From 440584f0f9108b8dce275d6fa385c5e989f44f13 Mon Sep 17 00:00:00 2001 From: Sidhant Arora <32369693+Sidhant29@users.noreply.github.com> Date: Fri, 19 Aug 2022 22:41:10 +1000 Subject: [PATCH 1/4] Updated Readme (#54) --- README.md | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 3fe7619..e6855f4 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,8 @@ The Python bindings have a similar API to Tantivy. To create a index first a sch needs to be built. After that documents can be added to the index and a reader can be created to search the index. +## Building an index and populating it + ```python import tantivy @@ -51,29 +53,112 @@ import tantivy schema_builder = tantivy.SchemaBuilder() schema_builder.add_text_field("title", stored=True) schema_builder.add_text_field("body", stored=True) +schema_builder.add_integer_field("doc_id",stored=True) schema = schema_builder.build() -# Creating our index (in memory, but filesystem is available too) +# Creating our index (in memory) index = tantivy.Index(schema) +``` +To have a persistent index, use the path +parameter to store the index on the disk, e.g: -# Adding one document. +```python +index = tantivy.Index(schema, path=os.getcwd() + '/index') +``` + +By default, tantivy offers the following tokenizers +which can be used in tantivy-py: + - `default` +`default` is the tokenizer that will be used if you do not + assign a specific tokenizer to your text field. + It will chop your text on punctuation and whitespaces, + removes tokens that are longer than 40 chars, and lowercase your text. + +- `raw` + Does not actual tokenizer your text. It keeps it entirely unprocessed. + It can be useful to index uuids, or urls for instance. + +- `en_stem` + + In addition to what `default` does, the `en_stem` tokenizer also + apply stemming to your tokens. Stemming consists in trimming words to + remove their inflection. This tokenizer is slower than the default one, + but is recommended to improve recall. + +to use the above tokenizers, simply provide them as a parameter to `add_text_field`. e.g. +```python +schema_builder.add_text_field("body", stored=True, tokenizer_name='en_stem') +``` + +### Adding one document. + +```python writer = index.writer() writer.add_document(tantivy.Document( + doc_id=1, title=["The Old Man and the Sea"], body=["""He was an old man who fished alone in a skiff in the Gulf Stream and he had gone eighty-four days now without taking a fish."""], )) # ... and committing writer.commit() +``` +## Building and Executing Queries + +First you need to get a searcher for the index + +```python # Reload the index to ensure it points to the last commit. index.reload() searcher = index.searcher() -query = index.parse_query("fish days", ["title", "body"]) +``` +Then you need to get a valid query object by parsing your query on the index. + +```python +query = index.parse_query("fish days", ["title", "body"]) (best_score, best_doc_address) = searcher.search(query, 3).hits[0] best_doc = searcher.doc(best_doc_address) assert best_doc["title"] == ["The Old Man and the Sea"] print(best_doc) ``` + +### Valid Query Formats + +tantivy-py supports the query language used in tantivy. +Some basic query Formats. + + + - AND and OR conjunctions. +```python +query = index.parse_query('(Old AND Man) OR Stream', ["title", "body"]) +(best_score, best_doc_address) = searcher.search(query, 3).hits[0] +best_doc = searcher.doc(best_doc_address) +``` + + - +(includes) and -(excludes) operators. +```python +query = index.parse_query('+Old +Man chef -fished', ["title", "body"]) +(best_score, best_doc_address) = searcher.search(query, 3).hits[0] +best_doc = searcher.doc(best_doc_address) +``` +Note: in a query like above, a word with no +/- acts like an OR. + + - phrase search. +```python +query = index.parse_query('"eighty-four days"', ["title", "body"]) +(best_score, best_doc_address) = searcher.search(query, 3).hits[0] +best_doc = searcher.doc(best_doc_address) +``` + +- integer search +```python +query = index.parse_query('"eighty-four days"', ["doc_id"]) +(best_score, best_doc_address) = searcher.search(query, 3).hits[0] +best_doc = searcher.doc(best_doc_address) +``` +Note: for integer search, the integer field should be indexed. + +For more possible query formats and possible query options, see [Tantivy Query Parser Docs.](https://docs.rs/tantivy/latest/tantivy/query/struct.QueryParser.html) \ No newline at end of file From 6eb867e08fb67f7e11a56505fe0195ddca99b3b1 Mon Sep 17 00:00:00 2001 From: amosdoublec <112558309+amosdoublec@users.noreply.github.com> Date: Mon, 28 Nov 2022 16:24:54 +0200 Subject: [PATCH 2/4] edit for the main readme (#60) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e6855f4..d869bbb 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ tantivy-py ========== -Python bindings for Tantivy. +Python bindings for [Tantivy](https://github.com/quickwit-oss/tantivy) the full-text search engine library written in Rust. # Installation From b2043793eec527e5a9aa7a1f922b1b13402bd238 Mon Sep 17 00:00:00 2001 From: Caleb Hattingh Date: Sat, 14 Jan 2023 02:29:38 +1000 Subject: [PATCH 3/4] Include Python 3.11 in nox tests and Github actions matrix (#56) * Include Python 3.11 in nox tests and Github actions matrix --- .github/workflows/ci.yml | 2 ++ noxfile.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 28b53a7..16f7425 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,6 +26,8 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] python-version: [3.9] include: + - os: ubuntu-latest + python-version: "3.11" - os: ubuntu-latest python-version: "3.10" - os: ubuntu-latest diff --git a/noxfile.py b/noxfile.py index 6e832df..bf9c128 100644 --- a/noxfile.py +++ b/noxfile.py @@ -1,7 +1,7 @@ import nox -@nox.session(python=["3.7", "3.8", "3.9", "3.10"]) +@nox.session(python=["3.7", "3.8", "3.9", "3.10", "3.11"]) def test(session): session.install("-rrequirements-dev.txt") session.install("-e", ".", "--no-build-isolation") From 164adc87e1a033117001cf70e38c82a53014d985 Mon Sep 17 00:00:00 2001 From: Phill Mell-Davies Date: Tue, 14 Feb 2023 08:20:59 -0500 Subject: [PATCH 4/4] Tantivy 0.19.2 (#67) * Adding __init__.py file to the tantivy folder to make maturin happy Add Cargo.lock to the repo Set the git-fetch-with-cli cargo flag so that we can override fetch settings Renaming .cargo/config to .cargo/config.toml Adding github-quiq-sh cargo registry Point dependencies at our github-quiq-sh registry Trying to resolve this build issue, pointing pyo3-build-config at our github-quiq-sh registry SER-21487: Enable support for all standard Tantivy languages plus Chinese + Japanese in tantivy-py SER-21487: Use uname rather than UNAME in the Makefile SER-21487: Fix document date handling SER-23013: Upgrade Tantivy and other dependencies * Upgrade to Tantivy 0.19.1 * Apply rustfmt and fix bug when fast option = None * Upgrade to tantivy-0.19.2 * Standardize around using 'cargo fmt' rather than 'rustfmt' * Reverting to old style dependencies * Linting with clippy * Switching out hashmap for defining tokenizers for an array, and adding test for Spanish indexing * Use cargo fmt instead of rustfmt on the Lint ci step --- .cargo/{config => config.toml} | 2 +- .github/workflows/ci.yml | 2 +- .gitignore | 2 +- Cargo.lock | 1573 ++++++++++++++++++++++++++++++++ Cargo.toml | 18 +- Makefile | 12 +- pyproject.toml | 2 +- rustfmt.toml | 2 +- src/document.rs | 91 +- src/index.rs | 78 +- src/lib.rs | 5 +- src/schemabuilder.rs | 52 +- src/searcher.rs | 21 +- tantivy/__init__.py | 1 + tests/tantivy_test.py | 65 ++ 15 files changed, 1820 insertions(+), 106 deletions(-) rename .cargo/{config => config.toml} (98%) create mode 100644 Cargo.lock create mode 100644 tantivy/__init__.py diff --git a/.cargo/config b/.cargo/config.toml similarity index 98% rename from .cargo/config rename to .cargo/config.toml index 15d5d32..58103f6 100644 --- a/.cargo/config +++ b/.cargo/config.toml @@ -2,4 +2,4 @@ rustflags = [ "-C", "link-arg=-undefined", "-C", "link-arg=dynamic_lookup", -] +] \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 16f7425..909c278 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,7 @@ jobs: toolchain: stable components: rustfmt - name: Check Formatting - run: rustfmt --check src/*rs + run: cargo fmt --check Test: strategy: diff --git a/.gitignore b/.gitignore index 72ff37d..106eea0 100644 --- a/.gitignore +++ b/.gitignore @@ -3,9 +3,9 @@ build /target **/*.rs.bk -Cargo.lock dist/ __pycache__/ tantivy.so +tantivy.dylib tantivy/tantivy.cpython*.so tantivy.egg-info/ diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..64341ea --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,1573 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "0.7.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +dependencies = [ + "memchr", +] + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "arc-swap" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" + +[[package]] +name = "async-trait" +version = "0.1.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd7fce9ba8c3c042128ce72d8b2ddbf3a05747efb67ea0313c635e10bda47a2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitpacking" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8c7d2ac73c167c06af4a5f37e6e59d84148d57ccbe4480b76f0273eefea82d7" +dependencies = [ + "crunchy", +] + +[[package]] +name = "bumpalo" +version = "3.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "cc" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" + +[[package]] +name = "census" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fafee10a5dd1cffcb5cc560e0d0df8803d7355a2b12272e3557dee57314cb6e" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b0a3d9ed01224b22057780a37bb8c5dbfe1be8ba48678e7bf57ec4b385411f" +dependencies = [ + "iana-time-zone", + "js-sys", + "num-integer", + "num-traits", + "time 0.1.45", + "wasm-bindgen", + "winapi", +] + +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +dependencies = [ + "termcolor", + "unicode-width", +] + +[[package]] +name = "combine" +version = "4.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4" +dependencies = [ + "memchr", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset 0.7.1", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "cxx" +version = "1.0.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90d59d9acd2a682b4e40605a242f6670eaa58c5957471cbf85e8aa6a0b97a5e8" +dependencies = [ + "cc", + "cxxbridge-flags", + "cxxbridge-macro", + "link-cplusplus", +] + +[[package]] +name = "cxx-build" +version = "1.0.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebfa40bda659dd5c864e65f4c9a2b0aff19bea56b017b9b77c73d3766a453a38" +dependencies = [ + "cc", + "codespan-reporting", + "once_cell", + "proc-macro2", + "quote", + "scratch", + "syn", +] + +[[package]] +name = "cxxbridge-flags" +version = "1.0.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "457ce6757c5c70dc6ecdbda6925b958aae7f959bda7d8fb9bde889e34a09dc03" + +[[package]] +name = "cxxbridge-macro" +version = "1.0.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebf883b7aacd7b2aeb2a7b338648ee19f57c140d4ee8e52c68979c6b2f7f2263" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "downcast-rs" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" + +[[package]] +name = "either" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" + +[[package]] +name = "fail" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe5e43d0f78a42ad591453aedb1d7ae631ce7ee445c7643691055a9ed8d3b01c" +dependencies = [ + "log", + "once_cell", + "rand", +] + +[[package]] +name = "fastdivide" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25c7df09945d65ea8d70b3321547ed414bbc540aad5bac6883d021b970f35b04" + +[[package]] +name = "fastfield_codecs" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "374a3a53c1bd5fb31b10084229290eafb0a05f260ec90f1f726afffda4877a8a" +dependencies = [ + "fastdivide", + "itertools", + "log", + "ownedbytes", + "tantivy-bitpacker", + "tantivy-common", +] + +[[package]] +name = "fastrand" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" +dependencies = [ + "instant", +] + +[[package]] +name = "fs2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "futures" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13e2792b0ff0340399d58445b88fd9770e3489eff258a4cbc1523418f12abf84" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e5317663a9089767a1ec00a487df42e0ca174b61b4483213ac24448e4664df5" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec90ff4d0fe1f57d600049061dc6bb68ed03c7d2fbd697274c41805dcb3f8608" + +[[package]] +name = "futures-executor" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8de0a35a6ab97ec8869e32a2473f4b1324459e14c29275d14b10cb1fd19b50e" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfb8371b6fb2aeb2d280374607aeabfc99d95c72edfe51692e42d3d7f0d08531" + +[[package]] +name = "futures-macro" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95a73af87da33b5acf53acfebdc339fe592ecf5357ac7c0a7734ab9d8c876a70" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f310820bb3e8cfd46c80db4d7fb8353e15dfff853a127158425f31e0be6c8364" + +[[package]] +name = "futures-task" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcf79a1bf610b10f42aea489289c5a2c478a786509693b80cd39c44ccd936366" + +[[package]] +name = "futures-util" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c1d6de3acfef38d2be4b1f543f553131788603495be83da675e180c8d6b7bd1" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generator" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d266041a359dfa931b370ef684cceb84b166beb14f7f0421f4a6a3d0c446d12e" +dependencies = [ + "cc", + "libc", + "log", + "rustversion", + "windows", +] + +[[package]] +name = "getrandom" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + +[[package]] +name = "hermit-abi" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" +dependencies = [ + "libc", +] + +[[package]] +name = "htmlescape" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" + +[[package]] +name = "iana-time-zone" +version = "0.1.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64c122667b287044802d6ce17ee2ddf13207ed924c712de9a66a5814d5b64765" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "winapi", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" +dependencies = [ + "cxx", + "cxx-build", +] + +[[package]] +name = "indoc" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306" + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" + +[[package]] +name = "js-sys" +version = "0.3.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "levenshtein_automata" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" + +[[package]] +name = "libc" +version = "0.2.139" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" + +[[package]] +name = "link-cplusplus" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" +dependencies = [ + "cc", +] + +[[package]] +name = "lock_api" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "loom" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5" +dependencies = [ + "cfg-if", + "generator", + "pin-utils", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "lru" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999beba7b6e8345721bd280141ed958096a2e4abdf74f67ff4ce49b4b54e47a" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "lz4_flex" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a8cbbb2831780bc3b9c15a41f5b49222ef756b6730a95f3decfdd15903eb5a3" + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "measure_time" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56220900f1a0923789ecd6bf25fbae8af3b2f1ff3e9e297fc9b6b8674dd4d852" +dependencies = [ + "instant", + "log", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "memmap2" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b182332558b18d807c4ce1ca8ca983b34c3ee32765e47b3f0f69b90355cc1dc" +dependencies = [ + "libc", +] + +[[package]] +name = "memoffset" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +dependencies = [ + "autocfg", +] + +[[package]] +name = "memoffset" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" +dependencies = [ + "autocfg", +] + +[[package]] +name = "murmurhash32" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d736ff882f0e85fe9689fb23db229616c4c00aee2b3ac282f666d8f20eb25d4a" +dependencies = [ + "byteorder", +] + +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" + +[[package]] +name = "oneshot" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc22d22931513428ea6cc089e942d38600e3d00976eef8c86de6b8a3aadec6eb" +dependencies = [ + "loom", +] + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "ownedbytes" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e957eaa64a299f39755416e5b3128c505e9d63a91d0453771ad2ccd3907f8db" +dependencies = [ + "stable_deref_trait", +] + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9069cbb9f99e3a5083476ccb29ceb1de18b9118cafa53e90c9551235de2b9521" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-sys", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "proc-macro2" +version = "1.0.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06a3d8e8a46ab2738109347433cb7b96dffda2e4a218b03ef27090238886b147" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset 0.8.0", + "parking_lot", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75439f995d07ddfad42b192dfcf3bc66a7ecfd8b4a1f5f6f046aa5c2c5d7677d" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "839526a5c07a17ff44823679b68add4a58004de00512a95b6c1c98a6dcac0ee5" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd44cf207476c6a9760c4653559be4f206efafb924d3e4cbf2721475fc0d6cc5" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1f43d8e30460f36350d18631ccf85ded64c059829208fe680904c65bcd0a4c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rayon" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db3a213adf02b3bcfd2d3846bb41cb22857d131789e01df434fb7e7bc0759b7" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "356a0625f1954f730c0201cdab48611198dc6ce21f4acff55089b5a78e6e835b" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "num_cpus", +] + +[[package]] +name = "redox_syscall" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" + +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + +[[package]] +name = "rust-stemmers" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +dependencies = [ + "serde", + "serde_derive", +] + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustversion" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5583e89e108996506031660fe09baa5011b9dd0341b89029313006d1fb508d70" + +[[package]] +name = "ryu" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" + +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "scratch" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2" + +[[package]] +name = "serde" +version = "1.0.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.93" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cad406b69c91885b5107daf2c29572f6c8cdb3c66826821e286c533490c0bc76" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sharded-slab" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "slab" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "syn" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tantivy" +version = "0.19.2" +dependencies = [ + "chrono", + "futures", + "itertools", + "pyo3", + "pyo3-build-config", + "serde_json", + "tantivy 0.19.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "tantivy" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bb26a6b22c84d8be41d99a14016d6f04d30d8d31a2ea411a8ab553af5cc490d" +dependencies = [ + "aho-corasick", + "arc-swap", + "async-trait", + "base64", + "bitpacking", + "byteorder", + "census", + "crc32fast", + "crossbeam-channel", + "downcast-rs", + "fail", + "fastdivide", + "fastfield_codecs", + "fs2", + "htmlescape", + "itertools", + "levenshtein_automata", + "log", + "lru", + "lz4_flex", + "measure_time", + "memmap2", + "murmurhash32", + "num_cpus", + "once_cell", + "oneshot", + "ownedbytes", + "rayon", + "regex", + "rust-stemmers", + "rustc-hash", + "serde", + "serde_json", + "smallvec", + "stable_deref_trait", + "tantivy-bitpacker", + "tantivy-common", + "tantivy-fst", + "tantivy-query-grammar", + "tempfile", + "thiserror", + "time 0.3.17", + "uuid", + "winapi", +] + +[[package]] +name = "tantivy-bitpacker" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e71a0c95b82d4292b097a09b989a6380d28c3a86800c841a2d03bae1fc8b9fa6" + +[[package]] +name = "tantivy-common" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14fef4182bb60df9a4b92cd8ecab39ba2e50a05542934af17eef1f49660705cb" +dependencies = [ + "byteorder", + "ownedbytes", +] + +[[package]] +name = "tantivy-fst" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc3c506b1a8443a3a65352df6382a1fb6a7afe1a02e871cee0d25e2c3d5f3944" +dependencies = [ + "byteorder", + "regex-syntax", + "utf8-ranges", +] + +[[package]] +name = "tantivy-query-grammar" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "343e3ada4c1c480953f6960f8a21ce9c76611480ffdd4f4e230fdddce0fc5331" +dependencies = [ + "combine", + "once_cell", + "regex", +] + +[[package]] +name = "target-lexicon" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae9980cab1db3fceee2f6c6f643d5d8de2997c58ee8d25fb0cc8a9e9e7348e5" + +[[package]] +name = "tempfile" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +dependencies = [ + "cfg-if", + "fastrand", + "libc", + "redox_syscall", + "remove_dir_all", + "winapi", +] + +[[package]] +name = "termcolor" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thiserror" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +dependencies = [ + "cfg-if", + "once_cell", +] + +[[package]] +name = "time" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" +dependencies = [ + "libc", + "wasi 0.10.0+wasi-snapshot-preview1", + "winapi", +] + +[[package]] +name = "time" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376" +dependencies = [ + "itoa", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" + +[[package]] +name = "time-macros" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d967f99f534ca7e495c575c62638eebc2898a8c84c119b89e250477bc4ba16b2" +dependencies = [ + "time-core", +] + +[[package]] +name = "tracing" +version = "0.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +dependencies = [ + "cfg-if", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4017f8f45139870ca7e672686113917c71c7a6e02d4924eda67186083c03081a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ddad33d2d10b1ed7eb9d1f518a5674713876e97e5bb9b7345a7984fbb4f922" +dependencies = [ + "lazy_static", + "log", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6176eae26dd70d0c919749377897b54a9276bd7061339665dd68777926b5a70" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "unicode-ident" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" + +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + +[[package]] +name = "unindent" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" + +[[package]] +name = "utf8-ranges" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" + +[[package]] +name = "uuid" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1674845326ee10d37ca60470760d4288a6f80f304007d92e5c53bab78c9cfd79" +dependencies = [ + "getrandom", + "serde", +] + +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.84" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" + +[[package]] +name = "web-sys" +version = "0.3.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1c4bd0a50ac6020f65184721f758dba47bb9fbc2133df715ec74a237b26794a" +dependencies = [ + "windows_aarch64_msvc 0.39.0", + "windows_i686_gnu 0.39.0", + "windows_i686_msvc 0.39.0", + "windows_x86_64_gnu 0.39.0", + "windows_x86_64_msvc 0.39.0", +] + +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2522491fbfcd58cc84d47aeb2958948c4b8982e9a2d8a2a35bbaed431390e7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc 0.42.1", + "windows_i686_gnu 0.42.1", + "windows_i686_msvc 0.42.1", + "windows_x86_64_gnu 0.42.1", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc 0.42.1", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c9864e83243fdec7fc9c5444389dcbbfd258f745e7853198f365e3c4968a608" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec7711666096bd4096ffa835238905bb33fb87267910e154b18b44eaabb340f2" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c8b1b673ffc16c47a9ff48570a9d85e25d265735c503681332589af6253c6c7" + +[[package]] +name = "windows_i686_gnu" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "763fc57100a5f7042e3057e7e8d9bdd7860d330070251a73d003563a3bb49e1b" + +[[package]] +name = "windows_i686_gnu" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3887528ad530ba7bdbb1faa8275ec7a1155a45ffa57c37993960277145d640" + +[[package]] +name = "windows_i686_msvc" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bc7cbfe58828921e10a9f446fcaaf649204dcfe6c1ddd712c5eebae6bda1106" + +[[package]] +name = "windows_i686_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf4d1122317eddd6ff351aa852118a2418ad4214e6613a50e0191f7004372605" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6868c165637d653ae1e8dc4d82c25d4f97dd6605eaa8d784b5c6e0ab2a252b65" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1040f221285e17ebccbc2591ffdc2d44ee1f9186324dd3e84e99ac68d699c45" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "628bfdf232daa22b0d64fdb62b09fcc36bb01f05a3939e20ab73aaf9470d0463" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e4d40883ae9cae962787ca76ba76390ffa29214667a111db9e0a1ad8377e809" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "447660ad36a13288b1db4d4248e857b510e8c3a225c822ba4fb748c0aafecffd" diff --git a/Cargo.toml b/Cargo.toml index 84a1bea..96a2fad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tantivy" -version = "0.17.0" +version = "0.19.2" readme = "README.md" authors = ["Damir Jelić "] edition = "2018" @@ -11,15 +11,15 @@ name = "tantivy" crate-type = ["cdylib"] [build-dependencies] -pyo3-build-config = "0.16.3" +pyo3-build-config = "0.18.0" [dependencies] -chrono = "0.4.19" -tantivy = "0.17" -itertools = "0.10.3" -futures = "0.3.21" -serde_json = "1.0.64" +chrono = "0.4.23" +tantivy = "0.19.2" +itertools = "0.10.5" +futures = "0.3.26" +serde_json = "1.0.91" [dependencies.pyo3] -version = "0.16.3" -features = ["extension-module"] +version = "0.18.0" +features = ["extension-module"] \ No newline at end of file diff --git a/Makefile b/Makefile index ce7bf0a..a57c65f 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -ifeq ($(shell UNAME),Darwin) +ifeq ($(shell uname),Darwin) EXT := dylib else EXT := so @@ -6,15 +6,21 @@ endif source_files := $(wildcard src/*.rs) -all: tantivy/tantivy.$(EXT) +all: format lint build test PHONY: test format +lint: + cargo clippy + test: tantivy/tantivy.$(EXT) python3 -m pytest format: - rustfmt src/*.rs + cargo fmt + +build: + maturin build --interpreter python3.7 python3.8 python3.9 python3.10 python3.11 tantivy/tantivy.$(EXT): target/debug/libtantivy.$(EXT) cp target/debug/libtantivy.$(EXT) tantivy/tantivy.so diff --git a/pyproject.toml b/pyproject.toml index b97966d..d61d69f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["maturin"] +requires = ["maturin>=0.13,<0.14"] build-backend = "maturin" [project] diff --git a/rustfmt.toml b/rustfmt.toml index df99c69..5c8d931 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1 +1 @@ -max_width = 80 +max_width = 80 \ No newline at end of file diff --git a/src/document.rs b/src/document.rs index acb79f5..6601472 100644 --- a/src/document.rs +++ b/src/document.rs @@ -9,7 +9,7 @@ use pyo3::{ }, }; -use chrono::{offset::TimeZone, Datelike, Timelike, Utc}; +use chrono::{offset::TimeZone, Utc}; use tantivy as tv; @@ -56,37 +56,42 @@ fn value_to_py(py: Python, value: &Value) -> PyResult { // TODO implement me unimplemented!(); } - Value::Date(d) => PyDateTime::new( - py, - d.year(), - d.month() as u8, - d.day() as u8, - d.hour() as u8, - d.minute() as u8, - d.second() as u8, - d.timestamp_subsec_micros(), - None, - )? - .into_py(py), + Value::Date(d) => { + let utc = d.into_utc(); + PyDateTime::new( + py, + utc.year(), + utc.month() as u8, + utc.day(), + utc.hour(), + utc.minute(), + utc.second(), + utc.microsecond(), + None, + )? + .into_py(py) + } Value::Facet(f) => Facet { inner: f.clone() }.into_py(py), Value::JsonObject(json_object) => { let inner: HashMap<_, _> = json_object .iter() - .map(|(k, v)| (k, value_to_object(&v, py))) + .map(|(k, v)| (k, value_to_object(v, py))) .collect(); inner.to_object(py) } + Value::Bool(b) => b.into_py(py), + Value::IpAddr(i) => (*i).to_string().into_py(py), }) } fn value_to_string(value: &Value) -> String { match value { Value::Str(text) => text.clone(), - Value::U64(num) => format!("{}", num), - Value::I64(num) => format!("{}", num), - Value::F64(num) => format!("{}", num), - Value::Bytes(bytes) => format!("{:?}", bytes), - Value::Date(d) => format!("{:?}", d), + Value::U64(num) => format!("{num}"), + Value::I64(num) => format!("{num}"), + Value::F64(num) => format!("{num}"), + Value::Bytes(bytes) => format!("{bytes:?}"), + Value::Date(d) => format!("{d:?}"), Value::Facet(facet) => facet.to_string(), Value::PreTokStr(_pretok) => { // TODO implement me @@ -95,6 +100,8 @@ fn value_to_string(value: &Value) -> String { Value::JsonObject(json_object) => { serde_json::to_string(&json_object).unwrap() } + Value::Bool(b) => format!("{b}"), + Value::IpAddr(i) => format!("{}", *i), } } @@ -141,10 +148,10 @@ impl fmt::Debug for Document { .chars() .take(10) .collect(); - format!("{}=[{}]", field_name, values_str) + format!("{field_name}=[{values_str}]") }) .join(","); - write!(f, "Document({})", doc_str) + write!(f, "Document({doc_str})") } } @@ -170,23 +177,24 @@ pub(crate) fn extract_value(any: &PyAny) -> PyResult { } if let Ok(py_datetime) = any.downcast::() { let datetime = Utc - .ymd( + .with_ymd_and_hms( py_datetime.get_year(), py_datetime.get_month().into(), py_datetime.get_day().into(), - ) - .and_hms_micro( py_datetime.get_hour().into(), py_datetime.get_minute().into(), py_datetime.get_second().into(), - py_datetime.get_microsecond(), - ); - return Ok(Value::Date(datetime)); + ) + .single() + .unwrap(); + return Ok(Value::Date(tv::DateTime::from_timestamp_secs( + datetime.timestamp(), + ))); } if let Ok(facet) = any.extract::() { - return Ok(Value::Facet(facet.inner.clone())); + return Ok(Value::Facet(facet.inner)); } - Err(to_pyerr(format!("Value unsupported {:?}", any))) + Err(to_pyerr(format!("Value unsupported {any:?}"))) } fn extract_value_single_or_list(any: &PyAny) -> PyResult> { @@ -200,7 +208,7 @@ fn extract_value_single_or_list(any: &PyAny) -> PyResult> { #[pymethods] impl Document { #[new] - #[args(kwargs = "**")] + #[pyo3(signature = (**kwargs))] fn new(kwargs: Option<&PyDict>) -> PyResult { let mut document = Document::default(); if let Some(field_dict) = kwargs { @@ -299,18 +307,21 @@ impl Document { /// value (datetime): The date that will be added to the document. fn add_date(&mut self, field_name: String, value: &PyDateTime) { let datetime = Utc - .ymd( + .with_ymd_and_hms( value.get_year(), value.get_month().into(), value.get_day().into(), - ) - .and_hms_micro( value.get_hour().into(), value.get_minute().into(), value.get_second().into(), - value.get_microsecond(), - ); - add_value(self, field_name, datetime); + ) + .single() + .unwrap(); + add_value( + self, + field_name, + tv::DateTime::from_timestamp_secs(datetime.timestamp()), + ); } /// Add a facet value to the document. @@ -387,13 +398,13 @@ impl Document { } fn __getitem__(&self, field_name: &str) -> PyResult> { - let gil = Python::acquire_gil(); - let py = gil.python(); - self.get_all(py, field_name) + Python::with_gil(|py| -> PyResult> { + self.get_all(py, field_name) + }) } fn __repr__(&self) -> PyResult { - Ok(format!("{:?}", self)) + Ok(format!("{self:?}")) } } diff --git a/src/index.rs b/src/index.rs index a24647b..00b544c 100644 --- a/src/index.rs +++ b/src/index.rs @@ -14,6 +14,10 @@ use tantivy as tv; use tantivy::{ directory::MmapDirectory, schema::{NamedFieldDocument, Term, Value}, + tokenizer::{ + Language, LowerCaser, RemoveLongFilter, SimpleTokenizer, Stemmer, + TextAnalyzer, + }, }; const RELOAD_POLICY: &str = "commit"; @@ -120,26 +124,25 @@ impl IndexWriter { Value::U64(num) => Term::from_field_u64(field, num), Value::I64(num) => Term::from_field_i64(field, num), Value::F64(num) => Term::from_field_f64(field, num), - Value::Date(d) => Term::from_field_date(field, &d), + Value::Date(d) => Term::from_field_date(field, d), Value::Facet(facet) => Term::from_facet(field, &facet), Value::Bytes(_) => { return Err(exceptions::PyValueError::new_err(format!( - "Field `{}` is bytes type not deletable.", - field_name + "Field `{field_name}` is bytes type not deletable." ))) } Value::PreTokStr(_pretok) => { return Err(exceptions::PyValueError::new_err(format!( - "Field `{}` is pretokenized. This is not authorized for delete.", - field_name + "Field `{field_name}` is pretokenized. This is not authorized for delete." ))) } Value::JsonObject(_) => { return Err(exceptions::PyValueError::new_err(format!( - "Field `{}` is json object type not deletable.", - field_name + "Field `{field_name}` is json object type not deletable." ))) - } + }, + Value::Bool(b) => Term::from_field_bool(field, b), + Value::IpAddr(i) => Term::from_field_ip_addr(field, i) }; Ok(self.inner_index_writer.delete_term(term)) } @@ -167,12 +170,15 @@ impl Index { #[staticmethod] fn open(path: &str) -> PyResult { let index = tv::Index::open_in_dir(path).map_err(to_pyerr)?; + + Index::register_custom_text_analyzers(&index); + let reader = index.reader().map_err(to_pyerr)?; Ok(Index { index, reader }) } #[new] - #[args(reuse = true)] + #[pyo3(signature = (schema, path = None, reuse = true))] fn new(schema: &Schema, path: Option<&str>, reuse: bool) -> PyResult { let index = match path { Some(p) => { @@ -191,6 +197,8 @@ impl Index { None => tv::Index::create_in_ram(schema.inner.clone()), }; + Index::register_custom_text_analyzers(&index); + let reader = index.reader().map_err(to_pyerr)?; Ok(Index { index, reader }) } @@ -208,7 +216,7 @@ impl Index { /// automatically the number of threads. /// /// Raises ValueError if there was an error while creating the writer. - #[args(heap_size = 3000000, num_threads = 0)] + #[pyo3(signature = (heap_size = 3000000, num_threads = 0))] fn writer( &self, heap_size: usize, @@ -231,13 +239,13 @@ impl Index { /// Args: /// reload_policy (str, optional): The reload policy that the /// IndexReader should use. Can be `Manual` or `OnCommit`. - /// num_searchers (int, optional): The number of searchers that the + /// num_warmers (int, optional): The number of searchers that the /// reader should create. - #[args(reload_policy = "RELOAD_POLICY", num_searchers = 0)] + #[pyo3(signature = (reload_policy = RELOAD_POLICY, num_warmers = 0))] fn config_reader( &mut self, reload_policy: &str, - num_searchers: usize, + num_warmers: usize, ) -> Result<(), PyErr> { let reload_policy = reload_policy.to_lowercase(); let reload_policy = match reload_policy.as_ref() { @@ -251,8 +259,8 @@ impl Index { }; let builder = self.index.reader_builder(); let builder = builder.reload_policy(reload_policy); - let builder = if num_searchers > 0 { - builder.num_searchers(num_searchers) + let builder = if num_warmers > 0 { + builder.num_warming_threads(num_warmers) } else { builder }; @@ -313,7 +321,7 @@ impl Index { /// default_fields_names (List[Field]): A list of fields used to search if no /// field is specified in the query. /// - #[args(reload_policy = "RELOAD_POLICY")] + #[pyo3(signature = (query, default_field_names = None))] pub fn parse_query( &self, query: &str, @@ -328,16 +336,14 @@ impl Index { if !field_entry.is_indexed() { return Err(exceptions::PyValueError::new_err( format!( - "Field `{}` is not set as indexed in the schema.", - default_field_name + "Field `{default_field_name}` is not set as indexed in the schema." ), )); } default_fields.push(field); } else { return Err(exceptions::PyValueError::new_err(format!( - "Field `{}` is not defined in the schema.", - default_field_name + "Field `{default_field_name}` is not defined in the schema." ))); } } @@ -355,3 +361,35 @@ impl Index { Ok(Query { inner: query }) } } + +impl Index { + fn register_custom_text_analyzers(index: &tv::Index) { + let analyzers = [ + ("ar_stem", Language::Arabic), + ("da_stem", Language::Danish), + ("nl_stem", Language::Dutch), + ("fi_stem", Language::Finnish), + ("fr_stem", Language::French), + ("de_stem", Language::German), + ("el_stem", Language::Greek), + ("hu_stem", Language::Hungarian), + ("it_stem", Language::Italian), + ("no_stem", Language::Norwegian), + ("pt_stem", Language::Portuguese), + ("ro_stem", Language::Romanian), + ("ru_stem", Language::Russian), + ("es_stem", Language::Spanish), + ("sv_stem", Language::Swedish), + ("ta_stem", Language::Tamil), + ("tr_stem", Language::Turkish), + ]; + + for (name, lang) in &analyzers { + let an = TextAnalyzer::from(SimpleTokenizer) + .filter(RemoveLongFilter::limit(40)) + .filter(LowerCaser) + .filter(Stemmer::new(*lang)); + index.tokenizers().register(name, an); + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 0593715..64c4f0c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ +use ::tantivy as tv; use pyo3::{exceptions, prelude::*}; -use tantivy as tv; mod document; mod facet; @@ -88,8 +88,7 @@ pub(crate) fn get_field( ) -> PyResult { let field = schema.get_field(field_name).ok_or_else(|| { exceptions::PyValueError::new_err(format!( - "Field `{}` is not defined in the schema.", - field_name + "Field `{field_name}` is not defined in the schema." )) })?; diff --git a/src/schemabuilder.rs b/src/schemabuilder.rs index 30cbd29..7fd6e56 100644 --- a/src/schemabuilder.rs +++ b/src/schemabuilder.rs @@ -6,7 +6,7 @@ use tantivy::schema; use crate::schema::Schema; use std::sync::{Arc, RwLock}; -use tantivy::schema::INDEXED; +use tantivy::schema::{DateOptions, INDEXED}; /// Tantivy has a very strict schema. /// You need to specify in advance whether a field is indexed or not, @@ -60,11 +60,12 @@ impl SchemaBuilder { /// /// Returns the associated field handle. /// Raises a ValueError if there was an error with the field creation. - #[args( + #[pyo3(signature = ( + name, stored = false, - tokenizer_name = "TOKENIZER", - index_option = "RECORD" - )] + tokenizer_name = TOKENIZER, + index_option = RECORD + ))] fn add_text_field( &mut self, name: &str, @@ -109,7 +110,7 @@ impl SchemaBuilder { /// /// Returns the associated field handle. /// Raises a ValueError if there was an error with the field creation. - #[args(stored = false, indexed = false)] + #[pyo3(signature = (name, stored = false, indexed = false, fast = None))] fn add_integer_field( &mut self, name: &str, @@ -151,7 +152,7 @@ impl SchemaBuilder { /// /// Returns the associated field handle. /// Raises a ValueError if there was an error with the field creation. - #[args(stored = false, indexed = false)] + #[pyo3(signature = (name, stored = false, indexed = false, fast = None))] fn add_unsigned_field( &mut self, name: &str, @@ -185,7 +186,7 @@ impl SchemaBuilder { /// field. Fast fields are designed for random access. Access time /// are similar to a random lookup in an array. If more than one /// value is associated to a fast field, only the last one is kept. - /// Can be one of 'single' or 'multi'. If this is set to 'single, + /// Can be one of 'single' or 'multi'. If this is set to 'single', /// the document must have exactly one value associated to the /// document. If this is set to 'multi', the document can have any /// number of values associated to the document. Defaults to None, @@ -193,7 +194,7 @@ impl SchemaBuilder { /// /// Returns the associated field handle. /// Raises a ValueError if there was an error with the field creation. - #[args(stored = false, indexed = false)] + #[pyo3(signature = (name, stored = false, indexed = false, fast = None))] fn add_date_field( &mut self, name: &str, @@ -203,7 +204,29 @@ impl SchemaBuilder { ) -> PyResult { let builder = &mut self.builder; - let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?; + let mut opts = DateOptions::default(); + if stored { + opts = opts.set_stored(); + } + if indexed { + opts = opts.set_indexed(); + } + let fast = match fast { + Some(f) => { + let f = f.to_lowercase(); + match f.as_ref() { + "single" => Some(schema::Cardinality::SingleValue), + "multi" => Some(schema::Cardinality::MultiValues), + _ => return Err(exceptions::PyValueError::new_err( + "Invalid index option, valid choices are: 'multi' and 'single'" + )), + } + } + None => None, + }; + if let Some(f) = fast { + opts = opts.set_fast(f); + } if let Some(builder) = builder.write().unwrap().as_mut() { builder.add_date_field(name, opts); @@ -234,11 +257,12 @@ impl SchemaBuilder { /// /// Returns the associated field handle. /// Raises a ValueError if there was an error with the field creation. - #[args( + #[pyo3(signature = ( + name, stored = false, - tokenizer_name = "TOKENIZER", - index_option = "RECORD" - )] + tokenizer_name = TOKENIZER, + index_option = RECORD + ))] fn add_json_field( &mut self, name: &str, diff --git a/src/searcher.rs b/src/searcher.rs index c2b6796..ea625d4 100644 --- a/src/searcher.rs +++ b/src/searcher.rs @@ -10,7 +10,7 @@ use tantivy::collector::{Count, MultiCollector, TopDocs}; /// A Searcher is used to search the index given a prepared Query. #[pyclass] pub(crate) struct Searcher { - pub(crate) inner: tv::LeasedItem, + pub(crate) inner: tv::Searcher, } #[derive(Clone)] @@ -22,8 +22,8 @@ enum Fruit { impl std::fmt::Debug for Fruit { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Fruit::Score(s) => f.write_str(&format!("{}", s)), - Fruit::Order(o) => f.write_str(&format!("{}", o)), + Fruit::Score(s) => f.write_str(&format!("{s}")), + Fruit::Order(o) => f.write_str(&format!("{o}")), } } } @@ -93,7 +93,7 @@ impl Searcher { /// Returns `SearchResult` object. /// /// Raises a ValueError if there was an error with the search. - #[args(limit = 10, offset = 0, count = true)] + #[pyo3(signature = (query, limit = 10, count = true, order_by_field = None, offset = 0))] fn search( &self, _py: Python, @@ -154,10 +154,7 @@ impl Searcher { } }; - let count = match count_handle { - Some(h) => Some(h.extract(&mut multifruit)), - None => None, - }; + let count = count_handle.map(|h| h.extract(&mut multifruit)); Ok(SearchResult { hits, count }) } @@ -230,11 +227,11 @@ impl From<&tv::DocAddress> for DocAddress { } } -impl Into for &DocAddress { - fn into(self) -> tv::DocAddress { +impl From<&DocAddress> for tv::DocAddress { + fn from(val: &DocAddress) -> Self { tv::DocAddress { - segment_ord: self.segment_ord(), - doc_id: self.doc(), + segment_ord: val.segment_ord(), + doc_id: val.doc(), } } } diff --git a/tantivy/__init__.py b/tantivy/__init__.py new file mode 100644 index 0000000..c7c118e --- /dev/null +++ b/tantivy/__init__.py @@ -0,0 +1 @@ +from .tantivy import * \ No newline at end of file diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index 0d6d898..74f444d 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -67,6 +67,56 @@ def create_index(dir=None): return index +def spanish_schema(): + return ( + SchemaBuilder() + .add_text_field("title", stored=True, tokenizer_name='es_stem') + .add_text_field("body", tokenizer_name='es_stem') + .build() + ) + + +def create_spanish_index(): + # assume all tests will use the same documents for now + # other methods may set up function-local indexes + index = Index(spanish_schema(), None) + writer = index.writer() + + # 2 ways of adding documents + # 1 + doc = Document() + # create a document instance + # add field-value pairs + doc.add_text("title", "El viejo y el mar") + doc.add_text( + "body", + ( + "Era un viejo que pescaba solo en un bote en el Gulf Stream y hacía ochenta y cuatro días que no cogía un pez. " + ), + ) + writer.add_document(doc) + # 2 use the built-in json support + # keys need to coincide with field names + doc = Document.from_dict( + { + "title": "De ratones y hombres", + "body": ( + "Unas millas al sur de Soledad, el río Salinas se ahonda junto al margen de la ladera y fluye profundo y verde. Es tibia el agua, porque se ha deslizado chispeante sobre la arena amarilla y al calor del sol antes de llegar a la angosta laguna. A un lado del río, la dorada falda de la ladera se curva hacia arriba trepando hasta las montañas Gabilán, fuertes y rocosas, pero del lado del valle los árboles bordean la orilla: sauces frescos y verdes cada primavera, que en la s junturas más bajas de sus hojas muestran las consecuencias de la crecida invernal; y sicomoros de troncos veteados, blancos, recostados, y ramas quesear quean sobre el estanque" + ), + } + ) + writer.add_document(doc) + writer.add_json( + """{ + "title": ["Frankenstein", "El moderno Prometeo"], + "body": "Te alegrará saber que no ha ocurrido ningún percance al principio de una aventura que siempre consideraste cargada de malos presagios. Llegué aquí ayer, y mi primera tarea es asegurarle a mi querida hermana que me hallo perfectamente y que tengo una gran confianza en el éxito de mi empresa." + }""" + ) + writer.commit() + index.reload() + return index + + @pytest.fixture() def dir_index(tmpdir): return (tmpdir, create_index(str(tmpdir))) @@ -77,6 +127,11 @@ def ram_index(): return create_index() +@pytest.fixture(scope="class") +def spanish_index(): + return create_spanish_index() + + class TestClass(object): def test_simple_search_in_dir(self, dir_index): _, index = dir_index @@ -103,6 +158,16 @@ class TestClass(object): searched_doc = index.searcher().doc(doc_address) assert searched_doc["title"] == ["The Old Man and the Sea"] + def test_simple_search_in_spanish(self, spanish_index): + index = spanish_index + query = index.parse_query("vieja", ["title", "body"]) + + result = index.searcher().search(query, 10) + assert len(result.hits) == 1 + _, doc_address = result.hits[0] + search_doc = index.searcher().doc(doc_address) + assert search_doc["title"] == ["El viejo y el mar"] + def test_and_query(self, ram_index): index = ram_index query = index.parse_query(