From b857dfec15f7b3dbd0ada6c1ddedb9bf47401fa1 Mon Sep 17 00:00:00 2001 From: Tomoko Uchida Date: Sat, 13 Apr 2024 19:26:05 +0900 Subject: [PATCH] Fix fuzzy_term_query() documentation. (#236) --- src/query.rs | 4 ++-- tests/tantivy_test.py | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/query.rs b/src/query.rs index bafbba2..e14d599 100644 --- a/src/query.rs +++ b/src/query.rs @@ -61,8 +61,8 @@ impl Query { /// * `field_name` - Field name to be searched. /// * `text` - String representation of the query term. /// * `distance` - (Optional) Edit distance you are going to alow. When not specified, the default is 1. - /// * `transposition_cost_one` - (Optional) If true, a transposition cost will be 1; otherwise it will be 2. When not specified, the default is true. - /// * `prefix` - (Optional) If true, only prefix matched results are returned. When not specified, the default is false. + /// * `transposition_cost_one` - (Optional) If true, a transposition (swapping) cost will be 1; otherwise it will be 2. When not specified, the default is true. + /// * `prefix` - (Optional) If true, prefix levenshtein distance is applied. When not specified, the default is false. #[staticmethod] #[pyo3(signature = (schema, field_name, text, distance = 1, transposition_cost_one = true, prefix = false))] pub(crate) fn fuzzy_term_query( diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index 9338c8e..f8300d0 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -775,19 +775,29 @@ class TestQuery(object): def test_fuzzy_term_query(self, ram_index): index = ram_index query = Query.fuzzy_term_query(index.schema, "title", "ice") - # the query "ice" should match "mice" result = index.searcher().search(query, 10) assert len(result.hits) == 1 _, doc_address = result.hits[0] searched_doc = index.searcher().doc(doc_address) assert searched_doc["title"] == ["Of Mice and Men"] - - def test_fuzzy_term_query_prefix(self, ram_index): - index = ram_index - query = Query.fuzzy_term_query(index.schema, "title", "man", prefix=True) - # the query "man" should match both "man" and "men" + query = Query.fuzzy_term_query(index.schema, "title", "mna") + # the query "mna" should match "man" since the default transposition cost is 1. + result = index.searcher().search(query, 10) + assert len(result.hits) == 1 + titles = set() + for _, doc_address in result.hits: + titles.update(index.searcher().doc(doc_address)["title"]) + assert titles == {"The Old Man and the Sea"} + + query = Query.fuzzy_term_query(index.schema, "title", "mna", transposition_cost_one=False) + # the query "mna" should not match any doc since the default distance is 1 and transposition cost is set to 2. + result = index.searcher().search(query, 10) + assert len(result.hits) == 0 + + query = Query.fuzzy_term_query(index.schema, "title", "mna", distance=2, transposition_cost_one=False) + # the query "mna" should match both "man" and "men" since distance is set to 2. result = index.searcher().search(query, 10) assert len(result.hits) == 2 titles = set() @@ -795,4 +805,18 @@ class TestQuery(object): titles.update(index.searcher().doc(doc_address)["title"]) assert titles == {"The Old Man and the Sea", "Of Mice and Men"} + query = Query.fuzzy_term_query(index.schema, "title", "fraken") + # the query "fraken" should not match any doc. + result = index.searcher().search(query, 10) + assert len(result.hits) == 0 + + query = Query.fuzzy_term_query(index.schema, "title", "fraken", prefix=True) + # the query "fraken" should match "franken", the prefix of "frankenstein", with edit distance 1. + result = index.searcher().search(query, 10) + assert len(result.hits) == 1 + titles = set() + for _, doc_address in result.hits: + titles.update(index.searcher().doc(doc_address)["title"]) + assert titles == {"Frankenstein", "The Modern Prometheus"} +