Expose Tantivy's PhraseQuery (#234)
parent
3d394959b8
commit
03b1c89fa3
42
src/query.rs
42
src/query.rs
|
@ -157,6 +157,48 @@ impl Query {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Construct a Tantivy's PhraseQuery with custom offsets and slop
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `schema` - Schema of the target index.
|
||||||
|
/// * `field_name` - Field name to be searched.
|
||||||
|
/// * `words` - Word list that constructs the phrase. A word can be a term text or a pair of term text and its offset in the phrase.
|
||||||
|
/// * `slop` - (Optional) The number of gaps permitted between the words in the query phrase. Default is 0.
|
||||||
|
#[staticmethod]
|
||||||
|
#[pyo3(signature = (schema, field_name, words, slop = 0))]
|
||||||
|
pub(crate) fn phrase_query(
|
||||||
|
schema: &Schema,
|
||||||
|
field_name: &str,
|
||||||
|
words: Vec<&PyAny>,
|
||||||
|
slop: u32,
|
||||||
|
) -> PyResult<Query> {
|
||||||
|
let mut terms_with_offset = Vec::with_capacity(words.len());
|
||||||
|
for (idx, word) in words.into_iter().enumerate() {
|
||||||
|
if let Ok((offset, value)) = word.extract() {
|
||||||
|
// Custom offset is provided.
|
||||||
|
let term = make_term(&schema.inner, field_name, value)?;
|
||||||
|
terms_with_offset.push((offset, term));
|
||||||
|
} else {
|
||||||
|
// Custom offset is not provided. Use the list index as the offset.
|
||||||
|
let term = make_term(&schema.inner, field_name, word)?;
|
||||||
|
terms_with_offset.push((idx, term));
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if terms_with_offset.is_empty() {
|
||||||
|
return Err(exceptions::PyValueError::new_err(
|
||||||
|
"words must not be empty.",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let inner = tv::query::PhraseQuery::new_with_offset_and_slop(
|
||||||
|
terms_with_offset,
|
||||||
|
slop,
|
||||||
|
);
|
||||||
|
Ok(Query {
|
||||||
|
inner: Box::new(inner),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
/// Construct a Tantivy's BooleanQuery
|
/// Construct a Tantivy's BooleanQuery
|
||||||
#[staticmethod]
|
#[staticmethod]
|
||||||
#[pyo3(signature = (subqueries))]
|
#[pyo3(signature = (subqueries))]
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
import datetime
|
import datetime
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Optional, Sequence
|
from typing import Any, Optional, Sequence, Union
|
||||||
|
|
||||||
class Schema:
|
class Schema:
|
||||||
pass
|
pass
|
||||||
|
@ -206,16 +206,10 @@ class Query:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def fuzzy_term_query(
|
def phrase_query(schema: Schema, field_name: str, words: list[Union[str, tuple[int, str]]], slop: int = 0) -> Query:
|
||||||
schema: Schema,
|
|
||||||
field_name: str,
|
|
||||||
text: str,
|
|
||||||
distance: int = 1,
|
|
||||||
transposition_cost_one: bool = True,
|
|
||||||
prefix=False,
|
|
||||||
) -> Query:
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def boolean_query(subqueries: Sequence[tuple[Occur, Query]]) -> Query:
|
def boolean_query(subqueries: Sequence[tuple[Occur, Query]]) -> Query:
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -801,6 +801,38 @@ class TestQuery(object):
|
||||||
result = index.searcher().search(query, 10)
|
result = index.searcher().search(query, 10)
|
||||||
assert len(result.hits) == 3
|
assert len(result.hits) == 3
|
||||||
|
|
||||||
|
def test_phrase_query(self, ram_index):
|
||||||
|
index = ram_index
|
||||||
|
searcher = index.searcher()
|
||||||
|
|
||||||
|
query = Query.phrase_query(index.schema, "title", ["old", "man"])
|
||||||
|
# should match the title "The Old Man and the Sea"
|
||||||
|
result = searcher.search(query, 10)
|
||||||
|
assert len(result.hits) == 1
|
||||||
|
|
||||||
|
query = Query.phrase_query(index.schema, "title", ["man", "old"])
|
||||||
|
# sholdn't match any document
|
||||||
|
result = searcher.search(query, 10)
|
||||||
|
assert len(result.hits) == 0
|
||||||
|
|
||||||
|
query = Query.phrase_query(index.schema, "title", [(1, "man"), (0, "old")])
|
||||||
|
# should match "The Old Man and the Sea" with the given offsets
|
||||||
|
result = searcher.search(query, 10)
|
||||||
|
assert len(result.hits) == 1
|
||||||
|
|
||||||
|
query = Query.phrase_query(index.schema, "title", ["man", "sea"])
|
||||||
|
# sholdn't match any document with default slop 0.
|
||||||
|
result = searcher.search(query, 10)
|
||||||
|
assert len(result.hits) == 0
|
||||||
|
|
||||||
|
query = Query.phrase_query(index.schema, "title", ["man", "sea"], slop=2)
|
||||||
|
# should match the title "The Old Man and the Sea" with slop 2.
|
||||||
|
result = searcher.search(query, 10)
|
||||||
|
assert len(result.hits) == 1
|
||||||
|
|
||||||
|
with pytest.raises(ValueError, match = "words must not be empty."):
|
||||||
|
Query.phrase_query(index.schema, "title", [])
|
||||||
|
|
||||||
def test_fuzzy_term_query(self, ram_index):
|
def test_fuzzy_term_query(self, ram_index):
|
||||||
index = ram_index
|
index = ram_index
|
||||||
query = Query.fuzzy_term_query(index.schema, "title", "ice")
|
query = Query.fuzzy_term_query(index.schema, "title", "ice")
|
||||||
|
|
Loading…
Reference in New Issue