Expose Tantivy's PhraseQuery (#234)
parent
3d394959b8
commit
03b1c89fa3
42
src/query.rs
42
src/query.rs
|
@ -157,6 +157,48 @@ impl Query {
|
|||
})
|
||||
}
|
||||
|
||||
/// Construct a Tantivy's PhraseQuery with custom offsets and slop
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `schema` - Schema of the target index.
|
||||
/// * `field_name` - Field name to be searched.
|
||||
/// * `words` - Word list that constructs the phrase. A word can be a term text or a pair of term text and its offset in the phrase.
|
||||
/// * `slop` - (Optional) The number of gaps permitted between the words in the query phrase. Default is 0.
|
||||
#[staticmethod]
|
||||
#[pyo3(signature = (schema, field_name, words, slop = 0))]
|
||||
pub(crate) fn phrase_query(
|
||||
schema: &Schema,
|
||||
field_name: &str,
|
||||
words: Vec<&PyAny>,
|
||||
slop: u32,
|
||||
) -> PyResult<Query> {
|
||||
let mut terms_with_offset = Vec::with_capacity(words.len());
|
||||
for (idx, word) in words.into_iter().enumerate() {
|
||||
if let Ok((offset, value)) = word.extract() {
|
||||
// Custom offset is provided.
|
||||
let term = make_term(&schema.inner, field_name, value)?;
|
||||
terms_with_offset.push((offset, term));
|
||||
} else {
|
||||
// Custom offset is not provided. Use the list index as the offset.
|
||||
let term = make_term(&schema.inner, field_name, word)?;
|
||||
terms_with_offset.push((idx, term));
|
||||
};
|
||||
}
|
||||
if terms_with_offset.is_empty() {
|
||||
return Err(exceptions::PyValueError::new_err(
|
||||
"words must not be empty.",
|
||||
));
|
||||
}
|
||||
let inner = tv::query::PhraseQuery::new_with_offset_and_slop(
|
||||
terms_with_offset,
|
||||
slop,
|
||||
);
|
||||
Ok(Query {
|
||||
inner: Box::new(inner),
|
||||
})
|
||||
}
|
||||
|
||||
/// Construct a Tantivy's BooleanQuery
|
||||
#[staticmethod]
|
||||
#[pyo3(signature = (subqueries))]
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import datetime
|
||||
from enum import Enum
|
||||
from typing import Any, Optional, Sequence
|
||||
from typing import Any, Optional, Sequence, Union
|
||||
|
||||
class Schema:
|
||||
pass
|
||||
|
@ -206,16 +206,10 @@ class Query:
|
|||
pass
|
||||
|
||||
@staticmethod
|
||||
def fuzzy_term_query(
|
||||
schema: Schema,
|
||||
field_name: str,
|
||||
text: str,
|
||||
distance: int = 1,
|
||||
transposition_cost_one: bool = True,
|
||||
prefix=False,
|
||||
) -> Query:
|
||||
def phrase_query(schema: Schema, field_name: str, words: list[Union[str, tuple[int, str]]], slop: int = 0) -> Query:
|
||||
pass
|
||||
|
||||
|
||||
@staticmethod
|
||||
def boolean_query(subqueries: Sequence[tuple[Occur, Query]]) -> Query:
|
||||
pass
|
||||
|
|
|
@ -801,6 +801,38 @@ class TestQuery(object):
|
|||
result = index.searcher().search(query, 10)
|
||||
assert len(result.hits) == 3
|
||||
|
||||
def test_phrase_query(self, ram_index):
|
||||
index = ram_index
|
||||
searcher = index.searcher()
|
||||
|
||||
query = Query.phrase_query(index.schema, "title", ["old", "man"])
|
||||
# should match the title "The Old Man and the Sea"
|
||||
result = searcher.search(query, 10)
|
||||
assert len(result.hits) == 1
|
||||
|
||||
query = Query.phrase_query(index.schema, "title", ["man", "old"])
|
||||
# sholdn't match any document
|
||||
result = searcher.search(query, 10)
|
||||
assert len(result.hits) == 0
|
||||
|
||||
query = Query.phrase_query(index.schema, "title", [(1, "man"), (0, "old")])
|
||||
# should match "The Old Man and the Sea" with the given offsets
|
||||
result = searcher.search(query, 10)
|
||||
assert len(result.hits) == 1
|
||||
|
||||
query = Query.phrase_query(index.schema, "title", ["man", "sea"])
|
||||
# sholdn't match any document with default slop 0.
|
||||
result = searcher.search(query, 10)
|
||||
assert len(result.hits) == 0
|
||||
|
||||
query = Query.phrase_query(index.schema, "title", ["man", "sea"], slop=2)
|
||||
# should match the title "The Old Man and the Sea" with slop 2.
|
||||
result = searcher.search(query, 10)
|
||||
assert len(result.hits) == 1
|
||||
|
||||
with pytest.raises(ValueError, match = "words must not be empty."):
|
||||
Query.phrase_query(index.schema, "title", [])
|
||||
|
||||
def test_fuzzy_term_query(self, ram_index):
|
||||
index = ram_index
|
||||
query = Query.fuzzy_term_query(index.schema, "title", "ice")
|
||||
|
|
Loading…
Reference in New Issue