Expose Tantivy's TermSetQuery (#249)

master
Aécio Santos 2024-04-26 08:21:46 -03:00 committed by GitHub
parent 7e57a00b23
commit 8216f17d60
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 53 additions and 0 deletions

View File

@ -89,6 +89,26 @@ impl Query {
})
}
/// Construct a Tantivy's TermSetQuery
#[staticmethod]
#[pyo3(signature = (schema, field_name, field_values))]
pub(crate) fn term_set_query(
schema: &Schema,
field_name: &str,
field_values: Vec<&PyAny>,
) -> PyResult<Query> {
let terms = field_values
.into_iter()
.map(|field_value| {
make_term(&schema.inner, field_name, &field_value)
})
.collect::<Result<Vec<_>, _>>()?;
let inner = tv::query::TermSetQuery::new(terms);
Ok(Query {
inner: Box::new(inner),
})
}
/// Construct a Tantivy's AllQuery
#[staticmethod]
pub(crate) fn all_query() -> PyResult<Query> {

View File

@ -197,6 +197,10 @@ class Query:
) -> Query:
pass
@staticmethod
def term_set_query(schema: Schema, field_name: str, field_values: Sequence[Any]) -> Query:
pass
@staticmethod
def all_query() -> Query:
pass

View File

@ -765,6 +765,35 @@ class TestQuery(object):
searched_doc = index.searcher().doc(doc_address)
assert searched_doc["title"] == ["The Old Man and the Sea"]
def test_term_set_query(self, ram_index):
index = ram_index
# Should match 1 document that contains both terms
terms = ["old", "man"]
query = Query.term_set_query(index.schema, "title", terms)
result = index.searcher().search(query, 10)
assert len(result.hits) == 1
_, doc_address = result.hits[0]
searched_doc = index.searcher().doc(doc_address)
assert searched_doc["title"] == ["The Old Man and the Sea"]
# Should not match any document since the term does not exist in the index
terms = ["a long term that does not exist in the index"]
query = Query.term_set_query(index.schema, "title", terms)
result = index.searcher().search(query, 10)
assert len(result.hits) == 0
# Should not match any document when the terms list is empty
terms = []
query = Query.term_set_query(index.schema, "title", terms)
result = index.searcher().search(query, 10)
assert len(result.hits) == 0
# Should fail to create the query due to the invalid list object in the terms list
with pytest.raises(ValueError, match = r"Can't create a term for Field `title` with value `\[\]`"):
terms = ["old", [], "man"]
query = Query.term_set_query(index.schema, "title", terms)
def test_all_query(self, ram_index):
index = ram_index
query = Query.all_query()