Expose tantivy's TermQuery (#175)

master
Tomoko Uchida 2023-12-20 18:40:50 +09:00 committed by GitHub
parent 5391291541
commit f94e04637b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 71 additions and 3 deletions

View File

@ -1,4 +1,5 @@
use ::tantivy as tv; use ::tantivy as tv;
use ::tantivy::schema::{Term, Value};
use pyo3::{exceptions, prelude::*, wrap_pymodule}; use pyo3::{exceptions, prelude::*, wrap_pymodule};
mod document; mod document;
@ -20,6 +21,8 @@ use schemabuilder::SchemaBuilder;
use searcher::{DocAddress, Order, SearchResult, Searcher}; use searcher::{DocAddress, Order, SearchResult, Searcher};
use snippet::{Snippet, SnippetGenerator}; use snippet::{Snippet, SnippetGenerator};
use crate::document::extract_value;
/// Python bindings for the search engine library Tantivy. /// Python bindings for the search engine library Tantivy.
/// ///
/// Tantivy is a full text search engine library written in rust. /// Tantivy is a full text search engine library written in rust.
@ -153,3 +156,29 @@ pub(crate) fn get_field(
Ok(field) Ok(field)
} }
pub(crate) fn make_term(
schema: &tv::schema::Schema,
field_name: &str,
field_value: &PyAny,
) -> PyResult<tv::Term> {
let field = get_field(schema, field_name)?;
let value = extract_value(field_value)?;
let term = match value {
Value::Str(text) => Term::from_field_text(field, &text),
Value::U64(num) => Term::from_field_u64(field, num),
Value::I64(num) => Term::from_field_i64(field, num),
Value::F64(num) => Term::from_field_f64(field, num),
Value::Date(d) => Term::from_field_date(field, d),
Value::Facet(facet) => Term::from_facet(field, &facet),
Value::Bool(b) => Term::from_field_bool(field, b),
Value::IpAddr(i) => Term::from_field_ip_addr(field, i),
_ => {
return Err(exceptions::PyValueError::new_err(format!(
"Can't create a term for Field `{field_name}` with value `{field_value}`."
)))
}
};
Ok(term)
}

View File

@ -1,4 +1,5 @@
use pyo3::prelude::*; use crate::{make_term, Schema};
use pyo3::{exceptions, prelude::*, types::PyAny};
use tantivy as tv; use tantivy as tv;
/// Tantivy's Query /// Tantivy's Query
@ -18,4 +19,28 @@ impl Query {
fn __repr__(&self) -> PyResult<String> { fn __repr__(&self) -> PyResult<String> {
Ok(format!("Query({:?})", self.get())) Ok(format!("Query({:?})", self.get()))
} }
/// Construct a Tantivy's TermQuery
#[staticmethod]
#[pyo3(signature = (schema, field_name, field_value, index_option = "position"))]
pub(crate) fn term_query(
schema: &Schema,
field_name: &str,
field_value: &PyAny,
index_option: &str,
) -> PyResult<Query> {
let term = make_term(&schema.inner, field_name, field_value)?;
let index_option = match index_option {
"position" => tv::schema::IndexRecordOption::WithFreqsAndPositions,
"freq" => tv::schema::IndexRecordOption::WithFreqs,
"basic" => tv::schema::IndexRecordOption::Basic,
_ => return Err(exceptions::PyValueError::new_err(
"Invalid index option, valid choices are: 'basic', 'freq' and 'position'"
))
};
let inner = tv::query::TermQuery::new(term, index_option);
Ok(Query {
inner: Box::new(inner),
})
}
} }

View File

@ -189,6 +189,8 @@ class Document:
class Query: class Query:
@staticmethod
def term_query(schema: Schema, field_name: str, field_value: Any, index_option: str = "position") -> Query:
pass pass

View File

@ -7,7 +7,7 @@ import tantivy
import pickle import pickle
import pytest import pytest
import tantivy import tantivy
from tantivy import Document, Index, SchemaBuilder, SnippetGenerator from tantivy import Document, Index, SchemaBuilder, SnippetGenerator, Query
def schema(): def schema():
@ -925,3 +925,15 @@ class TestSnippets(object):
assert first.end == 23 assert first.end == 23
html_snippet = snippet.to_html() html_snippet = snippet.to_html()
assert html_snippet == "The Old Man and the <b>Sea</b>" assert html_snippet == "The Old Man and the <b>Sea</b>"
class TestQuery(object):
def test_term_query(self, ram_index):
index = ram_index
query = Query.term_query(index.schema, "title", "sea")
result = index.searcher().search(query, 10)
assert len(result.hits) == 1
_, doc_address = result.hits[0]
searched_doc = index.searcher().doc(doc_address)
assert searched_doc["title"] == ["The Old Man and the Sea"]