Support lenient parser (#114)
parent
11f8bc4611
commit
2040463d52
|
@ -1112,6 +1112,7 @@ dependencies = [
|
|||
name = "tantivy"
|
||||
version = "0.20.1"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"chrono",
|
||||
"futures",
|
||||
"itertools 0.10.5",
|
||||
|
|
|
@ -14,6 +14,7 @@ crate-type = ["cdylib"]
|
|||
pyo3-build-config = "0.19.1"
|
||||
|
||||
[dependencies]
|
||||
base64 = "0.21"
|
||||
chrono = "0.4.23"
|
||||
tantivy = "0.21.0"
|
||||
itertools = "0.10.5"
|
||||
|
|
66
src/index.rs
66
src/index.rs
|
@ -5,6 +5,7 @@ use pyo3::{exceptions, prelude::*, types::PyAny};
|
|||
use crate::{
|
||||
document::{extract_value, Document},
|
||||
get_field,
|
||||
parser_error::QueryParserErrorIntoPy,
|
||||
query::Query,
|
||||
schema::Schema,
|
||||
searcher::Searcher,
|
||||
|
@ -399,6 +400,71 @@ impl Index {
|
|||
|
||||
Ok(Query { inner: query })
|
||||
}
|
||||
|
||||
/// Parse a query leniently.
|
||||
///
|
||||
/// This variant parses invalid query on a best effort basis. If some part of the query can't
|
||||
/// reasonably be executed (range query without field, searching on a non existing field,
|
||||
/// searching without precising field when no default field is provided...), they may get turned
|
||||
/// into a "match-nothing" subquery.
|
||||
///
|
||||
/// Args:
|
||||
/// query: the query, following the tantivy query language.
|
||||
/// default_fields_names (List[Field]): A list of fields used to search if no
|
||||
/// field is specified in the query.
|
||||
///
|
||||
/// Returns a tuple containing the parsed query and a list of errors.
|
||||
///
|
||||
/// Raises ValueError if a field in `default_field_names` is not defined or marked as indexed.
|
||||
#[pyo3(signature = (query, default_field_names = None))]
|
||||
pub fn parse_query_lenient(
|
||||
&self,
|
||||
query: &str,
|
||||
default_field_names: Option<Vec<String>>,
|
||||
) -> PyResult<(Query, Vec<PyObject>)> {
|
||||
let schema = self.index.schema();
|
||||
|
||||
let default_fields = if let Some(default_field_names_vec) =
|
||||
default_field_names
|
||||
{
|
||||
default_field_names_vec
|
||||
.iter()
|
||||
.map(|field_name| {
|
||||
schema
|
||||
.get_field(field_name)
|
||||
.map_err(|_err| {
|
||||
exceptions::PyValueError::new_err(format!(
|
||||
"Field `{field_name}` is not defined in the schema."
|
||||
))
|
||||
})
|
||||
.and_then(|field| {
|
||||
schema.get_field_entry(field).is_indexed().then_some(field).ok_or(
|
||||
exceptions::PyValueError::new_err(
|
||||
format!(
|
||||
"Field `{field_name}` is not set as indexed in the schema."
|
||||
),
|
||||
))
|
||||
})
|
||||
}).collect::<Result<Vec<_>, _>>()?
|
||||
} else {
|
||||
self.index
|
||||
.schema()
|
||||
.fields()
|
||||
.filter_map(|(f, fe)| fe.is_indexed().then_some(f))
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
|
||||
let parser =
|
||||
tv::query::QueryParser::for_index(&self.index, default_fields);
|
||||
let (query, errors) = parser.parse_query_lenient(query);
|
||||
|
||||
Python::with_gil(|py| {
|
||||
let errors =
|
||||
errors.into_iter().map(|err| err.into_py(py)).collect();
|
||||
|
||||
Ok((Query { inner: query }, errors))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Index {
|
||||
|
|
53
src/lib.rs
53
src/lib.rs
|
@ -1,9 +1,10 @@
|
|||
use ::tantivy as tv;
|
||||
use pyo3::{exceptions, prelude::*};
|
||||
use pyo3::{exceptions, prelude::*, wrap_pymodule};
|
||||
|
||||
mod document;
|
||||
mod facet;
|
||||
mod index;
|
||||
mod parser_error;
|
||||
mod query;
|
||||
mod schema;
|
||||
mod schemabuilder;
|
||||
|
@ -83,6 +84,56 @@ fn tantivy(_py: Python, m: &PyModule) -> PyResult<()> {
|
|||
m.add_class::<Query>()?;
|
||||
m.add_class::<Snippet>()?;
|
||||
m.add_class::<SnippetGenerator>()?;
|
||||
|
||||
m.add_wrapped(wrap_pymodule!(query_parser_error))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Submodule containing all the possible errors that can be raised during
|
||||
/// query parsing.
|
||||
///
|
||||
/// Example:
|
||||
/// >>> import tantivy
|
||||
/// >>> from tantivy import query_parser_error
|
||||
///
|
||||
/// >>> builder = tantivy.SchemaBuilder()
|
||||
///
|
||||
/// >>> title = builder.add_text_field("title", stored=True)
|
||||
/// >>> body = builder.add_text_field("body")
|
||||
/// >>> id = builder.add_unsigned_field("id")
|
||||
/// >>> rating = builder.add_float_field("rating")
|
||||
///
|
||||
/// >>> schema = builder.build()
|
||||
/// >>> index = tantivy.Index(schema)
|
||||
///
|
||||
/// >>> query, errors = index.parse_query_lenient(
|
||||
/// "bod:'world' AND id:<3.5 AND rating:5.0"
|
||||
/// )
|
||||
///
|
||||
/// >>> assert len(errors) == 2
|
||||
/// >>> assert isinstance(errors[0], query_parser_error.FieldDoesNotExistError)
|
||||
/// >>> assert isinstance(errors[1], query_parser_error.ExpectedIntError)
|
||||
#[pymodule]
|
||||
fn query_parser_error(_py: Python, m: &PyModule) -> PyResult<()> {
|
||||
m.add_class::<parser_error::SyntaxError>()?;
|
||||
m.add_class::<parser_error::UnsupportedQueryError>()?;
|
||||
m.add_class::<parser_error::FieldDoesNotExistError>()?;
|
||||
m.add_class::<parser_error::ExpectedIntError>()?;
|
||||
m.add_class::<parser_error::ExpectedBase64Error>()?;
|
||||
m.add_class::<parser_error::ExpectedFloatError>()?;
|
||||
m.add_class::<parser_error::ExpectedBoolError>()?;
|
||||
m.add_class::<parser_error::AllButQueryForbiddenError>()?;
|
||||
m.add_class::<parser_error::NoDefaultFieldDeclaredError>()?;
|
||||
m.add_class::<parser_error::FieldNotIndexedError>()?;
|
||||
m.add_class::<parser_error::FieldDoesNotHavePositionsIndexedError>()?;
|
||||
m.add_class::<parser_error::PhrasePrefixRequiresAtLeastTwoTermsError>()?;
|
||||
m.add_class::<parser_error::UnknownTokenizerError>()?;
|
||||
m.add_class::<parser_error::RangeMustNotHavePhraseError>()?;
|
||||
m.add_class::<parser_error::DateFormatError>()?;
|
||||
m.add_class::<parser_error::FacetFormatError>()?;
|
||||
m.add_class::<parser_error::IpFormatError>()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,972 @@
|
|||
use std::{
|
||||
convert::TryFrom,
|
||||
net::AddrParseError,
|
||||
num::{IntErrorKind, ParseFloatError, ParseIntError},
|
||||
str::ParseBoolError,
|
||||
};
|
||||
|
||||
use pyo3::prelude::*;
|
||||
use tantivy::{self as tv, schema::FacetParseError};
|
||||
|
||||
// TODO(https://github.com/PyO3/pyo3/issues/1190): Expose this to bindings once trait <-> ABC is
|
||||
// supported in PyO3.
|
||||
pub(crate) trait QueryParserError {
|
||||
fn full_message(&self) -> String;
|
||||
}
|
||||
|
||||
/// A crate local version of the [`IntoPy`] trait to implement for
|
||||
/// [`QueryParserError`](tv::query::QueryParserError).
|
||||
pub(crate) trait QueryParserErrorIntoPy {
|
||||
fn into_py(self, py: Python) -> PyObject;
|
||||
}
|
||||
|
||||
impl QueryParserErrorIntoPy for tv::query::QueryParserError {
|
||||
fn into_py(self, py: Python) -> PyObject {
|
||||
match self {
|
||||
tv::query::QueryParserError::SyntaxError(message) => {
|
||||
SyntaxError { message }.into_py(py)
|
||||
}
|
||||
tv::query::QueryParserError::UnsupportedQuery(message) => {
|
||||
UnsupportedQueryError { message }.into_py(py)
|
||||
}
|
||||
tv::query::QueryParserError::FieldDoesNotExist(field) => {
|
||||
FieldDoesNotExistError { field }.into_py(py)
|
||||
}
|
||||
tv::query::QueryParserError::FieldDoesNotHavePositionsIndexed(
|
||||
field,
|
||||
) => FieldDoesNotHavePositionsIndexedError { field }.into_py(py),
|
||||
tv::query::QueryParserError::ExpectedInt(parse_int_error) => {
|
||||
ExpectedIntError { parse_int_error }.into_py(py)
|
||||
}
|
||||
tv::query::QueryParserError::ExpectedFloat(parse_float_error) => {
|
||||
ExpectedFloatError { parse_float_error }.into_py(py)
|
||||
}
|
||||
tv::query::QueryParserError::ExpectedBool(parse_bool_error) => {
|
||||
ExpectedBoolError { parse_bool_error }.into_py(py)
|
||||
}
|
||||
tv::query::QueryParserError::ExpectedBase64(decode_error) => {
|
||||
ExpectedBase64Error { decode_error }.into_py(py)
|
||||
}
|
||||
tv::query::QueryParserError::AllButQueryForbidden => {
|
||||
AllButQueryForbiddenError.into_py(py)
|
||||
}
|
||||
tv::query::QueryParserError::NoDefaultFieldDeclared => {
|
||||
NoDefaultFieldDeclaredError.into_py(py)
|
||||
}
|
||||
tv::query::QueryParserError::FieldNotIndexed(field) => {
|
||||
FieldNotIndexedError { field }.into_py(py)
|
||||
}
|
||||
tv::query::QueryParserError::PhrasePrefixRequiresAtLeastTwoTerms {
|
||||
phrase,
|
||||
tokenizer,
|
||||
} => {
|
||||
PhrasePrefixRequiresAtLeastTwoTermsError { phrase, tokenizer }.into_py(py)
|
||||
}
|
||||
tv::query::QueryParserError::UnknownTokenizer { tokenizer, field } => {
|
||||
UnknownTokenizerError { tokenizer, field }.into_py(py)
|
||||
}
|
||||
tv::query::QueryParserError::RangeMustNotHavePhrase => {
|
||||
RangeMustNotHavePhraseError.into_py(py)
|
||||
}
|
||||
tv::query::QueryParserError::DateFormatError(_) => {
|
||||
DateFormatError { inner: self }.into_py(py)
|
||||
}
|
||||
tv::query::QueryParserError::FacetFormatError(facet_parse_error) => {
|
||||
FacetFormatError { facet_parse_error }.into_py(py)
|
||||
}
|
||||
tv::query::QueryParserError::IpFormatError(addr_parse_error) => {
|
||||
IpFormatError { addr_parse_error }.into_py(py)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Error in the query syntax.
|
||||
#[pyclass(frozen)]
|
||||
pub(crate) struct SyntaxError {
|
||||
message: String,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl SyntaxError {
|
||||
#[getter]
|
||||
fn inner_message(&self) -> &str {
|
||||
self.message.as_str()
|
||||
}
|
||||
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for SyntaxError {
|
||||
fn full_message(&self) -> String {
|
||||
format!("Syntax Error: {0}", self.message)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SyntaxError> for tv::query::QueryParserError {
|
||||
fn from(error: SyntaxError) -> Self {
|
||||
tv::query::QueryParserError::SyntaxError(error.message)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError> for SyntaxError {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::SyntaxError(message) => {
|
||||
Ok(Self { message })
|
||||
}
|
||||
_ => Err(format!("{error} is not a SyntaxError")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This query is unsupported.
|
||||
#[pyclass(frozen)]
|
||||
pub(crate) struct UnsupportedQueryError {
|
||||
message: String,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl UnsupportedQueryError {
|
||||
#[getter]
|
||||
fn inner_message(&self) -> &str {
|
||||
self.message.as_str()
|
||||
}
|
||||
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for UnsupportedQueryError {
|
||||
fn full_message(&self) -> String {
|
||||
format!("Unsupported query: {0}", self.message)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<UnsupportedQueryError> for tv::query::QueryParserError {
|
||||
fn from(error: UnsupportedQueryError) -> Self {
|
||||
tv::query::QueryParserError::SyntaxError(error.message)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError> for UnsupportedQueryError {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::UnsupportedQuery(message) => {
|
||||
Ok(Self { message })
|
||||
}
|
||||
_ => Err(format!("{error} is not an UnsupportedQuery error")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The query references a field that is not in the schema.
|
||||
#[pyclass(frozen)]
|
||||
pub struct FieldDoesNotExistError {
|
||||
field: String,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl FieldDoesNotExistError {
|
||||
/// The name of the field causing the error.
|
||||
#[getter]
|
||||
fn field(&self) -> &str {
|
||||
self.field.as_str()
|
||||
}
|
||||
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for FieldDoesNotExistError {
|
||||
fn full_message(&self) -> String {
|
||||
format!("Field does not exist: '{0}'", self.field)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FieldDoesNotExistError> for tv::query::QueryParserError {
|
||||
fn from(error: FieldDoesNotExistError) -> Self {
|
||||
tv::query::QueryParserError::FieldDoesNotExist(error.field)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError> for FieldDoesNotExistError {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::FieldDoesNotExist(field) => {
|
||||
Ok(Self { field })
|
||||
}
|
||||
_ => Err(format!("{error} is not a FieldDoesNotExist error")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The query contains a term for a `u64` or `i64`-field, but the value is neither.
|
||||
#[pyclass(frozen)]
|
||||
pub(crate) struct ExpectedIntError {
|
||||
parse_int_error: ParseIntError,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl ExpectedIntError {
|
||||
/// If `true`, the value being parsed was empty.
|
||||
fn caused_by_empty(&self) -> bool {
|
||||
self.parse_int_error.kind() == &IntErrorKind::Empty
|
||||
}
|
||||
|
||||
/// If `true`, an invalid digit was found.
|
||||
fn caused_by_invalid_digit(&self) -> bool {
|
||||
self.parse_int_error.kind() == &IntErrorKind::InvalidDigit
|
||||
}
|
||||
|
||||
/// If `true`, the value being parsed was too large.
|
||||
fn caused_by_pos_overflow(&self) -> bool {
|
||||
self.parse_int_error.kind() == &IntErrorKind::PosOverflow
|
||||
}
|
||||
|
||||
/// If `true`, the value being parsed was too small.
|
||||
fn caused_by_neg_overflow(&self) -> bool {
|
||||
self.parse_int_error.kind() == &IntErrorKind::NegOverflow
|
||||
}
|
||||
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for ExpectedIntError {
|
||||
fn full_message(&self) -> String {
|
||||
format!("Expected a valid integer: '{0:?}'", self.parse_int_error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ExpectedIntError> for tv::query::QueryParserError {
|
||||
fn from(error: ExpectedIntError) -> Self {
|
||||
tv::query::QueryParserError::ExpectedInt(error.parse_int_error)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError> for ExpectedIntError {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::ExpectedInt(parse_int_error) => {
|
||||
Ok(Self { parse_int_error })
|
||||
}
|
||||
_ => Err(format!("{error} is not an ExpectedInt error")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The query contains a term for a bytes field, but the value is not valid base64.
|
||||
#[pyclass(frozen)]
|
||||
pub(crate) struct ExpectedBase64Error {
|
||||
decode_error: base64::DecodeError,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl ExpectedBase64Error {
|
||||
/// If `true`, an invalid byte was found in the query. Padding characters (`=`) interspersed in
|
||||
/// the encoded form will be treated as invalid bytes.
|
||||
fn caused_by_invalid_byte(&self) -> bool {
|
||||
match self.decode_error {
|
||||
base64::DecodeError::InvalidByte { .. } => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// If the error was caused by an invalid byte, returns the offset and offending byte.
|
||||
fn invalid_byte_info(&self) -> Option<(usize, u8)> {
|
||||
match self.decode_error {
|
||||
base64::DecodeError::InvalidByte(position, byte) => {
|
||||
Some((position, byte))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// If `true`, the length of the base64 string was invalid.
|
||||
fn caused_by_invalid_length(&self) -> bool {
|
||||
match self.decode_error {
|
||||
base64::DecodeError::InvalidLength => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// The last non-padding input symbol's encoded 6 bits have nonzero bits that will be discarded.
|
||||
/// If `true`, this is indicative of corrupted or truncated Base64.
|
||||
fn caused_by_invalid_last_symbol(&self) -> bool {
|
||||
match self.decode_error {
|
||||
base64::DecodeError::InvalidLastSymbol { .. } => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// If the error was caused by an invalid last symbol, returns the offset and offending byte.
|
||||
fn invalid_last_symbol_info(&self) -> Option<(usize, u8)> {
|
||||
match self.decode_error {
|
||||
base64::DecodeError::InvalidLastSymbol(position, byte) => {
|
||||
Some((position, byte))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// The nature of the padding was not as configured: absent or incorrect when it must be
|
||||
/// canonical, or present when it must be absent, etc.
|
||||
fn caused_by_invalid_padding(&self) -> bool {
|
||||
match self.decode_error {
|
||||
base64::DecodeError::InvalidPadding => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for ExpectedBase64Error {
|
||||
fn full_message(&self) -> String {
|
||||
format!("Expected base64: {0:?}", self.decode_error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ExpectedBase64Error> for tv::query::QueryParserError {
|
||||
fn from(error: ExpectedBase64Error) -> Self {
|
||||
tv::query::QueryParserError::ExpectedBase64(error.decode_error)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError> for ExpectedBase64Error {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::ExpectedBase64(decode_error) => {
|
||||
Ok(Self { decode_error })
|
||||
}
|
||||
_ => Err(format!("{error} is not an ExpectedBase64 error")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The query contains a term for a `f64`-field, but the value is not a f64.
|
||||
#[pyclass(frozen)]
|
||||
pub(crate) struct ExpectedFloatError {
|
||||
parse_float_error: ParseFloatError,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl ExpectedFloatError {
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for ExpectedFloatError {
|
||||
fn full_message(&self) -> String {
|
||||
format!("Expected a float value: '{0:?}'", self.parse_float_error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ExpectedFloatError> for tv::query::QueryParserError {
|
||||
fn from(error: ExpectedFloatError) -> Self {
|
||||
tv::query::QueryParserError::ExpectedFloat(error.parse_float_error)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError> for ExpectedFloatError {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::ExpectedFloat(parse_float_error) => {
|
||||
Ok(Self { parse_float_error })
|
||||
}
|
||||
_ => Err(format!("{error} is not an ExpectedFloat error")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The query contains a term for a `bool`-field, but the value is not a bool.
|
||||
#[pyclass(frozen)]
|
||||
pub(crate) struct ExpectedBoolError {
|
||||
parse_bool_error: ParseBoolError,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl ExpectedBoolError {
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for ExpectedBoolError {
|
||||
fn full_message(&self) -> String {
|
||||
format!("Expected a bool value: '{0:?}'", self.parse_bool_error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ExpectedBoolError> for tv::query::QueryParserError {
|
||||
fn from(error: ExpectedBoolError) -> Self {
|
||||
tv::query::QueryParserError::ExpectedBool(error.parse_bool_error)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError> for ExpectedBoolError {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::ExpectedBool(parse_bool_error) => {
|
||||
Ok(Self { parse_bool_error })
|
||||
}
|
||||
_ => Err(format!("{error} is not an ExpectedBool error")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// It is forbidden queries that are only "excluding". (e.g. -title:pop)
|
||||
#[pyclass(frozen)]
|
||||
pub(crate) struct AllButQueryForbiddenError;
|
||||
|
||||
#[pymethods]
|
||||
impl AllButQueryForbiddenError {
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for AllButQueryForbiddenError {
|
||||
fn full_message(&self) -> String {
|
||||
"Invalid query: Only excluding terms given".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<AllButQueryForbiddenError> for tv::query::QueryParserError {
|
||||
fn from(_error: AllButQueryForbiddenError) -> Self {
|
||||
tv::query::QueryParserError::AllButQueryForbidden
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError> for AllButQueryForbiddenError {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::AllButQueryForbidden => Ok(Self {}),
|
||||
_ => Err(format!("{error} is not an AllButQueryForbidden error")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// If no default field is declared, running a query without any field specified is forbbidden.
|
||||
#[pyclass(frozen)]
|
||||
pub(crate) struct NoDefaultFieldDeclaredError;
|
||||
|
||||
#[pymethods]
|
||||
impl NoDefaultFieldDeclaredError {
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for NoDefaultFieldDeclaredError {
|
||||
fn full_message(&self) -> String {
|
||||
"No default field declared and no field specified in query".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<NoDefaultFieldDeclaredError> for tv::query::QueryParserError {
|
||||
fn from(_error: NoDefaultFieldDeclaredError) -> Self {
|
||||
tv::query::QueryParserError::NoDefaultFieldDeclared
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError> for NoDefaultFieldDeclaredError {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::NoDefaultFieldDeclared => Ok(Self {}),
|
||||
_ => Err(format!("{error} is not a NoDefaultFieldDeclared error")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The field searched for is not declared as indexed in the schema.
|
||||
#[pyclass(frozen)]
|
||||
pub(crate) struct FieldNotIndexedError {
|
||||
field: String,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl FieldNotIndexedError {
|
||||
fn field(&self) -> &str {
|
||||
self.field.as_str()
|
||||
}
|
||||
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for FieldNotIndexedError {
|
||||
fn full_message(&self) -> String {
|
||||
format!("The field '{0}' is not declared as indexed", self.field)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FieldNotIndexedError> for tv::query::QueryParserError {
|
||||
fn from(error: FieldNotIndexedError) -> Self {
|
||||
tv::query::QueryParserError::FieldNotIndexed(error.field)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError> for FieldNotIndexedError {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::FieldNotIndexed(field) => {
|
||||
Ok(Self { field })
|
||||
}
|
||||
_ => Err(format!("{error} is not an FieldNotIndexed error")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A phrase query was requested for a field that does not have any positions indexed.
|
||||
#[pyclass(frozen)]
|
||||
pub(crate) struct FieldDoesNotHavePositionsIndexedError {
|
||||
field: String,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl FieldDoesNotHavePositionsIndexedError {
|
||||
fn field(&self) -> &str {
|
||||
self.field.as_str()
|
||||
}
|
||||
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for FieldDoesNotHavePositionsIndexedError {
|
||||
fn full_message(&self) -> String {
|
||||
format!(
|
||||
"The field '{0}' does not have positions indexed",
|
||||
self.field
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FieldDoesNotHavePositionsIndexedError>
|
||||
for tv::query::QueryParserError
|
||||
{
|
||||
fn from(error: FieldDoesNotHavePositionsIndexedError) -> Self {
|
||||
tv::query::QueryParserError::FieldDoesNotHavePositionsIndexed(
|
||||
error.field,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError>
|
||||
for FieldDoesNotHavePositionsIndexedError
|
||||
{
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::FieldDoesNotHavePositionsIndexed(
|
||||
field,
|
||||
) => Ok(Self { field }),
|
||||
_ => Err(format!(
|
||||
"{error} is not a FieldDoesNotHavePositionsIndexed error"
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A phrase-prefix query requires at least two terms
|
||||
#[pyclass(frozen)]
|
||||
pub(crate) struct PhrasePrefixRequiresAtLeastTwoTermsError {
|
||||
/// The phrase which triggered the issue.
|
||||
phrase: String,
|
||||
/// The tokenizer configured for the field.
|
||||
tokenizer: String,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl PhrasePrefixRequiresAtLeastTwoTermsError {
|
||||
fn phrase(&self) -> &str {
|
||||
self.phrase.as_str()
|
||||
}
|
||||
|
||||
fn tokenizer(&self) -> &str {
|
||||
self.tokenizer.as_str()
|
||||
}
|
||||
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for PhrasePrefixRequiresAtLeastTwoTermsError {
|
||||
fn full_message(&self) -> String {
|
||||
format!(
|
||||
"The phrase '{0:?}' does not produce at least two terms using the tokenizer '{1:?}'",
|
||||
self.phrase, self.tokenizer
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PhrasePrefixRequiresAtLeastTwoTermsError>
|
||||
for tv::query::QueryParserError
|
||||
{
|
||||
fn from(error: PhrasePrefixRequiresAtLeastTwoTermsError) -> Self {
|
||||
tv::query::QueryParserError::PhrasePrefixRequiresAtLeastTwoTerms {
|
||||
phrase: error.phrase,
|
||||
tokenizer: error.tokenizer,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError>
|
||||
for PhrasePrefixRequiresAtLeastTwoTermsError
|
||||
{
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::PhrasePrefixRequiresAtLeastTwoTerms {
|
||||
phrase,
|
||||
tokenizer,
|
||||
} => Ok(Self { phrase, tokenizer }),
|
||||
_ => Err(format!(
|
||||
"{error} is not a PhrasePrefixRequiresAtLeastTwoTerms error"
|
||||
)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The tokenizer for the given field is unknown.
|
||||
#[pyclass(frozen)]
|
||||
pub(crate) struct UnknownTokenizerError {
|
||||
/// The name of the tokenizer.
|
||||
tokenizer: String,
|
||||
/// The field name.
|
||||
field: String,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl UnknownTokenizerError {
|
||||
fn tokenizer(&self) -> &str {
|
||||
self.tokenizer.as_str()
|
||||
}
|
||||
|
||||
fn field(&self) -> &str {
|
||||
self.field.as_str()
|
||||
}
|
||||
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for UnknownTokenizerError {
|
||||
fn full_message(&self) -> String {
|
||||
format!(
|
||||
"The tokenizer '{0:?}' for the field '{1:?}' is unknown",
|
||||
self.tokenizer, self.field
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<UnknownTokenizerError> for tv::query::QueryParserError {
|
||||
fn from(error: UnknownTokenizerError) -> Self {
|
||||
tv::query::QueryParserError::UnknownTokenizer {
|
||||
tokenizer: error.tokenizer,
|
||||
field: error.field,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError> for UnknownTokenizerError {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::UnknownTokenizer {
|
||||
tokenizer,
|
||||
field,
|
||||
} => Ok(Self { tokenizer, field }),
|
||||
_ => Err(format!("{error} is not an UnknownTokenizer error")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The query contains a range query with a phrase as one of the bounds. Only terms can be used as
|
||||
/// bounds.
|
||||
#[pyclass(frozen)]
|
||||
pub(crate) struct RangeMustNotHavePhraseError;
|
||||
|
||||
#[pymethods]
|
||||
impl RangeMustNotHavePhraseError {
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for RangeMustNotHavePhraseError {
|
||||
fn full_message(&self) -> String {
|
||||
"A range query cannot have a phrase as one of the bounds".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<RangeMustNotHavePhraseError> for tv::query::QueryParserError {
|
||||
fn from(_error: RangeMustNotHavePhraseError) -> Self {
|
||||
tv::query::QueryParserError::RangeMustNotHavePhrase
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError> for RangeMustNotHavePhraseError {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::RangeMustNotHavePhrase => Ok(Self {}),
|
||||
_ => Err(format!("{error} is not a RangeMustNotHavePhrase error")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The format for the date field is not RFC 3339 compliant.
|
||||
#[pyclass(frozen)]
|
||||
pub(crate) struct DateFormatError {
|
||||
// Keep around the entire `QueryParserError` to avoid importing the `time` crate.
|
||||
inner: tv::query::QueryParserError,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl DateFormatError {
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for DateFormatError {
|
||||
fn full_message(&self) -> String {
|
||||
"The date field has an invalid format".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<DateFormatError> for tv::query::QueryParserError {
|
||||
fn from(error: DateFormatError) -> Self {
|
||||
error.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError> for DateFormatError {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::DateFormatError { .. } => {
|
||||
Ok(Self { inner: error })
|
||||
}
|
||||
_ => Err(format!("{error} is not a DateFormatError")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The format for the facet field is invalid.
|
||||
#[pyclass(frozen)]
|
||||
pub(crate) struct FacetFormatError {
|
||||
facet_parse_error: FacetParseError,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl FacetFormatError {
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for FacetFormatError {
|
||||
fn full_message(&self) -> String {
|
||||
format!("The facet field is malformed: {0}", self.facet_parse_error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<FacetFormatError> for tv::query::QueryParserError {
|
||||
fn from(error: FacetFormatError) -> Self {
|
||||
tv::query::QueryParserError::FacetFormatError(error.facet_parse_error)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError> for FacetFormatError {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::FacetFormatError(
|
||||
facet_parse_error,
|
||||
) => Ok(Self { facet_parse_error }),
|
||||
_ => Err(format!("{error} is not a FacetFormatError")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The format for the ip field is invalid.
|
||||
#[pyclass(frozen)]
|
||||
pub(crate) struct IpFormatError {
|
||||
addr_parse_error: AddrParseError,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl IpFormatError {
|
||||
fn __repr__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
|
||||
fn __str__(&self) -> String {
|
||||
self.full_message()
|
||||
}
|
||||
}
|
||||
|
||||
impl QueryParserError for IpFormatError {
|
||||
fn full_message(&self) -> String {
|
||||
format!("The facet field is malformed: {0}", self.addr_parse_error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<IpFormatError> for tv::query::QueryParserError {
|
||||
fn from(error: IpFormatError) -> Self {
|
||||
tv::query::QueryParserError::IpFormatError(error.addr_parse_error)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<tv::query::QueryParserError> for IpFormatError {
|
||||
type Error = String;
|
||||
|
||||
fn try_from(
|
||||
error: tv::query::QueryParserError,
|
||||
) -> Result<Self, Self::Error> {
|
||||
match error {
|
||||
tv::query::QueryParserError::IpFormatError(addr_parse_error) => {
|
||||
Ok(Self { addr_parse_error })
|
||||
}
|
||||
_ => Err(format!("{error} is not an IpFormatError")),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -286,6 +286,30 @@ class TestClass(object):
|
|||
with pytest.raises(ValueError):
|
||||
index.parse_query("bod:men", ["title", "body"])
|
||||
|
||||
def test_query_lenient(self, ram_index_numeric_fields):
|
||||
from tantivy import query_parser_error
|
||||
|
||||
index = ram_index_numeric_fields
|
||||
|
||||
query, errors = index.parse_query_lenient("rating:3.5")
|
||||
assert len(errors) == 0
|
||||
assert repr(query) == """Query(TermQuery(Term(field=1, type=F64, 3.5)))"""
|
||||
|
||||
_, errors = index.parse_query_lenient("bod:men")
|
||||
assert len(errors) == 1
|
||||
assert isinstance(errors[0], query_parser_error.FieldDoesNotExistError)
|
||||
|
||||
query, errors = index.parse_query_lenient(
|
||||
"body:'hello' AND id:<3.5 OR rating:'hi'"
|
||||
)
|
||||
assert len(errors) == 2
|
||||
assert isinstance(errors[0], query_parser_error.ExpectedIntError)
|
||||
assert isinstance(errors[1], query_parser_error.ExpectedFloatError)
|
||||
assert (
|
||||
repr(query)
|
||||
== """Query(BooleanQuery { subqueries: [(Should, BooleanQuery { subqueries: [(Must, TermQuery(Term(field=3, type=Str, "hello")))] })] })"""
|
||||
)
|
||||
|
||||
def test_order_by_search(self):
|
||||
schema = (
|
||||
SchemaBuilder()
|
||||
|
|
Loading…
Reference in New Issue