From e91726e01080fe8f910b76f7ce01c7a88e8743c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Damir=20Jeli=C4=87?= Date: Sat, 19 Sep 2020 11:36:31 +0200 Subject: [PATCH] tantivy-py: Upgrade PyO3. This removes our nightly requirement but sadly it adds a requirement for things that are kept inside a Python class to be Send. Luckily for us almost everything in Tantivy is Send, except for the Query trait. This patch works around this by keeping the parser and query string inside our python Query object. This sadly means that we are going to parse the query string twice. --- Cargo.toml | 6 +++--- rust-toolchain | 1 - src/index.rs | 24 ++++++++++++++++-------- src/lib.rs | 4 ++-- src/query.rs | 14 ++++++++++++-- src/schemabuilder.rs | 18 +++++++++--------- src/searcher.rs | 10 +++++----- 7 files changed, 47 insertions(+), 30 deletions(-) delete mode 100644 rust-toolchain diff --git a/Cargo.toml b/Cargo.toml index 44f7e17..38a3402 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,13 +12,13 @@ name = "tantivy" crate-type = ["cdylib"] [dependencies] -chrono = "0.4.11" +chrono = "0.4.15" tantivy = "0.13.0" itertools = "0.9.0" -futures = "0.3.4" +futures = "0.3.5" [dependencies.pyo3] -version = "0.9.2" +version = "0.12.1" features = ["extension-module"] [package.metadata.maturin] diff --git a/rust-toolchain b/rust-toolchain deleted file mode 100644 index 1b77fcf..0000000 --- a/rust-toolchain +++ /dev/null @@ -1 +0,0 @@ -nightly-2020-04-18 diff --git a/src/index.rs b/src/index.rs index 704b2b4..911cf17 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,5 +1,7 @@ #![allow(clippy::new_ret_no_self)] +use std::sync::Arc; + use pyo3::exceptions; use pyo3::prelude::*; use pyo3::types::PyAny; @@ -120,13 +122,13 @@ impl IndexWriter { Value::Date(d) => Term::from_field_date(field, &d), Value::Facet(facet) => Term::from_facet(field, &facet), Value::Bytes(_) => { - return Err(exceptions::ValueError::py_err(format!( + return Err(exceptions::PyValueError::new_err(format!( "Field `{}` is bytes type not deletable.", field_name ))) } Value::PreTokStr(_pretok) => { - return Err(exceptions::ValueError::py_err(format!( + return Err(exceptions::PyValueError::new_err(format!( "Field `{}` is pretokenized. This is not authorized for delete.", field_name ))) @@ -232,7 +234,7 @@ impl Index { "on-commit" => tv::ReloadPolicy::OnCommit, "oncommit" => tv::ReloadPolicy::OnCommit, "manual" => tv::ReloadPolicy::Manual, - _ => return Err(exceptions::ValueError::py_err( + _ => return Err(exceptions::PyValueError::new_err( "Invalid reload policy, valid choices are: 'manual' and 'OnCommit'" )) }; @@ -313,14 +315,16 @@ impl Index { if let Some(field) = schema.get_field(default_field_name) { let field_entry = schema.get_field_entry(field); if !field_entry.is_indexed() { - return Err(exceptions::ValueError::py_err(format!( + return Err(exceptions::PyValueError::new_err( + format!( "Field `{}` is not set as indexed in the schema.", default_field_name - ))); + ), + )); } default_fields.push(field); } else { - return Err(exceptions::ValueError::py_err(format!( + return Err(exceptions::PyValueError::new_err(format!( "Field `{}` is not defined in the schema.", default_field_name ))); @@ -335,7 +339,11 @@ impl Index { } let parser = tv::query::QueryParser::for_index(&self.index, default_fields); - let query = parser.parse_query(query).map_err(to_pyerr)?; - Ok(Query { inner: query }) + parser.parse_query(query).map_err(to_pyerr)?; + + Ok(Query { + query: Arc::new(query.to_owned()), + parser, + }) } } diff --git a/src/lib.rs b/src/lib.rs index 680fe6d..7adb19e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -80,7 +80,7 @@ fn tantivy(_py: Python, m: &PyModule) -> PyResult<()> { } pub(crate) fn to_pyerr(err: E) -> PyErr { - exceptions::ValueError::py_err(err.to_string()) + exceptions::PyValueError::new_err(err.to_string()) } pub(crate) fn get_field( @@ -88,7 +88,7 @@ pub(crate) fn get_field( field_name: &str, ) -> PyResult { let field = schema.get_field(field_name).ok_or_else(|| { - exceptions::ValueError::py_err(format!( + exceptions::PyValueError::new_err(format!( "Field `{}` is not defined in the schema.", field_name )) diff --git a/src/query.rs b/src/query.rs index 027c453..849318d 100644 --- a/src/query.rs +++ b/src/query.rs @@ -1,16 +1,26 @@ use pyo3::prelude::*; use pyo3::PyObjectProtocol; +use std::sync::Arc; use tantivy as tv; /// Tantivy's Query #[pyclass] pub(crate) struct Query { - pub(crate) inner: Box, + pub(crate) query: Arc, + pub(crate) parser: tv::query::QueryParser, +} + +impl Query { + pub(crate) fn get(&self) -> Box { + self.parser + .parse_query(&self.query) + .expect("Created a query that returns a parse error") + } } #[pyproto] impl PyObjectProtocol for Query { fn __repr__(&self) -> PyResult { - Ok(format!("Query({:?})", self.inner)) + Ok(format!("Query({:?})", self.get())) } } diff --git a/src/schemabuilder.rs b/src/schemabuilder.rs index 6fd7128..683e48f 100644 --- a/src/schemabuilder.rs +++ b/src/schemabuilder.rs @@ -77,7 +77,7 @@ impl SchemaBuilder { "position" => schema::IndexRecordOption::WithFreqsAndPositions, "freq" => schema::IndexRecordOption::WithFreqs, "basic" => schema::IndexRecordOption::Basic, - _ => return Err(exceptions::ValueError::py_err( + _ => return Err(exceptions::PyValueError::new_err( "Invalid index option, valid choices are: 'basic', 'freq' and 'position'" )) }; @@ -97,7 +97,7 @@ impl SchemaBuilder { if let Some(builder) = builder.write().unwrap().as_mut() { builder.add_text_field(name, options); } else { - return Err(exceptions::ValueError::py_err( + return Err(exceptions::PyValueError::new_err( "Schema builder object isn't valid anymore.", )); } @@ -139,7 +139,7 @@ impl SchemaBuilder { if let Some(builder) = builder.write().unwrap().as_mut() { builder.add_i64_field(name, opts); } else { - return Err(exceptions::ValueError::py_err( + return Err(exceptions::PyValueError::new_err( "Schema builder object isn't valid anymore.", )); } @@ -181,7 +181,7 @@ impl SchemaBuilder { if let Some(builder) = builder.write().unwrap().as_mut() { builder.add_u64_field(name, opts); } else { - return Err(exceptions::ValueError::py_err( + return Err(exceptions::PyValueError::new_err( "Schema builder object isn't valid anymore.", )); } @@ -223,7 +223,7 @@ impl SchemaBuilder { if let Some(builder) = builder.write().unwrap().as_mut() { builder.add_date_field(name, opts); } else { - return Err(exceptions::ValueError::py_err( + return Err(exceptions::PyValueError::new_err( "Schema builder object isn't valid anymore.", )); } @@ -239,7 +239,7 @@ impl SchemaBuilder { if let Some(builder) = builder.write().unwrap().as_mut() { builder.add_facet_field(name); } else { - return Err(exceptions::ValueError::py_err( + return Err(exceptions::PyValueError::new_err( "Schema builder object isn't valid anymore.", )); } @@ -260,7 +260,7 @@ impl SchemaBuilder { if let Some(builder) = builder.write().unwrap().as_mut() { builder.add_bytes_field(name); } else { - return Err(exceptions::ValueError::py_err( + return Err(exceptions::PyValueError::new_err( "Schema builder object isn't valid anymore.", )); } @@ -277,7 +277,7 @@ impl SchemaBuilder { let schema = builder.build(); Ok(Schema { inner: schema }) } else { - Err(exceptions::ValueError::py_err( + Err(exceptions::PyValueError::new_err( "Schema builder object isn't valid anymore.", )) } @@ -301,7 +301,7 @@ impl SchemaBuilder { match f.as_ref() { "single" => Some(schema::Cardinality::SingleValue), "multi" => Some(schema::Cardinality::MultiValues), - _ => return Err(exceptions::ValueError::py_err( + _ => return Err(exceptions::PyValueError::new_err( "Invalid index option, valid choices are: 'multivalue' and 'singlevalue'" )), } diff --git a/src/searcher.rs b/src/searcher.rs index f408ef5..a22b29e 100644 --- a/src/searcher.rs +++ b/src/searcher.rs @@ -3,7 +3,7 @@ use crate::document::Document; use crate::query::Query; use crate::{get_field, to_pyerr}; -use pyo3::exceptions::ValueError; +use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::PyObjectProtocol; use tantivy as tv; @@ -125,7 +125,7 @@ impl Searcher { .and_offset(offset) .order_by_u64_field(field); let top_docs_handle = multicollector.add_collector(collector); - let ret = self.inner.search(&query.inner, &multicollector); + let ret = self.inner.search(&query.get(), &multicollector); match ret { Ok(mut r) => { @@ -138,12 +138,12 @@ impl Searcher { .collect(); (r, result) } - Err(e) => return Err(ValueError::py_err(e.to_string())), + Err(e) => return Err(PyValueError::new_err(e.to_string())), } } else { let collector = TopDocs::with_limit(limit).and_offset(offset); let top_docs_handle = multicollector.add_collector(collector); - let ret = self.inner.search(&query.inner, &multicollector); + let ret = self.inner.search(&query.get(), &multicollector); match ret { Ok(mut r) => { @@ -156,7 +156,7 @@ impl Searcher { .collect(); (r, result) } - Err(e) => return Err(ValueError::py_err(e.to_string())), + Err(e) => return Err(PyValueError::new_err(e.to_string())), } } };