feat: tantivy_0.20.1_upgrade (#82)
* Added api changes from tantivy-0.20.1 * lint fix * Increase test writer heap to 10_000_000 * Revert test back to original check * Update src/searcher.rs Co-authored-by: Cameron <561860+wallies@users.noreply.github.com> --------- Co-authored-by: Caleb Hattingh <caleb.hattingh@gmail.com> Co-authored-by: Cameron <561860+wallies@users.noreply.github.com>master
parent
1fe7244af7
commit
a266f41974
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "tantivy"
|
name = "tantivy"
|
||||||
version = "0.19.2"
|
version = "0.20.1"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
authors = ["Damir Jelić <poljar@termina.org.uk>"]
|
authors = ["Damir Jelić <poljar@termina.org.uk>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
@ -15,11 +15,11 @@ pyo3-build-config = "0.18.0"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
chrono = "0.4.23"
|
chrono = "0.4.23"
|
||||||
tantivy = "0.19.2"
|
tantivy = "0.20.1"
|
||||||
itertools = "0.10.5"
|
itertools = "0.10.5"
|
||||||
futures = "0.3.26"
|
futures = "0.3.26"
|
||||||
serde_json = "1.0.91"
|
serde_json = "1.0.91"
|
||||||
|
|
||||||
[dependencies.pyo3]
|
[dependencies.pyo3]
|
||||||
version = "0.18.0"
|
version = "0.18.0"
|
||||||
features = ["extension-module"]
|
features = ["extension-module"]
|
||||||
|
|
|
@ -331,7 +331,7 @@ impl Index {
|
||||||
let schema = self.index.schema();
|
let schema = self.index.schema();
|
||||||
if let Some(default_field_names_vec) = default_field_names {
|
if let Some(default_field_names_vec) = default_field_names {
|
||||||
for default_field_name in &default_field_names_vec {
|
for default_field_name in &default_field_names_vec {
|
||||||
if let Some(field) = schema.get_field(default_field_name) {
|
if let Ok(field) = schema.get_field(default_field_name) {
|
||||||
let field_entry = schema.get_field_entry(field);
|
let field_entry = schema.get_field_entry(field);
|
||||||
if !field_entry.is_indexed() {
|
if !field_entry.is_indexed() {
|
||||||
return Err(exceptions::PyValueError::new_err(
|
return Err(exceptions::PyValueError::new_err(
|
||||||
|
@ -385,10 +385,11 @@ impl Index {
|
||||||
];
|
];
|
||||||
|
|
||||||
for (name, lang) in &analyzers {
|
for (name, lang) in &analyzers {
|
||||||
let an = TextAnalyzer::from(SimpleTokenizer)
|
let an = TextAnalyzer::builder(SimpleTokenizer::default())
|
||||||
.filter(RemoveLongFilter::limit(40))
|
.filter(RemoveLongFilter::limit(40))
|
||||||
.filter(LowerCaser)
|
.filter(LowerCaser)
|
||||||
.filter(Stemmer::new(*lang));
|
.filter(Stemmer::new(*lang))
|
||||||
|
.build();
|
||||||
index.tokenizers().register(name, an);
|
index.tokenizers().register(name, an);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,7 +86,7 @@ pub(crate) fn get_field(
|
||||||
schema: &tv::schema::Schema,
|
schema: &tv::schema::Schema,
|
||||||
field_name: &str,
|
field_name: &str,
|
||||||
) -> PyResult<tv::schema::Field> {
|
) -> PyResult<tv::schema::Field> {
|
||||||
let field = schema.get_field(field_name).ok_or_else(|| {
|
let field = schema.get_field(field_name).map_err(|_err| {
|
||||||
exceptions::PyValueError::new_err(format!(
|
exceptions::PyValueError::new_err(format!(
|
||||||
"Field `{field_name}` is not defined in the schema."
|
"Field `{field_name}` is not defined in the schema."
|
||||||
))
|
))
|
||||||
|
|
|
@ -110,13 +110,13 @@ impl SchemaBuilder {
|
||||||
///
|
///
|
||||||
/// Returns the associated field handle.
|
/// Returns the associated field handle.
|
||||||
/// Raises a ValueError if there was an error with the field creation.
|
/// Raises a ValueError if there was an error with the field creation.
|
||||||
#[pyo3(signature = (name, stored = false, indexed = false, fast = None))]
|
#[pyo3(signature = (name, stored = false, indexed = false, fast = false))]
|
||||||
fn add_integer_field(
|
fn add_integer_field(
|
||||||
&mut self,
|
&mut self,
|
||||||
name: &str,
|
name: &str,
|
||||||
stored: bool,
|
stored: bool,
|
||||||
indexed: bool,
|
indexed: bool,
|
||||||
fast: Option<&str>,
|
fast: bool,
|
||||||
) -> PyResult<Self> {
|
) -> PyResult<Self> {
|
||||||
let builder = &mut self.builder;
|
let builder = &mut self.builder;
|
||||||
|
|
||||||
|
@ -132,13 +132,13 @@ impl SchemaBuilder {
|
||||||
Ok(self.clone())
|
Ok(self.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyo3(signature = (name, stored = false, indexed = false, fast = None))]
|
#[pyo3(signature = (name, stored = false, indexed = false, fast = false))]
|
||||||
fn add_float_field(
|
fn add_float_field(
|
||||||
&mut self,
|
&mut self,
|
||||||
name: &str,
|
name: &str,
|
||||||
stored: bool,
|
stored: bool,
|
||||||
indexed: bool,
|
indexed: bool,
|
||||||
fast: Option<&str>,
|
fast: bool,
|
||||||
) -> PyResult<Self> {
|
) -> PyResult<Self> {
|
||||||
let builder = &mut self.builder;
|
let builder = &mut self.builder;
|
||||||
|
|
||||||
|
@ -174,13 +174,13 @@ impl SchemaBuilder {
|
||||||
///
|
///
|
||||||
/// Returns the associated field handle.
|
/// Returns the associated field handle.
|
||||||
/// Raises a ValueError if there was an error with the field creation.
|
/// Raises a ValueError if there was an error with the field creation.
|
||||||
#[pyo3(signature = (name, stored = false, indexed = false, fast = None))]
|
#[pyo3(signature = (name, stored = false, indexed = false, fast = false))]
|
||||||
fn add_unsigned_field(
|
fn add_unsigned_field(
|
||||||
&mut self,
|
&mut self,
|
||||||
name: &str,
|
name: &str,
|
||||||
stored: bool,
|
stored: bool,
|
||||||
indexed: bool,
|
indexed: bool,
|
||||||
fast: Option<&str>,
|
fast: bool,
|
||||||
) -> PyResult<Self> {
|
) -> PyResult<Self> {
|
||||||
let builder = &mut self.builder;
|
let builder = &mut self.builder;
|
||||||
|
|
||||||
|
@ -216,13 +216,13 @@ impl SchemaBuilder {
|
||||||
///
|
///
|
||||||
/// Returns the associated field handle.
|
/// Returns the associated field handle.
|
||||||
/// Raises a ValueError if there was an error with the field creation.
|
/// Raises a ValueError if there was an error with the field creation.
|
||||||
#[pyo3(signature = (name, stored = false, indexed = false, fast = None))]
|
#[pyo3(signature = (name, stored = false, indexed = false, fast = false))]
|
||||||
fn add_date_field(
|
fn add_date_field(
|
||||||
&mut self,
|
&mut self,
|
||||||
name: &str,
|
name: &str,
|
||||||
stored: bool,
|
stored: bool,
|
||||||
indexed: bool,
|
indexed: bool,
|
||||||
fast: Option<&str>,
|
fast: bool,
|
||||||
) -> PyResult<Self> {
|
) -> PyResult<Self> {
|
||||||
let builder = &mut self.builder;
|
let builder = &mut self.builder;
|
||||||
|
|
||||||
|
@ -233,21 +233,8 @@ impl SchemaBuilder {
|
||||||
if indexed {
|
if indexed {
|
||||||
opts = opts.set_indexed();
|
opts = opts.set_indexed();
|
||||||
}
|
}
|
||||||
let fast = match fast {
|
if fast {
|
||||||
Some(f) => {
|
opts = opts.set_fast();
|
||||||
let f = f.to_lowercase();
|
|
||||||
match f.as_ref() {
|
|
||||||
"single" => Some(schema::Cardinality::SingleValue),
|
|
||||||
"multi" => Some(schema::Cardinality::MultiValues),
|
|
||||||
_ => return Err(exceptions::PyValueError::new_err(
|
|
||||||
"Invalid index option, valid choices are: 'multi' and 'single'"
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => None,
|
|
||||||
};
|
|
||||||
if let Some(f) = fast {
|
|
||||||
opts = opts.set_fast(f);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(builder) = builder.write().unwrap().as_mut() {
|
if let Some(builder) = builder.write().unwrap().as_mut() {
|
||||||
|
@ -368,33 +355,12 @@ impl SchemaBuilder {
|
||||||
fn build_numeric_option(
|
fn build_numeric_option(
|
||||||
stored: bool,
|
stored: bool,
|
||||||
indexed: bool,
|
indexed: bool,
|
||||||
fast: Option<&str>,
|
fast: bool,
|
||||||
) -> PyResult<schema::NumericOptions> {
|
) -> PyResult<schema::NumericOptions> {
|
||||||
let opts = schema::NumericOptions::default();
|
let opts = schema::NumericOptions::default();
|
||||||
|
|
||||||
let opts = if stored { opts.set_stored() } else { opts };
|
let opts = if stored { opts.set_stored() } else { opts };
|
||||||
let opts = if indexed { opts.set_indexed() } else { opts };
|
let opts = if indexed { opts.set_indexed() } else { opts };
|
||||||
|
let opts = if fast { opts.set_fast() } else { opts };
|
||||||
let fast = match fast {
|
|
||||||
Some(f) => {
|
|
||||||
let f = f.to_lowercase();
|
|
||||||
match f.as_ref() {
|
|
||||||
"single" => Some(schema::Cardinality::SingleValue),
|
|
||||||
"multi" => Some(schema::Cardinality::MultiValues),
|
|
||||||
_ => return Err(exceptions::PyValueError::new_err(
|
|
||||||
"Invalid index option, valid choices are: 'multivalue' and 'singlevalue'"
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let opts = if let Some(f) = fast {
|
|
||||||
opts.set_fast(f)
|
|
||||||
} else {
|
|
||||||
opts
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(opts)
|
Ok(opts)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#![allow(clippy::new_ret_no_self)]
|
#![allow(clippy::new_ret_no_self)]
|
||||||
|
|
||||||
use crate::{document::Document, get_field, query::Query, to_pyerr};
|
use crate::{document::Document, query::Query, to_pyerr};
|
||||||
use pyo3::{exceptions::PyValueError, prelude::*};
|
use pyo3::{exceptions::PyValueError, prelude::*};
|
||||||
use tantivy as tv;
|
use tantivy as tv;
|
||||||
use tantivy::collector::{Count, MultiCollector, TopDocs};
|
use tantivy::collector::{Count, MultiCollector, TopDocs};
|
||||||
|
@ -113,10 +113,9 @@ impl Searcher {
|
||||||
|
|
||||||
let (mut multifruit, hits) = {
|
let (mut multifruit, hits) = {
|
||||||
if let Some(order_by) = order_by_field {
|
if let Some(order_by) = order_by_field {
|
||||||
let field = get_field(&self.inner.index().schema(), order_by)?;
|
|
||||||
let collector = TopDocs::with_limit(limit)
|
let collector = TopDocs::with_limit(limit)
|
||||||
.and_offset(offset)
|
.and_offset(offset)
|
||||||
.order_by_u64_field(field);
|
.order_by_u64_field(order_by);
|
||||||
let top_docs_handle = multicollector.add_collector(collector);
|
let top_docs_handle = multicollector.add_collector(collector);
|
||||||
let ret = self.inner.search(query.get(), &multicollector);
|
let ret = self.inner.search(query.get(), &multicollector);
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ def create_index(dir=None):
|
||||||
# assume all tests will use the same documents for now
|
# assume all tests will use the same documents for now
|
||||||
# other methods may set up function-local indexes
|
# other methods may set up function-local indexes
|
||||||
index = Index(schema(), dir)
|
index = Index(schema(), dir)
|
||||||
writer = index.writer()
|
writer = index.writer(10_000_000, 1)
|
||||||
|
|
||||||
# 2 ways of adding documents
|
# 2 ways of adding documents
|
||||||
# 1
|
# 1
|
||||||
|
@ -77,7 +77,7 @@ def create_index(dir=None):
|
||||||
|
|
||||||
def create_index_with_numeric_fields(dir=None):
|
def create_index_with_numeric_fields(dir=None):
|
||||||
index = Index(schema_numeric_fields(), dir)
|
index = Index(schema_numeric_fields(), dir)
|
||||||
writer = index.writer()
|
writer = index.writer(10_000_000, 1)
|
||||||
|
|
||||||
doc = Document()
|
doc = Document()
|
||||||
doc.add_integer("id", 1)
|
doc.add_integer("id", 1)
|
||||||
|
@ -260,13 +260,13 @@ class TestClass(object):
|
||||||
|
|
||||||
def test_and_query_parser_default_fields(self, ram_index):
|
def test_and_query_parser_default_fields(self, ram_index):
|
||||||
query = ram_index.parse_query("winter", default_field_names=["title"])
|
query = ram_index.parse_query("winter", default_field_names=["title"])
|
||||||
assert repr(query) == """Query(TermQuery(Term(type=Str, field=0, "winter")))"""
|
assert repr(query) == """Query(TermQuery(Term(field=0, type=Str, "winter")))"""
|
||||||
|
|
||||||
def test_and_query_parser_default_fields_undefined(self, ram_index):
|
def test_and_query_parser_default_fields_undefined(self, ram_index):
|
||||||
query = ram_index.parse_query("winter")
|
query = ram_index.parse_query("winter")
|
||||||
assert (
|
assert (
|
||||||
repr(query)
|
repr(query)
|
||||||
== """Query(BooleanQuery { subqueries: [(Should, TermQuery(Term(type=Str, field=0, "winter"))), (Should, TermQuery(Term(type=Str, field=1, "winter")))] })"""
|
== """Query(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, "winter"))), (Should, TermQuery(Term(field=1, type=Str, "winter")))] })"""
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_query_errors(self, ram_index):
|
def test_query_errors(self, ram_index):
|
||||||
|
@ -278,7 +278,7 @@ class TestClass(object):
|
||||||
def test_order_by_search(self):
|
def test_order_by_search(self):
|
||||||
schema = (
|
schema = (
|
||||||
SchemaBuilder()
|
SchemaBuilder()
|
||||||
.add_unsigned_field("order", fast="single")
|
.add_unsigned_field("order", fast=True)
|
||||||
.add_text_field("title", stored=True)
|
.add_text_field("title", stored=True)
|
||||||
.build()
|
.build()
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue