feat: tantivy_0.20.1_upgrade (#82)
* Added api changes from tantivy-0.20.1 * lint fix * Increase test writer heap to 10_000_000 * Revert test back to original check * Update src/searcher.rs Co-authored-by: Cameron <561860+wallies@users.noreply.github.com> --------- Co-authored-by: Caleb Hattingh <caleb.hattingh@gmail.com> Co-authored-by: Cameron <561860+wallies@users.noreply.github.com>master
parent
1fe7244af7
commit
a266f41974
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "tantivy"
|
||||
version = "0.19.2"
|
||||
version = "0.20.1"
|
||||
readme = "README.md"
|
||||
authors = ["Damir Jelić <poljar@termina.org.uk>"]
|
||||
edition = "2018"
|
||||
|
@ -15,7 +15,7 @@ pyo3-build-config = "0.18.0"
|
|||
|
||||
[dependencies]
|
||||
chrono = "0.4.23"
|
||||
tantivy = "0.19.2"
|
||||
tantivy = "0.20.1"
|
||||
itertools = "0.10.5"
|
||||
futures = "0.3.26"
|
||||
serde_json = "1.0.91"
|
||||
|
|
|
@ -331,7 +331,7 @@ impl Index {
|
|||
let schema = self.index.schema();
|
||||
if let Some(default_field_names_vec) = default_field_names {
|
||||
for default_field_name in &default_field_names_vec {
|
||||
if let Some(field) = schema.get_field(default_field_name) {
|
||||
if let Ok(field) = schema.get_field(default_field_name) {
|
||||
let field_entry = schema.get_field_entry(field);
|
||||
if !field_entry.is_indexed() {
|
||||
return Err(exceptions::PyValueError::new_err(
|
||||
|
@ -385,10 +385,11 @@ impl Index {
|
|||
];
|
||||
|
||||
for (name, lang) in &analyzers {
|
||||
let an = TextAnalyzer::from(SimpleTokenizer)
|
||||
let an = TextAnalyzer::builder(SimpleTokenizer::default())
|
||||
.filter(RemoveLongFilter::limit(40))
|
||||
.filter(LowerCaser)
|
||||
.filter(Stemmer::new(*lang));
|
||||
.filter(Stemmer::new(*lang))
|
||||
.build();
|
||||
index.tokenizers().register(name, an);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -86,7 +86,7 @@ pub(crate) fn get_field(
|
|||
schema: &tv::schema::Schema,
|
||||
field_name: &str,
|
||||
) -> PyResult<tv::schema::Field> {
|
||||
let field = schema.get_field(field_name).ok_or_else(|| {
|
||||
let field = schema.get_field(field_name).map_err(|_err| {
|
||||
exceptions::PyValueError::new_err(format!(
|
||||
"Field `{field_name}` is not defined in the schema."
|
||||
))
|
||||
|
|
|
@ -110,13 +110,13 @@ impl SchemaBuilder {
|
|||
///
|
||||
/// Returns the associated field handle.
|
||||
/// Raises a ValueError if there was an error with the field creation.
|
||||
#[pyo3(signature = (name, stored = false, indexed = false, fast = None))]
|
||||
#[pyo3(signature = (name, stored = false, indexed = false, fast = false))]
|
||||
fn add_integer_field(
|
||||
&mut self,
|
||||
name: &str,
|
||||
stored: bool,
|
||||
indexed: bool,
|
||||
fast: Option<&str>,
|
||||
fast: bool,
|
||||
) -> PyResult<Self> {
|
||||
let builder = &mut self.builder;
|
||||
|
||||
|
@ -132,13 +132,13 @@ impl SchemaBuilder {
|
|||
Ok(self.clone())
|
||||
}
|
||||
|
||||
#[pyo3(signature = (name, stored = false, indexed = false, fast = None))]
|
||||
#[pyo3(signature = (name, stored = false, indexed = false, fast = false))]
|
||||
fn add_float_field(
|
||||
&mut self,
|
||||
name: &str,
|
||||
stored: bool,
|
||||
indexed: bool,
|
||||
fast: Option<&str>,
|
||||
fast: bool,
|
||||
) -> PyResult<Self> {
|
||||
let builder = &mut self.builder;
|
||||
|
||||
|
@ -174,13 +174,13 @@ impl SchemaBuilder {
|
|||
///
|
||||
/// Returns the associated field handle.
|
||||
/// Raises a ValueError if there was an error with the field creation.
|
||||
#[pyo3(signature = (name, stored = false, indexed = false, fast = None))]
|
||||
#[pyo3(signature = (name, stored = false, indexed = false, fast = false))]
|
||||
fn add_unsigned_field(
|
||||
&mut self,
|
||||
name: &str,
|
||||
stored: bool,
|
||||
indexed: bool,
|
||||
fast: Option<&str>,
|
||||
fast: bool,
|
||||
) -> PyResult<Self> {
|
||||
let builder = &mut self.builder;
|
||||
|
||||
|
@ -216,13 +216,13 @@ impl SchemaBuilder {
|
|||
///
|
||||
/// Returns the associated field handle.
|
||||
/// Raises a ValueError if there was an error with the field creation.
|
||||
#[pyo3(signature = (name, stored = false, indexed = false, fast = None))]
|
||||
#[pyo3(signature = (name, stored = false, indexed = false, fast = false))]
|
||||
fn add_date_field(
|
||||
&mut self,
|
||||
name: &str,
|
||||
stored: bool,
|
||||
indexed: bool,
|
||||
fast: Option<&str>,
|
||||
fast: bool,
|
||||
) -> PyResult<Self> {
|
||||
let builder = &mut self.builder;
|
||||
|
||||
|
@ -233,21 +233,8 @@ impl SchemaBuilder {
|
|||
if indexed {
|
||||
opts = opts.set_indexed();
|
||||
}
|
||||
let fast = match fast {
|
||||
Some(f) => {
|
||||
let f = f.to_lowercase();
|
||||
match f.as_ref() {
|
||||
"single" => Some(schema::Cardinality::SingleValue),
|
||||
"multi" => Some(schema::Cardinality::MultiValues),
|
||||
_ => return Err(exceptions::PyValueError::new_err(
|
||||
"Invalid index option, valid choices are: 'multi' and 'single'"
|
||||
)),
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
if let Some(f) = fast {
|
||||
opts = opts.set_fast(f);
|
||||
if fast {
|
||||
opts = opts.set_fast();
|
||||
}
|
||||
|
||||
if let Some(builder) = builder.write().unwrap().as_mut() {
|
||||
|
@ -368,33 +355,12 @@ impl SchemaBuilder {
|
|||
fn build_numeric_option(
|
||||
stored: bool,
|
||||
indexed: bool,
|
||||
fast: Option<&str>,
|
||||
fast: bool,
|
||||
) -> PyResult<schema::NumericOptions> {
|
||||
let opts = schema::NumericOptions::default();
|
||||
|
||||
let opts = if stored { opts.set_stored() } else { opts };
|
||||
let opts = if indexed { opts.set_indexed() } else { opts };
|
||||
|
||||
let fast = match fast {
|
||||
Some(f) => {
|
||||
let f = f.to_lowercase();
|
||||
match f.as_ref() {
|
||||
"single" => Some(schema::Cardinality::SingleValue),
|
||||
"multi" => Some(schema::Cardinality::MultiValues),
|
||||
_ => return Err(exceptions::PyValueError::new_err(
|
||||
"Invalid index option, valid choices are: 'multivalue' and 'singlevalue'"
|
||||
)),
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
let opts = if let Some(f) = fast {
|
||||
opts.set_fast(f)
|
||||
} else {
|
||||
opts
|
||||
};
|
||||
|
||||
let opts = if fast { opts.set_fast() } else { opts };
|
||||
Ok(opts)
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#![allow(clippy::new_ret_no_self)]
|
||||
|
||||
use crate::{document::Document, get_field, query::Query, to_pyerr};
|
||||
use crate::{document::Document, query::Query, to_pyerr};
|
||||
use pyo3::{exceptions::PyValueError, prelude::*};
|
||||
use tantivy as tv;
|
||||
use tantivy::collector::{Count, MultiCollector, TopDocs};
|
||||
|
@ -113,10 +113,9 @@ impl Searcher {
|
|||
|
||||
let (mut multifruit, hits) = {
|
||||
if let Some(order_by) = order_by_field {
|
||||
let field = get_field(&self.inner.index().schema(), order_by)?;
|
||||
let collector = TopDocs::with_limit(limit)
|
||||
.and_offset(offset)
|
||||
.order_by_u64_field(field);
|
||||
.order_by_u64_field(order_by);
|
||||
let top_docs_handle = multicollector.add_collector(collector);
|
||||
let ret = self.inner.search(query.get(), &multicollector);
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ def create_index(dir=None):
|
|||
# assume all tests will use the same documents for now
|
||||
# other methods may set up function-local indexes
|
||||
index = Index(schema(), dir)
|
||||
writer = index.writer()
|
||||
writer = index.writer(10_000_000, 1)
|
||||
|
||||
# 2 ways of adding documents
|
||||
# 1
|
||||
|
@ -77,7 +77,7 @@ def create_index(dir=None):
|
|||
|
||||
def create_index_with_numeric_fields(dir=None):
|
||||
index = Index(schema_numeric_fields(), dir)
|
||||
writer = index.writer()
|
||||
writer = index.writer(10_000_000, 1)
|
||||
|
||||
doc = Document()
|
||||
doc.add_integer("id", 1)
|
||||
|
@ -260,13 +260,13 @@ class TestClass(object):
|
|||
|
||||
def test_and_query_parser_default_fields(self, ram_index):
|
||||
query = ram_index.parse_query("winter", default_field_names=["title"])
|
||||
assert repr(query) == """Query(TermQuery(Term(type=Str, field=0, "winter")))"""
|
||||
assert repr(query) == """Query(TermQuery(Term(field=0, type=Str, "winter")))"""
|
||||
|
||||
def test_and_query_parser_default_fields_undefined(self, ram_index):
|
||||
query = ram_index.parse_query("winter")
|
||||
assert (
|
||||
repr(query)
|
||||
== """Query(BooleanQuery { subqueries: [(Should, TermQuery(Term(type=Str, field=0, "winter"))), (Should, TermQuery(Term(type=Str, field=1, "winter")))] })"""
|
||||
== """Query(BooleanQuery { subqueries: [(Should, TermQuery(Term(field=0, type=Str, "winter"))), (Should, TermQuery(Term(field=1, type=Str, "winter")))] })"""
|
||||
)
|
||||
|
||||
def test_query_errors(self, ram_index):
|
||||
|
@ -278,7 +278,7 @@ class TestClass(object):
|
|||
def test_order_by_search(self):
|
||||
schema = (
|
||||
SchemaBuilder()
|
||||
.add_unsigned_field("order", fast="single")
|
||||
.add_unsigned_field("order", fast=True)
|
||||
.add_text_field("title", stored=True)
|
||||
.build()
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue