commit
ff51f61a1f
13
Cargo.toml
13
Cargo.toml
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "tantivy"
|
name = "tantivy"
|
||||||
version = "0.16.0"
|
version = "0.17.0"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
authors = ["Damir Jelić <poljar@termina.org.uk>"]
|
authors = ["Damir Jelić <poljar@termina.org.uk>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
@ -11,14 +11,15 @@ name = "tantivy"
|
||||||
crate-type = ["cdylib"]
|
crate-type = ["cdylib"]
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
pyo3-build-config = "0.15.1"
|
pyo3-build-config = "0.16.3"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
chrono = "0.4.19"
|
chrono = "0.4.19"
|
||||||
tantivy = "0.16.1"
|
tantivy = "0.17"
|
||||||
itertools = "0.10.0"
|
itertools = "0.10.3"
|
||||||
futures = "0.3.5"
|
futures = "0.3.21"
|
||||||
|
serde_json = "1.0.64"
|
||||||
|
|
||||||
[dependencies.pyo3]
|
[dependencies.pyo3]
|
||||||
version = "0.15.1"
|
version = "0.16.3"
|
||||||
features = ["extension-module"]
|
features = ["extension-module"]
|
||||||
|
|
|
@ -14,10 +14,37 @@ use chrono::{offset::TimeZone, Datelike, Timelike, Utc};
|
||||||
use tantivy as tv;
|
use tantivy as tv;
|
||||||
|
|
||||||
use crate::{facet::Facet, to_pyerr};
|
use crate::{facet::Facet, to_pyerr};
|
||||||
use pyo3::{PyMappingProtocol, PyObjectProtocol};
|
use serde_json::Value as JsonValue;
|
||||||
use std::{collections::BTreeMap, fmt};
|
use std::{
|
||||||
|
collections::{BTreeMap, HashMap},
|
||||||
|
fmt,
|
||||||
|
};
|
||||||
use tantivy::schema::Value;
|
use tantivy::schema::Value;
|
||||||
|
|
||||||
|
fn value_to_object(val: &JsonValue, py: Python<'_>) -> PyObject {
|
||||||
|
match val {
|
||||||
|
JsonValue::Null => py.None(),
|
||||||
|
JsonValue::Bool(b) => b.to_object(py),
|
||||||
|
JsonValue::Number(n) => match n {
|
||||||
|
n if n.is_i64() => n.as_i64().to_object(py),
|
||||||
|
n if n.is_u64() => n.as_u64().to_object(py),
|
||||||
|
n if n.is_f64() => n.as_f64().to_object(py),
|
||||||
|
_ => panic!("number too large"),
|
||||||
|
},
|
||||||
|
JsonValue::String(s) => s.to_object(py),
|
||||||
|
JsonValue::Array(v) => {
|
||||||
|
let inner: Vec<_> =
|
||||||
|
v.iter().map(|x| value_to_object(x, py)).collect();
|
||||||
|
inner.to_object(py)
|
||||||
|
}
|
||||||
|
JsonValue::Object(m) => {
|
||||||
|
let inner: HashMap<_, _> =
|
||||||
|
m.iter().map(|(k, v)| (k, value_to_object(v, py))).collect();
|
||||||
|
inner.to_object(py)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn value_to_py(py: Python, value: &Value) -> PyResult<PyObject> {
|
fn value_to_py(py: Python, value: &Value) -> PyResult<PyObject> {
|
||||||
Ok(match value {
|
Ok(match value {
|
||||||
Value::Str(text) => text.into_py(py),
|
Value::Str(text) => text.into_py(py),
|
||||||
|
@ -42,6 +69,13 @@ fn value_to_py(py: Python, value: &Value) -> PyResult<PyObject> {
|
||||||
)?
|
)?
|
||||||
.into_py(py),
|
.into_py(py),
|
||||||
Value::Facet(f) => Facet { inner: f.clone() }.into_py(py),
|
Value::Facet(f) => Facet { inner: f.clone() }.into_py(py),
|
||||||
|
Value::JsonObject(json_object) => {
|
||||||
|
let inner: HashMap<_, _> = json_object
|
||||||
|
.iter()
|
||||||
|
.map(|(k, v)| (k, value_to_object(&v, py)))
|
||||||
|
.collect();
|
||||||
|
inner.to_object(py)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,6 +92,9 @@ fn value_to_string(value: &Value) -> String {
|
||||||
// TODO implement me
|
// TODO implement me
|
||||||
unimplemented!();
|
unimplemented!();
|
||||||
}
|
}
|
||||||
|
Value::JsonObject(json_object) => {
|
||||||
|
serde_json::to_string(&json_object).unwrap()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -293,6 +330,17 @@ impl Document {
|
||||||
add_value(self, field_name, bytes);
|
add_value(self, field_name, bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Add a bytes value to the document.
|
||||||
|
///
|
||||||
|
/// Args:
|
||||||
|
/// field_name (str): The field for which we are adding the bytes.
|
||||||
|
/// value (str): The json object that will be added to the document.
|
||||||
|
fn add_json(&mut self, field_name: String, json: &str) {
|
||||||
|
let json_object: serde_json::Value =
|
||||||
|
serde_json::from_str(json).unwrap();
|
||||||
|
add_value(self, field_name, json_object);
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the number of added fields that have been added to the document
|
/// Returns the number of added fields that have been added to the document
|
||||||
#[getter]
|
#[getter]
|
||||||
fn num_fields(&self) -> usize {
|
fn num_fields(&self) -> usize {
|
||||||
|
@ -337,6 +385,16 @@ impl Document {
|
||||||
.map(|value| value_to_py(py, value))
|
.map(|value| value_to_py(py, value))
|
||||||
.collect::<PyResult<Vec<_>>>()
|
.collect::<PyResult<Vec<_>>>()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn __getitem__(&self, field_name: &str) -> PyResult<Vec<PyObject>> {
|
||||||
|
let gil = Python::acquire_gil();
|
||||||
|
let py = gil.python();
|
||||||
|
self.get_all(py, field_name)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn __repr__(&self) -> PyResult<String> {
|
||||||
|
Ok(format!("{:?}", self))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Document {
|
impl Document {
|
||||||
|
@ -350,19 +408,3 @@ impl Document {
|
||||||
.flat_map(|values| values.iter())
|
.flat_map(|values| values.iter())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyproto]
|
|
||||||
impl PyMappingProtocol for Document {
|
|
||||||
fn __getitem__(&self, field_name: &str) -> PyResult<Vec<PyObject>> {
|
|
||||||
let gil = Python::acquire_gil();
|
|
||||||
let py = gil.python();
|
|
||||||
self.get_all(py, field_name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[pyproto]
|
|
||||||
impl PyObjectProtocol for Document {
|
|
||||||
fn __repr__(&self) -> PyResult<String> {
|
|
||||||
Ok(format!("{:?}", self))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
use pyo3::{basic::PyObjectProtocol, prelude::*, types::PyType};
|
use pyo3::{prelude::*, types::PyType};
|
||||||
use tantivy::schema;
|
use tantivy::schema;
|
||||||
|
|
||||||
/// A Facet represent a point in a given hierarchy.
|
/// A Facet represent a point in a given hierarchy.
|
||||||
|
@ -63,10 +63,7 @@ impl Facet {
|
||||||
fn to_path_str(&self) -> String {
|
fn to_path_str(&self) -> String {
|
||||||
self.inner.to_string()
|
self.inner.to_string()
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#[pyproto]
|
|
||||||
impl PyObjectProtocol for Facet {
|
|
||||||
fn __repr__(&self) -> PyResult<String> {
|
fn __repr__(&self) -> PyResult<String> {
|
||||||
Ok(format!("Facet({})", self.to_path_str()))
|
Ok(format!("Facet({})", self.to_path_str()))
|
||||||
}
|
}
|
||||||
|
|
14
src/index.rs
14
src/index.rs
|
@ -41,7 +41,7 @@ impl IndexWriter {
|
||||||
pub fn add_document(&mut self, doc: &Document) -> PyResult<u64> {
|
pub fn add_document(&mut self, doc: &Document) -> PyResult<u64> {
|
||||||
let named_doc = NamedFieldDocument(doc.field_values.clone());
|
let named_doc = NamedFieldDocument(doc.field_values.clone());
|
||||||
let doc = self.schema.convert_named_doc(named_doc).map_err(to_pyerr)?;
|
let doc = self.schema.convert_named_doc(named_doc).map_err(to_pyerr)?;
|
||||||
Ok(self.inner_index_writer.add_document(doc))
|
self.inner_index_writer.add_document(doc).map_err(to_pyerr)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Helper for the `add_document` method, but passing a json string.
|
/// Helper for the `add_document` method, but passing a json string.
|
||||||
|
@ -55,7 +55,7 @@ impl IndexWriter {
|
||||||
pub fn add_json(&mut self, json: &str) -> PyResult<u64> {
|
pub fn add_json(&mut self, json: &str) -> PyResult<u64> {
|
||||||
let doc = self.schema.parse_document(json).map_err(to_pyerr)?;
|
let doc = self.schema.parse_document(json).map_err(to_pyerr)?;
|
||||||
let opstamp = self.inner_index_writer.add_document(doc);
|
let opstamp = self.inner_index_writer.add_document(doc);
|
||||||
Ok(opstamp)
|
opstamp.map_err(to_pyerr)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Commits all of the pending changes
|
/// Commits all of the pending changes
|
||||||
|
@ -134,6 +134,12 @@ impl IndexWriter {
|
||||||
field_name
|
field_name
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
|
Value::JsonObject(_) => {
|
||||||
|
return Err(exceptions::PyValueError::new_err(format!(
|
||||||
|
"Field `{}` is json object type not deletable.",
|
||||||
|
field_name
|
||||||
|
)))
|
||||||
|
}
|
||||||
};
|
};
|
||||||
Ok(self.inner_index_writer.delete_term(term))
|
Ok(self.inner_index_writer.delete_term(term))
|
||||||
}
|
}
|
||||||
|
@ -281,7 +287,7 @@ impl Index {
|
||||||
#[staticmethod]
|
#[staticmethod]
|
||||||
fn exists(path: &str) -> PyResult<bool> {
|
fn exists(path: &str) -> PyResult<bool> {
|
||||||
let directory = MmapDirectory::open(path).map_err(to_pyerr)?;
|
let directory = MmapDirectory::open(path).map_err(to_pyerr)?;
|
||||||
Ok(tv::Index::exists(&directory).unwrap())
|
tv::Index::exists(&directory).map_err(to_pyerr)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The schema of the current index.
|
/// The schema of the current index.
|
||||||
|
@ -304,7 +310,7 @@ impl Index {
|
||||||
///
|
///
|
||||||
/// Args:
|
/// Args:
|
||||||
/// query: the query, following the tantivy query language.
|
/// query: the query, following the tantivy query language.
|
||||||
/// default_fields (List[Field]): A list of fields used to search if no
|
/// default_fields_names (List[Field]): A list of fields used to search if no
|
||||||
/// field is specified in the query.
|
/// field is specified in the query.
|
||||||
///
|
///
|
||||||
#[args(reload_policy = "RELOAD_POLICY")]
|
#[args(reload_policy = "RELOAD_POLICY")]
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
use pyo3::{prelude::*, PyObjectProtocol};
|
use pyo3::prelude::*;
|
||||||
use tantivy as tv;
|
use tantivy as tv;
|
||||||
|
|
||||||
/// Tantivy's Query
|
/// Tantivy's Query
|
||||||
|
@ -13,8 +13,8 @@ impl Query {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyproto]
|
#[pymethods]
|
||||||
impl PyObjectProtocol for Query {
|
impl Query {
|
||||||
fn __repr__(&self) -> PyResult<String> {
|
fn __repr__(&self) -> PyResult<String> {
|
||||||
Ok(format!("Query({:?})", self.get()))
|
Ok(format!("Query({:?})", self.get()))
|
||||||
}
|
}
|
||||||
|
|
|
@ -73,26 +73,11 @@ impl SchemaBuilder {
|
||||||
index_option: &str,
|
index_option: &str,
|
||||||
) -> PyResult<Self> {
|
) -> PyResult<Self> {
|
||||||
let builder = &mut self.builder;
|
let builder = &mut self.builder;
|
||||||
let index_option = match index_option {
|
let options = SchemaBuilder::build_text_option(
|
||||||
"position" => schema::IndexRecordOption::WithFreqsAndPositions,
|
stored,
|
||||||
"freq" => schema::IndexRecordOption::WithFreqs,
|
tokenizer_name,
|
||||||
"basic" => schema::IndexRecordOption::Basic,
|
index_option,
|
||||||
_ => return Err(exceptions::PyValueError::new_err(
|
)?;
|
||||||
"Invalid index option, valid choices are: 'basic', 'freq' and 'position'"
|
|
||||||
))
|
|
||||||
};
|
|
||||||
|
|
||||||
let indexing = schema::TextFieldIndexing::default()
|
|
||||||
.set_tokenizer(tokenizer_name)
|
|
||||||
.set_index_option(index_option);
|
|
||||||
|
|
||||||
let options =
|
|
||||||
schema::TextOptions::default().set_indexing_options(indexing);
|
|
||||||
let options = if stored {
|
|
||||||
options.set_stored()
|
|
||||||
} else {
|
|
||||||
options
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Some(builder) = builder.write().unwrap().as_mut() {
|
if let Some(builder) = builder.write().unwrap().as_mut() {
|
||||||
builder.add_text_field(name, options);
|
builder.add_text_field(name, options);
|
||||||
|
@ -230,6 +215,55 @@ impl SchemaBuilder {
|
||||||
Ok(self.clone())
|
Ok(self.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Add a new json field to the schema.
|
||||||
|
///
|
||||||
|
/// Args:
|
||||||
|
/// name (str): the name of the field.
|
||||||
|
/// stored (bool, optional): If true sets the field as stored, the
|
||||||
|
/// content of the field can be later restored from a Searcher.
|
||||||
|
/// Defaults to False.
|
||||||
|
/// tokenizer_name (str, optional): The name of the tokenizer that
|
||||||
|
/// should be used to process the field. Defaults to 'default'
|
||||||
|
/// index_option (str, optional): Sets which information should be
|
||||||
|
/// indexed with the tokens. Can be one of 'position', 'freq' or
|
||||||
|
/// 'basic'. Defaults to 'position'. The 'basic' index_option
|
||||||
|
/// records only the document ID, the 'freq' option records the
|
||||||
|
/// document id and the term frequency, while the 'position' option
|
||||||
|
/// records the document id, term frequency and the positions of
|
||||||
|
/// the term occurrences in the document.
|
||||||
|
///
|
||||||
|
/// Returns the associated field handle.
|
||||||
|
/// Raises a ValueError if there was an error with the field creation.
|
||||||
|
#[args(
|
||||||
|
stored = false,
|
||||||
|
tokenizer_name = "TOKENIZER",
|
||||||
|
index_option = "RECORD"
|
||||||
|
)]
|
||||||
|
fn add_json_field(
|
||||||
|
&mut self,
|
||||||
|
name: &str,
|
||||||
|
stored: bool,
|
||||||
|
tokenizer_name: &str,
|
||||||
|
index_option: &str,
|
||||||
|
) -> PyResult<Self> {
|
||||||
|
let builder = &mut self.builder;
|
||||||
|
let options = SchemaBuilder::build_text_option(
|
||||||
|
stored,
|
||||||
|
tokenizer_name,
|
||||||
|
index_option,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
if let Some(builder) = builder.write().unwrap().as_mut() {
|
||||||
|
builder.add_json_field(name, options);
|
||||||
|
} else {
|
||||||
|
return Err(exceptions::PyValueError::new_err(
|
||||||
|
"Schema builder object isn't valid anymore.",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(self.clone())
|
||||||
|
}
|
||||||
|
|
||||||
/// Add a Facet field to the schema.
|
/// Add a Facet field to the schema.
|
||||||
/// Args:
|
/// Args:
|
||||||
/// name (str): The name of the field.
|
/// name (str): The name of the field.
|
||||||
|
@ -289,8 +323,8 @@ impl SchemaBuilder {
|
||||||
stored: bool,
|
stored: bool,
|
||||||
indexed: bool,
|
indexed: bool,
|
||||||
fast: Option<&str>,
|
fast: Option<&str>,
|
||||||
) -> PyResult<schema::IntOptions> {
|
) -> PyResult<schema::NumericOptions> {
|
||||||
let opts = schema::IntOptions::default();
|
let opts = schema::NumericOptions::default();
|
||||||
|
|
||||||
let opts = if stored { opts.set_stored() } else { opts };
|
let opts = if stored { opts.set_stored() } else { opts };
|
||||||
let opts = if indexed { opts.set_indexed() } else { opts };
|
let opts = if indexed { opts.set_indexed() } else { opts };
|
||||||
|
@ -317,4 +351,33 @@ impl SchemaBuilder {
|
||||||
|
|
||||||
Ok(opts)
|
Ok(opts)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_text_option(
|
||||||
|
stored: bool,
|
||||||
|
tokenizer_name: &str,
|
||||||
|
index_option: &str,
|
||||||
|
) -> PyResult<schema::TextOptions> {
|
||||||
|
let index_option = match index_option {
|
||||||
|
"position" => schema::IndexRecordOption::WithFreqsAndPositions,
|
||||||
|
"freq" => schema::IndexRecordOption::WithFreqs,
|
||||||
|
"basic" => schema::IndexRecordOption::Basic,
|
||||||
|
_ => return Err(exceptions::PyValueError::new_err(
|
||||||
|
"Invalid index option, valid choices are: 'basic', 'freq' and 'position'"
|
||||||
|
))
|
||||||
|
};
|
||||||
|
|
||||||
|
let indexing = schema::TextFieldIndexing::default()
|
||||||
|
.set_tokenizer(tokenizer_name)
|
||||||
|
.set_index_option(index_option);
|
||||||
|
|
||||||
|
let options =
|
||||||
|
schema::TextOptions::default().set_indexing_options(indexing);
|
||||||
|
let options = if stored {
|
||||||
|
options.set_stored()
|
||||||
|
} else {
|
||||||
|
options
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(options)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#![allow(clippy::new_ret_no_self)]
|
#![allow(clippy::new_ret_no_self)]
|
||||||
|
|
||||||
use crate::{document::Document, get_field, query::Query, to_pyerr};
|
use crate::{document::Document, get_field, query::Query, to_pyerr};
|
||||||
use pyo3::{exceptions::PyValueError, prelude::*, PyObjectProtocol};
|
use pyo3::{exceptions::PyValueError, prelude::*};
|
||||||
use tantivy as tv;
|
use tantivy as tv;
|
||||||
use tantivy::collector::{Count, MultiCollector, TopDocs};
|
use tantivy::collector::{Count, MultiCollector, TopDocs};
|
||||||
|
|
||||||
|
@ -47,8 +47,8 @@ pub(crate) struct SearchResult {
|
||||||
count: Option<usize>,
|
count: Option<usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyproto]
|
#[pymethods]
|
||||||
impl PyObjectProtocol for SearchResult {
|
impl SearchResult {
|
||||||
fn __repr__(&self) -> PyResult<String> {
|
fn __repr__(&self) -> PyResult<String> {
|
||||||
if let Some(count) = self.count {
|
if let Some(count) = self.count {
|
||||||
Ok(format!(
|
Ok(format!(
|
||||||
|
@ -59,10 +59,7 @@ impl PyObjectProtocol for SearchResult {
|
||||||
Ok(format!("SearchResult(hits: {:?})", self.hits))
|
Ok(format!("SearchResult(hits: {:?})", self.hits))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#[pymethods]
|
|
||||||
impl SearchResult {
|
|
||||||
#[getter]
|
#[getter]
|
||||||
/// The list of tuples that contains the scores and DocAddress of the
|
/// The list of tuples that contains the scores and DocAddress of the
|
||||||
/// search results.
|
/// search results.
|
||||||
|
@ -185,6 +182,14 @@ impl Searcher {
|
||||||
field_values: named_doc.0,
|
field_values: named_doc.0,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn __repr__(&self) -> PyResult<String> {
|
||||||
|
Ok(format!(
|
||||||
|
"Searcher(num_docs={}, num_segments={})",
|
||||||
|
self.inner.num_docs(),
|
||||||
|
self.inner.segment_readers().len()
|
||||||
|
))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// DocAddress contains all the necessary information to identify a document
|
/// DocAddress contains all the necessary information to identify a document
|
||||||
|
@ -233,14 +238,3 @@ impl Into<tv::DocAddress> for &DocAddress {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[pyproto]
|
|
||||||
impl PyObjectProtocol for Searcher {
|
|
||||||
fn __repr__(&self) -> PyResult<String> {
|
|
||||||
Ok(format!(
|
|
||||||
"Searcher(num_docs={}, num_segments={})",
|
|
||||||
self.inner.num_docs(),
|
|
||||||
self.inner.segment_readers().len()
|
|
||||||
))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -5,7 +5,13 @@ from tantivy import Document, Index, SchemaBuilder
|
||||||
|
|
||||||
|
|
||||||
def schema():
|
def schema():
|
||||||
return SchemaBuilder().add_text_field("title", stored=True).add_text_field("body").build()
|
return (
|
||||||
|
SchemaBuilder()
|
||||||
|
.add_text_field("title", stored=True)
|
||||||
|
.add_text_field("body")
|
||||||
|
.build()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def create_index(dir=None):
|
def create_index(dir=None):
|
||||||
# assume all tests will use the same documents for now
|
# assume all tests will use the same documents for now
|
||||||
|
@ -99,7 +105,9 @@ class TestClass(object):
|
||||||
|
|
||||||
def test_and_query(self, ram_index):
|
def test_and_query(self, ram_index):
|
||||||
index = ram_index
|
index = ram_index
|
||||||
query = index.parse_query("title:men AND body:summer", default_field_names=["title", "body"])
|
query = index.parse_query(
|
||||||
|
"title:men AND body:summer", default_field_names=["title", "body"]
|
||||||
|
)
|
||||||
# look for an intersection of documents
|
# look for an intersection of documents
|
||||||
searcher = index.searcher()
|
searcher = index.searcher()
|
||||||
result = searcher.search(query, 10)
|
result = searcher.search(query, 10)
|
||||||
|
@ -114,15 +122,13 @@ class TestClass(object):
|
||||||
|
|
||||||
def test_and_query_parser_default_fields(self, ram_index):
|
def test_and_query_parser_default_fields(self, ram_index):
|
||||||
query = ram_index.parse_query("winter", default_field_names=["title"])
|
query = ram_index.parse_query("winter", default_field_names=["title"])
|
||||||
assert repr(query) == """Query(TermQuery(Term(field=0,bytes=[119, 105, 110, 116, 101, 114])))"""
|
assert repr(query) == """Query(TermQuery(Term(type=Str, field=0, "winter")))"""
|
||||||
|
|
||||||
def test_and_query_parser_default_fields_undefined(self, ram_index):
|
def test_and_query_parser_default_fields_undefined(self, ram_index):
|
||||||
query = ram_index.parse_query("winter")
|
query = ram_index.parse_query("winter")
|
||||||
assert (
|
assert (
|
||||||
repr(query) == "Query(BooleanQuery { subqueries: ["
|
repr(query)
|
||||||
"(Should, TermQuery(Term(field=0,bytes=[119, 105, 110, 116, 101, 114]))), "
|
== """Query(BooleanQuery { subqueries: [(Should, TermQuery(Term(type=Str, field=0, "winter"))), (Should, TermQuery(Term(type=Str, field=1, "winter")))] })"""
|
||||||
"(Should, TermQuery(Term(field=1,bytes=[119, 105, 110, 116, 101, 114])))] "
|
|
||||||
"})"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_query_errors(self, ram_index):
|
def test_query_errors(self, ram_index):
|
||||||
|
@ -132,9 +138,11 @@ class TestClass(object):
|
||||||
index.parse_query("bod:men", ["title", "body"])
|
index.parse_query("bod:men", ["title", "body"])
|
||||||
|
|
||||||
def test_order_by_search(self):
|
def test_order_by_search(self):
|
||||||
schema = (SchemaBuilder()
|
schema = (
|
||||||
|
SchemaBuilder()
|
||||||
.add_unsigned_field("order", fast="single")
|
.add_unsigned_field("order", fast="single")
|
||||||
.add_text_field("title", stored=True).build()
|
.add_text_field("title", stored=True)
|
||||||
|
.build()
|
||||||
)
|
)
|
||||||
|
|
||||||
index = Index(schema)
|
index = Index(schema)
|
||||||
|
@ -155,7 +163,6 @@ class TestClass(object):
|
||||||
doc.add_unsigned("order", 1)
|
doc.add_unsigned("order", 1)
|
||||||
doc.add_text("title", "Another test title")
|
doc.add_text("title", "Another test title")
|
||||||
|
|
||||||
|
|
||||||
writer.add_document(doc)
|
writer.add_document(doc)
|
||||||
|
|
||||||
writer.commit()
|
writer.commit()
|
||||||
|
@ -163,7 +170,6 @@ class TestClass(object):
|
||||||
|
|
||||||
query = index.parse_query("test")
|
query = index.parse_query("test")
|
||||||
|
|
||||||
|
|
||||||
searcher = index.searcher()
|
searcher = index.searcher()
|
||||||
|
|
||||||
result = searcher.search(query, 10, offset=2, order_by_field="order")
|
result = searcher.search(query, 10, offset=2, order_by_field="order")
|
||||||
|
@ -187,9 +193,11 @@ class TestClass(object):
|
||||||
assert searched_doc["title"] == ["Test title"]
|
assert searched_doc["title"] == ["Test title"]
|
||||||
|
|
||||||
def test_order_by_search_without_fast_field(self):
|
def test_order_by_search_without_fast_field(self):
|
||||||
schema = (SchemaBuilder()
|
schema = (
|
||||||
|
SchemaBuilder()
|
||||||
.add_unsigned_field("order")
|
.add_unsigned_field("order")
|
||||||
.add_text_field("title", stored=True).build()
|
.add_text_field("title", stored=True)
|
||||||
|
.build()
|
||||||
)
|
)
|
||||||
|
|
||||||
index = Index(schema)
|
index = Index(schema)
|
||||||
|
@ -316,3 +324,72 @@ class TestDocument(object):
|
||||||
def test_document_error(self):
|
def test_document_error(self):
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
tantivy.Document(name={})
|
tantivy.Document(name={})
|
||||||
|
|
||||||
|
|
||||||
|
class TestJsonField:
|
||||||
|
def test_query_from_json_field(self):
|
||||||
|
schema = (
|
||||||
|
SchemaBuilder()
|
||||||
|
.add_json_field(
|
||||||
|
"attributes",
|
||||||
|
stored=True,
|
||||||
|
tokenizer_name="default",
|
||||||
|
index_option="position",
|
||||||
|
)
|
||||||
|
.build()
|
||||||
|
)
|
||||||
|
|
||||||
|
index = Index(schema)
|
||||||
|
|
||||||
|
writer = index.writer()
|
||||||
|
|
||||||
|
doc = Document()
|
||||||
|
doc.add_json(
|
||||||
|
"attributes",
|
||||||
|
"""{
|
||||||
|
"order":1.1,
|
||||||
|
"target": "submit-button",
|
||||||
|
"cart": {"product_id": 103},
|
||||||
|
"description": "the best vacuum cleaner ever"
|
||||||
|
}""",
|
||||||
|
)
|
||||||
|
|
||||||
|
writer.add_document(doc)
|
||||||
|
|
||||||
|
doc = Document()
|
||||||
|
doc.add_json(
|
||||||
|
"attributes",
|
||||||
|
"""{
|
||||||
|
"order":1.2,
|
||||||
|
"target": "submit-button",
|
||||||
|
"cart": {"product_id": 133},
|
||||||
|
"description": "das keyboard"
|
||||||
|
}""",
|
||||||
|
)
|
||||||
|
|
||||||
|
writer.add_document(doc)
|
||||||
|
|
||||||
|
writer.commit()
|
||||||
|
index.reload()
|
||||||
|
|
||||||
|
query = index.parse_query("target:submit-button", ["attributes"])
|
||||||
|
result = index.searcher().search(query, 2)
|
||||||
|
assert len(result.hits) == 2
|
||||||
|
|
||||||
|
query = index.parse_query("target:submit", ["attributes"])
|
||||||
|
result = index.searcher().search(query, 2)
|
||||||
|
assert len(result.hits) == 2
|
||||||
|
|
||||||
|
query = index.parse_query("order:1.1", ["attributes"])
|
||||||
|
result = index.searcher().search(query, 2)
|
||||||
|
assert len(result.hits) == 1
|
||||||
|
|
||||||
|
# query = index.parse_query_for_attributes("cart.product_id:103")
|
||||||
|
# result = index.searcher().search(query, 1)
|
||||||
|
# assert len(result.hits) == 1
|
||||||
|
|
||||||
|
# query = index.parse_query_for_attributes(
|
||||||
|
# "target:submit-button AND cart.product_id:133"
|
||||||
|
# )
|
||||||
|
# result = index.searcher().search(query, 2)
|
||||||
|
# assert len(result.hits) == 1
|
||||||
|
|
Loading…
Reference in New Issue