API simplification.

See #9
master
Paul Masurel 2019-08-02 20:23:10 +09:00
parent b1b3689c55
commit 0498f941b0
13 changed files with 615 additions and 548 deletions

View File

@ -12,6 +12,7 @@ crate-type = ["cdylib"]
[dependencies]
chrono = "0.4"
tantivy = { git = "https://github.com/tantivy-search/tantivy" }
itertools = "0.8"
[dependencies.pyo3]
version = "0.7.0"

View File

@ -24,35 +24,37 @@ needs to be built. After that documents can be added to the index and a reader
can be created to search the index.
```python
builder = tantivy.SchemaBuilder()
import tantivy
title = builder.add_text_field("title", stored=True)
body = builder.add_text_field("body")
# Declaring our schema.
schema_builder = tantivy.SchemaBuilder()
schema_builder.add_text_field("title", stored=True)
schema_builder.add_text_field("body", stored=True)
schema = schema_builder.build()
schema = builder.build()
# Creating our index (in memory, but filesystem is available too)
index = tantivy.Index(schema)
writer = index.writer()
doc = tantivy.Document()
doc.add_text(title, "The Old Man and the Sea")
doc.add_text(body, ("He was an old man who fished alone in a skiff in"
"the Gulf Stream and he had gone eighty-four days "
"now without taking a fish."))
writer.add_document(doc)
# Adding one document.
writer = index.writer()
writer.add_document({
"title": "The Old Man and the Sea",
"body": """He was an old man who fished alone in a skiff in
the Gulf Stream and he had gone eighty-four days
now without taking a fish."""
})
# ... and committing
writer.commit()
reader = index.reader()
searcher = reader.searcher()
query_parser = tantivy.QueryParser.for_index(index, [title, body])
query = query_parser.parse_query("sea whale")
# Reload the index to ensure it points to the last commit.
index.reload();
searcher = index.searcher()
query = index.parse_query("sea whale", ["title", "body"])
top_docs = tantivy.TopDocs(3)
top_docs = tantivy.TopDocs(10)
result = searcher.search(query, top_docs)
_, doc_address = result[0]
searched_doc = searcher.doc(doc_address)
assert searched_doc.get_first(title) == "The Old Man and the Sea"
(best_score, best_doc_address) = searcher.search(query, nhits=3)[0]
best_doc = searcher.doc(best_doc_address)
assert best_doc["title"] == ["The Old Man and the Sea"]
```

View File

@ -1,40 +1,220 @@
#![allow(clippy::new_ret_no_self)]
#![allow(clippy::wrong_self_convention)]
use itertools::Itertools;
use pyo3::prelude::*;
use pyo3::types::PyDateTime;
use pyo3::types::{PyAny, PyDateTime, PyDict, PyList, PyTuple};
use pyo3::types::{PyDateAccess, PyTimeAccess};
use chrono::offset::TimeZone;
use chrono::Utc;
use chrono::{Datelike, Timelike, Utc};
use tantivy as tv;
use crate::facet::Facet;
use crate::field::{Field, FieldValue};
use crate::to_pyerr;
use pyo3::{PyMappingProtocol, PyObjectProtocol};
use std::collections::BTreeMap;
use std::fmt;
use tantivy::schema::Value;
fn value_to_py(py: Python, value: &Value) -> PyResult<PyObject> {
Ok(match value {
Value::Str(text) => text.into_object(py),
Value::U64(num) => num.into_object(py),
Value::I64(num) => num.into_object(py),
Value::F64(num) => num.into_object(py),
Value::Bytes(b) => b.to_object(py),
Value::Date(d) => PyDateTime::new(
py,
d.year(),
d.month() as u8,
d.day() as u8,
d.hour() as u8,
d.minute() as u8,
d.second() as u8,
d.timestamp_subsec_micros(),
None,
)?
.into_object(py),
Value::Facet(f) => Facet { inner: f.clone() }.into_object(py),
})
}
fn value_to_string(value: &Value) -> String {
match value {
Value::Str(text) => text.clone(),
Value::U64(num) => format!("{}", num),
Value::I64(num) => format!("{}", num),
Value::F64(num) => format!("{}", num),
Value::Bytes(bytes) => format!("{:?}", bytes),
Value::Date(d) => format!("{:?}", d),
Value::Facet(facet) => facet.to_string(),
}
}
/// Tantivy's Document is the object that can be indexed and then searched for.
///
/// Documents are fundamentally a collection of unordered tuples
/// (field, value). In this list, one field may appear more than once.
/// (field_name, value). In this list, one field may appear more than once.
///
/// Example:
/// >>> doc = tantivy.Document()
/// >>> doc.add_text(title, "The Old Man and the Sea")
/// >>> doc.add_text(body, ("He was an old man who fished alone in a "
/// >>> doc.add_text("title", "The Old Man and the Sea")
/// >>> doc.add_text("body", ("He was an old man who fished alone in a "
/// "skiff in the Gulf Stream and he had gone "
/// "eighty-four days now without taking a fish."))
///
/// For simplicity, it is also possible to build a `Document` by passing the field
/// values directly as constructor arguments.
///
/// Example:
/// >>> doc = tantivy.Document(title=["The Old Man and the Sea"], body=["..."])
///
/// As syntactic sugar, tantivy also allows the user to pass a single values
/// if there is only one. In other words, the following is also legal.
///
/// Example:
/// >>> doc = tantivy.Document(title="The Old Man and the Sea", body="...")
#[pyclass]
#[derive(Default)]
pub(crate) struct Document {
pub(crate) inner: tv::Document,
pub(crate) field_values: BTreeMap<String, Vec<tv::schema::Value>>,
}
impl fmt::Debug for Document {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let doc_str = self
.field_values
.iter()
.map(|(field_name, field_values)| {
let mut values_str =
field_values.iter().map(value_to_string).join(",");
values_str.truncate(10);
format!("{}=[{}]", field_name, values_str)
})
.join(",");
write!(f, "Document({})", doc_str)
}
}
fn add_value<T>(doc: &mut Document, field_name: String, value: T)
where
Value: From<T>,
{
doc.field_values
.entry(field_name)
.or_insert_with(Vec::new)
.push(Value::from(value));
}
fn extract_value(any: &PyAny) -> PyResult<Value> {
if let Ok(s) = any.extract::<String>() {
return Ok(Value::Str(s));
}
if let Ok(num) = any.extract::<i64>() {
return Ok(Value::I64(num));
}
if let Ok(num) = any.extract::<f64>() {
return Ok(Value::F64(num));
}
if let Ok(py_datetime) = any.downcast_ref::<PyDateTime>() {
let datetime = Utc
.ymd(
py_datetime.get_year(),
py_datetime.get_month().into(),
py_datetime.get_day().into(),
)
.and_hms_micro(
py_datetime.get_hour().into(),
py_datetime.get_minute().into(),
py_datetime.get_second().into(),
py_datetime.get_microsecond(),
);
return Ok(Value::Date(datetime));
}
if let Ok(facet) = any.downcast_ref::<Facet>() {
return Ok(Value::Facet(facet.inner.clone()));
}
Err(to_pyerr(format!("Value unsupported {:?}", any)))
}
fn extract_value_single_or_list(any: &PyAny) -> PyResult<Vec<Value>> {
if let Ok(values) = any.downcast_ref::<PyList>() {
values.iter().map(extract_value).collect()
} else {
Ok(vec![extract_value(any)?])
}
}
#[pymethods]
impl Document {
#[new]
fn new(obj: &PyRawObject) {
obj.init(Document {
inner: tv::Document::default(),
});
#[args(kwargs = "**")]
fn new(obj: &PyRawObject, kwargs: Option<&PyDict>) -> PyResult<()> {
let mut document = Document::default();
if let Some(field_dict) = kwargs {
document.extend(field_dict)?;
}
obj.init(document);
Ok(())
}
fn extend(&mut self, py_dict: &PyDict) -> PyResult<()> {
let mut field_values: BTreeMap<String, Vec<tv::schema::Value>> =
BTreeMap::new();
for key_value_any in py_dict.items() {
if let Ok(key_value) = key_value_any.downcast_ref::<PyTuple>() {
if key_value.len() != 2 {
continue;
}
let key: String = key_value.get_item(0).extract()?;
let value_list =
extract_value_single_or_list(key_value.get_item(1))?;
field_values.insert(key, value_list);
}
}
self.field_values.extend(field_values.into_iter());
Ok(())
}
#[staticmethod]
fn from_dict(py_dict: &PyDict) -> PyResult<Document> {
let mut field_values: BTreeMap<String, Vec<tv::schema::Value>> =
BTreeMap::new();
for key_value_any in py_dict.items() {
if let Ok(key_value) = key_value_any.downcast_ref::<PyTuple>() {
if key_value.len() != 2 {
continue;
}
let key: String = key_value.get_item(0).extract()?;
let value_list =
extract_value_single_or_list(key_value.get_item(1))?;
field_values.insert(key, value_list);
}
}
Ok(Document { field_values })
}
/// Returns a dictionary with the different
/// field values.
///
/// In tantivy, `Document` can be hold multiple
/// values for a single field.
///
/// For this reason, the dictionary, will associate
/// a list of value for every field.
fn to_dict(&self, py: Python) -> PyResult<PyObject> {
let dict = PyDict::new(py);
for (key, values) in &self.field_values {
let values_py: Vec<PyObject> = values
.iter()
.map(|v| value_to_py(py, v))
.collect::<PyResult<_>>()?;
dict.set_item(key, values_py)?;
}
Ok(dict.into())
}
/// Add a text value to the document.
@ -42,8 +222,8 @@ impl Document {
/// Args:
/// field (Field): The field for which we are adding the text.
/// text (str): The text that will be added to the document.
fn add_text(&mut self, field: &Field, text: &str) {
self.inner.add_text(field.inner, text);
fn add_text(&mut self, field_name: String, text: &str) {
add_value(self, field_name, text);
}
/// Add an unsigned integer value to the document.
@ -51,8 +231,8 @@ impl Document {
/// Args:
/// field (Field): The field for which we are adding the integer.
/// value (int): The integer that will be added to the document.
fn add_unsigned(&mut self, field: &Field, value: u64) {
self.inner.add_u64(field.inner, value);
fn add_unsigned(&mut self, field_name: String, value: u64) {
add_value(self, field_name, value);
}
/// Add a signed integer value to the document.
@ -60,8 +240,8 @@ impl Document {
/// Args:
/// field (Field): The field for which we are adding the integer.
/// value (int): The integer that will be added to the document.
fn add_integer(&mut self, field: &Field, value: i64) {
self.inner.add_i64(field.inner, value);
fn add_integer(&mut self, field_name: String, value: i64) {
add_value(self, field_name, value);
}
/// Add a date value to the document.
@ -69,7 +249,7 @@ impl Document {
/// Args:
/// field (Field): The field for which we are adding the integer.
/// value (datetime): The date that will be added to the document.
fn add_date(&mut self, field: &Field, value: &PyDateTime) {
fn add_date(&mut self, field_name: String, value: &PyDateTime) {
let datetime = Utc
.ymd(
value.get_year(),
@ -82,16 +262,15 @@ impl Document {
value.get_second().into(),
value.get_microsecond(),
);
self.inner.add_date(field.inner, &datetime);
add_value(self, field_name, datetime);
}
/// Add a facet value to the document.
/// Args:
/// field (Field): The field for which we are adding the facet.
/// value (Facet): The Facet that will be added to the document.
fn add_facet(&mut self, field: &Field, value: &Facet) {
self.inner.add_facet(field.inner, value.inner.clone());
fn add_facet(&mut self, field_name: String, facet: &Facet) {
add_value(self, field_name, facet.inner.clone());
}
/// Add a bytes value to the document.
@ -99,20 +278,20 @@ impl Document {
/// Args:
/// field (Field): The field for which we are adding the bytes.
/// value (bytes): The bytes that will be added to the document.
fn add_bytes(&mut self, field: &Field, value: Vec<u8>) {
self.inner.add_bytes(field.inner, value);
fn add_bytes(&mut self, field_name: String, bytes: Vec<u8>) {
add_value(self, field_name, bytes);
}
/// Returns the number of added fields that have been added to the document
#[getter]
fn len(&self) -> usize {
self.inner.len()
fn num_fields(&self) -> usize {
self.field_values.len()
}
/// True if the document is empty, False otherwise.
#[getter]
fn is_empty(&self) -> bool {
self.inner.is_empty()
self.field_values.is_empty()
}
/// Get the first value associated with the given field.
@ -122,9 +301,17 @@ impl Document {
///
/// Returns the value if one is found, otherwise None.
/// The type of the value depends on the field.
fn get_first(&self, py: Python, field: &Field) -> Option<PyObject> {
let value = self.inner.get_first(field.inner)?;
FieldValue::value_to_py(py, value)
fn get_first(
&self,
py: Python,
fieldname: &str,
) -> PyResult<Option<PyObject>> {
if let Some(value) = self.iter_values_for_field(fieldname).next() {
let py_value = value_to_py(py, value)?;
Ok(Some(py_value))
} else {
Ok(None)
}
}
/// Get the all values associated with the given field.
@ -134,21 +321,37 @@ impl Document {
///
/// Returns a list of values.
/// The type of the value depends on the field.
fn get_all(&self, py: Python, field: &Field) -> Vec<PyObject> {
let values = self.inner.get_all(field.inner);
values
.iter()
.map(|&v| FieldValue::value_to_py(py, v))
.filter_map(|x| x)
.collect()
fn get_all(&self, py: Python, field_name: &str) -> PyResult<Vec<PyObject>> {
self.iter_values_for_field(field_name)
.map(|value| value_to_py(py, value))
.collect::<PyResult<Vec<_>>>()
}
}
/// Get all the fields and values contained in the document.
fn field_values(&self, py: Python) -> Vec<FieldValue> {
let field_values = self.inner.field_values();
field_values
.iter()
.map(|v| FieldValue::field_value_to_py(py, v))
.collect()
impl Document {
fn iter_values_for_field<'a>(
&'a self,
field: &str,
) -> impl Iterator<Item = &'a Value> + 'a {
self.field_values
.get(field)
.into_iter()
.flat_map(|values| values.iter())
}
}
#[pyproto]
impl PyMappingProtocol for Document {
fn __getitem__(&self, field_name: &str) -> PyResult<Vec<PyObject>> {
let gil = Python::acquire_gil();
let py = gil.python();
self.get_all(py, field_name)
}
}
#[pyproto]
impl PyObjectProtocol for Document {
fn __repr__(&self) -> PyResult<String> {
Ok(format!("{:?}", self))
}
}

View File

@ -1,6 +1,6 @@
use pyo3::basic::PyObjectProtocol;
use pyo3::prelude::*;
use pyo3::types::PyType;
use tantivy::schema;
/// A Facet represent a point in a given hierarchy.
@ -52,4 +52,23 @@ impl Facet {
inner: schema::Facet::from_text(facet_string),
}
}
/// Returns the list of `segments` that forms a facet path.
///
/// For instance `//europe/france` becomes `["europe", "france"]`.
fn to_path(&self) -> Vec<&str> {
self.inner.to_path()
}
/// Returns the facet string representation.
fn to_path_str(&self) -> String {
self.inner.to_string()
}
}
#[pyproto]
impl PyObjectProtocol for Facet {
fn __repr__(&self) -> PyResult<String> {
Ok(format!("Facet({})", self.to_path_str()))
}
}

View File

@ -1,73 +0,0 @@
use pyo3::prelude::*;
use pyo3::types::PyDateTime;
use tantivy::schema;
use crate::facet::Facet;
/// Field is a numeric indentifier that represents an entry in the Schema.
#[pyclass]
#[derive(Clone)]
pub(crate) struct Field {
pub(crate) inner: schema::Field,
}
/// FieldValue holds together a Field and its Value.
#[pyclass]
pub(crate) struct FieldValue {
pub(crate) field: Field,
pub(crate) value: PyObject,
}
#[pymethods]
impl FieldValue {
#[getter]
fn field(&self) -> Field {
self.field.clone()
}
#[getter]
fn value(&self) -> &PyObject {
&self.value
}
}
impl FieldValue {
pub(crate) fn value_to_py(
py: Python,
value: &schema::Value,
) -> Option<PyObject> {
match value {
schema::Value::Str(text) => Some(text.into_object(py)),
schema::Value::U64(num) => Some(num.into_object(py)),
schema::Value::I64(num) => Some(num.into_object(py)),
schema::Value::F64(num) => Some(num.into_object(py)),
schema::Value::Bytes(b) => Some(b.to_object(py)),
schema::Value::Date(d) => {
let date =
PyDateTime::from_timestamp(py, d.timestamp() as f64, None);
match date {
Ok(d) => Some(d.into_object(py)),
Err(_e) => None,
}
}
schema::Value::Facet(f) => {
Some(Facet { inner: f.clone() }.into_object(py))
}
}
}
pub(crate) fn field_value_to_py(
py: Python,
field_value: &schema::FieldValue,
) -> FieldValue {
let value = field_value.value();
let field = field_value.field();
FieldValue {
field: Field { inner: field },
value: FieldValue::value_to_py(py, value).unwrap(),
}
}
}

View File

@ -4,60 +4,24 @@ use pyo3::exceptions;
use pyo3::prelude::*;
use crate::document::Document;
use crate::query::Query;
use crate::schema::Schema;
use crate::searcher::Searcher;
use crate::to_pyerr;
use tantivy as tv;
use tantivy::directory::MmapDirectory;
use tantivy::schema::{Field, NamedFieldDocument};
const RELOAD_POLICY: &str = "commit";
/// IndexReader is the entry point to read and search the index.
///
/// IndexReader controls when a new version of the index should be loaded and
/// lends you instances of Searcher for the last loaded version.
///
/// To create an IndexReader first create an Index and call the reader() method
/// on the index object.
#[pyclass]
pub(crate) struct IndexReader {
inner: tv::IndexReader,
}
#[pymethods]
impl IndexReader {
/// Update searchers so that they reflect the state of the last .commit().
///
/// If you set up the the reload policy to be on 'commit' (which is the
/// default) every commit should be rapidly reflected on your IndexReader
/// and you should not need to call reload() at all.
fn reload(&self) -> PyResult<()> {
let ret = self.inner.reload();
match ret {
Ok(_) => Ok(()),
Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
}
}
/// Get a Searcher for the index.
///
/// This method should be called every single time a search query is
/// performed. The searchers are taken from a pool of num_searchers
/// searchers.
///
/// Returns a Searcher object, if no searcher is available this may block.
fn searcher(&self) -> Searcher {
let searcher = self.inner.searcher();
Searcher { inner: searcher }
}
}
/// IndexWriter is the user entry-point to add documents to the index.
///
/// To create an IndexWriter first create an Index and call the writer() method
/// on the index object.
#[pyclass]
pub(crate) struct IndexWriter {
inner: tv::IndexWriter,
inner_index_writer: tv::IndexWriter,
schema: tv::schema::Schema,
}
#[pymethods]
@ -70,9 +34,24 @@ impl IndexWriter {
/// by the client to align commits with its own document queue.
/// The `opstamp` represents the number of documents that have been added
/// since the creation of the index.
fn add_document(&mut self, document: &Document) -> PyResult<()> {
self.inner.add_document(document.inner.clone());
Ok(())
pub fn add_document(&mut self, doc: &Document) -> PyResult<u64> {
let named_doc = NamedFieldDocument(doc.field_values.clone());
let doc = self.schema.convert_named_doc(named_doc).map_err(to_pyerr)?;
Ok(self.inner_index_writer.add_document(doc))
}
/// Helper for the `add_document` method, but passing a json string.
///
/// If the indexing pipeline is full, this call may block.
///
/// Returns an `opstamp`, which is an increasing integer that can be used
/// by the client to align commits with its own document queue.
/// The `opstamp` represents the number of documents that have been added
/// since the creation of the index.
pub fn add_json(&mut self, json: &str) -> PyResult<u64> {
let doc = self.schema.parse_document(json).map_err(to_pyerr)?;
let opstamp = self.inner_index_writer.add_document(doc);
Ok(opstamp)
}
/// Commits all of the pending changes
@ -84,12 +63,8 @@ impl IndexWriter {
/// spared), it will be possible to resume indexing from this point.
///
/// Returns the `opstamp` of the last document that made it in the commit.
fn commit(&mut self) -> PyResult<()> {
let ret = self.inner.commit();
match ret {
Ok(_) => Ok(()),
Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
}
fn commit(&mut self) -> PyResult<u64> {
self.inner_index_writer.commit().map_err(to_pyerr)
}
/// Rollback to the last commit
@ -97,23 +72,15 @@ impl IndexWriter {
/// This cancels all of the update that happened before after the last
/// commit. After calling rollback, the index is in the same state as it
/// was after the last commit.
fn rollback(&mut self) -> PyResult<()> {
let ret = self.inner.rollback();
match ret {
Ok(_) => Ok(()),
Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
}
fn rollback(&mut self) -> PyResult<u64> {
self.inner_index_writer.rollback().map_err(to_pyerr)
}
/// Detect and removes the files that are not used by the index anymore.
fn garbage_collect_files(&mut self) -> PyResult<()> {
let ret = self.inner.garbage_collect_files();
match ret {
Ok(_) => Ok(()),
Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
}
self.inner_index_writer
.garbage_collect_files()
.map_err(to_pyerr)
}
/// The opstamp of the last successful commit.
@ -125,7 +92,7 @@ impl IndexWriter {
/// for searchers.
#[getter]
fn commit_opstamp(&self) -> u64 {
self.inner.commit_opstamp()
self.inner_index_writer.commit_opstamp()
}
}
@ -142,11 +109,19 @@ impl IndexWriter {
/// if there was a problem during the opening or creation of the index.
#[pyclass]
pub(crate) struct Index {
pub(crate) inner: tv::Index,
pub(crate) index: tv::Index,
reader: tv::IndexReader,
}
#[pymethods]
impl Index {
#[staticmethod]
fn open(path: &str) -> PyResult<Index> {
let index = tv::Index::open_in_dir(path).map_err(to_pyerr)?;
let reader = index.reader().map_err(to_pyerr)?;
Ok(Index { index, reader })
}
#[new]
#[args(reuse = true)]
fn new(
@ -157,32 +132,20 @@ impl Index {
) -> PyResult<()> {
let index = match path {
Some(p) => {
let directory = MmapDirectory::open(p);
let dir = match directory {
Ok(d) => d,
Err(e) => {
return Err(exceptions::OSError::py_err(e.to_string()))
}
};
let i = if reuse {
tv::Index::open_or_create(dir, schema.inner.clone())
let directory = MmapDirectory::open(p).map_err(to_pyerr)?;
if reuse {
tv::Index::open_or_create(directory, schema.inner.clone())
} else {
tv::Index::create(dir, schema.inner.clone())
};
match i {
Ok(index) => index,
Err(e) => {
return Err(exceptions::OSError::py_err(e.to_string()))
}
tv::Index::create(directory, schema.inner.clone())
}
.map_err(to_pyerr)?
}
None => tv::Index::create_in_ram(schema.inner.clone()),
};
obj.init(Index { inner: index });
let reader = index.reader().map_err(to_pyerr)?;
println!("reader {}", reader.searcher().segment_readers().len());
obj.init(Index { index, reader });
Ok(())
}
@ -206,32 +169,30 @@ impl Index {
num_threads: usize,
) -> PyResult<IndexWriter> {
let writer = match num_threads {
0 => self.inner.writer(heap_size),
_ => self.inner.writer_with_num_threads(num_threads, heap_size),
};
match writer {
Ok(w) => Ok(IndexWriter { inner: w }),
Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
0 => self.index.writer(heap_size),
_ => self.index.writer_with_num_threads(num_threads, heap_size),
}
.map_err(to_pyerr)?;
let schema = self.index.schema();
Ok(IndexWriter {
inner_index_writer: writer,
schema,
})
}
/// Create an IndexReader for the index.
/// Configure the index reader.
///
/// Args:
/// reload_policy (str, optional): The reload policy that the
/// IndexReader should use. Can be manual or OnCommit.
/// IndexReader should use. Can be `Manual` or `OnCommit`.
/// num_searchers (int, optional): The number of searchers that the
/// reader should create.
///
/// Returns the IndexReader on success, raises ValueError if a IndexReader
/// couldn't be created.
#[args(reload_policy = "RELOAD_POLICY", num_searchers = 0)]
fn reader(
&self,
fn config_reader(
&mut self,
reload_policy: &str,
num_searchers: usize,
) -> PyResult<IndexReader> {
) -> Result<(), PyErr> {
let reload_policy = reload_policy.to_lowercase();
let reload_policy = match reload_policy.as_ref() {
"commit" => tv::ReloadPolicy::OnCommit,
@ -242,9 +203,7 @@ impl Index {
"Invalid reload policy, valid choices are: 'manual' and 'OnCommit'"
))
};
let builder = self.inner.reader_builder();
let builder = self.index.reader_builder();
let builder = builder.reload_policy(reload_policy);
let builder = if num_searchers > 0 {
builder.num_searchers(num_searchers)
@ -252,10 +211,13 @@ impl Index {
builder
};
let reader = builder.try_into();
match reader {
Ok(r) => Ok(IndexReader { inner: r }),
Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
self.reader = builder.try_into().map_err(to_pyerr)?;
Ok(())
}
fn searcher(&self) -> Searcher {
Searcher {
inner: self.reader.searcher(),
}
}
@ -268,19 +230,71 @@ impl Index {
/// Raises OSError if the directory cannot be opened.
#[staticmethod]
fn exists(path: &str) -> PyResult<bool> {
let directory = MmapDirectory::open(path);
let dir = match directory {
Ok(d) => d,
Err(e) => return Err(exceptions::OSError::py_err(e.to_string())),
};
Ok(tv::Index::exists(&dir))
let directory = MmapDirectory::open(path).map_err(to_pyerr)?;
Ok(tv::Index::exists(&directory))
}
/// The schema of the current index.
#[getter]
fn schema(&self) -> Schema {
let schema = self.inner.schema();
let schema = self.index.schema();
Schema { inner: schema }
}
/// Update searchers so that they reflect the state of the last .commit().
///
/// If you set up the the reload policy to be on 'commit' (which is the
/// default) every commit should be rapidly reflected on your IndexReader
/// and you should not need to call reload() at all.
fn reload(&self) -> PyResult<()> {
self.reader.reload().map_err(to_pyerr)
}
/// Parse a query
///
/// Args:
/// query: the query, following the tantivy query language.
/// default_fields (List[Field]): A list of fields used to search if no
/// field is specified in the query.
///
#[args(reload_policy = "RELOAD_POLICY")]
pub fn parse_query(
&self,
query: &str,
default_field_names: Option<Vec<String>>,
) -> PyResult<Query> {
let mut default_fields = vec![];
let schema = self.index.schema();
if let Some(default_field_names_vec) = default_field_names {
for default_field_name in &default_field_names_vec {
if let Some(field) = schema.get_field(default_field_name) {
let field_entry = schema.get_field_entry(field);
if !field_entry.is_indexed() {
return Err(exceptions::ValueError::py_err(format!(
"Field `{}` is not set as indexed in the schema.",
default_field_name
)));
}
default_fields.push(field);
} else {
return Err(exceptions::ValueError::py_err(format!(
"Field `{}` is not defined in the schema.",
default_field_name
)));
}
}
} else {
for (field_id, field_entry) in
self.index.schema().fields().iter().enumerate()
{
if field_entry.is_indexed() {
default_fields.push(Field(field_id as u32));
}
}
}
let parser =
tv::query::QueryParser::for_index(&self.index, default_fields);
let query = parser.parse_query(query).map_err(to_pyerr)?;
Ok(Query { inner: query })
}
}

View File

@ -1,8 +1,8 @@
use pyo3::exceptions;
use pyo3::prelude::*;
mod document;
mod facet;
mod field;
mod index;
mod query;
mod schema;
@ -11,9 +11,7 @@ mod searcher;
use document::Document;
use facet::Facet;
use field::{Field, FieldValue};
use index::Index;
use query::QueryParser;
use schema::Schema;
use schemabuilder::SchemaBuilder;
use searcher::{DocAddress, Searcher, TopDocs};
@ -75,14 +73,14 @@ fn tantivy(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<Schema>()?;
m.add_class::<SchemaBuilder>()?;
m.add_class::<Searcher>()?;
m.add_class::<Index>()?;
m.add_class::<QueryParser>()?;
m.add_class::<Document>()?;
m.add_class::<Index>()?;
m.add_class::<DocAddress>()?;
m.add_class::<TopDocs>()?;
m.add_class::<Field>()?;
m.add_class::<FieldValue>()?;
m.add_class::<Facet>()?;
Ok(())
}
pub(crate) fn to_pyerr<E: ToString>(err: E) -> PyErr {
exceptions::ValueError::py_err(err.to_string())
}

View File

@ -1,70 +1,16 @@
use pyo3::exceptions;
use pyo3::prelude::*;
use pyo3::types::PyType;
use pyo3::PyObjectProtocol;
use tantivy as tv;
use crate::field::Field;
use crate::index::Index;
/// Tantivy's Query
#[pyclass]
pub(crate) struct Query {
pub(crate) inner: Box<dyn tv::query::Query>,
}
/// Tantivy's Query parser
#[pyclass]
pub(crate) struct QueryParser {
inner: tv::query::QueryParser,
}
#[pymethods]
impl QueryParser {
/// Creates a QueryParser for an Index.
///
/// Args:
/// index (Index): The index for which the query will be created.
/// default_fields (List[Field]): A list of fields used to search if no
/// field is specified in the query.
///
/// Returns the QueryParser.
#[classmethod]
fn for_index(
_cls: &PyType,
index: &Index,
default_fields: Vec<&Field>,
) -> PyResult<QueryParser> {
let default_fields: Vec<tv::schema::Field> =
default_fields.iter().map(|&f| f.inner).collect();
let parser =
tv::query::QueryParser::for_index(&index.inner, default_fields);
Ok(QueryParser { inner: parser })
}
/// Parse a string into a query that can be given to a searcher.
///
/// Args:
/// query (str): A query string that should be parsed into a query.
///
/// Returns the parsed Query object. Raises ValueError if there was an
/// error with the query string.
fn parse_query(&self, query: &str) -> PyResult<Query> {
let ret = self.inner.parse_query(query);
match ret {
Ok(q) => Ok(Query { inner: q }),
Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
}
}
/// Set the default way to compose queries to a conjunction.
///
/// By default, the query happy tax payer is equivalent to the query happy
/// OR tax OR payer. After calling .set_conjunction_by_default() happy tax
/// payer will be interpreted by the parser as happy AND tax AND payer.
fn set_conjunction_by_default(&mut self) {
self.inner.set_conjunction_by_default();
#[pyproto]
impl PyObjectProtocol for Query {
fn __repr__(&self) -> PyResult<String> {
Ok(format!("Query({:?})", self.inner))
}
}

View File

@ -1,10 +1,5 @@
use pyo3::exceptions;
use pyo3::prelude::*;
use tantivy::schema;
use crate::document::Document;
use crate::field::Field;
use tantivy as tv;
/// Tantivy schema.
///
@ -12,54 +7,8 @@ use crate::field::Field;
/// provided.
#[pyclass]
pub(crate) struct Schema {
pub(crate) inner: schema::Schema,
pub(crate) inner: tv::schema::Schema,
}
#[pymethods]
impl Schema {
/// Build a document object from a json string.
///
/// Args:
/// doc_json (str) - A string containing json that should be parsed
/// into a `Document`
///
/// Returns the parsed document, raises a ValueError if the parsing failed.
fn parse_document(&self, doc_json: &str) -> PyResult<Document> {
let ret = self.inner.parse_document(doc_json);
match ret {
Ok(d) => Ok(Document { inner: d }),
Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
}
}
/// Convert a `Document` object into a json string.
///
/// Args:
/// doc (Document): The document that will be converted into a json
/// string.
fn to_json(&self, doc: &Document) -> String {
self.inner.to_json(&doc.inner)
}
/// Return the field name for a given `Field`.
///
/// Args:
/// field (Field): The field for which the name will be returned.
fn get_field_name(&self, field: &Field) -> &str {
self.inner.get_field_name(field.inner)
}
/// Returns the field option associated with a given name.
///
/// Args:
/// name (str): The name of the field that we want to retrieve.
///
/// Returns the Field if one is found, None otherwise.
fn get_field(&self, name: &str) -> Option<Field> {
let f = self.inner.get_field(name);
match f {
Some(field) => Some(Field { inner: field }),
None => None,
}
}
}
impl Schema {}

View File

@ -5,8 +5,8 @@ use pyo3::prelude::*;
use tantivy::schema;
use crate::field::Field;
use crate::schema::Schema;
use std::sync::{Arc, RwLock};
/// Tantivy has a very strict schema.
/// You need to specify in advance whether a field is indexed or not,
@ -24,8 +24,9 @@ use crate::schema::Schema;
///
/// >>> schema = builder.build()
#[pyclass]
#[derive(Clone)]
pub(crate) struct SchemaBuilder {
pub(crate) builder: Option<schema::SchemaBuilder>,
pub(crate) builder: Arc<RwLock<Option<schema::SchemaBuilder>>>,
}
const TOKENIZER: &str = "default";
@ -36,7 +37,7 @@ impl SchemaBuilder {
#[new]
fn new(obj: &PyRawObject) {
obj.init(SchemaBuilder {
builder: Some(schema::Schema::builder()),
builder: Arc::new(From::from(Some(schema::Schema::builder()))),
});
}
@ -70,9 +71,8 @@ impl SchemaBuilder {
stored: bool,
tokenizer_name: &str,
index_option: &str,
) -> PyResult<Field> {
) -> PyResult<Self> {
let builder = &mut self.builder;
let index_option = match index_option {
"position" => schema::IndexRecordOption::WithFreqsAndPositions,
"freq" => schema::IndexRecordOption::WithFreqs,
@ -94,14 +94,14 @@ impl SchemaBuilder {
options
};
if let Some(builder) = builder {
let field = builder.add_text_field(name, options);
Ok(Field { inner: field })
if let Some(builder) = builder.write().unwrap().as_mut() {
builder.add_text_field(name, options);
} else {
Err(exceptions::ValueError::py_err(
return Err(exceptions::ValueError::py_err(
"Schema builder object isn't valid anymore.",
))
));
}
Ok(self.clone())
}
/// Add a new signed integer field to the schema.
@ -131,19 +131,19 @@ impl SchemaBuilder {
stored: bool,
indexed: bool,
fast: Option<&str>,
) -> PyResult<Field> {
) -> PyResult<Self> {
let builder = &mut self.builder;
let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?;
if let Some(builder) = builder {
let field = builder.add_i64_field(name, opts);
Ok(Field { inner: field })
if let Some(builder) = builder.write().unwrap().as_mut() {
builder.add_i64_field(name, opts);
} else {
Err(exceptions::ValueError::py_err(
return Err(exceptions::ValueError::py_err(
"Schema builder object isn't valid anymore.",
))
));
}
Ok(self.clone())
}
/// Add a new unsigned integer field to the schema.
@ -173,19 +173,19 @@ impl SchemaBuilder {
stored: bool,
indexed: bool,
fast: Option<&str>,
) -> PyResult<Field> {
) -> PyResult<Self> {
let builder = &mut self.builder;
let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?;
if let Some(builder) = builder {
let field = builder.add_u64_field(name, opts);
Ok(Field { inner: field })
if let Some(builder) = builder.write().unwrap().as_mut() {
builder.add_u64_field(name, opts);
} else {
Err(exceptions::ValueError::py_err(
return Err(exceptions::ValueError::py_err(
"Schema builder object isn't valid anymore.",
))
));
}
Ok(self.clone())
}
/// Add a new date field to the schema.
@ -215,35 +215,35 @@ impl SchemaBuilder {
stored: bool,
indexed: bool,
fast: Option<&str>,
) -> PyResult<Field> {
) -> PyResult<Self> {
let builder = &mut self.builder;
let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?;
if let Some(builder) = builder {
let field = builder.add_date_field(name, opts);
Ok(Field { inner: field })
if let Some(builder) = builder.write().unwrap().as_mut() {
builder.add_date_field(name, opts);
} else {
Err(exceptions::ValueError::py_err(
return Err(exceptions::ValueError::py_err(
"Schema builder object isn't valid anymore.",
))
));
}
Ok(self.clone())
}
/// Add a Facet field to the schema.
/// Args:
/// name (str): The name of the field.
fn add_facet_field(&mut self, name: &str) -> PyResult<Field> {
fn add_facet_field(&mut self, name: &str) -> PyResult<Self> {
let builder = &mut self.builder;
if let Some(builder) = builder {
let field = builder.add_facet_field(name);
Ok(Field { inner: field })
if let Some(builder) = builder.write().unwrap().as_mut() {
builder.add_facet_field(name);
} else {
Err(exceptions::ValueError::py_err(
return Err(exceptions::ValueError::py_err(
"Schema builder object isn't valid anymore.",
))
));
}
Ok(self.clone())
}
/// Add a fast bytes field to the schema.
@ -254,17 +254,17 @@ impl SchemaBuilder {
///
/// Args:
/// name (str): The name of the field.
fn add_bytes_field(&mut self, name: &str) -> PyResult<Field> {
fn add_bytes_field(&mut self, name: &str) -> PyResult<Self> {
let builder = &mut self.builder;
if let Some(builder) = builder {
let field = builder.add_bytes_field(name);
Ok(Field { inner: field })
if let Some(builder) = builder.write().unwrap().as_mut() {
builder.add_bytes_field(name);
} else {
Err(exceptions::ValueError::py_err(
return Err(exceptions::ValueError::py_err(
"Schema builder object isn't valid anymore.",
))
));
}
Ok(self.clone())
}
/// Finalize the creation of a Schema.
@ -272,7 +272,7 @@ impl SchemaBuilder {
/// Returns a Schema object. After this is called the SchemaBuilder cannot
/// be used anymore.
fn build(&mut self) -> PyResult<Schema> {
let builder = self.builder.take();
let builder = self.builder.write().unwrap().take();
if let Some(builder) = builder {
let schema = builder.build();
Ok(Schema { inner: schema })

View File

@ -1,12 +1,11 @@
#![allow(clippy::new_ret_no_self)]
use pyo3::exceptions;
use pyo3::prelude::*;
use tantivy as tv;
use crate::document::Document;
use crate::query::Query;
use crate::to_pyerr;
use pyo3::prelude::*;
use pyo3::{exceptions, PyObjectProtocol};
use tantivy as tv;
/// Tantivy's Searcher class
///
@ -60,11 +59,11 @@ impl Searcher {
///
/// Returns the Document, raises ValueError if the document can't be found.
fn doc(&self, doc_address: &DocAddress) -> PyResult<Document> {
let ret = self.inner.doc(doc_address.into());
match ret {
Ok(doc) => Ok(Document { inner: doc }),
Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
}
let doc = self.inner.doc(doc_address.into()).map_err(to_pyerr)?;
let named_doc = self.inner.schema().to_named_doc(&doc);
Ok(Document {
field_values: named_doc.0,
})
}
}
@ -132,3 +131,14 @@ impl TopDocs {
Ok(())
}
}
#[pyproto]
impl PyObjectProtocol for Searcher {
fn __repr__(&self) -> PyResult<String> {
Ok(format!(
"Searcher(num_docs={}, num_segments={})",
self.inner.num_docs(),
self.inner.segment_readers().len()
))
}
}

View File

@ -1,36 +1,34 @@
import json
import tantivy
import pytest
from tantivy import Document, Index, SchemaBuilder, Schema
def schema():
return SchemaBuilder()\
.add_text_field("title", stored=True)\
.add_text_field("body")\
.build()
@pytest.fixture(scope="class")
def ram_index():
# assume all tests will use the same documents for now
# other methods may set up function-local indexes
builder = tantivy.SchemaBuilder()
title = builder.add_text_field("title", stored=True)
body = builder.add_text_field("body")
schema = builder.build()
index = tantivy.Index(schema)
index = Index(schema())
writer = index.writer()
# 2 ways of adding documents
# 1
doc = tantivy.Document()
doc = Document()
# create a document instance
# add field-value pairs
doc.add_text(title, "The Old Man and the Sea")
doc.add_text(body, ("He was an old man who fished alone in a skiff in"
doc.add_text("title", "The Old Man and the Sea")
doc.add_text("body", ("He was an old man who fished alone in a skiff in"
"the Gulf Stream and he had gone eighty-four days "
"now without taking a fish."))
writer.add_document(doc)
# 2 use the built-in json support
# keys need to coincide with field names
doc = schema.parse_document(json.dumps({
doc = Document.from_dict({
"title": "Of Mice and Men",
"body": ("A few miles south of Soledad, the Salinas River drops "
"in close to the hillside bank and runs deep and "
@ -44,149 +42,149 @@ def ram_index():
"junctures the debris of the winters flooding; and "
"sycamores with mottled, white, recumbent limbs and "
"branches that arch over the pool")
}))
})
writer.add_document(doc)
doc = schema.parse_document(json.dumps({
writer.add_json("""{
"title": ["Frankenstein", "The Modern Prometheus"],
"body": ("You will rejoice to hear that no disaster has "
"accompanied the commencement of an enterprise which you "
"have regarded with such evil forebodings. I arrived "
"here yesterday, and my first task is to assure my dear "
"sister of my welfare and increasing confidence in the "
"success of my undertaking.")
}))
"body": "You will rejoice to hear that no disaster has accompanied the commencement of an enterprise which you have regarded with such evil forebodings. I arrived here yesterday, and my first task is to assure my dear sister of my welfare and increasing confidence in the success of my undertaking."
}""")
writer.add_document(doc)
writer.commit()
index.reload()
return index
reader = index.reader()
searcher = reader.searcher()
index = index
schema = schema
default_args = [title, body]
ret = (index, searcher, schema, default_args, title, body)
return ret
class TestClass(object):
def test_simple_search(self, ram_index):
index, searcher, schema, default_args, title, body = ram_index
query_parser = tantivy.QueryParser.for_index(index, default_args)
query = query_parser.parse_query("sea whale")
index = ram_index
query = index.parse_query("sea whale", ["title", "body"])
top_docs = tantivy.TopDocs(10)
result = searcher.search(query, top_docs)
print(result)
result = index.searcher().search(query, top_docs)
assert len(result) == 1
_, doc_address = result[0]
searched_doc = searcher.doc(doc_address)
assert searched_doc.get_first(title) == "The Old Man and the Sea"
def test_doc(self):
builder = tantivy.SchemaBuilder()
title = builder.add_text_field("title", stored=True)
doc = tantivy.Document()
assert doc.is_empty
doc.add_text(title, "The Old Man and the Sea")
assert doc.get_first(title) == "The Old Man and the Sea"
assert doc.len == 1
assert not doc.is_empty
searched_doc = index.searcher().doc(doc_address)
assert searched_doc["title"] == ["The Old Man and the Sea"]
def test_and_query(self, ram_index):
index, searcher, schema, default_args, title, body = ram_index
q_parser = tantivy.QueryParser.for_index(index, default_args)
index = ram_index
query = index.parse_query("title:men AND body:summer", default_field_names=["title", "body"])
# look for an intersection of documents
query = q_parser.parse_query("title:men AND body:summer")
top_docs = tantivy.TopDocs(10)
searcher = index.searcher()
result = searcher.search(query, top_docs)
print(result)
# summer isn't present
assert len(result) == 0
query = q_parser.parse_query("title:men AND body:winter")
query = index.parse_query("title:men AND body:winter", ["title", "body"])
result = searcher.search(query, top_docs)
assert len(result) == 1
def test_and_query_parser_default_fields(self, ram_index):
query = ram_index.parse_query("winter", default_field_names=["title"])
assert repr(query) == """Query(TermQuery(Term(field=0,bytes=[119, 105, 110, 116, 101, 114])))"""
def test_and_query_parser_default_fields_undefined(self, ram_index):
query = ram_index.parse_query("winter")
assert repr(query) == "Query(BooleanQuery { subqueries: [" \
"(Should, TermQuery(Term(field=0,bytes=[119, 105, 110, 116, 101, 114]))), " \
"(Should, TermQuery(Term(field=1,bytes=[119, 105, 110, 116, 101, 114])))] " \
"})"
def test_query_errors(self, ram_index):
index, searcher, schema, default_args, title, body = ram_index
q_parser = tantivy.QueryParser.for_index(index, default_args)
index = ram_index
# no "bod" field
with pytest.raises(ValueError):
q_parser.parse_query("bod:title")
index.parse_query("bod:men", ["title", "body"])
@pytest.fixture(scope="class")
def disk_index():
builder = tantivy.SchemaBuilder()
title = builder.add_text_field("title", stored=True)
body = builder.add_text_field("body")
default_args = [title, body]
schema = builder.build()
schema = schema
index = tantivy.Index(schema)
path_to_index = "tests/test_index/"
return index, path_to_index, schema, default_args, title, body
PATH_TO_INDEX = "tests/test_index/"
class TestFromDiskClass(object):
def test_exists(self, disk_index):
def test_exists(self):
# prefer to keep it separate in case anyone deletes this
# runs from the root directory
index, path_to_index, _, _, _, _ = disk_index
assert index.exists(path_to_index)
assert Index.exists(PATH_TO_INDEX)
def test_opens_from_dir(self, disk_index):
_, path_to_index, schema, _, _, _ = disk_index
tantivy.Index(schema, path_to_index)
def test_opens_from_dir(self):
index = Index(schema(), PATH_TO_INDEX, reuse=True)
assert index.searcher().num_docs == 3
def test_create_readers(self, disk_index):
_, path_to_index, schema, _, _, _ = disk_index
idx = tantivy.Index(schema, path_to_index)
reload_policy = "OnCommit" # or "Manual"
assert idx.reader(reload_policy, 4)
assert idx.reader("Manual", 4)
def test_create_readers(self):
# not sure what is the point of this test.
idx = Index(schema())
assert idx.searcher().num_docs == 0
# by default this is manual mode
writer = idx.writer(30000000, 1)
writer.add_document(Document(title="mytitle",body="mybody"))
writer.commit()
assert idx.searcher().num_docs == 0
# Manual is the default setting.
# In this case, change are reflected only when
# the index is manually reloaded.
idx.reload()
assert idx.searcher().num_docs == 1
idx.config_reader("OnCommit", 4)
writer.add_document(Document(title="mytitle2",body="mybody2"))
writer.commit()
import time
for i in range(50):
# The index should be automatically reloaded.
# Wait for at most 5s for it to happen.
time.sleep(0.1)
if idx.searcher().num_docs == 2:
return
assert False
def test_create_writer_and_reader(self, disk_index):
_, path_to_index, schema, default_args, title, body = disk_index
idx = tantivy.Index(schema, path_to_index)
writer = idx.writer()
reload_policy = "OnCommit" # or "Manual"
reader = idx.reader(reload_policy, 4)
class TestSearcher(object):
def test_searcher_repr(self, ram_index):
assert repr(ram_index.searcher()) == "Searcher(num_docs=3, num_segments=1)"
# check against the opstamp in the meta file
meta_fname = "meta.json"
with open("{}{}".format(path_to_index, meta_fname)) as f:
json_file = json.load(f)
expected_last_opstamp = json_file["opstamp"]
# ASSUMPTION
# We haven't had any deletes in the index
# so max_doc per index coincides with the value of `num_docs`
# summing them in all segments, gives the number of documents
expected_num_docs = sum([segment["max_doc"]
for segment in json_file["segments"]])
assert writer.commit_opstamp == expected_last_opstamp
q_parser = tantivy.QueryParser.for_index(idx, default_args)
# get all documents
query = q_parser.parse_query("*")
top_docs = tantivy.TopDocs(10)
class TestDocument(object):
docs = reader.searcher().search(query, top_docs)
for (_score, doc_addr) in docs:
print(reader.searcher().doc(doc_addr))
assert expected_num_docs == len(docs)
def test_document(self):
doc = tantivy.Document(name="Bill", reference=[1, 2])
assert doc["reference"] == [1, 2]
assert doc["name"] == ["Bill"]
assert doc.get_first("name") == "Bill"
assert doc.get_first("reference") == 1
assert doc.to_dict() == {"name": ["Bill"], "reference": [1, 2]}
def test_document_with_date(self):
import datetime
date = datetime.datetime(2019, 8, 12, 13, 0, 0, )
doc = tantivy.Document(name="Bill", date=date)
assert doc["date"][0] == date
def test_document_repr(self):
doc = tantivy.Document(name="Bill", reference=[1, 2])
assert repr(doc) == "Document(name=[Bill],reference=[1,2])"
def test_document_with_facet(self):
doc = tantivy.Document()
facet = tantivy.Facet.from_string("/europe/france")
doc.add_facet("facet", facet)
assert doc["facet"][0].to_path() == ['europe', 'france']
doc = tantivy.Document()
facet = tantivy.Facet.from_string("/asia\\/oceania/fiji")
doc.add_facet("facet", facet)
assert doc["facet"][0].to_path() == ['asia/oceania', 'fiji']
assert doc["facet"][0].to_path_str() == "/asia\\/oceania/fiji"
assert repr(doc["facet"][0]) == "Facet(/asia\\/oceania/fiji)"
doc = tantivy.Document(facet=facet)
assert doc["facet"][0].to_path() == ['asia/oceania', 'fiji']
def test_document_error(self):
with pytest.raises(ValueError):
tantivy.Document(name={})