API simplification.

See #9
2019-08-02 20:23:10 +09:00 · 2019-08-02 20:23:10 +09:00 · 0498f941b0
commit 0498f941b0
parent b1b3689c55
13 changed files with 615 additions and 548 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -12,6 +12,7 @@ crate-type = ["cdylib"]
 [dependencies]
 chrono = "0.4"
 tantivy = { git = "https://github.com/tantivy-search/tantivy" }
 itertools = "0.8"
 [dependencies.pyo3]
 version = "0.7.0"
--- a/README.md
+++ b/README.md
@ -24,35 +24,37 @@ needs to be built. After that documents can be added to the index and a reader
 can be created to search the index.
 ```python
-    builder = tantivy.SchemaBuilder()
+import tantivy
-    title = builder.add_text_field("title", stored=True)
+# Declaring our schema.
-    body = builder.add_text_field("body")
+schema_builder = tantivy.SchemaBuilder()
 schema_builder.add_text_field("title", stored=True)
 schema_builder.add_text_field("body", stored=True)
 schema = schema_builder.build()
-    schema = builder.build()
+# Creating our index (in memory, but filesystem is available too)
 index = tantivy.Index(schema)
    writer = index.writer()
-    doc = tantivy.Document()
+# Adding one document.
-    doc.add_text(title, "The Old Man and the Sea")
+writer = index.writer()
-    doc.add_text(body, ("He was an old man who fished alone in a skiff in"
+writer.add_document({
-                        "the Gulf Stream and he had gone eighty-four days "
+    "title": "The Old Man and the Sea",
-                        "now without taking a fish."))
+    "body": """He was an old man who fished alone in a skiff in
-    writer.add_document(doc)
+               the Gulf Stream and he had gone eighty-four days 
               now without taking a fish."""
 })
 # ... and committing
 writer.commit()
    reader = index.reader()
    searcher = reader.searcher()
-    query_parser = tantivy.QueryParser.for_index(index, [title, body])
+# Reload the index to ensure it points to the last commit.
-    query = query_parser.parse_query("sea whale")
+index.reload();
 searcher = index.searcher()
 query = index.parse_query("sea whale", ["title", "body"])
 top_docs = tantivy.TopDocs(3)
-    top_docs = tantivy.TopDocs(10)
+(best_score, best_doc_address) = searcher.search(query, nhits=3)[0]
-    result = searcher.search(query, top_docs)
+best_doc = searcher.doc(best_doc_address) 
-
+assert best_doc["title"] == ["The Old Man and the Sea"]
    _, doc_address = result[0]
    searched_doc = searcher.doc(doc_address)
    assert searched_doc.get_first(title) == "The Old Man and the Sea"
 ```
--- a/src/document.rs
+++ b/src/document.rs
@ -1,40 +1,220 @@
 #![allow(clippy::new_ret_no_self)]
 #![allow(clippy::wrong_self_convention)]
 use itertools::Itertools;
 use pyo3::prelude::*;
-use pyo3::types::PyDateTime;
+use pyo3::types::{PyAny, PyDateTime, PyDict, PyList, PyTuple};
 use pyo3::types::{PyDateAccess, PyTimeAccess};
 use chrono::offset::TimeZone;
-use chrono::Utc;
+use chrono::{Datelike, Timelike, Utc};
 use tantivy as tv;
 use crate::facet::Facet;
-use crate::field::{Field, FieldValue};
+use crate::to_pyerr;
 use pyo3::{PyMappingProtocol, PyObjectProtocol};
 use std::collections::BTreeMap;
 use std::fmt;
 use tantivy::schema::Value;
 fn value_to_py(py: Python, value: &Value) -> PyResult<PyObject> {
    Ok(match value {
        Value::Str(text) => text.into_object(py),
        Value::U64(num) => num.into_object(py),
        Value::I64(num) => num.into_object(py),
        Value::F64(num) => num.into_object(py),
        Value::Bytes(b) => b.to_object(py),
        Value::Date(d) => PyDateTime::new(
            py,
            d.year(),
            d.month() as u8,
            d.day() as u8,
            d.hour() as u8,
            d.minute() as u8,
            d.second() as u8,
            d.timestamp_subsec_micros(),
            None,
        )?
        .into_object(py),
        Value::Facet(f) => Facet { inner: f.clone() }.into_object(py),
    })
 }
 fn value_to_string(value: &Value) -> String {
    match value {
        Value::Str(text) => text.clone(),
        Value::U64(num) => format!("{}", num),
        Value::I64(num) => format!("{}", num),
        Value::F64(num) => format!("{}", num),
        Value::Bytes(bytes) => format!("{:?}", bytes),
        Value::Date(d) => format!("{:?}", d),
        Value::Facet(facet) => facet.to_string(),
    }
 }
 /// Tantivy's Document is the object that can be indexed and then searched for.
 ///
 /// Documents are fundamentally a collection of unordered tuples
-/// (field, value). In this list, one field may appear more than once.
+/// (field_name, value). In this list, one field may appear more than once.
 ///
 /// Example:
 ///     >>> doc = tantivy.Document()
-///     >>> doc.add_text(title, "The Old Man and the Sea")
+///     >>> doc.add_text("title", "The Old Man and the Sea")
-///     >>> doc.add_text(body, ("He was an old man who fished alone in a "
+///     >>> doc.add_text("body", ("He was an old man who fished alone in a "
 ///                             "skiff in the Gulf Stream and he had gone "
 ///                             "eighty-four days now without taking a fish."))
 ///
 /// For simplicity, it is also possible to build a `Document` by passing the field
 /// values directly as constructor arguments.
 ///
 /// Example:
 ///     >>> doc = tantivy.Document(title=["The Old Man and the Sea"], body=["..."])
 ///
 /// As syntactic sugar, tantivy also allows the user to pass a single values
 /// if there is only one. In other words, the following is also legal.
 ///
 /// Example:
 ///     >>> doc = tantivy.Document(title="The Old Man and the Sea", body="...")
 #[pyclass]
 #[derive(Default)]
 pub(crate) struct Document {
-    pub(crate) inner: tv::Document,
+    pub(crate) field_values: BTreeMap<String, Vec<tv::schema::Value>>,
 }
 impl fmt::Debug for Document {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let doc_str = self
            .field_values
            .iter()
            .map(|(field_name, field_values)| {
                let mut values_str =
                    field_values.iter().map(value_to_string).join(",");
                values_str.truncate(10);
                format!("{}=[{}]", field_name, values_str)
            })
            .join(",");
        write!(f, "Document({})", doc_str)
    }
 }
 fn add_value<T>(doc: &mut Document, field_name: String, value: T)
 where
    Value: From<T>,
 {
    doc.field_values
        .entry(field_name)
        .or_insert_with(Vec::new)
        .push(Value::from(value));
 }
 fn extract_value(any: &PyAny) -> PyResult<Value> {
    if let Ok(s) = any.extract::<String>() {
        return Ok(Value::Str(s));
    }
    if let Ok(num) = any.extract::<i64>() {
        return Ok(Value::I64(num));
    }
    if let Ok(num) = any.extract::<f64>() {
        return Ok(Value::F64(num));
    }
    if let Ok(py_datetime) = any.downcast_ref::<PyDateTime>() {
        let datetime = Utc
            .ymd(
                py_datetime.get_year(),
                py_datetime.get_month().into(),
                py_datetime.get_day().into(),
            )
            .and_hms_micro(
                py_datetime.get_hour().into(),
                py_datetime.get_minute().into(),
                py_datetime.get_second().into(),
                py_datetime.get_microsecond(),
            );
        return Ok(Value::Date(datetime));
    }
    if let Ok(facet) = any.downcast_ref::<Facet>() {
        return Ok(Value::Facet(facet.inner.clone()));
    }
    Err(to_pyerr(format!("Value unsupported {:?}", any)))
 }
 fn extract_value_single_or_list(any: &PyAny) -> PyResult<Vec<Value>> {
    if let Ok(values) = any.downcast_ref::<PyList>() {
        values.iter().map(extract_value).collect()
    } else {
        Ok(vec![extract_value(any)?])
    }
 }
 #[pymethods]
 impl Document {
    #[new]
-    fn new(obj: &PyRawObject) {
+    #[args(kwargs = "**")]
-        obj.init(Document {
+    fn new(obj: &PyRawObject, kwargs: Option<&PyDict>) -> PyResult<()> {
-            inner: tv::Document::default(),
+        let mut document = Document::default();
-        });
+        if let Some(field_dict) = kwargs {
            document.extend(field_dict)?;
        }
        obj.init(document);
        Ok(())
    }
    fn extend(&mut self, py_dict: &PyDict) -> PyResult<()> {
        let mut field_values: BTreeMap<String, Vec<tv::schema::Value>> =
            BTreeMap::new();
        for key_value_any in py_dict.items() {
            if let Ok(key_value) = key_value_any.downcast_ref::<PyTuple>() {
                if key_value.len() != 2 {
                    continue;
                }
                let key: String = key_value.get_item(0).extract()?;
                let value_list =
                    extract_value_single_or_list(key_value.get_item(1))?;
                field_values.insert(key, value_list);
            }
        }
        self.field_values.extend(field_values.into_iter());
        Ok(())
    }
    #[staticmethod]
    fn from_dict(py_dict: &PyDict) -> PyResult<Document> {
        let mut field_values: BTreeMap<String, Vec<tv::schema::Value>> =
            BTreeMap::new();
        for key_value_any in py_dict.items() {
            if let Ok(key_value) = key_value_any.downcast_ref::<PyTuple>() {
                if key_value.len() != 2 {
                    continue;
                }
                let key: String = key_value.get_item(0).extract()?;
                let value_list =
                    extract_value_single_or_list(key_value.get_item(1))?;
                field_values.insert(key, value_list);
            }
        }
        Ok(Document { field_values })
    }
    /// Returns a dictionary with the different
    /// field values.
    ///
    /// In tantivy, `Document` can be hold multiple
    /// values for a single field.
    ///
    /// For this reason, the dictionary, will associate
    /// a list of value for every field.
    fn to_dict(&self, py: Python) -> PyResult<PyObject> {
        let dict = PyDict::new(py);
        for (key, values) in &self.field_values {
            let values_py: Vec<PyObject> = values
                .iter()
                .map(|v| value_to_py(py, v))
                .collect::<PyResult<_>>()?;
            dict.set_item(key, values_py)?;
        }
        Ok(dict.into())
    }
    /// Add a text value to the document.
@ -42,8 +222,8 @@ impl Document {
    /// Args:
    ///     field (Field): The field for which we are adding the text.
    ///     text (str): The text that will be added to the document.
-    fn add_text(&mut self, field: &Field, text: &str) {
+    fn add_text(&mut self, field_name: String, text: &str) {
-        self.inner.add_text(field.inner, text);
+        add_value(self, field_name, text);
    }
    /// Add an unsigned integer value to the document.
@ -51,8 +231,8 @@ impl Document {
    /// Args:
    ///     field (Field): The field for which we are adding the integer.
    ///     value (int): The integer that will be added to the document.
-    fn add_unsigned(&mut self, field: &Field, value: u64) {
+    fn add_unsigned(&mut self, field_name: String, value: u64) {
-        self.inner.add_u64(field.inner, value);
+        add_value(self, field_name, value);
    }
    /// Add a signed integer value to the document.
@ -60,8 +240,8 @@ impl Document {
    /// Args:
    ///     field (Field): The field for which we are adding the integer.
    ///     value (int): The integer that will be added to the document.
-    fn add_integer(&mut self, field: &Field, value: i64) {
+    fn add_integer(&mut self, field_name: String, value: i64) {
-        self.inner.add_i64(field.inner, value);
+        add_value(self, field_name, value);
    }
    /// Add a date value to the document.
@ -69,7 +249,7 @@ impl Document {
    /// Args:
    ///     field (Field): The field for which we are adding the integer.
    ///     value (datetime): The date that will be added to the document.
-    fn add_date(&mut self, field: &Field, value: &PyDateTime) {
+    fn add_date(&mut self, field_name: String, value: &PyDateTime) {
        let datetime = Utc
            .ymd(
                value.get_year(),
@ -82,16 +262,15 @@ impl Document {
                value.get_second().into(),
                value.get_microsecond(),
            );
-
+        add_value(self, field_name, datetime);
        self.inner.add_date(field.inner, &datetime);
    }
    /// Add a facet value to the document.
    /// Args:
    ///     field (Field): The field for which we are adding the facet.
    ///     value (Facet): The Facet that will be added to the document.
-    fn add_facet(&mut self, field: &Field, value: &Facet) {
+    fn add_facet(&mut self, field_name: String, facet: &Facet) {
-        self.inner.add_facet(field.inner, value.inner.clone());
+        add_value(self, field_name, facet.inner.clone());
    }
    /// Add a bytes value to the document.
@ -99,20 +278,20 @@ impl Document {
    /// Args:
    ///     field (Field): The field for which we are adding the bytes.
    ///     value (bytes): The bytes that will be added to the document.
-    fn add_bytes(&mut self, field: &Field, value: Vec<u8>) {
+    fn add_bytes(&mut self, field_name: String, bytes: Vec<u8>) {
-        self.inner.add_bytes(field.inner, value);
+        add_value(self, field_name, bytes);
    }
    /// Returns the number of added fields that have been added to the document
    #[getter]
-    fn len(&self) -> usize {
+    fn num_fields(&self) -> usize {
-        self.inner.len()
+        self.field_values.len()
    }
    /// True if the document is empty, False otherwise.
    #[getter]
    fn is_empty(&self) -> bool {
-        self.inner.is_empty()
+        self.field_values.is_empty()
    }
    /// Get the first value associated with the given field.
@ -122,9 +301,17 @@ impl Document {
    ///
    /// Returns the value if one is found, otherwise None.
    /// The type of the value depends on the field.
-    fn get_first(&self, py: Python, field: &Field) -> Option<PyObject> {
+    fn get_first(
-        let value = self.inner.get_first(field.inner)?;
+        &self,
-        FieldValue::value_to_py(py, value)
+        py: Python,
        fieldname: &str,
    ) -> PyResult<Option<PyObject>> {
        if let Some(value) = self.iter_values_for_field(fieldname).next() {
            let py_value = value_to_py(py, value)?;
            Ok(Some(py_value))
        } else {
            Ok(None)
        }
    }
    /// Get the all values associated with the given field.
@ -134,21 +321,37 @@ impl Document {
    ///
    /// Returns a list of values.
    /// The type of the value depends on the field.
-    fn get_all(&self, py: Python, field: &Field) -> Vec<PyObject> {
+    fn get_all(&self, py: Python, field_name: &str) -> PyResult<Vec<PyObject>> {
-        let values = self.inner.get_all(field.inner);
+        self.iter_values_for_field(field_name)
-        values
+            .map(|value| value_to_py(py, value))
-            .iter()
+            .collect::<PyResult<Vec<_>>>()
-            .map(|&v| FieldValue::value_to_py(py, v))
+    }
            .filter_map(|x| x)
            .collect()
 }
-    /// Get all the fields and values contained in the document.
+impl Document {
-    fn field_values(&self, py: Python) -> Vec<FieldValue> {
+    fn iter_values_for_field<'a>(
-        let field_values = self.inner.field_values();
+        &'a self,
-        field_values
+        field: &str,
-            .iter()
+    ) -> impl Iterator<Item = &'a Value> + 'a {
-            .map(|v| FieldValue::field_value_to_py(py, v))
+        self.field_values
-            .collect()
+            .get(field)
            .into_iter()
            .flat_map(|values| values.iter())
    }
 }
 #[pyproto]
 impl PyMappingProtocol for Document {
    fn __getitem__(&self, field_name: &str) -> PyResult<Vec<PyObject>> {
        let gil = Python::acquire_gil();
        let py = gil.python();
        self.get_all(py, field_name)
    }
 }
 #[pyproto]
 impl PyObjectProtocol for Document {
    fn __repr__(&self) -> PyResult<String> {
        Ok(format!("{:?}", self))
    }
 }
--- a/src/facet.rs
+++ b/src/facet.rs
@ -1,6 +1,6 @@
 use pyo3::basic::PyObjectProtocol;
 use pyo3::prelude::*;
 use pyo3::types::PyType;
 use tantivy::schema;
 /// A Facet represent a point in a given hierarchy.
@ -52,4 +52,23 @@ impl Facet {
            inner: schema::Facet::from_text(facet_string),
        }
    }
    /// Returns the list of `segments` that forms a facet path.
    ///
    /// For instance `//europe/france` becomes `["europe", "france"]`.
    fn to_path(&self) -> Vec<&str> {
        self.inner.to_path()
    }
    /// Returns the facet string representation.
    fn to_path_str(&self) -> String {
        self.inner.to_string()
    }
 }
 #[pyproto]
 impl PyObjectProtocol for Facet {
    fn __repr__(&self) -> PyResult<String> {
        Ok(format!("Facet({})", self.to_path_str()))
    }
 }
--- a/src/field.rs
+++ b/src/field.rs
@ -1,73 +0,0 @@
 use pyo3::prelude::*;
 use pyo3::types::PyDateTime;
 use tantivy::schema;
 use crate::facet::Facet;
 /// Field is a numeric indentifier that represents an entry in the Schema.
 #[pyclass]
 #[derive(Clone)]
 pub(crate) struct Field {
    pub(crate) inner: schema::Field,
 }
 /// FieldValue holds together a Field and its Value.
 #[pyclass]
 pub(crate) struct FieldValue {
    pub(crate) field: Field,
    pub(crate) value: PyObject,
 }
 #[pymethods]
 impl FieldValue {
    #[getter]
    fn field(&self) -> Field {
        self.field.clone()
    }
    #[getter]
    fn value(&self) -> &PyObject {
        &self.value
    }
 }
 impl FieldValue {
    pub(crate) fn value_to_py(
        py: Python,
        value: &schema::Value,
    ) -> Option<PyObject> {
        match value {
            schema::Value::Str(text) => Some(text.into_object(py)),
            schema::Value::U64(num) => Some(num.into_object(py)),
            schema::Value::I64(num) => Some(num.into_object(py)),
            schema::Value::F64(num) => Some(num.into_object(py)),
            schema::Value::Bytes(b) => Some(b.to_object(py)),
            schema::Value::Date(d) => {
                let date =
                    PyDateTime::from_timestamp(py, d.timestamp() as f64, None);
                match date {
                    Ok(d) => Some(d.into_object(py)),
                    Err(_e) => None,
                }
            }
            schema::Value::Facet(f) => {
                Some(Facet { inner: f.clone() }.into_object(py))
            }
        }
    }
    pub(crate) fn field_value_to_py(
        py: Python,
        field_value: &schema::FieldValue,
    ) -> FieldValue {
        let value = field_value.value();
        let field = field_value.field();
        FieldValue {
            field: Field { inner: field },
            value: FieldValue::value_to_py(py, value).unwrap(),
        }
    }
 }
--- a/src/index.rs
+++ b/src/index.rs
@ -4,60 +4,24 @@ use pyo3::exceptions;
 use pyo3::prelude::*;
 use crate::document::Document;
 use crate::query::Query;
 use crate::schema::Schema;
 use crate::searcher::Searcher;
 use crate::to_pyerr;
 use tantivy as tv;
 use tantivy::directory::MmapDirectory;
 use tantivy::schema::{Field, NamedFieldDocument};
 const RELOAD_POLICY: &str = "commit";
 /// IndexReader is the entry point to read and search the index.
 ///
 /// IndexReader controls when a new version of the index should be loaded and
 /// lends you instances of Searcher for the last loaded version.
 ///
 /// To create an IndexReader first create an Index and call the reader() method
 /// on the index object.
 #[pyclass]
 pub(crate) struct IndexReader {
    inner: tv::IndexReader,
 }
 #[pymethods]
 impl IndexReader {
    /// Update searchers so that they reflect the state of the last .commit().
    ///
    /// If you set up the the reload policy to be on 'commit' (which is the
    /// default) every commit should be rapidly reflected on your IndexReader
    /// and you should not need to call reload() at all.
    fn reload(&self) -> PyResult<()> {
        let ret = self.inner.reload();
        match ret {
            Ok(_) => Ok(()),
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
    }
    /// Get a Searcher for the index.
    ///
    /// This method should be called every single time a search query is
    /// performed. The searchers are taken from a pool of num_searchers
    /// searchers.
    ///
    /// Returns a Searcher object, if no searcher is available this may block.
    fn searcher(&self) -> Searcher {
        let searcher = self.inner.searcher();
        Searcher { inner: searcher }
    }
 }
 /// IndexWriter is the user entry-point to add documents to the index.
 ///
 /// To create an IndexWriter first create an Index and call the writer() method
 /// on the index object.
 #[pyclass]
 pub(crate) struct IndexWriter {
-    inner: tv::IndexWriter,
+    inner_index_writer: tv::IndexWriter,
    schema: tv::schema::Schema,
 }
 #[pymethods]
@ -70,9 +34,24 @@ impl IndexWriter {
    /// by the client to align commits with its own document queue.
    /// The `opstamp` represents the number of documents that have been added
    /// since the creation of the index.
-    fn add_document(&mut self, document: &Document) -> PyResult<()> {
+    pub fn add_document(&mut self, doc: &Document) -> PyResult<u64> {
-        self.inner.add_document(document.inner.clone());
+        let named_doc = NamedFieldDocument(doc.field_values.clone());
-        Ok(())
+        let doc = self.schema.convert_named_doc(named_doc).map_err(to_pyerr)?;
        Ok(self.inner_index_writer.add_document(doc))
    }
    /// Helper for the `add_document` method, but passing a json string.
    ///
    /// If the indexing pipeline is full, this call may block.
    ///
    /// Returns an `opstamp`, which is an increasing integer that can be used
    /// by the client to align commits with its own document queue.
    /// The `opstamp` represents the number of documents that have been added
    /// since the creation of the index.
    pub fn add_json(&mut self, json: &str) -> PyResult<u64> {
        let doc = self.schema.parse_document(json).map_err(to_pyerr)?;
        let opstamp = self.inner_index_writer.add_document(doc);
        Ok(opstamp)
    }
    /// Commits all of the pending changes
@ -84,12 +63,8 @@ impl IndexWriter {
    /// spared), it will be possible to resume indexing from this point.
    ///
    /// Returns the `opstamp` of the last document that made it in the commit.
-    fn commit(&mut self) -> PyResult<()> {
+    fn commit(&mut self) -> PyResult<u64> {
-        let ret = self.inner.commit();
+        self.inner_index_writer.commit().map_err(to_pyerr)
        match ret {
            Ok(_) => Ok(()),
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
    }
    /// Rollback to the last commit
@ -97,23 +72,15 @@ impl IndexWriter {
    /// This cancels all of the update that happened before after the last
    /// commit. After calling rollback, the index is in the same state as it
    /// was after the last commit.
-    fn rollback(&mut self) -> PyResult<()> {
+    fn rollback(&mut self) -> PyResult<u64> {
-        let ret = self.inner.rollback();
+        self.inner_index_writer.rollback().map_err(to_pyerr)
        match ret {
            Ok(_) => Ok(()),
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
    }
    /// Detect and removes the files that are not used by the index anymore.
    fn garbage_collect_files(&mut self) -> PyResult<()> {
-        let ret = self.inner.garbage_collect_files();
+        self.inner_index_writer
-
+            .garbage_collect_files()
-        match ret {
+            .map_err(to_pyerr)
            Ok(_) => Ok(()),
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
    }
    /// The opstamp of the last successful commit.
@ -125,7 +92,7 @@ impl IndexWriter {
    /// for searchers.
    #[getter]
    fn commit_opstamp(&self) -> u64 {
-        self.inner.commit_opstamp()
+        self.inner_index_writer.commit_opstamp()
    }
 }
@ -142,11 +109,19 @@ impl IndexWriter {
 /// if there was a problem during the opening or creation of the index.
 #[pyclass]
 pub(crate) struct Index {
-    pub(crate) inner: tv::Index,
+    pub(crate) index: tv::Index,
    reader: tv::IndexReader,
 }
 #[pymethods]
 impl Index {
    #[staticmethod]
    fn open(path: &str) -> PyResult<Index> {
        let index = tv::Index::open_in_dir(path).map_err(to_pyerr)?;
        let reader = index.reader().map_err(to_pyerr)?;
        Ok(Index { index, reader })
    }
    #[new]
    #[args(reuse = true)]
    fn new(
@ -157,32 +132,20 @@ impl Index {
    ) -> PyResult<()> {
        let index = match path {
            Some(p) => {
-                let directory = MmapDirectory::open(p);
+                let directory = MmapDirectory::open(p).map_err(to_pyerr)?;
-
+                if reuse {
-                let dir = match directory {
+                    tv::Index::open_or_create(directory, schema.inner.clone())
                    Ok(d) => d,
                    Err(e) => {
                        return Err(exceptions::OSError::py_err(e.to_string()))
                    }
                };
                let i = if reuse {
                    tv::Index::open_or_create(dir, schema.inner.clone())
                } else {
-                    tv::Index::create(dir, schema.inner.clone())
+                    tv::Index::create(directory, schema.inner.clone())
                };
                match i {
                    Ok(index) => index,
                    Err(e) => {
                        return Err(exceptions::OSError::py_err(e.to_string()))
                    }
                }
                .map_err(to_pyerr)?
            }
            None => tv::Index::create_in_ram(schema.inner.clone()),
        };
-        obj.init(Index { inner: index });
+        let reader = index.reader().map_err(to_pyerr)?;
        println!("reader {}", reader.searcher().segment_readers().len());
        obj.init(Index { index, reader });
        Ok(())
    }
@ -206,32 +169,30 @@ impl Index {
        num_threads: usize,
    ) -> PyResult<IndexWriter> {
        let writer = match num_threads {
-            0 => self.inner.writer(heap_size),
+            0 => self.index.writer(heap_size),
-            _ => self.inner.writer_with_num_threads(num_threads, heap_size),
+            _ => self.index.writer_with_num_threads(num_threads, heap_size),
        };
        match writer {
            Ok(w) => Ok(IndexWriter { inner: w }),
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
        .map_err(to_pyerr)?;
        let schema = self.index.schema();
        Ok(IndexWriter {
            inner_index_writer: writer,
            schema,
        })
    }
-    /// Create an IndexReader for the index.
+    /// Configure the index reader.
    ///
    /// Args:
    ///     reload_policy (str, optional): The reload policy that the
-    ///         IndexReader should use. Can be manual or OnCommit.
+    ///         IndexReader should use. Can be `Manual` or `OnCommit`.
    ///     num_searchers (int, optional): The number of searchers that the
    ///         reader should create.
    ///
    /// Returns the IndexReader on success, raises ValueError if a IndexReader
    /// couldn't be created.
    #[args(reload_policy = "RELOAD_POLICY", num_searchers = 0)]
-    fn reader(
+    fn config_reader(
-        &self,
+        &mut self,
        reload_policy: &str,
        num_searchers: usize,
-    ) -> PyResult<IndexReader> {
+    ) -> Result<(), PyErr> {
        let reload_policy = reload_policy.to_lowercase();
        let reload_policy = match reload_policy.as_ref() {
            "commit" => tv::ReloadPolicy::OnCommit,
@ -242,9 +203,7 @@ impl Index {
                "Invalid reload policy, valid choices are: 'manual' and 'OnCommit'"
            ))
        };
-
+        let builder = self.index.reader_builder();
        let builder = self.inner.reader_builder();
        let builder = builder.reload_policy(reload_policy);
        let builder = if num_searchers > 0 {
            builder.num_searchers(num_searchers)
@ -252,10 +211,13 @@ impl Index {
            builder
        };
-        let reader = builder.try_into();
+        self.reader = builder.try_into().map_err(to_pyerr)?;
-        match reader {
+        Ok(())
-            Ok(r) => Ok(IndexReader { inner: r }),
+    }
-            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
+
    fn searcher(&self) -> Searcher {
        Searcher {
            inner: self.reader.searcher(),
        }
    }
@ -268,19 +230,71 @@ impl Index {
    /// Raises OSError if the directory cannot be opened.
    #[staticmethod]
    fn exists(path: &str) -> PyResult<bool> {
-        let directory = MmapDirectory::open(path);
+        let directory = MmapDirectory::open(path).map_err(to_pyerr)?;
-        let dir = match directory {
+        Ok(tv::Index::exists(&directory))
            Ok(d) => d,
            Err(e) => return Err(exceptions::OSError::py_err(e.to_string())),
        };
        Ok(tv::Index::exists(&dir))
    }
    /// The schema of the current index.
    #[getter]
    fn schema(&self) -> Schema {
-        let schema = self.inner.schema();
+        let schema = self.index.schema();
        Schema { inner: schema }
    }
    /// Update searchers so that they reflect the state of the last .commit().
    ///
    /// If you set up the the reload policy to be on 'commit' (which is the
    /// default) every commit should be rapidly reflected on your IndexReader
    /// and you should not need to call reload() at all.
    fn reload(&self) -> PyResult<()> {
        self.reader.reload().map_err(to_pyerr)
    }
    /// Parse a query
    ///
    /// Args:
    ///     query: the query, following the tantivy query language.
    ///     default_fields (List[Field]): A list of fields used to search if no
    ///         field is specified in the query.
    ///
    #[args(reload_policy = "RELOAD_POLICY")]
    pub fn parse_query(
        &self,
        query: &str,
        default_field_names: Option<Vec<String>>,
    ) -> PyResult<Query> {
        let mut default_fields = vec![];
        let schema = self.index.schema();
        if let Some(default_field_names_vec) = default_field_names {
            for default_field_name in &default_field_names_vec {
                if let Some(field) = schema.get_field(default_field_name) {
                    let field_entry = schema.get_field_entry(field);
                    if !field_entry.is_indexed() {
                        return Err(exceptions::ValueError::py_err(format!(
                            "Field `{}` is not set as indexed in the schema.",
                            default_field_name
                        )));
                    }
                    default_fields.push(field);
                } else {
                    return Err(exceptions::ValueError::py_err(format!(
                        "Field `{}` is not defined in the schema.",
                        default_field_name
                    )));
                }
            }
        } else {
            for (field_id, field_entry) in
                self.index.schema().fields().iter().enumerate()
            {
                if field_entry.is_indexed() {
                    default_fields.push(Field(field_id as u32));
                }
            }
        }
        let parser =
            tv::query::QueryParser::for_index(&self.index, default_fields);
        let query = parser.parse_query(query).map_err(to_pyerr)?;
        Ok(Query { inner: query })
    }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,8 +1,8 @@
 use pyo3::exceptions;
 use pyo3::prelude::*;
 mod document;
 mod facet;
 mod field;
 mod index;
 mod query;
 mod schema;
@ -11,9 +11,7 @@ mod searcher;
 use document::Document;
 use facet::Facet;
 use field::{Field, FieldValue};
 use index::Index;
 use query::QueryParser;
 use schema::Schema;
 use schemabuilder::SchemaBuilder;
 use searcher::{DocAddress, Searcher, TopDocs};
@ -75,14 +73,14 @@ fn tantivy(_py: Python, m: &PyModule) -> PyResult<()> {
    m.add_class::<Schema>()?;
    m.add_class::<SchemaBuilder>()?;
    m.add_class::<Searcher>()?;
    m.add_class::<Index>()?;
    m.add_class::<QueryParser>()?;
    m.add_class::<Document>()?;
    m.add_class::<Index>()?;
    m.add_class::<DocAddress>()?;
    m.add_class::<TopDocs>()?;
    m.add_class::<Field>()?;
    m.add_class::<FieldValue>()?;
    m.add_class::<Facet>()?;
    Ok(())
 }
 pub(crate) fn to_pyerr<E: ToString>(err: E) -> PyErr {
    exceptions::ValueError::py_err(err.to_string())
 }
--- a/src/query.rs
+++ b/src/query.rs
@ -1,70 +1,16 @@
 use pyo3::exceptions;
 use pyo3::prelude::*;
-use pyo3::types::PyType;
+use pyo3::PyObjectProtocol;
 use tantivy as tv;
 use crate::field::Field;
 use crate::index::Index;
 /// Tantivy's Query
 #[pyclass]
 pub(crate) struct Query {
    pub(crate) inner: Box<dyn tv::query::Query>,
 }
-/// Tantivy's Query parser
+#[pyproto]
-#[pyclass]
+impl PyObjectProtocol for Query {
-pub(crate) struct QueryParser {
+    fn __repr__(&self) -> PyResult<String> {
-    inner: tv::query::QueryParser,
+        Ok(format!("Query({:?})", self.inner))
 }
 #[pymethods]
 impl QueryParser {
    /// Creates a QueryParser for an Index.
    ///
    /// Args:
    ///     index (Index): The index for which the query will be created.
    ///     default_fields (List[Field]): A list of fields used to search if no
    ///         field is specified in the query.
    ///
    /// Returns the QueryParser.
    #[classmethod]
    fn for_index(
        _cls: &PyType,
        index: &Index,
        default_fields: Vec<&Field>,
    ) -> PyResult<QueryParser> {
        let default_fields: Vec<tv::schema::Field> =
            default_fields.iter().map(|&f| f.inner).collect();
        let parser =
            tv::query::QueryParser::for_index(&index.inner, default_fields);
        Ok(QueryParser { inner: parser })
    }
    /// Parse a string into a query that can be given to a searcher.
    ///
    /// Args:
    ///     query (str): A query string that should be parsed into a query.
    ///
    /// Returns the parsed Query object. Raises ValueError if there was an
    /// error with the query string.
    fn parse_query(&self, query: &str) -> PyResult<Query> {
        let ret = self.inner.parse_query(query);
        match ret {
            Ok(q) => Ok(Query { inner: q }),
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
    }
    /// Set the default way to compose queries to a conjunction.
    ///
    /// By default, the query happy tax payer is equivalent to the query happy
    /// OR tax OR payer. After calling .set_conjunction_by_default() happy tax
    /// payer will be interpreted by the parser as happy AND tax AND payer.
    fn set_conjunction_by_default(&mut self) {
        self.inner.set_conjunction_by_default();
    }
 }
--- a/src/schema.rs
+++ b/src/schema.rs
@ -1,10 +1,5 @@
 use pyo3::exceptions;
 use pyo3::prelude::*;
-
+use tantivy as tv;
 use tantivy::schema;
 use crate::document::Document;
 use crate::field::Field;
 /// Tantivy schema.
 ///
@ -12,54 +7,8 @@ use crate::field::Field;
 /// provided.
 #[pyclass]
 pub(crate) struct Schema {
-    pub(crate) inner: schema::Schema,
+    pub(crate) inner: tv::schema::Schema,
 }
 #[pymethods]
-impl Schema {
+impl Schema {}
    /// Build a document object from a json string.
    ///
    /// Args:
    ///     doc_json (str) - A string containing json that should be parsed
    ///         into a `Document`
    ///
    /// Returns the parsed document, raises a ValueError if the parsing failed.
    fn parse_document(&self, doc_json: &str) -> PyResult<Document> {
        let ret = self.inner.parse_document(doc_json);
        match ret {
            Ok(d) => Ok(Document { inner: d }),
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
    }
    /// Convert a `Document` object into a json string.
    ///
    /// Args:
    ///     doc (Document): The document that will be converted into a json
    ///         string.
    fn to_json(&self, doc: &Document) -> String {
        self.inner.to_json(&doc.inner)
    }
    /// Return the field name for a given `Field`.
    ///
    /// Args:
    ///     field (Field): The field for which the name will be returned.
    fn get_field_name(&self, field: &Field) -> &str {
        self.inner.get_field_name(field.inner)
    }
    /// Returns the field option associated with a given name.
    ///
    /// Args:
    ///     name (str): The name of the field that we want to retrieve.
    ///
    /// Returns the Field if one is found, None otherwise.
    fn get_field(&self, name: &str) -> Option<Field> {
        let f = self.inner.get_field(name);
        match f {
            Some(field) => Some(Field { inner: field }),
            None => None,
        }
    }
 }
--- a/src/schemabuilder.rs
+++ b/src/schemabuilder.rs
@ -5,8 +5,8 @@ use pyo3::prelude::*;
 use tantivy::schema;
 use crate::field::Field;
 use crate::schema::Schema;
 use std::sync::{Arc, RwLock};
 /// Tantivy has a very strict schema.
 /// You need to specify in advance whether a field is indexed or not,
@ -24,8 +24,9 @@ use crate::schema::Schema;
 ///
 ///     >>> schema = builder.build()
 #[pyclass]
 #[derive(Clone)]
 pub(crate) struct SchemaBuilder {
-    pub(crate) builder: Option<schema::SchemaBuilder>,
+    pub(crate) builder: Arc<RwLock<Option<schema::SchemaBuilder>>>,
 }
 const TOKENIZER: &str = "default";
@ -36,7 +37,7 @@ impl SchemaBuilder {
    #[new]
    fn new(obj: &PyRawObject) {
        obj.init(SchemaBuilder {
-            builder: Some(schema::Schema::builder()),
+            builder: Arc::new(From::from(Some(schema::Schema::builder()))),
        });
    }
@ -70,9 +71,8 @@ impl SchemaBuilder {
        stored: bool,
        tokenizer_name: &str,
        index_option: &str,
-    ) -> PyResult<Field> {
+    ) -> PyResult<Self> {
        let builder = &mut self.builder;
        let index_option = match index_option {
            "position" => schema::IndexRecordOption::WithFreqsAndPositions,
            "freq" => schema::IndexRecordOption::WithFreqs,
@ -94,14 +94,14 @@ impl SchemaBuilder {
            options
        };
-        if let Some(builder) = builder {
+        if let Some(builder) = builder.write().unwrap().as_mut() {
-            let field = builder.add_text_field(name, options);
+            builder.add_text_field(name, options);
            Ok(Field { inner: field })
        } else {
-            Err(exceptions::ValueError::py_err(
+            return Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
-            ))
+            ));
        }
        Ok(self.clone())
    }
    /// Add a new signed integer field to the schema.
@ -131,19 +131,19 @@ impl SchemaBuilder {
        stored: bool,
        indexed: bool,
        fast: Option<&str>,
-    ) -> PyResult<Field> {
+    ) -> PyResult<Self> {
        let builder = &mut self.builder;
        let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?;
-        if let Some(builder) = builder {
+        if let Some(builder) = builder.write().unwrap().as_mut() {
-            let field = builder.add_i64_field(name, opts);
+            builder.add_i64_field(name, opts);
            Ok(Field { inner: field })
        } else {
-            Err(exceptions::ValueError::py_err(
+            return Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
-            ))
+            ));
        }
        Ok(self.clone())
    }
    /// Add a new unsigned integer field to the schema.
@ -173,19 +173,19 @@ impl SchemaBuilder {
        stored: bool,
        indexed: bool,
        fast: Option<&str>,
-    ) -> PyResult<Field> {
+    ) -> PyResult<Self> {
        let builder = &mut self.builder;
        let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?;
-        if let Some(builder) = builder {
+        if let Some(builder) = builder.write().unwrap().as_mut() {
-            let field = builder.add_u64_field(name, opts);
+            builder.add_u64_field(name, opts);
            Ok(Field { inner: field })
        } else {
-            Err(exceptions::ValueError::py_err(
+            return Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
-            ))
+            ));
        }
        Ok(self.clone())
    }
    /// Add a new date field to the schema.
@ -215,35 +215,35 @@ impl SchemaBuilder {
        stored: bool,
        indexed: bool,
        fast: Option<&str>,
-    ) -> PyResult<Field> {
+    ) -> PyResult<Self> {
        let builder = &mut self.builder;
        let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?;
-        if let Some(builder) = builder {
+        if let Some(builder) = builder.write().unwrap().as_mut() {
-            let field = builder.add_date_field(name, opts);
+            builder.add_date_field(name, opts);
            Ok(Field { inner: field })
        } else {
-            Err(exceptions::ValueError::py_err(
+            return Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
-            ))
+            ));
        }
        Ok(self.clone())
    }
    /// Add a Facet field to the schema.
    /// Args:
    ///     name (str): The name of the field.
-    fn add_facet_field(&mut self, name: &str) -> PyResult<Field> {
+    fn add_facet_field(&mut self, name: &str) -> PyResult<Self> {
        let builder = &mut self.builder;
-        if let Some(builder) = builder {
+        if let Some(builder) = builder.write().unwrap().as_mut() {
-            let field = builder.add_facet_field(name);
+            builder.add_facet_field(name);
            Ok(Field { inner: field })
        } else {
-            Err(exceptions::ValueError::py_err(
+            return Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
-            ))
+            ));
        }
        Ok(self.clone())
    }
    /// Add a fast bytes field to the schema.
@ -254,17 +254,17 @@ impl SchemaBuilder {
    ///
    /// Args:
    ///     name (str): The name of the field.
-    fn add_bytes_field(&mut self, name: &str) -> PyResult<Field> {
+    fn add_bytes_field(&mut self, name: &str) -> PyResult<Self> {
        let builder = &mut self.builder;
-        if let Some(builder) = builder {
+        if let Some(builder) = builder.write().unwrap().as_mut() {
-            let field = builder.add_bytes_field(name);
+            builder.add_bytes_field(name);
            Ok(Field { inner: field })
        } else {
-            Err(exceptions::ValueError::py_err(
+            return Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
-            ))
+            ));
        }
        Ok(self.clone())
    }
    /// Finalize the creation of a Schema.
@ -272,7 +272,7 @@ impl SchemaBuilder {
    /// Returns a Schema object. After this is called the SchemaBuilder cannot
    /// be used anymore.
    fn build(&mut self) -> PyResult<Schema> {
-        let builder = self.builder.take();
+        let builder = self.builder.write().unwrap().take();
        if let Some(builder) = builder {
            let schema = builder.build();
            Ok(Schema { inner: schema })
--- a/src/searcher.rs
+++ b/src/searcher.rs
@ -1,12 +1,11 @@
 #![allow(clippy::new_ret_no_self)]
 use pyo3::exceptions;
 use pyo3::prelude::*;
 use tantivy as tv;
 use crate::document::Document;
 use crate::query::Query;
 use crate::to_pyerr;
 use pyo3::prelude::*;
 use pyo3::{exceptions, PyObjectProtocol};
 use tantivy as tv;
 /// Tantivy's Searcher class
 ///
@ -60,11 +59,11 @@ impl Searcher {
    ///
    /// Returns the Document, raises ValueError if the document can't be found.
    fn doc(&self, doc_address: &DocAddress) -> PyResult<Document> {
-        let ret = self.inner.doc(doc_address.into());
+        let doc = self.inner.doc(doc_address.into()).map_err(to_pyerr)?;
-        match ret {
+        let named_doc = self.inner.schema().to_named_doc(&doc);
-            Ok(doc) => Ok(Document { inner: doc }),
+        Ok(Document {
-            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
+            field_values: named_doc.0,
-        }
+        })
    }
 }
@ -132,3 +131,14 @@ impl TopDocs {
        Ok(())
    }
 }
 #[pyproto]
 impl PyObjectProtocol for Searcher {
    fn __repr__(&self) -> PyResult<String> {
        Ok(format!(
            "Searcher(num_docs={}, num_segments={})",
            self.inner.num_docs(),
            self.inner.segment_readers().len()
        ))
    }
 }
--- a/tests/tantivy_test.py
+++ b/tests/tantivy_test.py
@ -1,36 +1,34 @@
 import json
 import tantivy
 import pytest
 from tantivy import Document, Index, SchemaBuilder, Schema
 def schema():
    return SchemaBuilder()\
        .add_text_field("title", stored=True)\
        .add_text_field("body")\
        .build()
@pytest.fixture(scope="class")
 def ram_index():
        # assume all tests will use the same documents for now
        # other methods may set up function-local indexes
-        builder = tantivy.SchemaBuilder()
+        index = Index(schema())
        title = builder.add_text_field("title", stored=True)
        body = builder.add_text_field("body")
        schema = builder.build()
        index = tantivy.Index(schema)
        writer = index.writer()
        # 2 ways of adding documents
        # 1
-        doc = tantivy.Document()
+        doc = Document()
        # create a document instance
        # add field-value pairs
-        doc.add_text(title, "The Old Man and the Sea")
+        doc.add_text("title", "The Old Man and the Sea")
-        doc.add_text(body, ("He was an old man who fished alone in a skiff in"
+        doc.add_text("body", ("He was an old man who fished alone in a skiff in"
                            "the Gulf Stream and he had gone eighty-four days "
                            "now without taking a fish."))
        writer.add_document(doc)
        # 2 use the built-in json support
        # keys need to coincide with field names
-        doc = schema.parse_document(json.dumps({
+        doc = Document.from_dict({
            "title": "Of Mice and Men",
            "body": ("A few miles south of Soledad, the Salinas River drops "
                     "in close to the hillside bank and runs deep and "
@ -44,149 +42,149 @@ def ram_index():
                     "junctures the debris of the winter’s flooding; and "
                     "sycamores with mottled, white, recumbent limbs and "
                     "branches that arch over the pool")
-        }))
+        })
        writer.add_document(doc)
-        doc = schema.parse_document(json.dumps({
+        writer.add_json("""{
            "title": ["Frankenstein", "The Modern Prometheus"],
-            "body": ("You will rejoice to hear that no disaster has "
+            "body": "You will rejoice to hear that no disaster has accompanied the commencement of an enterprise which you have regarded with such evil forebodings.  I arrived here yesterday, and my first task is to assure my dear sister of my welfare and increasing confidence in the success of my undertaking."
-                     "accompanied the commencement of an enterprise which you "
+        }""")
                     "have regarded with such evil forebodings.  I arrived "
                     "here yesterday, and my first task is to assure my dear "
                     "sister of my welfare and increasing confidence in the "
                     "success of my undertaking.")
        }))
        writer.add_document(doc)
        writer.commit()
        index.reload()
        return index
        reader = index.reader()
        searcher = reader.searcher()
        index = index
        schema = schema
        default_args = [title, body]
        ret = (index, searcher, schema, default_args, title, body)
        return ret
 class TestClass(object):
    def test_simple_search(self, ram_index):
-        index, searcher, schema, default_args, title, body = ram_index
+        index = ram_index
-        query_parser = tantivy.QueryParser.for_index(index, default_args)
+        query = index.parse_query("sea whale", ["title", "body"])
        query = query_parser.parse_query("sea whale")
        top_docs = tantivy.TopDocs(10)
-        result = searcher.search(query, top_docs)
+        result = index.searcher().search(query, top_docs)
        print(result)
        assert len(result) == 1
        _, doc_address = result[0]
-
+        searched_doc = index.searcher().doc(doc_address)
-        searched_doc = searcher.doc(doc_address)
+        assert searched_doc["title"] == ["The Old Man and the Sea"]
        assert searched_doc.get_first(title) == "The Old Man and the Sea"
    def test_doc(self):
        builder = tantivy.SchemaBuilder()
        title = builder.add_text_field("title", stored=True)
        doc = tantivy.Document()
        assert doc.is_empty
        doc.add_text(title, "The Old Man and the Sea")
        assert doc.get_first(title) == "The Old Man and the Sea"
        assert doc.len == 1
        assert not doc.is_empty
    def test_and_query(self, ram_index):
-        index, searcher, schema, default_args, title, body = ram_index
+        index = ram_index
-        q_parser = tantivy.QueryParser.for_index(index, default_args)
+        query = index.parse_query("title:men AND body:summer", default_field_names=["title", "body"])
        # look for an intersection of documents
        query = q_parser.parse_query("title:men AND body:summer")
        top_docs = tantivy.TopDocs(10)
-
+        searcher = index.searcher()
        result = searcher.search(query, top_docs)
        print(result)
        # summer isn't present
        assert len(result) == 0
-        query = q_parser.parse_query("title:men AND body:winter")
+        query = index.parse_query("title:men AND body:winter", ["title", "body"])
        result = searcher.search(query, top_docs)
        assert len(result) == 1
    def test_and_query_parser_default_fields(self, ram_index):
        query = ram_index.parse_query("winter", default_field_names=["title"])
        assert repr(query) == """Query(TermQuery(Term(field=0,bytes=[119, 105, 110, 116, 101, 114])))"""
    def test_and_query_parser_default_fields_undefined(self, ram_index):
        query = ram_index.parse_query("winter")
        assert repr(query) == "Query(BooleanQuery { subqueries: [" \
                              "(Should, TermQuery(Term(field=0,bytes=[119, 105, 110, 116, 101, 114]))), " \
                              "(Should, TermQuery(Term(field=1,bytes=[119, 105, 110, 116, 101, 114])))] " \
                              "})"
    def test_query_errors(self, ram_index):
-        index, searcher, schema, default_args, title, body = ram_index
+        index = ram_index
        q_parser = tantivy.QueryParser.for_index(index, default_args)
        # no "bod" field
        with pytest.raises(ValueError):
-            q_parser.parse_query("bod:title")
+            index.parse_query("bod:men", ["title", "body"])
@pytest.fixture(scope="class")
 def disk_index():
    builder = tantivy.SchemaBuilder()
    title = builder.add_text_field("title", stored=True)
    body = builder.add_text_field("body")
    default_args = [title, body]
    schema = builder.build()
    schema = schema
    index = tantivy.Index(schema)
    path_to_index = "tests/test_index/"
    return index, path_to_index, schema, default_args, title, body
 PATH_TO_INDEX = "tests/test_index/"
 class TestFromDiskClass(object):
-    def test_exists(self, disk_index):
+    def test_exists(self):
        # prefer to keep it separate in case anyone deletes this
        # runs from the root directory
-        index, path_to_index, _, _, _, _ = disk_index
+        assert Index.exists(PATH_TO_INDEX)
        assert index.exists(path_to_index)
-    def test_opens_from_dir(self, disk_index):
+    def test_opens_from_dir(self):
-        _, path_to_index, schema, _, _, _ = disk_index
+        index = Index(schema(), PATH_TO_INDEX, reuse=True)
-        tantivy.Index(schema, path_to_index)
+        assert index.searcher().num_docs == 3
-    def test_create_readers(self, disk_index):
+    def test_create_readers(self):
-        _, path_to_index, schema, _, _, _ = disk_index
+        # not sure what is the point of this test.
-        idx = tantivy.Index(schema, path_to_index)
+        idx = Index(schema())
-        reload_policy = "OnCommit"  # or "Manual"
+        assert idx.searcher().num_docs  == 0
-        assert idx.reader(reload_policy, 4)
+        # by default this is manual mode
-        assert idx.reader("Manual", 4)
+        writer = idx.writer(30000000, 1)
        writer.add_document(Document(title="mytitle",body="mybody"))
        writer.commit()
        assert idx.searcher().num_docs  == 0
        # Manual is the default setting.
        # In this case, change are reflected only when
        # the index is manually reloaded.
        idx.reload()
        assert idx.searcher().num_docs  == 1
        idx.config_reader("OnCommit", 4)
        writer.add_document(Document(title="mytitle2",body="mybody2"))
        writer.commit()
        import time
        for i in range(50):
            # The index should be automatically reloaded.
            # Wait for at most 5s for it to happen.
            time.sleep(0.1)
            if idx.searcher().num_docs  == 2:
                return
        assert False
-    def test_create_writer_and_reader(self, disk_index):
+class TestSearcher(object):
-        _, path_to_index, schema, default_args, title, body = disk_index
+    def test_searcher_repr(self, ram_index):
-        idx = tantivy.Index(schema, path_to_index)
+        assert repr(ram_index.searcher()) == "Searcher(num_docs=3, num_segments=1)"
        writer = idx.writer()
        reload_policy = "OnCommit"  # or "Manual"
        reader = idx.reader(reload_policy, 4)
        # check against the opstamp in the meta file
        meta_fname = "meta.json"
        with open("{}{}".format(path_to_index, meta_fname)) as f:
            json_file = json.load(f)
            expected_last_opstamp = json_file["opstamp"]
            # ASSUMPTION
            # We haven't had any deletes in the index
            # so max_doc per index coincides with the value of `num_docs`
            # summing them in all segments, gives the number of documents
            expected_num_docs = sum([segment["max_doc"]
                                     for segment in json_file["segments"]])
        assert writer.commit_opstamp == expected_last_opstamp
-        q_parser = tantivy.QueryParser.for_index(idx, default_args)
+class TestDocument(object):
        # get all documents
        query = q_parser.parse_query("*")
        top_docs = tantivy.TopDocs(10)
-        docs = reader.searcher().search(query, top_docs)
+    def test_document(self):
-        for (_score, doc_addr) in docs:
+        doc = tantivy.Document(name="Bill", reference=[1, 2])
-            print(reader.searcher().doc(doc_addr))
+        assert doc["reference"] == [1, 2]
-        assert expected_num_docs == len(docs)
+        assert doc["name"] == ["Bill"]
        assert doc.get_first("name") == "Bill"
        assert doc.get_first("reference") == 1
        assert doc.to_dict() == {"name": ["Bill"], "reference": [1, 2]}
    def test_document_with_date(self):
        import datetime
        date = datetime.datetime(2019, 8, 12, 13, 0, 0, )
        doc = tantivy.Document(name="Bill", date=date)
        assert doc["date"][0] == date
    def test_document_repr(self):
        doc = tantivy.Document(name="Bill", reference=[1, 2])
        assert repr(doc) == "Document(name=[Bill],reference=[1,2])"
    def test_document_with_facet(self):
        doc = tantivy.Document()
        facet = tantivy.Facet.from_string("/europe/france")
        doc.add_facet("facet", facet)
        assert doc["facet"][0].to_path() == ['europe', 'france']
        doc = tantivy.Document()
        facet = tantivy.Facet.from_string("/asia\\/oceania/fiji")
        doc.add_facet("facet", facet)
        assert doc["facet"][0].to_path() == ['asia/oceania', 'fiji']
        assert doc["facet"][0].to_path_str() == "/asia\\/oceania/fiji"
        assert repr(doc["facet"][0]) == "Facet(/asia\\/oceania/fiji)"
        doc = tantivy.Document(facet=facet)
        assert doc["facet"][0].to_path() == ['asia/oceania', 'fiji']
    def test_document_error(self):
        with pytest.raises(ValueError):
            tantivy.Document(name={})