Merge pull request #3 from tantivy-search/api-change

Different API Change.
2019-09-02 09:52:29 +09:00 · 2019-09-02 09:52:29 +09:00 · f57c4669c8
commit f57c4669c8
parent b1b3689c55 c91234cd73
12 changed files with 656 additions and 582 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -11,7 +11,8 @@ crate-type = ["cdylib"]

 [dependencies]
 chrono = "0.4"
-tantivy = { git = "https://github.com/tantivy-search/tantivy" }
+tantivy = { git = "https://github.com/tantivy-search/tantivy", branch = "master" }
+itertools = "0.8"

 [dependencies.pyo3]
 version = "0.7.0"
--- a/README.md
+++ b/README.md
@ -24,35 +24,37 @@ needs to be built. After that documents can be added to the index and a reader
 can be created to search the index.

 ```python
-    builder = tantivy.SchemaBuilder()
+import tantivy

-    title = builder.add_text_field("title", stored=True)
-    body = builder.add_text_field("body")
+# Declaring our schema.
+schema_builder = tantivy.SchemaBuilder()
+schema_builder.add_text_field("title", stored=True)
+schema_builder.add_text_field("body", stored=True)
+schema = schema_builder.build()

-    schema = builder.build()
-    index = tantivy.Index(schema)
+# Creating our index (in memory, but filesystem is available too)
+index = tantivy.Index(schema)

-    writer = index.writer()

-    doc = tantivy.Document()
-    doc.add_text(title, "The Old Man and the Sea")
-    doc.add_text(body, ("He was an old man who fished alone in a skiff in"
-                        "the Gulf Stream and he had gone eighty-four days "
-                        "now without taking a fish."))
-    writer.add_document(doc)
-    writer.commit()
+# Adding one document.
+writer = index.writer()
+writer.add_document({
+    "title": "The Old Man and the Sea",
+    "body": """He was an old man who fished alone in a skiff in
+               the Gulf Stream and he had gone eighty-four days 
+               now without taking a fish."""
+})
+# ... and committing
+writer.commit()

-    reader = index.reader()
-    searcher = reader.searcher()

-    query_parser = tantivy.QueryParser.for_index(index, [title, body])
-    query = query_parser.parse_query("sea whale")
+# Reload the index to ensure it points to the last commit.
+index.reload();
+searcher = index.searcher()
+query = index.parse_query("sea whale", ["title", "body"])
+top_docs = tantivy.TopDocs(3)

-    top_docs = tantivy.TopDocs(10)
-    result = searcher.search(query, top_docs)
-
-    _, doc_address = result[0]
-
-    searched_doc = searcher.doc(doc_address)
-    assert searched_doc.get_first(title) == "The Old Man and the Sea"
+(best_score, best_doc_address) = searcher.search(query, nhits=3)[0]
+best_doc = searcher.doc(best_doc_address) 
+assert best_doc["title"] == ["The Old Man and the Sea"]
 ```
--- a/src/document.rs
+++ b/src/document.rs
@ -1,75 +1,255 @@
 #![allow(clippy::new_ret_no_self)]
+#![allow(clippy::wrong_self_convention)]

+use itertools::Itertools;
 use pyo3::prelude::*;
-use pyo3::types::PyDateTime;
+use pyo3::types::{PyAny, PyDateTime, PyDict, PyList, PyTuple};
 use pyo3::types::{PyDateAccess, PyTimeAccess};

 use chrono::offset::TimeZone;
-use chrono::Utc;
+use chrono::{Datelike, Timelike, Utc};

 use tantivy as tv;

 use crate::facet::Facet;
-use crate::field::{Field, FieldValue};
+use crate::to_pyerr;
+use pyo3::{PyMappingProtocol, PyObjectProtocol};
+use std::collections::BTreeMap;
+use std::fmt;
+use tantivy::schema::Value;
+
+fn value_to_py(py: Python, value: &Value) -> PyResult<PyObject> {
+    Ok(match value {
+        Value::Str(text) => text.into_object(py),
+        Value::U64(num) => num.into_object(py),
+        Value::I64(num) => num.into_object(py),
+        Value::F64(num) => num.into_object(py),
+        Value::Bytes(b) => b.to_object(py),
+        Value::Date(d) => PyDateTime::new(
+            py,
+            d.year(),
+            d.month() as u8,
+            d.day() as u8,
+            d.hour() as u8,
+            d.minute() as u8,
+            d.second() as u8,
+            d.timestamp_subsec_micros(),
+            None,
+        )?
+        .into_object(py),
+        Value::Facet(f) => Facet { inner: f.clone() }.into_object(py),
+    })
+}
+
+fn value_to_string(value: &Value) -> String {
+    match value {
+        Value::Str(text) => text.clone(),
+        Value::U64(num) => format!("{}", num),
+        Value::I64(num) => format!("{}", num),
+        Value::F64(num) => format!("{}", num),
+        Value::Bytes(bytes) => format!("{:?}", bytes),
+        Value::Date(d) => format!("{:?}", d),
+        Value::Facet(facet) => facet.to_string(),
+    }
+}

 /// Tantivy's Document is the object that can be indexed and then searched for.
 ///
 /// Documents are fundamentally a collection of unordered tuples
-/// (field, value). In this list, one field may appear more than once.
+/// (field_name, value). In this list, one field may appear more than once.
 ///
 /// Example:
 ///     >>> doc = tantivy.Document()
-///     >>> doc.add_text(title, "The Old Man and the Sea")
-///     >>> doc.add_text(body, ("He was an old man who fished alone in a "
+///     >>> doc.add_text("title", "The Old Man and the Sea")
+///     >>> doc.add_text("body", ("He was an old man who fished alone in a "
 ///                             "skiff in the Gulf Stream and he had gone "
 ///                             "eighty-four days now without taking a fish."))
+///
+/// For simplicity, it is also possible to build a `Document` by passing the field
+/// values directly as constructor arguments.
+///
+/// Example:
+///     >>> doc = tantivy.Document(title=["The Old Man and the Sea"], body=["..."])
+///
+/// As syntactic sugar, tantivy also allows the user to pass a single values
+/// if there is only one. In other words, the following is also legal.
+///
+/// Example:
+///     >>> doc = tantivy.Document(title="The Old Man and the Sea", body="...")
+
 #[pyclass]
+#[derive(Default)]
 pub(crate) struct Document {
-    pub(crate) inner: tv::Document,
+    pub(crate) field_values: BTreeMap<String, Vec<tv::schema::Value>>,
+}
+
+impl fmt::Debug for Document {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let doc_str = self
+            .field_values
+            .iter()
+            .map(|(field_name, field_values)| {
+                let mut values_str =
+                    field_values.iter().map(value_to_string).join(",");
+                values_str.truncate(10);
+                format!("{}=[{}]", field_name, values_str)
+            })
+            .join(",");
+        write!(f, "Document({})", doc_str)
+    }
+}
+
+fn add_value<T>(doc: &mut Document, field_name: String, value: T)
+where
+    Value: From<T>,
+{
+    doc.field_values
+        .entry(field_name)
+        .or_insert_with(Vec::new)
+        .push(Value::from(value));
+}
+
+fn extract_value(any: &PyAny) -> PyResult<Value> {
+    if let Ok(s) = any.extract::<String>() {
+        return Ok(Value::Str(s));
+    }
+    if let Ok(num) = any.extract::<i64>() {
+        return Ok(Value::I64(num));
+    }
+    if let Ok(num) = any.extract::<f64>() {
+        return Ok(Value::F64(num));
+    }
+    if let Ok(py_datetime) = any.downcast_ref::<PyDateTime>() {
+        let datetime = Utc
+            .ymd(
+                py_datetime.get_year(),
+                py_datetime.get_month().into(),
+                py_datetime.get_day().into(),
+            )
+            .and_hms_micro(
+                py_datetime.get_hour().into(),
+                py_datetime.get_minute().into(),
+                py_datetime.get_second().into(),
+                py_datetime.get_microsecond(),
+            );
+        return Ok(Value::Date(datetime));
+    }
+    if let Ok(facet) = any.downcast_ref::<Facet>() {
+        return Ok(Value::Facet(facet.inner.clone()));
+    }
+    Err(to_pyerr(format!("Value unsupported {:?}", any)))
+}
+
+fn extract_value_single_or_list(any: &PyAny) -> PyResult<Vec<Value>> {
+    if let Ok(values) = any.downcast_ref::<PyList>() {
+        values.iter().map(extract_value).collect()
+    } else {
+        Ok(vec![extract_value(any)?])
+    }
 }

 #[pymethods]
 impl Document {
    #[new]
-    fn new(obj: &PyRawObject) {
-        obj.init(Document {
-            inner: tv::Document::default(),
-        });
+    #[args(kwargs = "**")]
+    fn new(obj: &PyRawObject, kwargs: Option<&PyDict>) -> PyResult<()> {
+        let mut document = Document::default();
+        if let Some(field_dict) = kwargs {
+            document.extend(field_dict)?;
+        }
+        obj.init(document);
+        Ok(())
+    }
+
+    fn extend(&mut self, py_dict: &PyDict) -> PyResult<()> {
+        let mut field_values: BTreeMap<String, Vec<tv::schema::Value>> =
+            BTreeMap::new();
+        for key_value_any in py_dict.items() {
+            if let Ok(key_value) = key_value_any.downcast_ref::<PyTuple>() {
+                if key_value.len() != 2 {
+                    continue;
+                }
+                let key: String = key_value.get_item(0).extract()?;
+                let value_list =
+                    extract_value_single_or_list(key_value.get_item(1))?;
+                field_values.insert(key, value_list);
+            }
+        }
+        self.field_values.extend(field_values.into_iter());
+        Ok(())
+    }
+
+    #[staticmethod]
+    fn from_dict(py_dict: &PyDict) -> PyResult<Document> {
+        let mut field_values: BTreeMap<String, Vec<tv::schema::Value>> =
+            BTreeMap::new();
+        for key_value_any in py_dict.items() {
+            if let Ok(key_value) = key_value_any.downcast_ref::<PyTuple>() {
+                if key_value.len() != 2 {
+                    continue;
+                }
+                let key: String = key_value.get_item(0).extract()?;
+                let value_list =
+                    extract_value_single_or_list(key_value.get_item(1))?;
+                field_values.insert(key, value_list);
+            }
+        }
+        Ok(Document { field_values })
+    }
+
+    /// Returns a dictionary with the different
+    /// field values.
+    ///
+    /// In tantivy, `Document` can be hold multiple
+    /// values for a single field.
+    ///
+    /// For this reason, the dictionary, will associate
+    /// a list of value for every field.
+    fn to_dict(&self, py: Python) -> PyResult<PyObject> {
+        let dict = PyDict::new(py);
+        for (key, values) in &self.field_values {
+            let values_py: Vec<PyObject> = values
+                .iter()
+                .map(|v| value_to_py(py, v))
+                .collect::<PyResult<_>>()?;
+            dict.set_item(key, values_py)?;
+        }
+        Ok(dict.into())
    }

    /// Add a text value to the document.
    ///
    /// Args:
-    ///     field (Field): The field for which we are adding the text.
+    ///     field_name (str): The field name for which we are adding the text.
    ///     text (str): The text that will be added to the document.
-    fn add_text(&mut self, field: &Field, text: &str) {
-        self.inner.add_text(field.inner, text);
+    fn add_text(&mut self, field_name: String, text: &str) {
+        add_value(self, field_name, text);
    }

    /// Add an unsigned integer value to the document.
    ///
    /// Args:
-    ///     field (Field): The field for which we are adding the integer.
+    ///     field_name (str): The field name for which we are adding the unsigned integer.
    ///     value (int): The integer that will be added to the document.
-    fn add_unsigned(&mut self, field: &Field, value: u64) {
-        self.inner.add_u64(field.inner, value);
+    fn add_unsigned(&mut self, field_name: String, value: u64) {
+        add_value(self, field_name, value);
    }

    /// Add a signed integer value to the document.
    ///
    /// Args:
-    ///     field (Field): The field for which we are adding the integer.
+    ///     field_name (str): The field name for which we are adding the integer.
    ///     value (int): The integer that will be added to the document.
-    fn add_integer(&mut self, field: &Field, value: i64) {
-        self.inner.add_i64(field.inner, value);
+    fn add_integer(&mut self, field_name: String, value: i64) {
+        add_value(self, field_name, value);
    }

    /// Add a date value to the document.
    ///
    /// Args:
-    ///     field (Field): The field for which we are adding the integer.
+    ///     field_name (str): The field name for which we are adding the date.
    ///     value (datetime): The date that will be added to the document.
-    fn add_date(&mut self, field: &Field, value: &PyDateTime) {
+    fn add_date(&mut self, field_name: String, value: &PyDateTime) {
        let datetime = Utc
            .ymd(
                value.get_year(),
@ -82,37 +262,36 @@ impl Document {
                value.get_second().into(),
                value.get_microsecond(),
            );
-
-        self.inner.add_date(field.inner, &datetime);
+        add_value(self, field_name, datetime);
    }

    /// Add a facet value to the document.
    /// Args:
-    ///     field (Field): The field for which we are adding the facet.
+    ///     field_name (str): The field name for which we are adding the facet.
    ///     value (Facet): The Facet that will be added to the document.
-    fn add_facet(&mut self, field: &Field, value: &Facet) {
-        self.inner.add_facet(field.inner, value.inner.clone());
+    fn add_facet(&mut self, field_name: String, facet: &Facet) {
+        add_value(self, field_name, facet.inner.clone());
    }

    /// Add a bytes value to the document.
    ///
    /// Args:
-    ///     field (Field): The field for which we are adding the bytes.
+    ///     field_name (str): The field for which we are adding the bytes.
    ///     value (bytes): The bytes that will be added to the document.
-    fn add_bytes(&mut self, field: &Field, value: Vec<u8>) {
-        self.inner.add_bytes(field.inner, value);
+    fn add_bytes(&mut self, field_name: String, bytes: Vec<u8>) {
+        add_value(self, field_name, bytes);
    }

    /// Returns the number of added fields that have been added to the document
    #[getter]
-    fn len(&self) -> usize {
-        self.inner.len()
+    fn num_fields(&self) -> usize {
+        self.field_values.len()
    }

    /// True if the document is empty, False otherwise.
    #[getter]
    fn is_empty(&self) -> bool {
-        self.inner.is_empty()
+        self.field_values.is_empty()
    }

    /// Get the first value associated with the given field.
@ -122,9 +301,17 @@ impl Document {
    ///
    /// Returns the value if one is found, otherwise None.
    /// The type of the value depends on the field.
-    fn get_first(&self, py: Python, field: &Field) -> Option<PyObject> {
-        let value = self.inner.get_first(field.inner)?;
-        FieldValue::value_to_py(py, value)
+    fn get_first(
+        &self,
+        py: Python,
+        fieldname: &str,
+    ) -> PyResult<Option<PyObject>> {
+        if let Some(value) = self.iter_values_for_field(fieldname).next() {
+            let py_value = value_to_py(py, value)?;
+            Ok(Some(py_value))
+        } else {
+            Ok(None)
+        }
    }

    /// Get the all values associated with the given field.
@ -134,21 +321,37 @@ impl Document {
    ///
    /// Returns a list of values.
    /// The type of the value depends on the field.
-    fn get_all(&self, py: Python, field: &Field) -> Vec<PyObject> {
-        let values = self.inner.get_all(field.inner);
-        values
-            .iter()
-            .map(|&v| FieldValue::value_to_py(py, v))
-            .filter_map(|x| x)
-            .collect()
-    }
-
-    /// Get all the fields and values contained in the document.
-    fn field_values(&self, py: Python) -> Vec<FieldValue> {
-        let field_values = self.inner.field_values();
-        field_values
-            .iter()
-            .map(|v| FieldValue::field_value_to_py(py, v))
-            .collect()
+    fn get_all(&self, py: Python, field_name: &str) -> PyResult<Vec<PyObject>> {
+        self.iter_values_for_field(field_name)
+            .map(|value| value_to_py(py, value))
+            .collect::<PyResult<Vec<_>>>()
+    }
+}
+
+impl Document {
+    fn iter_values_for_field<'a>(
+        &'a self,
+        field: &str,
+    ) -> impl Iterator<Item = &'a Value> + 'a {
+        self.field_values
+            .get(field)
+            .into_iter()
+            .flat_map(|values| values.iter())
+    }
+}
+
+#[pyproto]
+impl PyMappingProtocol for Document {
+    fn __getitem__(&self, field_name: &str) -> PyResult<Vec<PyObject>> {
+        let gil = Python::acquire_gil();
+        let py = gil.python();
+        self.get_all(py, field_name)
+    }
+}
+
+#[pyproto]
+impl PyObjectProtocol for Document {
+    fn __repr__(&self) -> PyResult<String> {
+        Ok(format!("{:?}", self))
    }
 }
--- a/src/facet.rs
+++ b/src/facet.rs
@ -1,6 +1,6 @@
+use pyo3::basic::PyObjectProtocol;
 use pyo3::prelude::*;
 use pyo3::types::PyType;
-
 use tantivy::schema;

 /// A Facet represent a point in a given hierarchy.
@ -52,4 +52,23 @@ impl Facet {
            inner: schema::Facet::from_text(facet_string),
        }
    }
+
+    /// Returns the list of `segments` that forms a facet path.
+    ///
+    /// For instance `//europe/france` becomes `["europe", "france"]`.
+    fn to_path(&self) -> Vec<&str> {
+        self.inner.to_path()
+    }
+
+    /// Returns the facet string representation.
+    fn to_path_str(&self) -> String {
+        self.inner.to_string()
+    }
+}
+
+#[pyproto]
+impl PyObjectProtocol for Facet {
+    fn __repr__(&self) -> PyResult<String> {
+        Ok(format!("Facet({})", self.to_path_str()))
+    }
 }
--- a/src/field.rs
+++ b/src/field.rs
@ -1,73 +0,0 @@
-use pyo3::prelude::*;
-use pyo3::types::PyDateTime;
-
-use tantivy::schema;
-
-use crate::facet::Facet;
-
-/// Field is a numeric indentifier that represents an entry in the Schema.
-#[pyclass]
-#[derive(Clone)]
-pub(crate) struct Field {
-    pub(crate) inner: schema::Field,
-}
-
-/// FieldValue holds together a Field and its Value.
-#[pyclass]
-pub(crate) struct FieldValue {
-    pub(crate) field: Field,
-    pub(crate) value: PyObject,
-}
-
-#[pymethods]
-impl FieldValue {
-    #[getter]
-    fn field(&self) -> Field {
-        self.field.clone()
-    }
-
-    #[getter]
-    fn value(&self) -> &PyObject {
-        &self.value
-    }
-}
-
-impl FieldValue {
-    pub(crate) fn value_to_py(
-        py: Python,
-        value: &schema::Value,
-    ) -> Option<PyObject> {
-        match value {
-            schema::Value::Str(text) => Some(text.into_object(py)),
-            schema::Value::U64(num) => Some(num.into_object(py)),
-            schema::Value::I64(num) => Some(num.into_object(py)),
-            schema::Value::F64(num) => Some(num.into_object(py)),
-            schema::Value::Bytes(b) => Some(b.to_object(py)),
-            schema::Value::Date(d) => {
-                let date =
-                    PyDateTime::from_timestamp(py, d.timestamp() as f64, None);
-
-                match date {
-                    Ok(d) => Some(d.into_object(py)),
-                    Err(_e) => None,
-                }
-            }
-            schema::Value::Facet(f) => {
-                Some(Facet { inner: f.clone() }.into_object(py))
-            }
-        }
-    }
-
-    pub(crate) fn field_value_to_py(
-        py: Python,
-        field_value: &schema::FieldValue,
-    ) -> FieldValue {
-        let value = field_value.value();
-        let field = field_value.field();
-
-        FieldValue {
-            field: Field { inner: field },
-            value: FieldValue::value_to_py(py, value).unwrap(),
-        }
-    }
-}
--- a/src/index.rs
+++ b/src/index.rs
@ -4,60 +4,24 @@ use pyo3::exceptions;
 use pyo3::prelude::*;

 use crate::document::Document;
+use crate::query::Query;
 use crate::schema::Schema;
 use crate::searcher::Searcher;
+use crate::to_pyerr;
 use tantivy as tv;
 use tantivy::directory::MmapDirectory;
+use tantivy::schema::{Field, NamedFieldDocument};

 const RELOAD_POLICY: &str = "commit";

-/// IndexReader is the entry point to read and search the index.
-///
-/// IndexReader controls when a new version of the index should be loaded and
-/// lends you instances of Searcher for the last loaded version.
-///
-/// To create an IndexReader first create an Index and call the reader() method
-/// on the index object.
-#[pyclass]
-pub(crate) struct IndexReader {
-    inner: tv::IndexReader,
-}
-
-#[pymethods]
-impl IndexReader {
-    /// Update searchers so that they reflect the state of the last .commit().
-    ///
-    /// If you set up the the reload policy to be on 'commit' (which is the
-    /// default) every commit should be rapidly reflected on your IndexReader
-    /// and you should not need to call reload() at all.
-    fn reload(&self) -> PyResult<()> {
-        let ret = self.inner.reload();
-        match ret {
-            Ok(_) => Ok(()),
-            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
-        }
-    }
-
-    /// Get a Searcher for the index.
-    ///
-    /// This method should be called every single time a search query is
-    /// performed. The searchers are taken from a pool of num_searchers
-    /// searchers.
-    ///
-    /// Returns a Searcher object, if no searcher is available this may block.
-    fn searcher(&self) -> Searcher {
-        let searcher = self.inner.searcher();
-        Searcher { inner: searcher }
-    }
-}
-
 /// IndexWriter is the user entry-point to add documents to the index.
 ///
 /// To create an IndexWriter first create an Index and call the writer() method
 /// on the index object.
 #[pyclass]
 pub(crate) struct IndexWriter {
-    inner: tv::IndexWriter,
+    inner_index_writer: tv::IndexWriter,
+    schema: tv::schema::Schema,
 }

 #[pymethods]
@ -70,9 +34,24 @@ impl IndexWriter {
    /// by the client to align commits with its own document queue.
    /// The `opstamp` represents the number of documents that have been added
    /// since the creation of the index.
-    fn add_document(&mut self, document: &Document) -> PyResult<()> {
-        self.inner.add_document(document.inner.clone());
-        Ok(())
+    pub fn add_document(&mut self, doc: &Document) -> PyResult<u64> {
+        let named_doc = NamedFieldDocument(doc.field_values.clone());
+        let doc = self.schema.convert_named_doc(named_doc).map_err(to_pyerr)?;
+        Ok(self.inner_index_writer.add_document(doc))
+    }
+
+    /// Helper for the `add_document` method, but passing a json string.
+    ///
+    /// If the indexing pipeline is full, this call may block.
+    ///
+    /// Returns an `opstamp`, which is an increasing integer that can be used
+    /// by the client to align commits with its own document queue.
+    /// The `opstamp` represents the number of documents that have been added
+    /// since the creation of the index.
+    pub fn add_json(&mut self, json: &str) -> PyResult<u64> {
+        let doc = self.schema.parse_document(json).map_err(to_pyerr)?;
+        let opstamp = self.inner_index_writer.add_document(doc);
+        Ok(opstamp)
    }

    /// Commits all of the pending changes
@ -84,12 +63,8 @@ impl IndexWriter {
    /// spared), it will be possible to resume indexing from this point.
    ///
    /// Returns the `opstamp` of the last document that made it in the commit.
-    fn commit(&mut self) -> PyResult<()> {
-        let ret = self.inner.commit();
-        match ret {
-            Ok(_) => Ok(()),
-            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
-        }
+    fn commit(&mut self) -> PyResult<u64> {
+        self.inner_index_writer.commit().map_err(to_pyerr)
    }

    /// Rollback to the last commit
@ -97,23 +72,15 @@ impl IndexWriter {
    /// This cancels all of the update that happened before after the last
    /// commit. After calling rollback, the index is in the same state as it
    /// was after the last commit.
-    fn rollback(&mut self) -> PyResult<()> {
-        let ret = self.inner.rollback();
-
-        match ret {
-            Ok(_) => Ok(()),
-            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
-        }
+    fn rollback(&mut self) -> PyResult<u64> {
+        self.inner_index_writer.rollback().map_err(to_pyerr)
    }

    /// Detect and removes the files that are not used by the index anymore.
    fn garbage_collect_files(&mut self) -> PyResult<()> {
-        let ret = self.inner.garbage_collect_files();
-
-        match ret {
-            Ok(_) => Ok(()),
-            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
-        }
+        self.inner_index_writer
+            .garbage_collect_files()
+            .map_err(to_pyerr)
    }

    /// The opstamp of the last successful commit.
@ -125,7 +92,7 @@ impl IndexWriter {
    /// for searchers.
    #[getter]
    fn commit_opstamp(&self) -> u64 {
-        self.inner.commit_opstamp()
+        self.inner_index_writer.commit_opstamp()
    }
 }

@ -142,11 +109,19 @@ impl IndexWriter {
 /// if there was a problem during the opening or creation of the index.
 #[pyclass]
 pub(crate) struct Index {
-    pub(crate) inner: tv::Index,
+    pub(crate) index: tv::Index,
+    reader: tv::IndexReader,
 }

 #[pymethods]
 impl Index {
+    #[staticmethod]
+    fn open(path: &str) -> PyResult<Index> {
+        let index = tv::Index::open_in_dir(path).map_err(to_pyerr)?;
+        let reader = index.reader().map_err(to_pyerr)?;
+        Ok(Index { index, reader })
+    }
+
    #[new]
    #[args(reuse = true)]
    fn new(
@ -157,32 +132,19 @@ impl Index {
    ) -> PyResult<()> {
        let index = match path {
            Some(p) => {
-                let directory = MmapDirectory::open(p);
-
-                let dir = match directory {
-                    Ok(d) => d,
-                    Err(e) => {
-                        return Err(exceptions::OSError::py_err(e.to_string()))
-                    }
-                };
-
-                let i = if reuse {
-                    tv::Index::open_or_create(dir, schema.inner.clone())
+                let directory = MmapDirectory::open(p).map_err(to_pyerr)?;
+                if reuse {
+                    tv::Index::open_or_create(directory, schema.inner.clone())
                } else {
-                    tv::Index::create(dir, schema.inner.clone())
-                };
-
-                match i {
-                    Ok(index) => index,
-                    Err(e) => {
-                        return Err(exceptions::OSError::py_err(e.to_string()))
-                    }
+                    tv::Index::create(directory, schema.inner.clone())
                }
+                .map_err(to_pyerr)?
            }
            None => tv::Index::create_in_ram(schema.inner.clone()),
        };

-        obj.init(Index { inner: index });
+        let reader = index.reader().map_err(to_pyerr)?;
+        obj.init(Index { index, reader });
        Ok(())
    }

@ -206,32 +168,30 @@ impl Index {
        num_threads: usize,
    ) -> PyResult<IndexWriter> {
        let writer = match num_threads {
-            0 => self.inner.writer(heap_size),
-            _ => self.inner.writer_with_num_threads(num_threads, heap_size),
-        };
-
-        match writer {
-            Ok(w) => Ok(IndexWriter { inner: w }),
-            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
+            0 => self.index.writer(heap_size),
+            _ => self.index.writer_with_num_threads(num_threads, heap_size),
        }
+        .map_err(to_pyerr)?;
+        let schema = self.index.schema();
+        Ok(IndexWriter {
+            inner_index_writer: writer,
+            schema,
+        })
    }

-    /// Create an IndexReader for the index.
+    /// Configure the index reader.
    ///
    /// Args:
    ///     reload_policy (str, optional): The reload policy that the
-    ///         IndexReader should use. Can be manual or OnCommit.
+    ///         IndexReader should use. Can be `Manual` or `OnCommit`.
    ///     num_searchers (int, optional): The number of searchers that the
    ///         reader should create.
-    ///
-    /// Returns the IndexReader on success, raises ValueError if a IndexReader
-    /// couldn't be created.
    #[args(reload_policy = "RELOAD_POLICY", num_searchers = 0)]
-    fn reader(
-        &self,
+    fn config_reader(
+        &mut self,
        reload_policy: &str,
        num_searchers: usize,
-    ) -> PyResult<IndexReader> {
+    ) -> Result<(), PyErr> {
        let reload_policy = reload_policy.to_lowercase();
        let reload_policy = match reload_policy.as_ref() {
            "commit" => tv::ReloadPolicy::OnCommit,
@ -242,9 +202,7 @@ impl Index {
                "Invalid reload policy, valid choices are: 'manual' and 'OnCommit'"
            ))
        };
-
-        let builder = self.inner.reader_builder();
-
+        let builder = self.index.reader_builder();
        let builder = builder.reload_policy(reload_policy);
        let builder = if num_searchers > 0 {
            builder.num_searchers(num_searchers)
@ -252,10 +210,23 @@ impl Index {
            builder
        };

-        let reader = builder.try_into();
-        match reader {
-            Ok(r) => Ok(IndexReader { inner: r }),
-            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
+        self.reader = builder.try_into().map_err(to_pyerr)?;
+        Ok(())
+    }
+
+    /// Acquires a Searcher from the searcher pool.
+    ///
+    /// If no searcher is available during the call, note that
+    /// this call will block until one is made available.
+    ///
+    /// Searcher are automatically released back into the pool when
+    /// they are dropped. If you observe this function to block forever
+    /// you probably should configure the Index to have a larger
+    /// searcher pool, or you are holding references to previous searcher
+    /// for ever.
+    fn searcher(&self) -> Searcher {
+        Searcher {
+            inner: self.reader.searcher(),
        }
    }

@ -268,19 +239,71 @@ impl Index {
    /// Raises OSError if the directory cannot be opened.
    #[staticmethod]
    fn exists(path: &str) -> PyResult<bool> {
-        let directory = MmapDirectory::open(path);
-        let dir = match directory {
-            Ok(d) => d,
-            Err(e) => return Err(exceptions::OSError::py_err(e.to_string())),
-        };
-
-        Ok(tv::Index::exists(&dir))
+        let directory = MmapDirectory::open(path).map_err(to_pyerr)?;
+        Ok(tv::Index::exists(&directory))
    }

    /// The schema of the current index.
    #[getter]
    fn schema(&self) -> Schema {
-        let schema = self.inner.schema();
+        let schema = self.index.schema();
        Schema { inner: schema }
    }
+
+    /// Update searchers so that they reflect the state of the last .commit().
+    ///
+    /// If you set up the the reload policy to be on 'commit' (which is the
+    /// default) every commit should be rapidly reflected on your IndexReader
+    /// and you should not need to call reload() at all.
+    fn reload(&self) -> PyResult<()> {
+        self.reader.reload().map_err(to_pyerr)
+    }
+
+    /// Parse a query
+    ///
+    /// Args:
+    ///     query: the query, following the tantivy query language.
+    ///     default_fields (List[Field]): A list of fields used to search if no
+    ///         field is specified in the query.
+    ///
+    #[args(reload_policy = "RELOAD_POLICY")]
+    pub fn parse_query(
+        &self,
+        query: &str,
+        default_field_names: Option<Vec<String>>,
+    ) -> PyResult<Query> {
+        let mut default_fields = vec![];
+        let schema = self.index.schema();
+        if let Some(default_field_names_vec) = default_field_names {
+            for default_field_name in &default_field_names_vec {
+                if let Some(field) = schema.get_field(default_field_name) {
+                    let field_entry = schema.get_field_entry(field);
+                    if !field_entry.is_indexed() {
+                        return Err(exceptions::ValueError::py_err(format!(
+                            "Field `{}` is not set as indexed in the schema.",
+                            default_field_name
+                        )));
+                    }
+                    default_fields.push(field);
+                } else {
+                    return Err(exceptions::ValueError::py_err(format!(
+                        "Field `{}` is not defined in the schema.",
+                        default_field_name
+                    )));
+                }
+            }
+        } else {
+            for (field_id, field_entry) in
+                self.index.schema().fields().iter().enumerate()
+            {
+                if field_entry.is_indexed() {
+                    default_fields.push(Field(field_id as u32));
+                }
+            }
+        }
+        let parser =
+            tv::query::QueryParser::for_index(&self.index, default_fields);
+        let query = parser.parse_query(query).map_err(to_pyerr)?;
+        Ok(Query { inner: query })
+    }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,8 +1,8 @@
+use pyo3::exceptions;
 use pyo3::prelude::*;

 mod document;
 mod facet;
-mod field;
 mod index;
 mod query;
 mod schema;
@ -11,9 +11,7 @@ mod searcher;

 use document::Document;
 use facet::Facet;
-use field::{Field, FieldValue};
 use index::Index;
-use query::QueryParser;
 use schema::Schema;
 use schemabuilder::SchemaBuilder;
 use searcher::{DocAddress, Searcher, TopDocs};
@ -75,14 +73,14 @@ fn tantivy(_py: Python, m: &PyModule) -> PyResult<()> {
    m.add_class::<Schema>()?;
    m.add_class::<SchemaBuilder>()?;
    m.add_class::<Searcher>()?;
-    m.add_class::<Index>()?;
-    m.add_class::<QueryParser>()?;
    m.add_class::<Document>()?;
+    m.add_class::<Index>()?;
    m.add_class::<DocAddress>()?;
    m.add_class::<TopDocs>()?;
-    m.add_class::<Field>()?;
-    m.add_class::<FieldValue>()?;
    m.add_class::<Facet>()?;
-
    Ok(())
 }
+
+pub(crate) fn to_pyerr<E: ToString>(err: E) -> PyErr {
+    exceptions::ValueError::py_err(err.to_string())
+}
--- a/src/query.rs
+++ b/src/query.rs
@ -1,70 +1,16 @@
-use pyo3::exceptions;
 use pyo3::prelude::*;
-use pyo3::types::PyType;
-
+use pyo3::PyObjectProtocol;
 use tantivy as tv;

-use crate::field::Field;
-use crate::index::Index;
-
 /// Tantivy's Query
 #[pyclass]
 pub(crate) struct Query {
    pub(crate) inner: Box<dyn tv::query::Query>,
 }

-/// Tantivy's Query parser
-#[pyclass]
-pub(crate) struct QueryParser {
-    inner: tv::query::QueryParser,
-}
-
-#[pymethods]
-impl QueryParser {
-    /// Creates a QueryParser for an Index.
-    ///
-    /// Args:
-    ///     index (Index): The index for which the query will be created.
-    ///     default_fields (List[Field]): A list of fields used to search if no
-    ///         field is specified in the query.
-    ///
-    /// Returns the QueryParser.
-    #[classmethod]
-    fn for_index(
-        _cls: &PyType,
-        index: &Index,
-        default_fields: Vec<&Field>,
-    ) -> PyResult<QueryParser> {
-        let default_fields: Vec<tv::schema::Field> =
-            default_fields.iter().map(|&f| f.inner).collect();
-
-        let parser =
-            tv::query::QueryParser::for_index(&index.inner, default_fields);
-        Ok(QueryParser { inner: parser })
-    }
-
-    /// Parse a string into a query that can be given to a searcher.
-    ///
-    /// Args:
-    ///     query (str): A query string that should be parsed into a query.
-    ///
-    /// Returns the parsed Query object. Raises ValueError if there was an
-    /// error with the query string.
-    fn parse_query(&self, query: &str) -> PyResult<Query> {
-        let ret = self.inner.parse_query(query);
-
-        match ret {
-            Ok(q) => Ok(Query { inner: q }),
-            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
-        }
-    }
-
-    /// Set the default way to compose queries to a conjunction.
-    ///
-    /// By default, the query happy tax payer is equivalent to the query happy
-    /// OR tax OR payer. After calling .set_conjunction_by_default() happy tax
-    /// payer will be interpreted by the parser as happy AND tax AND payer.
-    fn set_conjunction_by_default(&mut self) {
-        self.inner.set_conjunction_by_default();
+#[pyproto]
+impl PyObjectProtocol for Query {
+    fn __repr__(&self) -> PyResult<String> {
+        Ok(format!("Query({:?})", self.inner))
    }
 }
--- a/src/schema.rs
+++ b/src/schema.rs
@ -1,10 +1,5 @@
-use pyo3::exceptions;
 use pyo3::prelude::*;
-
-use tantivy::schema;
-
-use crate::document::Document;
-use crate::field::Field;
+use tantivy as tv;

 /// Tantivy schema.
 ///
@ -12,54 +7,8 @@ use crate::field::Field;
 /// provided.
 #[pyclass]
 pub(crate) struct Schema {
-    pub(crate) inner: schema::Schema,
+    pub(crate) inner: tv::schema::Schema,
 }

 #[pymethods]
-impl Schema {
-    /// Build a document object from a json string.
-    ///
-    /// Args:
-    ///     doc_json (str) - A string containing json that should be parsed
-    ///         into a `Document`
-    ///
-    /// Returns the parsed document, raises a ValueError if the parsing failed.
-    fn parse_document(&self, doc_json: &str) -> PyResult<Document> {
-        let ret = self.inner.parse_document(doc_json);
-        match ret {
-            Ok(d) => Ok(Document { inner: d }),
-            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
-        }
-    }
-
-    /// Convert a `Document` object into a json string.
-    ///
-    /// Args:
-    ///     doc (Document): The document that will be converted into a json
-    ///         string.
-    fn to_json(&self, doc: &Document) -> String {
-        self.inner.to_json(&doc.inner)
-    }
-
-    /// Return the field name for a given `Field`.
-    ///
-    /// Args:
-    ///     field (Field): The field for which the name will be returned.
-    fn get_field_name(&self, field: &Field) -> &str {
-        self.inner.get_field_name(field.inner)
-    }
-
-    /// Returns the field option associated with a given name.
-    ///
-    /// Args:
-    ///     name (str): The name of the field that we want to retrieve.
-    ///
-    /// Returns the Field if one is found, None otherwise.
-    fn get_field(&self, name: &str) -> Option<Field> {
-        let f = self.inner.get_field(name);
-        match f {
-            Some(field) => Some(Field { inner: field }),
-            None => None,
-        }
-    }
-}
+impl Schema {}
--- a/src/schemabuilder.rs
+++ b/src/schemabuilder.rs
@ -5,8 +5,8 @@ use pyo3::prelude::*;

 use tantivy::schema;

-use crate::field::Field;
 use crate::schema::Schema;
+use std::sync::{Arc, RwLock};

 /// Tantivy has a very strict schema.
 /// You need to specify in advance whether a field is indexed or not,
@ -24,8 +24,9 @@ use crate::schema::Schema;
 ///
 ///     >>> schema = builder.build()
 #[pyclass]
+#[derive(Clone)]
 pub(crate) struct SchemaBuilder {
-    pub(crate) builder: Option<schema::SchemaBuilder>,
+    pub(crate) builder: Arc<RwLock<Option<schema::SchemaBuilder>>>,
 }

 const TOKENIZER: &str = "default";
@ -36,7 +37,7 @@ impl SchemaBuilder {
    #[new]
    fn new(obj: &PyRawObject) {
        obj.init(SchemaBuilder {
-            builder: Some(schema::Schema::builder()),
+            builder: Arc::new(From::from(Some(schema::Schema::builder()))),
        });
    }

@ -70,9 +71,8 @@ impl SchemaBuilder {
        stored: bool,
        tokenizer_name: &str,
        index_option: &str,
-    ) -> PyResult<Field> {
+    ) -> PyResult<Self> {
        let builder = &mut self.builder;
-
        let index_option = match index_option {
            "position" => schema::IndexRecordOption::WithFreqsAndPositions,
            "freq" => schema::IndexRecordOption::WithFreqs,
@ -94,14 +94,14 @@ impl SchemaBuilder {
            options
        };

-        if let Some(builder) = builder {
-            let field = builder.add_text_field(name, options);
-            Ok(Field { inner: field })
+        if let Some(builder) = builder.write().unwrap().as_mut() {
+            builder.add_text_field(name, options);
        } else {
-            Err(exceptions::ValueError::py_err(
+            return Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
-            ))
+            ));
        }
+        Ok(self.clone())
    }

    /// Add a new signed integer field to the schema.
@ -131,19 +131,19 @@ impl SchemaBuilder {
        stored: bool,
        indexed: bool,
        fast: Option<&str>,
-    ) -> PyResult<Field> {
+    ) -> PyResult<Self> {
        let builder = &mut self.builder;

        let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?;

-        if let Some(builder) = builder {
-            let field = builder.add_i64_field(name, opts);
-            Ok(Field { inner: field })
+        if let Some(builder) = builder.write().unwrap().as_mut() {
+            builder.add_i64_field(name, opts);
        } else {
-            Err(exceptions::ValueError::py_err(
+            return Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
-            ))
+            ));
        }
+        Ok(self.clone())
    }

    /// Add a new unsigned integer field to the schema.
@ -173,19 +173,19 @@ impl SchemaBuilder {
        stored: bool,
        indexed: bool,
        fast: Option<&str>,
-    ) -> PyResult<Field> {
+    ) -> PyResult<Self> {
        let builder = &mut self.builder;

        let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?;

-        if let Some(builder) = builder {
-            let field = builder.add_u64_field(name, opts);
-            Ok(Field { inner: field })
+        if let Some(builder) = builder.write().unwrap().as_mut() {
+            builder.add_u64_field(name, opts);
        } else {
-            Err(exceptions::ValueError::py_err(
+            return Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
-            ))
+            ));
        }
+        Ok(self.clone())
    }

    /// Add a new date field to the schema.
@ -215,35 +215,35 @@ impl SchemaBuilder {
        stored: bool,
        indexed: bool,
        fast: Option<&str>,
-    ) -> PyResult<Field> {
+    ) -> PyResult<Self> {
        let builder = &mut self.builder;

        let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?;

-        if let Some(builder) = builder {
-            let field = builder.add_date_field(name, opts);
-            Ok(Field { inner: field })
+        if let Some(builder) = builder.write().unwrap().as_mut() {
+            builder.add_date_field(name, opts);
        } else {
-            Err(exceptions::ValueError::py_err(
+            return Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
-            ))
+            ));
        }
+        Ok(self.clone())
    }

    /// Add a Facet field to the schema.
    /// Args:
    ///     name (str): The name of the field.
-    fn add_facet_field(&mut self, name: &str) -> PyResult<Field> {
+    fn add_facet_field(&mut self, name: &str) -> PyResult<Self> {
        let builder = &mut self.builder;

-        if let Some(builder) = builder {
-            let field = builder.add_facet_field(name);
-            Ok(Field { inner: field })
+        if let Some(builder) = builder.write().unwrap().as_mut() {
+            builder.add_facet_field(name);
        } else {
-            Err(exceptions::ValueError::py_err(
+            return Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
-            ))
+            ));
        }
+        Ok(self.clone())
    }

    /// Add a fast bytes field to the schema.
@ -254,17 +254,17 @@ impl SchemaBuilder {
    ///
    /// Args:
    ///     name (str): The name of the field.
-    fn add_bytes_field(&mut self, name: &str) -> PyResult<Field> {
+    fn add_bytes_field(&mut self, name: &str) -> PyResult<Self> {
        let builder = &mut self.builder;

-        if let Some(builder) = builder {
-            let field = builder.add_bytes_field(name);
-            Ok(Field { inner: field })
+        if let Some(builder) = builder.write().unwrap().as_mut() {
+            builder.add_bytes_field(name);
        } else {
-            Err(exceptions::ValueError::py_err(
+            return Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
-            ))
+            ));
        }
+        Ok(self.clone())
    }

    /// Finalize the creation of a Schema.
@ -272,7 +272,7 @@ impl SchemaBuilder {
    /// Returns a Schema object. After this is called the SchemaBuilder cannot
    /// be used anymore.
    fn build(&mut self) -> PyResult<Schema> {
-        let builder = self.builder.take();
+        let builder = self.builder.write().unwrap().take();
        if let Some(builder) = builder {
            let schema = builder.build();
            Ok(Schema { inner: schema })
--- a/src/searcher.rs
+++ b/src/searcher.rs
@ -1,12 +1,11 @@
 #![allow(clippy::new_ret_no_self)]

-use pyo3::exceptions;
-use pyo3::prelude::*;
-
-use tantivy as tv;
-
 use crate::document::Document;
 use crate::query::Query;
+use crate::to_pyerr;
+use pyo3::prelude::*;
+use pyo3::{exceptions, PyObjectProtocol};
+use tantivy as tv;

 /// Tantivy's Searcher class
 ///
@ -60,11 +59,11 @@ impl Searcher {
    ///
    /// Returns the Document, raises ValueError if the document can't be found.
    fn doc(&self, doc_address: &DocAddress) -> PyResult<Document> {
-        let ret = self.inner.doc(doc_address.into());
-        match ret {
-            Ok(doc) => Ok(Document { inner: doc }),
-            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
-        }
+        let doc = self.inner.doc(doc_address.into()).map_err(to_pyerr)?;
+        let named_doc = self.inner.schema().to_named_doc(&doc);
+        Ok(Document {
+            field_values: named_doc.0,
+        })
    }
 }

@ -132,3 +131,14 @@ impl TopDocs {
        Ok(())
    }
 }
+
+#[pyproto]
+impl PyObjectProtocol for Searcher {
+    fn __repr__(&self) -> PyResult<String> {
+        Ok(format!(
+            "Searcher(num_docs={}, num_segments={})",
+            self.inner.num_docs(),
+            self.inner.segment_readers().len()
+        ))
+    }
+}
--- a/tests/tantivy_test.py
+++ b/tests/tantivy_test.py
@ -1,192 +1,188 @@
-import json
 import tantivy
-
 import pytest

+from tantivy import Document, Index, SchemaBuilder, Schema
+
+
+def schema():
+    return SchemaBuilder() \
+        .add_text_field("title", stored=True) \
+        .add_text_field("body") \
+        .build()
+

@pytest.fixture(scope="class")
 def ram_index():
-        # assume all tests will use the same documents for now
-        # other methods may set up function-local indexes
-        builder = tantivy.SchemaBuilder()
+    # assume all tests will use the same documents for now
+    # other methods may set up function-local indexes
+    index = Index(schema())
+    writer = index.writer()

-        title = builder.add_text_field("title", stored=True)
-        body = builder.add_text_field("body")
-
-        schema = builder.build()
-        index = tantivy.Index(schema)
-
-        writer = index.writer()
-
-        # 2 ways of adding documents
-        # 1
-        doc = tantivy.Document()
-        # create a document instance
-        # add field-value pairs
-        doc.add_text(title, "The Old Man and the Sea")
-        doc.add_text(body, ("He was an old man who fished alone in a skiff in"
-                            "the Gulf Stream and he had gone eighty-four days "
-                            "now without taking a fish."))
-        writer.add_document(doc)
-        # 2 use the built-in json support
-        # keys need to coincide with field names
-        doc = schema.parse_document(json.dumps({
-            "title": "Of Mice and Men",
-            "body": ("A few miles south of Soledad, the Salinas River drops "
-                     "in close to the hillside bank and runs deep and "
-                     "green. The water is warm too, for it has slipped "
-                     "twinkling over the yellow sands in the sunlight "
-                     "before reaching the narrow pool. On one side of the "
-                     "river the golden foothill slopes curve up to the "
-                     "strong and rocky Gabilan Mountains, but on the valley "
-                     "side the water is lined with trees—willows fresh and "
-                     "green with every spring, carrying in their lower leaf "
-                     "junctures the debris of the winter’s flooding; and "
-                     "sycamores with mottled, white, recumbent limbs and "
-                     "branches that arch over the pool")
-        }))
-
-        writer.add_document(doc)
-
-        doc = schema.parse_document(json.dumps({
+    # 2 ways of adding documents
+    # 1
+    doc = Document()
+    # create a document instance
+    # add field-value pairs
+    doc.add_text("title", "The Old Man and the Sea")
+    doc.add_text("body", ("He was an old man who fished alone in a skiff in"
+                          "the Gulf Stream and he had gone eighty-four days "
+                          "now without taking a fish."))
+    writer.add_document(doc)
+    # 2 use the built-in json support
+    # keys need to coincide with field names
+    doc = Document.from_dict({
+        "title": "Of Mice and Men",
+        "body": ("A few miles south of Soledad, the Salinas River drops "
+                 "in close to the hillside bank and runs deep and "
+                 "green. The water is warm too, for it has slipped "
+                 "twinkling over the yellow sands in the sunlight "
+                 "before reaching the narrow pool. On one side of the "
+                 "river the golden foothill slopes curve up to the "
+                 "strong and rocky Gabilan Mountains, but on the valley "
+                 "side the water is lined with trees—willows fresh and "
+                 "green with every spring, carrying in their lower leaf "
+                 "junctures the debris of the winter’s flooding; and "
+                 "sycamores with mottled, white, recumbent limbs and "
+                 "branches that arch over the pool")
+    })
+    writer.add_document(doc)
+    writer.add_json("""{
            "title": ["Frankenstein", "The Modern Prometheus"],
-            "body": ("You will rejoice to hear that no disaster has "
-                     "accompanied the commencement of an enterprise which you "
-                     "have regarded with such evil forebodings.  I arrived "
-                     "here yesterday, and my first task is to assure my dear "
-                     "sister of my welfare and increasing confidence in the "
-                     "success of my undertaking.")
-        }))
-
-        writer.add_document(doc)
-        writer.commit()
-
-        reader = index.reader()
-        searcher = reader.searcher()
-        index = index
-        schema = schema
-        default_args = [title, body]
-        ret = (index, searcher, schema, default_args, title, body)
-        return ret
+            "body": "You will rejoice to hear that no disaster has accompanied the commencement of an enterprise which you have regarded with such evil forebodings.  I arrived here yesterday, and my first task is to assure my dear sister of my welfare and increasing confidence in the success of my undertaking."
+        }""")
+    writer.commit()
+    index.reload()
+    return index


 class TestClass(object):

    def test_simple_search(self, ram_index):
-        index, searcher, schema, default_args, title, body = ram_index
-        query_parser = tantivy.QueryParser.for_index(index, default_args)
-        query = query_parser.parse_query("sea whale")
+        index = ram_index
+        query = index.parse_query("sea whale", ["title", "body"])

        top_docs = tantivy.TopDocs(10)

-        result = searcher.search(query, top_docs)
-        print(result)
-
+        result = index.searcher().search(query, top_docs)
        assert len(result) == 1
-
        _, doc_address = result[0]
-
-        searched_doc = searcher.doc(doc_address)
-        assert searched_doc.get_first(title) == "The Old Man and the Sea"
-
-    def test_doc(self):
-        builder = tantivy.SchemaBuilder()
-        title = builder.add_text_field("title", stored=True)
-
-        doc = tantivy.Document()
-        assert doc.is_empty
-
-        doc.add_text(title, "The Old Man and the Sea")
-
-        assert doc.get_first(title) == "The Old Man and the Sea"
-
-        assert doc.len == 1
-        assert not doc.is_empty
+        searched_doc = index.searcher().doc(doc_address)
+        assert searched_doc["title"] == ["The Old Man and the Sea"]

    def test_and_query(self, ram_index):
-        index, searcher, schema, default_args, title, body = ram_index
-        q_parser = tantivy.QueryParser.for_index(index, default_args)
+        index = ram_index
+        query = index.parse_query("title:men AND body:summer", default_field_names=["title", "body"])
        # look for an intersection of documents
-        query = q_parser.parse_query("title:men AND body:summer")
        top_docs = tantivy.TopDocs(10)
-
+        searcher = index.searcher()
        result = searcher.search(query, top_docs)
-        print(result)

        # summer isn't present
        assert len(result) == 0

-        query = q_parser.parse_query("title:men AND body:winter")
+        query = index.parse_query("title:men AND body:winter", ["title", "body"])
        result = searcher.search(query, top_docs)

        assert len(result) == 1

+    def test_and_query_parser_default_fields(self, ram_index):
+        query = ram_index.parse_query("winter", default_field_names=["title"])
+        assert repr(query) == """Query(TermQuery(Term(field=0,bytes=[119, 105, 110, 116, 101, 114])))"""
+
+    def test_and_query_parser_default_fields_undefined(self, ram_index):
+        query = ram_index.parse_query("winter")
+        assert repr(query) == "Query(BooleanQuery { subqueries: [" \
+                              "(Should, TermQuery(Term(field=0,bytes=[119, 105, 110, 116, 101, 114]))), " \
+                              "(Should, TermQuery(Term(field=1,bytes=[119, 105, 110, 116, 101, 114])))] " \
+                              "})"
+
    def test_query_errors(self, ram_index):
-        index, searcher, schema, default_args, title, body = ram_index
-        q_parser = tantivy.QueryParser.for_index(index, default_args)
+        index = ram_index
        # no "bod" field
        with pytest.raises(ValueError):
-            q_parser.parse_query("bod:title")
+            index.parse_query("bod:men", ["title", "body"])


-@pytest.fixture(scope="class")
-def disk_index():
-    builder = tantivy.SchemaBuilder()
-    title = builder.add_text_field("title", stored=True)
-    body = builder.add_text_field("body")
-    default_args = [title, body]
-    schema = builder.build()
-    schema = schema
-    index = tantivy.Index(schema)
-    path_to_index = "tests/test_index/"
-    return index, path_to_index, schema, default_args, title, body
+PATH_TO_INDEX = "tests/test_index/"


 class TestFromDiskClass(object):

-    def test_exists(self, disk_index):
+    def test_exists(self):
        # prefer to keep it separate in case anyone deletes this
        # runs from the root directory
-        index, path_to_index, _, _, _, _ = disk_index
-        assert index.exists(path_to_index)
+        assert Index.exists(PATH_TO_INDEX)

-    def test_opens_from_dir(self, disk_index):
-        _, path_to_index, schema, _, _, _ = disk_index
-        tantivy.Index(schema, path_to_index)
+    def test_opens_from_dir(self):
+        index = Index(schema(), PATH_TO_INDEX, reuse=True)
+        assert index.searcher().num_docs == 3

-    def test_create_readers(self, disk_index):
-        _, path_to_index, schema, _, _, _ = disk_index
-        idx = tantivy.Index(schema, path_to_index)
-        reload_policy = "OnCommit"  # or "Manual"
-        assert idx.reader(reload_policy, 4)
-        assert idx.reader("Manual", 4)
+    def test_create_readers(self):
+        # not sure what is the point of this test.
+        idx = Index(schema())
+        assert idx.searcher().num_docs == 0
+        # by default this is manual mode
+        writer = idx.writer(30000000, 1)
+        writer.add_document(Document(title="mytitle", body="mybody"))
+        writer.commit()
+        assert idx.searcher().num_docs == 0
+        # Manual is the default setting.
+        # In this case, change are reflected only when
+        # the index is manually reloaded.
+        idx.reload()
+        assert idx.searcher().num_docs == 1
+        idx.config_reader("OnCommit", 4)
+        writer.add_document(Document(title="mytitle2", body="mybody2"))
+        writer.commit()
+        import time
+        for i in range(50):
+            # The index should be automatically reloaded.
+            # Wait for at most 5s for it to happen.
+            time.sleep(0.1)
+            if idx.searcher().num_docs == 2:
+                return
+        assert False

-    def test_create_writer_and_reader(self, disk_index):
-        _, path_to_index, schema, default_args, title, body = disk_index
-        idx = tantivy.Index(schema, path_to_index)
-        writer = idx.writer()
-        reload_policy = "OnCommit"  # or "Manual"
-        reader = idx.reader(reload_policy, 4)

-        # check against the opstamp in the meta file
-        meta_fname = "meta.json"
-        with open("{}{}".format(path_to_index, meta_fname)) as f:
-            json_file = json.load(f)
-            expected_last_opstamp = json_file["opstamp"]
-            # ASSUMPTION
-            # We haven't had any deletes in the index
-            # so max_doc per index coincides with the value of `num_docs`
-            # summing them in all segments, gives the number of documents
-            expected_num_docs = sum([segment["max_doc"]
-                                     for segment in json_file["segments"]])
-        assert writer.commit_opstamp == expected_last_opstamp
+class TestSearcher(object):
+    def test_searcher_repr(self, ram_index):
+        assert repr(ram_index.searcher()) == "Searcher(num_docs=3, num_segments=1)"

-        q_parser = tantivy.QueryParser.for_index(idx, default_args)
-        # get all documents
-        query = q_parser.parse_query("*")
-        top_docs = tantivy.TopDocs(10)

-        docs = reader.searcher().search(query, top_docs)
-        for (_score, doc_addr) in docs:
-            print(reader.searcher().doc(doc_addr))
-        assert expected_num_docs == len(docs)
+class TestDocument(object):
+
+    def test_document(self):
+        doc = tantivy.Document(name="Bill", reference=[1, 2])
+        assert doc["reference"] == [1, 2]
+        assert doc["name"] == ["Bill"]
+        assert doc.get_first("name") == "Bill"
+        assert doc.get_first("reference") == 1
+        assert doc.to_dict() == {"name": ["Bill"], "reference": [1, 2]}
+
+    def test_document_with_date(self):
+        import datetime
+        date = datetime.datetime(2019, 8, 12, 13, 0, 0, )
+        doc = tantivy.Document(name="Bill", date=date)
+        assert doc["date"][0] == date
+
+    def test_document_repr(self):
+        doc = tantivy.Document(name="Bill", reference=[1, 2])
+        assert repr(doc) == "Document(name=[Bill],reference=[1,2])"
+
+    def test_document_with_facet(self):
+        doc = tantivy.Document()
+        facet = tantivy.Facet.from_string("/europe/france")
+        doc.add_facet("facet", facet)
+        assert doc["facet"][0].to_path() == ['europe', 'france']
+        doc = tantivy.Document()
+        facet = tantivy.Facet.from_string("/asia\\/oceania/fiji")
+        doc.add_facet("facet", facet)
+        assert doc["facet"][0].to_path() == ['asia/oceania', 'fiji']
+        assert doc["facet"][0].to_path_str() == "/asia\\/oceania/fiji"
+        assert repr(doc["facet"][0]) == "Facet(/asia\\/oceania/fiji)"
+        doc = tantivy.Document(facet=facet)
+        assert doc["facet"][0].to_path() == ['asia/oceania', 'fiji']
+
+    def test_document_error(self):
+        with pytest.raises(ValueError):
+            tantivy.Document(name={})