Merge pull request #1 from matrix-org/initial-bindings

Initial python bindings implementation.
2019-08-01 16:05:15 +09:00 · 2019-08-01 16:05:15 +09:00 · 9158a4fd7d
parent 5f4e1ef253 5d92452604
commit 9158a4fd7d
21 changed files with 1443 additions and 10 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,10 +1,7 @@
-# Generated by Cargo
+/target
 # will have compiled files and executables
 /target/
 # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
 # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
 Cargo.lock
 # These are backup files generated by rustfmt
 **/*.rs.bk
 Cargo.lock
 dist/
 __pycache__/
 tantivy.so
 tantivy.egg-info/
--- a/Cargo.toml
+++ b/Cargo.toml
@ -0,0 +1,18 @@
 [package]
 name = "tantivy-py"
 version = "0.1.0"
 authors = ["Damir Jelić <poljar@termina.org.uk>"]
 edition = "2018"
 license = "MIT"
 [lib]
 name = "tantivy"
 crate-type = ["dylib"]
 [dependencies]
 chrono = "0.4"
 tantivy = { git = "https://github.com/tantivy-search/tantivy" }
 [dependencies.pyo3]
 version = "0.7.0"
 features = ["extension-module"]
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2019 tantivy
+Copyright (c) 2019 The Matrix.org Foundation CIC
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -0,0 +1,4 @@
 include Cargo.toml
 include Makefile
 include rust-toolchain
 recursive-include src *
--- a/17
+++ b/17
@ -0,0 +1,17 @@
 source_files := $(wildcard src/*.rs)
 all: tantivy/tantivy.so
 PHONY: test format
 test: tantivy/tantivy.so
 	python3 -m pytest
 format:
 	rustfmt src/*.rs
 tantivy/tantivy.so: target/debug/libtantivy.so
 	cp target/debug/libtantivy.so tantivy/tantivy.so
 target/debug/libtantivy.so: $(source_files)
 	cargo build
--- a/README.md
+++ b/README.md
@ -0,0 +1,54 @@
 tantivy-py
 ==========
 Python bindings for tantivy.
 # Installation
 The bindings can be installed using setuptools:
    python3 setup.py install --user
 Note that this requires setuptools-rust to be installed. Another thing to note
 is that the bindings are using [PyO3](https://github.com/PyO3/pyo3), which
 requires rust nightly and only supports python3.
 # Usage
 tantivy-py has a similar API to tantivy. To create a index first a schema
 needs to be built. After that documents can be added to the index and a reader
 can be created to search the index.
 ```python
    builder = tantivy.SchemaBuilder()
    title = builder.add_text_field("title", stored=True)
    body = builder.add_text_field("body")
    schema = builder.build()
    index = tantivy.Index(schema)
    writer = index.writer()
    doc = tantivy.Document()
    doc.add_text(title, "The Old Man and the Sea")
    doc.add_text(body, ("He was an old man who fished alone in a skiff in"
                        "the Gulf Stream and he had gone eighty-four days "
                        "now without taking a fish."))
    writer.add_document(doc)
    reader = index.reader()
    searcher = reader.searcher()
    query_parser = tantivy.QueryParser.for_index(index, [title, body])
    query = query_parser.parse_query("sea whale")
    top_docs = tantivy.TopDocs(10)
    result = searcher.search(query, top_docs)
    _, doc_address = result[0]
    searched_doc = searcher.doc(doc_address)
    assert searched_doc.get_first(title) == "The Old Man and the Sea"
 ```
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,2 @@
 [build-system]
 requires = ["setuptools", "wheel", "setuptools-rust"]
--- a/1
+++ b/1
@ -0,0 +1 @@
 nightly-2019-05-22
--- a/rustfmt.toml
+++ b/rustfmt.toml
@ -0,0 +1 @@
 max_width = 80
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,15 @@
 from setuptools import setup
 try:
    from setuptools_rust import Binding, RustExtension
 except ImportError:
    print("Please install setuptools-rust package")
    raise SystemExit(1)
 setup(
    name="tantivy",
    version="0.9.1",
    rust_extensions=[RustExtension("tantivy.tantivy", binding=Binding.PyO3)],
    packages=["tantivy"],
    zip_safe=False,
 )
--- a/src/document.rs
+++ b/src/document.rs
@ -0,0 +1,152 @@
 use pyo3::prelude::*;
 use pyo3::types::PyDateTime;
 use pyo3::types::{PyDateAccess, PyTimeAccess};
 use chrono::offset::TimeZone;
 use chrono::Utc;
 use tantivy as tv;
 use crate::facet::Facet;
 use crate::field::{Field, FieldValue};
 /// Tantivy's Document is the object that can be indexed and then searched for.
 ///
 /// Documents are fundamentally a collection of unordered tuples
 /// (field, value). In this list, one field may appear more than once.
 ///
 /// Example:
 ///     >>> doc = tantivy.Document()
 ///     >>> doc.add_text(title, "The Old Man and the Sea")
 ///     >>> doc.add_text(body, ("He was an old man who fished alone in a "
 ///                             "skiff in the Gulf Stream and he had gone "
 ///                             "eighty-four days now without taking a fish."))
 #[pyclass]
 pub(crate) struct Document {
    pub(crate) inner: tv::Document,
 }
 #[pymethods]
 impl Document {
    #[new]
    fn new(obj: &PyRawObject) {
        obj.init(Document {
            inner: tv::Document::default(),
        });
    }
    /// Add a text value to the document.
    ///
    /// Args:
    ///     field (Field): The field for which we are adding the text.
    ///     text (str): The text that will be added to the document.
    fn add_text(&mut self, field: &Field, text: &str) {
        self.inner.add_text(field.inner, text);
    }
    /// Add an unsigned integer value to the document.
    ///
    /// Args:
    ///     field (Field): The field for which we are adding the integer.
    ///     value (int): The integer that will be added to the document.
    fn add_unsigned(&mut self, field: &Field, value: u64) {
        self.inner.add_u64(field.inner, value);
    }
    /// Add a signed integer value to the document.
    ///
    /// Args:
    ///     field (Field): The field for which we are adding the integer.
    ///     value (int): The integer that will be added to the document.
    fn add_integer(&mut self, field: &Field, value: i64) {
        self.inner.add_i64(field.inner, value);
    }
    /// Add a date value to the document.
    ///
    /// Args:
    ///     field (Field): The field for which we are adding the integer.
    ///     value (datetime): The date that will be added to the document.
    fn add_date(&mut self, field: &Field, value: &PyDateTime) {
        let datetime = Utc
            .ymd(
                value.get_year().into(),
                value.get_month().into(),
                value.get_day().into(),
            )
            .and_hms_micro(
                value.get_hour().into(),
                value.get_minute().into(),
                value.get_second().into(),
                value.get_microsecond().into(),
            );
        self.inner.add_date(field.inner, &datetime);
    }
    /// Add a facet value to the document.
    /// Args:
    ///     field (Field): The field for which we are adding the facet.
    ///     value (Facet): The Facet that will be added to the document.
    fn add_facet(&mut self, field: &Field, value: &Facet) {
        self.inner.add_facet(field.inner, value.inner.clone());
    }
    /// Add a bytes value to the document.
    ///
    /// Args:
    ///     field (Field): The field for which we are adding the bytes.
    ///     value (bytes): The bytes that will be added to the document.
    fn add_bytes(&mut self, field: &Field, value: Vec<u8>) {
        self.inner.add_bytes(field.inner, value);
    }
    /// Returns the number of added fields that have been added to the document
    #[getter]
    fn len(&self) -> usize {
        self.inner.len()
    }
    /// True if the document is empty, False otherwise.
    #[getter]
    fn is_empty(&self) -> bool {
        self.inner.is_empty()
    }
    /// Get the first value associated with the given field.
    ///
    /// Args:
    ///     field (Field): The field for which we would like to get the value.
    ///
    /// Returns the value if one is found, otherwise None.
    /// The type of the value depends on the field.
    fn get_first(&self, py: Python, field: &Field) -> Option<PyObject> {
        let value = self.inner.get_first(field.inner)?;
        FieldValue::value_to_py(py, value)
    }
    /// Get the all values associated with the given field.
    ///
    /// Args:
    ///     field (Field): The field for which we would like to get the values.
    ///
    /// Returns a list of values.
    /// The type of the value depends on the field.
    fn get_all(&self, py: Python, field: &Field) -> Vec<PyObject> {
        let values = self.inner.get_all(field.inner);
        values
            .iter()
            .map(|&v| FieldValue::value_to_py(py, v))
            .filter_map(|x| x)
            .collect()
    }
    /// Get all the fields and values contained in the document.
    fn field_values(&self, py: Python) -> Vec<FieldValue> {
        let field_values = self.inner.field_values();
        field_values
            .iter()
            .map(|v| FieldValue::field_value_to_py(py, v))
            .collect()
    }
 }
--- a/src/facet.rs
+++ b/src/facet.rs
@ -0,0 +1,55 @@
 use pyo3::prelude::*;
 use pyo3::types::PyType;
 use tantivy::schema;
 /// A Facet represent a point in a given hierarchy.
 ///
 /// They are typically represented similarly to a filepath. For instance, an
 /// e-commerce website could have a Facet for /electronics/tv_and_video/led_tv.
 ///
 /// A document can be associated to any number of facets. The hierarchy
 /// implicitely imply that a document belonging to a facet also belongs to the
 /// ancestor of its facet. In the example above, /electronics/tv_and_video/
 /// and /electronics.
 #[pyclass]
 pub(crate) struct Facet {
    pub(crate) inner: schema::Facet,
 }
 #[pymethods]
 impl Facet {
    /// Create a new instance of the "root facet" Equivalent to /.
    #[classmethod]
    fn root(_cls: &PyType) -> Facet {
        Facet {
            inner: schema::Facet::root(),
        }
    }
    /// Returns true if the facet is the root facet /.
    #[getter]
    fn is_root(&self) -> bool {
        self.inner.is_root()
    }
    /// Returns true if another Facet is a subfacet of this facet.
    /// Args:
    ///     other (Facet): The Facet that we should check if this facet is a
    ///         subset of.
    fn is_prefix_of(&self, other: &Facet) -> bool {
        self.inner.is_prefix_of(&other.inner)
    }
    /// Create a Facet object from a string.
    /// Args:
    ///     facet_string (str): The string that contains a facet.
    ///
    /// Returns the created Facet.
    #[classmethod]
    fn from_string(_cls: &PyType, facet_string: &str) -> Facet {
        Facet {
            inner: schema::Facet::from_text(facet_string),
        }
    }
 }
--- a/src/field.rs
+++ b/src/field.rs
@ -0,0 +1,72 @@
 use pyo3::prelude::*;
 use pyo3::types::PyDateTime;
 use tantivy::schema;
 use crate::facet::Facet;
 /// Field is a numeric indentifier that represents an entry in the Schema.
 #[pyclass]
 #[derive(Clone)]
 pub(crate) struct Field {
    pub(crate) inner: schema::Field,
 }
 /// FieldValue holds together a Field and its Value.
 #[pyclass]
 pub(crate) struct FieldValue {
    pub(crate) field: Field,
    pub(crate) value: PyObject,
 }
 #[pymethods]
 impl FieldValue {
    #[getter]
    fn field(&self) -> Field {
        self.field.clone()
    }
    #[getter]
    fn value(&self) -> &PyObject {
        &self.value
    }
 }
 impl FieldValue {
    pub(crate) fn value_to_py(
        py: Python,
        value: &schema::Value,
    ) -> Option<PyObject> {
        match value {
            schema::Value::Str(text) => Some(text.into_object(py)),
            schema::Value::U64(num) => Some(num.into_object(py)),
            schema::Value::I64(num) => Some(num.into_object(py)),
            schema::Value::Bytes(b) => Some(b.to_object(py)),
            schema::Value::Date(d) => {
                let date =
                    PyDateTime::from_timestamp(py, d.timestamp() as f64, None);
                match date {
                    Ok(d) => Some(d.into_object(py)),
                    Err(_e) => None,
                }
            }
            schema::Value::Facet(f) => {
                Some(Facet { inner: f.clone() }.into_object(py))
            }
        }
    }
    pub(crate) fn field_value_to_py(
        py: Python,
        field_value: &schema::FieldValue,
    ) -> FieldValue {
        let value = field_value.value();
        let field = field_value.field();
        FieldValue {
            field: Field { inner: field },
            value: FieldValue::value_to_py(py, value).unwrap(),
        }
    }
 }
--- a/src/index.rs
+++ b/src/index.rs
@ -0,0 +1,284 @@
 use pyo3::exceptions;
 use pyo3::prelude::*;
 use crate::document::Document;
 use crate::schema::Schema;
 use crate::searcher::Searcher;
 use tantivy as tv;
 use tantivy::directory::MmapDirectory;
 const RELOAD_POLICY: &str = "commit";
 /// IndexReader is the entry point to read and search the index.
 ///
 /// IndexReader controls when a new version of the index should be loaded and
 /// lends you instances of Searcher for the last loaded version.
 ///
 /// To create an IndexReader first create an Index and call the reader() method
 /// on the index object.
 #[pyclass]
 pub(crate) struct IndexReader {
    inner: tv::IndexReader,
 }
 #[pymethods]
 impl IndexReader {
    /// Update searchers so that they reflect the state of the last .commit().
    ///
    /// If you set up the the reload policy to be on 'commit' (which is the
    /// default) every commit should be rapidly reflected on your IndexReader
    /// and you should not need to call reload() at all.
    fn reload(&self) -> PyResult<()> {
        let ret = self.inner.reload();
        match ret {
            Ok(_) => Ok(()),
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
    }
    /// Get a Searcher for the index.
    ///
    /// This method should be called every single time a search query is
    /// performed. The searchers are taken from a pool of num_searchers
    /// searchers.
    ///
    /// Returns a Searcher object, if no searcher is available this may block.
    fn searcher(&self) -> Searcher {
        let searcher = self.inner.searcher();
        Searcher { inner: searcher }
    }
 }
 /// IndexWriter is the user entry-point to add documents to the index.
 ///
 /// To create an IndexWriter first create an Index and call the writer() method
 /// on the index object.
 #[pyclass]
 pub(crate) struct IndexWriter {
    inner: tv::IndexWriter,
 }
 #[pymethods]
 impl IndexWriter {
    /// Add a document to the index.
    ///
    /// If the indexing pipeline is full, this call may block.
    ///
    /// Returns an `opstamp`, which is an increasing integer that can be used
    /// by the client to align commits with its own document queue.
    /// The `opstamp` represents the number of documents that have been added
    /// since the creation of the index.
    fn add_document(&mut self, document: &Document) -> PyResult<()> {
        self.inner.add_document(document.inner.clone());
        Ok(())
    }
    /// Commits all of the pending changes
    ///
    /// A call to commit blocks. After it returns, all of the document that
    /// were added since the last commit are published and persisted.
    ///
    /// In case of a crash or an hardware failure (as long as the hard disk is
    /// spared), it will be possible to resume indexing from this point.
    ///
    /// Returns the `opstamp` of the last document that made it in the commit.
    fn commit(&mut self) -> PyResult<()> {
        let ret = self.inner.commit();
        match ret {
            Ok(_) => Ok(()),
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
    }
    /// Rollback to the last commit
    ///
    /// This cancels all of the update that happened before after the last
    /// commit. After calling rollback, the index is in the same state as it
    /// was after the last commit.
    fn rollback(&mut self) -> PyResult<()> {
        let ret = self.inner.rollback();
        match ret {
            Ok(_) => Ok(()),
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
    }
    /// Detect and removes the files that are not used by the index anymore.
    fn garbage_collect_files(&mut self) -> PyResult<()> {
        let ret = self.inner.garbage_collect_files();
        match ret {
            Ok(_) => Ok(()),
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
    }
    /// The opstamp of the last successful commit.
    ///
    /// This is the opstamp the index will rollback to if there is a failure
    /// like a power surge.
    ///
    /// This is also the opstamp of the commit that is currently available
    /// for searchers.
    #[getter]
    fn commit_opstamp(&self) -> u64 {
        self.inner.commit_opstamp()
    }
 }
 /// Create a new index object.
 ///
 /// Args:
 ///     schema (Schema): The schema of the index.
 ///     path (str, optional): The path where the index should be stored. If
 ///         no path is provided, the index will be stored in memory.
 ///     reuse (bool, optional): Should we open an existing index if one exists
 ///         or always create a new one.
 ///
 /// If an index already exists it will be opened and reused. Raises OSError
 /// if there was a problem during the opening or creation of the index.
 #[pyclass]
 pub(crate) struct Index {
    pub(crate) inner: tv::Index,
 }
 #[pymethods]
 impl Index {
    #[new]
    #[args(reuse = true)]
    fn new(
        obj: &PyRawObject,
        schema: &Schema,
        path: Option<&str>,
        reuse: bool,
    ) -> PyResult<()> {
        let index = match path {
            Some(p) => {
                let directory = MmapDirectory::open(p);
                let dir = match directory {
                    Ok(d) => d,
                    Err(e) => {
                        return Err(exceptions::OSError::py_err(e.to_string()))
                    }
                };
                let i = if reuse {
                    tv::Index::open_or_create(dir, schema.inner.clone())
                } else {
                    tv::Index::create(dir, schema.inner.clone())
                };
                match i {
                    Ok(index) => index,
                    Err(e) => {
                        return Err(exceptions::OSError::py_err(e.to_string()))
                    }
                }
            }
            None => tv::Index::create_in_ram(schema.inner.clone()),
        };
        obj.init(Index { inner: index });
        Ok(())
    }
    /// Create a `IndexWriter` for the index.
    ///
    /// The writer will be multithreaded and the provided heap size will be
    /// split between the given number of threads.
    ///
    /// Args:
    ///     overall_heap_size (int, optional): The total target memory usage of
    ///         the writer, can't be less than 3000000.
    ///     num_threads (int, optional): The number of threads that the writer
    ///         should use. If this value is 0, tantivy will choose
    ///         automatically the number of threads.
    ///
    /// Raises ValueError if there was an error while creating the writer.
    #[args(heap_size = 3000000, num_threads = 0)]
    fn writer(
        &self,
        heap_size: usize,
        num_threads: usize,
    ) -> PyResult<IndexWriter> {
        let writer = match num_threads {
            0 => self.inner.writer(heap_size),
            _ => self.inner.writer_with_num_threads(num_threads, heap_size),
        };
        match writer {
            Ok(w) => Ok(IndexWriter { inner: w }),
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
    }
    /// Create an IndexReader for the index.
    ///
    /// Args:
    ///     reload_policy (str, optional): The reload policy that the
    ///         IndexReader should use. Can be manual or OnCommit.
    ///     num_searchers (int, optional): The number of searchers that the
    ///         reader should create.
    ///
    /// Returns the IndexReader on success, raises ValueError if a IndexReader
    /// couldn't be created.
    #[args(reload_policy = "RELOAD_POLICY", num_searchers = 0)]
    fn reader(
        &self,
        reload_policy: &str,
        num_searchers: usize,
    ) -> PyResult<IndexReader> {
        let reload_policy = reload_policy.to_lowercase();
        let reload_policy = match reload_policy.as_ref() {
            "commit" => tv::ReloadPolicy::OnCommit,
            "on-commit" => tv::ReloadPolicy::OnCommit,
            "oncommit" => tv::ReloadPolicy::OnCommit,
            "manual" => tv::ReloadPolicy::Manual,
            _ => return Err(exceptions::ValueError::py_err(
                "Invalid reload policy, valid choices are: 'manual' and 'OnCommit'"
            ))
        };
        let builder = self.inner.reader_builder();
        let builder = builder.reload_policy(reload_policy);
        let builder = if num_searchers > 0 {
            builder.num_searchers(num_searchers)
        } else {
            builder
        };
        let reader = builder.try_into();
        match reader {
            Ok(r) => Ok(IndexReader { inner: r }),
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
    }
    /// Check if the given path contains an existing index.
    /// Args:
    ///     path: The path where tantivy will search for an index.
    ///
    /// Returns True if an index exists at the given path, False otherwise.
    ///
    /// Raises OSError if the directory cannot be opened.
    #[staticmethod]
    fn exists(path: &str) -> PyResult<bool> {
        let directory = MmapDirectory::open(path);
        let dir = match directory {
            Ok(d) => d,
            Err(e) => return Err(exceptions::OSError::py_err(e.to_string())),
        };
        Ok(tv::Index::exists(&dir))
    }
    /// The schema of the current index.
    #[getter]
    fn schema(&self) -> Schema {
        let schema = self.inner.schema();
        Schema { inner: schema }
    }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -0,0 +1,88 @@
 use pyo3::prelude::*;
 mod document;
 mod facet;
 mod field;
 mod index;
 mod query;
 mod schema;
 mod schemabuilder;
 mod searcher;
 use document::Document;
 use facet::Facet;
 use field::{Field, FieldValue};
 use index::Index;
 use query::QueryParser;
 use schema::Schema;
 use schemabuilder::SchemaBuilder;
 use searcher::{DocAddress, Searcher, TopDocs};
 /// Python bindings for the search engine library Tantivy.
 ///
 /// Tantivy is a full text search engine library written in rust.
 ///
 /// It is closer to Apache Lucene than to Elasticsearch and Apache Solr in
 /// the sense it is not an off-the-shelf search engine server, but rather
 /// a library that can be used to build such a search engine.
 /// Tantivy is, in fact, strongly inspired by Lucene's design.
 ///
 /// Example:
 ///     >>> import json
 ///     >>> import tantivy
 ///
 ///     >>> builder = tantivy.SchemaBuilder()
 ///
 ///     >>> title = builder.add_text_field("title", stored=True)
 ///     >>> body = builder.add_text_field("body")
 ///
 ///     >>> schema = builder.build()
 ///     >>> index = tantivy.Index(schema)
 ///     >>> doc = tantivy.Document()
 ///     >>> doc.add_text(title, "The Old Man and the Sea")
 ///     >>> doc.add_text(body, ("He was an old man who fished alone in a "
 ///                             "skiff in the Gulf Stream and he had gone "
 ///                             "eighty-four days now without taking a fish."))
 ///
 ///     >>> writer.add_document(doc)
 ///
 ///     >>> doc = schema.parse_document(json.dumps({
 ///            "title": ["Frankenstein", "The Modern Prometheus"],
 ///            "body": ("You will rejoice to hear that no disaster has "
 ///                     "accompanied the commencement of an enterprise which "
 ///                     "you have regarded with such evil forebodings.  "
 ///                     "I arrived here yesterday, and my first task is to "
 ///                     "assure my dear sister of my welfare and increasing "
 ///                     "confidence in the success of my undertaking.")
 ///     }))
 ///
 ///     >>> writer.add_document(doc)
 ///     >>> writer.commit()
 ///
 ///     >>> reader = index.reader()
 ///     >>> searcher = reader.searcher()
 ///
 ///     >>> query_parser = tantivy.QueryParser.for_index(index, [title, body])
 ///     >>> query = query_parser.parse_query("sea whale")
 ///
 ///     >>> top_docs = tantivy.TopDocs.with_limit(10)
 ///     >>> result = searcher.search(query, top_docs)
 ///
 ///     >>> assert len(result) == 1
 ///
 #[pymodule]
 fn tantivy(_py: Python, m: &PyModule) -> PyResult<()> {
    m.add_class::<Schema>()?;
    m.add_class::<SchemaBuilder>()?;
    m.add_class::<Searcher>()?;
    m.add_class::<Index>()?;
    m.add_class::<QueryParser>()?;
    m.add_class::<Document>()?;
    m.add_class::<DocAddress>()?;
    m.add_class::<TopDocs>()?;
    m.add_class::<Field>()?;
    m.add_class::<FieldValue>()?;
    m.add_class::<Facet>()?;
    Ok(())
 }
--- a/src/query.rs
+++ b/src/query.rs
@ -0,0 +1,70 @@
 use pyo3::exceptions;
 use pyo3::prelude::*;
 use pyo3::types::PyType;
 use tantivy as tv;
 use crate::field::Field;
 use crate::index::Index;
 /// Tantivy's Query
 #[pyclass]
 pub(crate) struct Query {
    pub(crate) inner: Box<dyn tv::query::Query>,
 }
 /// Tantivy's Query parser
 #[pyclass]
 pub(crate) struct QueryParser {
    inner: tv::query::QueryParser,
 }
 #[pymethods]
 impl QueryParser {
    /// Creates a QueryParser for an Index.
    ///
    /// Args:
    ///     index (Index): The index for which the query will be created.
    ///     default_fields (List[Field]): A list of fields used to search if no
    ///         field is specified in the query.
    ///
    /// Returns the QueryParser.
    #[classmethod]
    fn for_index(
        _cls: &PyType,
        index: &Index,
        default_fields: Vec<&Field>,
    ) -> PyResult<QueryParser> {
        let default_fields: Vec<tv::schema::Field> =
            default_fields.iter().map(|&f| f.inner.clone()).collect();
        let parser =
            tv::query::QueryParser::for_index(&index.inner, default_fields);
        Ok(QueryParser { inner: parser })
    }
    /// Parse a string into a query that can be given to a searcher.
    ///
    /// Args:
    ///     query (str): A query string that should be parsed into a query.
    ///
    /// Returns the parsed Query object. Raises ValueError if there was an
    /// error with the query string.
    fn parse_query(&self, query: &str) -> PyResult<Query> {
        let ret = self.inner.parse_query(query);
        match ret {
            Ok(q) => Ok(Query { inner: q }),
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
    }
    /// Set the default way to compose queries to a conjunction.
    ///
    /// By default, the query happy tax payer is equivalent to the query happy
    /// OR tax OR payer. After calling .set_conjunction_by_default() happy tax
    /// payer will be interpreted by the parser as happy AND tax AND payer.
    fn set_conjunction_by_default(&mut self) {
        self.inner.set_conjunction_by_default();
    }
 }
--- a/src/schema.rs
+++ b/src/schema.rs
@ -0,0 +1,65 @@
 use pyo3::exceptions;
 use pyo3::prelude::*;
 use tantivy::schema;
 use crate::document::Document;
 use crate::field::Field;
 /// Tantivy schema.
 ///
 /// The schema is very strict. To build the schema the `SchemaBuilder` class is
 /// provided.
 #[pyclass]
 pub(crate) struct Schema {
    pub(crate) inner: schema::Schema,
 }
 #[pymethods]
 impl Schema {
    /// Build a document object from a json string.
    ///
    /// Args:
    ///     doc_json (str) - A string containing json that should be parsed
    ///         into a `Document`
    ///
    /// Returns the parsed document, raises a ValueError if the parsing failed.
    fn parse_document(&self, doc_json: &str) -> PyResult<Document> {
        let ret = self.inner.parse_document(doc_json);
        match ret {
            Ok(d) => Ok(Document { inner: d }),
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
    }
    /// Convert a `Document` object into a json string.
    ///
    /// Args:
    ///     doc (Document): The document that will be converted into a json
    ///         string.
    fn to_json(&self, doc: &Document) -> String {
        self.inner.to_json(&doc.inner)
    }
    /// Return the field name for a given `Field`.
    ///
    /// Args:
    ///     field (Field): The field for which the name will be returned.
    fn get_field_name(&self, field: &Field) -> &str {
        self.inner.get_field_name(field.inner)
    }
    /// Returns the field option associated with a given name.
    ///
    /// Args:
    ///     name (str): The name of the field that we want to retrieve.
    ///
    /// Returns the Field if one is found, None otherwise.
    fn get_field(&self, name: &str) -> Option<Field> {
        let f = self.inner.get_field(name);
        match f {
            Some(field) => Some(Field { inner: field }),
            None => None,
        }
    }
 }
--- a/src/schemabuilder.rs
+++ b/src/schemabuilder.rs
@ -0,0 +1,318 @@
 use pyo3::exceptions;
 use pyo3::prelude::*;
 use tantivy::schema;
 use crate::field::Field;
 use crate::schema::Schema;
 /// Tantivy has a very strict schema.
 /// You need to specify in advance whether a field is indexed or not,
 /// stored or not.
 ///
 /// This is done by creating a schema object, and
 /// setting up the fields one by one.
 ///
 /// Examples:
 ///
 ///     >>> builder = tantivy.SchemaBuilder()
 ///
 ///     >>> title = builder.add_text_field("title", stored=True)
 ///     >>> body = builder.add_text_field("body")
 ///
 ///     >>> schema = builder.build()
 #[pyclass]
 pub(crate) struct SchemaBuilder {
    pub(crate) builder: Option<schema::SchemaBuilder>,
 }
 const TOKENIZER: &str = "default";
 const RECORD: &str = "position";
 #[pymethods]
 impl SchemaBuilder {
    #[new]
    fn new(obj: &PyRawObject) {
        obj.init(SchemaBuilder {
            builder: Some(schema::Schema::builder()),
        });
    }
    /// Add a new text field to the schema.
    ///
    /// Args:
    ///     name (str): The name of the field.
    ///     stored (bool, optional): If true sets the field as stored, the
    ///         content of the field can be later restored from a Searcher.
    ///         Defaults to False.
    ///     tokenizer_name (str, optional): The name of the tokenizer that
    ///         should be used to process the field. Defaults to 'default'
    ///     index_option (str, optional): Sets which information should be
    ///         indexed with the tokens. Can be one of 'position', 'freq' or
    ///         'basic'. Defaults to 'position'. The 'basic' index_option
    ///         records only the document ID, the 'freq' option records the
    ///         document id and the term frequency, while the 'position' option
    ///         records the document id, term frequency and the positions of
    ///         the term occurrences in the document.
    ///
    /// Returns the associated field handle.
    /// Raises a ValueError if there was an error with the field creation.
    #[args(
        stored = false,
        tokenizer_name = "TOKENIZER",
        index_option = "RECORD"
    )]
    fn add_text_field(
        &mut self,
        name: &str,
        stored: bool,
        tokenizer_name: &str,
        index_option: &str,
    ) -> PyResult<Field> {
        let builder = &mut self.builder;
        let index_option = match index_option {
            "position" => schema::IndexRecordOption::WithFreqsAndPositions,
            "freq" => schema::IndexRecordOption::WithFreqs,
            "basic" => schema::IndexRecordOption::Basic,
            _ => return Err(exceptions::ValueError::py_err(
                "Invalid index option, valid choices are: 'basic', 'freq' and 'position'"
            ))
        };
        let indexing = schema::TextFieldIndexing::default()
            .set_tokenizer(tokenizer_name)
            .set_index_option(index_option);
        let options =
            schema::TextOptions::default().set_indexing_options(indexing);
        let options = if stored {
            options.set_stored()
        } else {
            options
        };
        if let Some(builder) = builder {
            let field = builder.add_text_field(name, options);
            Ok(Field { inner: field })
        } else {
            Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
            ))
        }
    }
    /// Add a new signed integer field to the schema.
    ///
    /// Args:
    ///     name (str): The name of the field.
    ///     stored (bool, optional): If true sets the field as stored, the
    ///         content of the field can be later restored from a Searcher.
    ///         Defaults to False.
    ///     indexed (bool, optional): If true sets the field to be indexed.
    ///     fast (str, optional): Set the u64 options as a single-valued fast
    ///         field. Fast fields are designed for random access. Access time
    ///         are similar to a random lookup in an array. If more than one
    ///         value is associated to a fast field, only the last one is kept.
    ///         Can be one of 'single' or 'multi'. If this is set to 'single,
    ///         the document must have exactly one value associated to the
    ///         document. If this is set to 'multi', the document can have any
    ///         number of values associated to the document. Defaults to None,
    ///         which disables this option.
    ///
    /// Returns the associated field handle.
    /// Raises a ValueError if there was an error with the field creation.
    #[args(stored = false, indexed = false)]
    fn add_integer_field(
        &mut self,
        name: &str,
        stored: bool,
        indexed: bool,
        fast: Option<&str>,
    ) -> PyResult<Field> {
        let builder = &mut self.builder;
        let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?;
        if let Some(builder) = builder {
            let field = builder.add_i64_field(name, opts);
            Ok(Field { inner: field })
        } else {
            Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
            ))
        }
    }
    /// Add a new unsigned integer field to the schema.
    ///
    /// Args:
    ///     name (str): The name of the field.
    ///     stored (bool, optional): If true sets the field as stored, the
    ///         content of the field can be later restored from a Searcher.
    ///         Defaults to False.
    ///     indexed (bool, optional): If true sets the field to be indexed.
    ///     fast (str, optional): Set the u64 options as a single-valued fast
    ///         field. Fast fields are designed for random access. Access time
    ///         are similar to a random lookup in an array. If more than one
    ///         value is associated to a fast field, only the last one is kept.
    ///         Can be one of 'single' or 'multi'. If this is set to 'single,
    ///         the document must have exactly one value associated to the
    ///         document. If this is set to 'multi', the document can have any
    ///         number of values associated to the document. Defaults to None,
    ///         which disables this option.
    ///
    /// Returns the associated field handle.
    /// Raises a ValueError if there was an error with the field creation.
    #[args(stored = false, indexed = false)]
    fn add_unsigned_field(
        &mut self,
        name: &str,
        stored: bool,
        indexed: bool,
        fast: Option<&str>,
    ) -> PyResult<Field> {
        let builder = &mut self.builder;
        let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?;
        if let Some(builder) = builder {
            let field = builder.add_u64_field(name, opts);
            Ok(Field { inner: field })
        } else {
            Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
            ))
        }
    }
    /// Add a new date field to the schema.
    ///
    /// Args:
    ///     name (str): The name of the field.
    ///     stored (bool, optional): If true sets the field as stored, the
    ///         content of the field can be later restored from a Searcher.
    ///         Defaults to False.
    ///     indexed (bool, optional): If true sets the field to be indexed.
    ///     fast (str, optional): Set the u64 options as a single-valued fast
    ///         field. Fast fields are designed for random access. Access time
    ///         are similar to a random lookup in an array. If more than one
    ///         value is associated to a fast field, only the last one is kept.
    ///         Can be one of 'single' or 'multi'. If this is set to 'single,
    ///         the document must have exactly one value associated to the
    ///         document. If this is set to 'multi', the document can have any
    ///         number of values associated to the document. Defaults to None,
    ///         which disables this option.
    ///
    /// Returns the associated field handle.
    /// Raises a ValueError if there was an error with the field creation.
    #[args(stored = false, indexed = false)]
    fn add_date_field(
        &mut self,
        name: &str,
        stored: bool,
        indexed: bool,
        fast: Option<&str>,
    ) -> PyResult<Field> {
        let builder = &mut self.builder;
        let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?;
        if let Some(builder) = builder {
            let field = builder.add_date_field(name, opts);
            Ok(Field { inner: field })
        } else {
            Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
            ))
        }
    }
    /// Add a Facet field to the schema.
    /// Args:
    ///     name (str): The name of the field.
    fn add_facet_field(&mut self, name: &str) -> PyResult<Field> {
        let builder = &mut self.builder;
        if let Some(builder) = builder {
            let field = builder.add_facet_field(name);
            Ok(Field { inner: field })
        } else {
            Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
            ))
        }
    }
    /// Add a fast bytes field to the schema.
    ///
    /// Bytes field are not searchable and are only used
    /// as fast field, to associate any kind of payload
    /// to a document.
    ///
    /// Args:
    ///     name (str): The name of the field.
    fn add_bytes_field(&mut self, name: &str) -> PyResult<Field> {
        let builder = &mut self.builder;
        if let Some(builder) = builder {
            let field = builder.add_bytes_field(name);
            Ok(Field { inner: field })
        } else {
            Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
            ))
        }
    }
    /// Finalize the creation of a Schema.
    ///
    /// Returns a Schema object. After this is called the SchemaBuilder cannot
    /// be used anymore.
    fn build(&mut self) -> PyResult<Schema> {
        let builder = self.builder.take();
        if let Some(builder) = builder {
            let schema = builder.build();
            Ok(Schema { inner: schema })
        } else {
            Err(exceptions::ValueError::py_err(
                "Schema builder object isn't valid anymore.",
            ))
        }
    }
 }
 impl SchemaBuilder {
    fn build_int_option(
        stored: bool,
        indexed: bool,
        fast: Option<&str>,
    ) -> PyResult<schema::IntOptions> {
        let opts = schema::IntOptions::default();
        let opts = if stored { opts.set_stored() } else { opts };
        let opts = if indexed { opts.set_indexed() } else { opts };
        let fast = match fast {
            Some(f) => {
                let f = f.to_lowercase();
                match f.as_ref() {
                    "single" => Some(schema::Cardinality::SingleValue),
                    "multi" => Some(schema::Cardinality::MultiValues),
                    _ => return Err(exceptions::ValueError::py_err(
                        "Invalid index option, valid choices are: 'multivalue' and 'singlevalue'"
                    )),
                }
            }
            None => None,
        };
        let opts = if let Some(f) = fast {
            opts.set_fast(f)
        } else {
            opts
        };
        Ok(opts)
    }
 }
--- a/src/searcher.rs
+++ b/src/searcher.rs
@ -0,0 +1,134 @@
 use pyo3::exceptions;
 use pyo3::prelude::*;
 use tantivy as tv;
 use crate::document::Document;
 use crate::query::Query;
 /// Tantivy's Searcher class
 ///
 /// A Searcher is used to search the index given a prepared Query.
 #[pyclass]
 pub(crate) struct Searcher {
    pub(crate) inner: tv::LeasedItem<tv::Searcher>,
 }
 #[pymethods]
 impl Searcher {
    /// Search the index with the given query and collect results.
    ///
    /// Args:
    ///     query (Query): The query that will be used for the search.
    ///     collector (Collector): A collector that determines how the search
    ///         results will be collected. Only the TopDocs collector is
    ///         supported for now.
    ///
    /// Returns a list of tuples that contains the scores and DocAddress of the
    /// search results.
    ///
    /// Raises a ValueError if there was an error with the search.
    fn search(
        &self,
        query: &Query,
        collector: &mut TopDocs,
    ) -> PyResult<Vec<(f32, DocAddress)>> {
        let ret = self.inner.search(&query.inner, &collector.inner);
        match ret {
            Ok(r) => {
                let result: Vec<(f32, DocAddress)> = r
                    .iter()
                    .map(|(f, d)| (f.clone(), DocAddress::from(d)))
                    .collect();
                Ok(result)
            }
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
    }
    /// Returns the overall number of documents in the index.
    #[getter]
    fn num_docs(&self) -> u64 {
        self.inner.num_docs()
    }
    /// Fetches a document from Tantivy's store given a DocAddress.
    ///
    /// Args:
    ///     doc_address (DocAddress): The DocAddress that is associated with
    ///         the document that we wish to fetch.
    ///
    /// Returns the Document, raises ValueError if the document can't be found.
    fn doc(&self, doc_address: &DocAddress) -> PyResult<Document> {
        let ret = self.inner.doc(doc_address.into());
        match ret {
            Ok(doc) => Ok(Document { inner: doc }),
            Err(e) => Err(exceptions::ValueError::py_err(e.to_string())),
        }
    }
 }
 /// DocAddress contains all the necessary information to identify a document
 /// given a Searcher object.
 ///
 /// It consists in an id identifying its segment, and its segment-local DocId.
 /// The id used for the segment is actually an ordinal in the list of segment
 /// hold by a Searcher.
 #[pyclass]
 pub(crate) struct DocAddress {
    pub(crate) segment_ord: tv::SegmentLocalId,
    pub(crate) doc: tv::DocId,
 }
 #[pymethods]
 impl DocAddress {
    /// The segment ordinal is an id identifying the segment hosting the
    /// document. It is only meaningful, in the context of a searcher.
    #[getter]
    fn segment_ord(&self) -> u32 {
        self.segment_ord
    }
    /// The segment local DocId
    #[getter]
    fn doc(&self) -> u32 {
        self.doc
    }
 }
 impl From<&tv::DocAddress> for DocAddress {
    fn from(doc_address: &tv::DocAddress) -> Self {
        DocAddress {
            segment_ord: doc_address.segment_ord(),
            doc: doc_address.doc(),
        }
    }
 }
 impl Into<tv::DocAddress> for &DocAddress {
    fn into(self) -> tv::DocAddress {
        tv::DocAddress(self.segment_ord(), self.doc())
    }
 }
 /// The Top Score Collector keeps track of the K documents sorted by their
 /// score.
 ///
 /// Args:
 ///     limit (int, optional): The number of documents that the top scorer will
 ///         retrieve. Must be a positive integer larger than 0. Defaults to 10.
 #[pyclass]
 pub(crate) struct TopDocs {
    inner: tv::collector::TopDocs,
 }
 #[pymethods]
 impl TopDocs {
    #[new]
    #[args(limit = 10)]
    fn new(obj: &PyRawObject, limit: usize) -> PyResult<()> {
        let top = tv::collector::TopDocs::with_limit(limit);
        obj.init(TopDocs { inner: top });
        Ok(())
    }
 }
--- a/tantivy/init.py
+++ b/tantivy/init.py
@ -0,0 +1 @@
 from .tantivy import *
--- a/tests/tantivy_test.py
+++ b/tests/tantivy_test.py
@ -0,0 +1,85 @@
 import json
 import tantivy
 class TestClass(object):
    def test_simple_search(self):
        builder = tantivy.SchemaBuilder()
        title = builder.add_text_field("title", stored=True)
        body = builder.add_text_field("body")
        schema = builder.build()
        index = tantivy.Index(schema)
        writer = index.writer()
        doc = tantivy.Document()
        doc.add_text(title, "The Old Man and the Sea")
        doc.add_text(body, ("He was an old man who fished alone in a skiff in"
                            "the Gulf Stream and he had gone eighty-four days "
                            "now without taking a fish."))
        writer.add_document(doc)
        doc = schema.parse_document(json.dumps({
            "title": "Of Mice and Men",
            "body": ("A few miles south of Soledad, the Salinas River drops "
                     "in close to the hillside bank and runs deep and "
                     "green. The water is warm too, for it has slipped "
                     "twinkling over the yellow sands in the sunlight "
                     "before reaching the narrow pool. On one side of the "
                     "river the golden foothill slopes curve up to the "
                     "strong and rocky Gabilan Mountains, but on the valley "
                     "side the water is lined with trees—willows fresh and "
                     "green with every spring, carrying in their lower leaf "
                     "junctures the debris of the winter’s flooding; and "
                     "sycamores with mottled, white, recumbent limbs and "
                     "branches that arch over the pool")
        }))
        writer.add_document(doc)
        doc = schema.parse_document(json.dumps({
            "title": ["Frankenstein", "The Modern Prometheus"],
            "body": ("You will rejoice to hear that no disaster has "
                     "accompanied the commencement of an enterprise which you "
                     "have regarded with such evil forebodings.  I arrived "
                     "here yesterday, and my first task is to assure my dear "
                     "sister of my welfare and increasing confidence in the "
                     "success of my undertaking.")
        }))
        writer.add_document(doc)
        writer.commit()
        reader = index.reader()
        searcher = reader.searcher()
        query_parser = tantivy.QueryParser.for_index(index, [title, body])
        query = query_parser.parse_query("sea whale")
        top_docs = tantivy.TopDocs(10)
        result = searcher.search(query, top_docs)
        print(result)
        assert len(result) == 1
        _, doc_address = result[0]
        searched_doc = searcher.doc(doc_address)
        assert searched_doc.get_first(title) == "The Old Man and the Sea"
    def test_doc(self):
        builder = tantivy.SchemaBuilder()
        title = builder.add_text_field("title", stored=True)
        doc = tantivy.Document()
        assert doc.is_empty
        doc.add_text(title, "The Old Man and the Sea")
        assert doc.get_first(title) == "The Old Man and the Sea"
        assert doc.len == 1
        assert not doc.is_empty
		`@ -0,0 +1,2 @@`
							`[build-system]`
							`requires = ["setuptools", "wheel", "setuptools-rust"]`