tantivy-py/src/index.rs

#![allow(clippy::new_ret_no_self)]

use pyo3::exceptions;
use pyo3::prelude::*;
use pyo3::types::PyAny;

use crate::document::{extract_value, Document};
use crate::query::Query;
use crate::schema::Schema;
use crate::searcher::Searcher;
use crate::{get_field, to_pyerr};
use tantivy as tv;
use tantivy::directory::MmapDirectory;
use tantivy::schema::{NamedFieldDocument, Term, Value};

const RELOAD_POLICY: &str = "commit";

/// IndexWriter is the user entry-point to add documents to the index.
///
/// To create an IndexWriter first create an Index and call the writer() method
/// on the index object.
#[pyclass]
pub(crate) struct IndexWriter {
    inner_index_writer: tv::IndexWriter,
    schema: tv::schema::Schema,
}

#[pymethods]
impl IndexWriter {
    /// Add a document to the index.
    ///
    /// If the indexing pipeline is full, this call may block.
    ///
    /// Returns an `opstamp`, which is an increasing integer that can be used
    /// by the client to align commits with its own document queue.
    /// The `opstamp` represents the number of documents that have been added
    /// since the creation of the index.
    pub fn add_document(&mut self, doc: &Document) -> PyResult<u64> {
        let named_doc = NamedFieldDocument(doc.field_values.clone());
        let doc = self.schema.convert_named_doc(named_doc).map_err(to_pyerr)?;
        Ok(self.inner_index_writer.add_document(doc))
    }

    /// Helper for the `add_document` method, but passing a json string.
    ///
    /// If the indexing pipeline is full, this call may block.
    ///
    /// Returns an `opstamp`, which is an increasing integer that can be used
    /// by the client to align commits with its own document queue.
    /// The `opstamp` represents the number of documents that have been added
    /// since the creation of the index.
    pub fn add_json(&mut self, json: &str) -> PyResult<u64> {
        let doc = self.schema.parse_document(json).map_err(to_pyerr)?;
        let opstamp = self.inner_index_writer.add_document(doc);
        Ok(opstamp)
    }

    /// Commits all of the pending changes
    ///
    /// A call to commit blocks. After it returns, all of the document that
    /// were added since the last commit are published and persisted.
    ///
    /// In case of a crash or an hardware failure (as long as the hard disk is
    /// spared), it will be possible to resume indexing from this point.
    ///
    /// Returns the `opstamp` of the last document that made it in the commit.
    fn commit(&mut self) -> PyResult<u64> {
        self.inner_index_writer.commit().map_err(to_pyerr)
    }

    /// Rollback to the last commit
    ///
    /// This cancels all of the update that happened before after the last
    /// commit. After calling rollback, the index is in the same state as it
    /// was after the last commit.
    fn rollback(&mut self) -> PyResult<u64> {
        self.inner_index_writer.rollback().map_err(to_pyerr)
    }

    /// Detect and removes the files that are not used by the index anymore.
    fn garbage_collect_files(&mut self) -> PyResult<()> {
        use futures::executor::block_on;
        block_on(self.inner_index_writer.garbage_collect_files())
            .map_err(to_pyerr)?;
        Ok(())
    }

    /// The opstamp of the last successful commit.
    ///
    /// This is the opstamp the index will rollback to if there is a failure
    /// like a power surge.
    ///
    /// This is also the opstamp of the commit that is currently available
    /// for searchers.
    #[getter]
    fn commit_opstamp(&self) -> u64 {
        self.inner_index_writer.commit_opstamp()
    }

    /// Delete all documents containing a given term.
    ///
    /// Args:
    ///     field_name (str): The field name for which we want to filter deleted docs.
    ///     field_value (PyAny): Python object with the value we want to filter.
    ///
    /// If the field_name is not on the schema raises ValueError exception.
    /// If the field_value is not supported raises Exception.
    fn delete_documents(
        &mut self,
        field_name: &str,
        field_value: &PyAny,
    ) -> PyResult<u64> {
        let field = get_field(&self.schema, field_name)?;
        let value = extract_value(field_value)?;
        let term = match value {
            Value::Str(text) => Term::from_field_text(field, &text),
            Value::U64(num) => Term::from_field_u64(field, num),
            Value::I64(num) => Term::from_field_i64(field, num),
            Value::F64(num) => Term::from_field_f64(field, num),
            Value::Date(d) => Term::from_field_date(field, &d),
            Value::Facet(facet) => Term::from_facet(field, &facet),
            Value::Bytes(_) => {
                return Err(exceptions::ValueError::py_err(format!(
                    "Field `{}` is bytes type not deletable.",
                    field_name
                )))
            }
            Value::PreTokStr(_pretok) => {
                return Err(exceptions::ValueError::py_err(format!(
                    "Field `{}` is pretokenized. This is not authorized for delete.",
                    field_name
                )))
            }
        };
        Ok(self.inner_index_writer.delete_term(term))
    }
}

/// Create a new index object.
///
/// Args:
///     schema (Schema): The schema of the index.
///     path (str, optional): The path where the index should be stored. If
///         no path is provided, the index will be stored in memory.
///     reuse (bool, optional): Should we open an existing index if one exists
///         or always create a new one.
///
/// If an index already exists it will be opened and reused. Raises OSError
/// if there was a problem during the opening or creation of the index.
#[pyclass]
pub(crate) struct Index {
    pub(crate) index: tv::Index,
    reader: tv::IndexReader,
}

#[pymethods]
impl Index {
    #[staticmethod]
    fn open(path: &str) -> PyResult<Index> {
        let index = tv::Index::open_in_dir(path).map_err(to_pyerr)?;
        let reader = index.reader().map_err(to_pyerr)?;
        Ok(Index { index, reader })
    }

    #[new]
    #[args(reuse = true)]
    fn new(
        schema: &Schema,
        path: Option<&str>,
        reuse: bool,
    ) -> PyResult<Self> {
        let index = match path {
            Some(p) => {
                let directory = MmapDirectory::open(p).map_err(to_pyerr)?;
                if reuse {
                    tv::Index::open_or_create(directory, schema.inner.clone())
                } else {
                    tv::Index::create(directory, schema.inner.clone())
                }
                .map_err(to_pyerr)?
            }
            None => tv::Index::create_in_ram(schema.inner.clone()),
        };

        let reader = index.reader().map_err(to_pyerr)?;
        Ok(Index { index, reader })
    }

    /// Create a `IndexWriter` for the index.
    ///
    /// The writer will be multithreaded and the provided heap size will be
    /// split between the given number of threads.
    ///
    /// Args:
    ///     overall_heap_size (int, optional): The total target memory usage of
    ///         the writer, can't be less than 3000000.
    ///     num_threads (int, optional): The number of threads that the writer
    ///         should use. If this value is 0, tantivy will choose
    ///         automatically the number of threads.
    ///
    /// Raises ValueError if there was an error while creating the writer.
    #[args(heap_size = 3000000, num_threads = 0)]
    fn writer(
        &self,
        heap_size: usize,
        num_threads: usize,
    ) -> PyResult<IndexWriter> {
        let writer = match num_threads {
            0 => self.index.writer(heap_size),
            _ => self.index.writer_with_num_threads(num_threads, heap_size),
        }
        .map_err(to_pyerr)?;
        let schema = self.index.schema();
        Ok(IndexWriter {
            inner_index_writer: writer,
            schema,
        })
    }

    /// Configure the index reader.
    ///
    /// Args:
    ///     reload_policy (str, optional): The reload policy that the
    ///         IndexReader should use. Can be `Manual` or `OnCommit`.
    ///     num_searchers (int, optional): The number of searchers that the
    ///         reader should create.
    #[args(reload_policy = "RELOAD_POLICY", num_searchers = 0)]
    fn config_reader(
        &mut self,
        reload_policy: &str,
        num_searchers: usize,
    ) -> Result<(), PyErr> {
        let reload_policy = reload_policy.to_lowercase();
        let reload_policy = match reload_policy.as_ref() {
            "commit" => tv::ReloadPolicy::OnCommit,
            "on-commit" => tv::ReloadPolicy::OnCommit,
            "oncommit" => tv::ReloadPolicy::OnCommit,
            "manual" => tv::ReloadPolicy::Manual,
            _ => return Err(exceptions::ValueError::py_err(
                "Invalid reload policy, valid choices are: 'manual' and 'OnCommit'"
            ))
        };
        let builder = self.index.reader_builder();
        let builder = builder.reload_policy(reload_policy);
        let builder = if num_searchers > 0 {
            builder.num_searchers(num_searchers)
        } else {
            builder
        };

        self.reader = builder.try_into().map_err(to_pyerr)?;
        Ok(())
    }

    /// Acquires a Searcher from the searcher pool.
    ///
    /// If no searcher is available during the call, note that
    /// this call will block until one is made available.
    ///
    /// Searcher are automatically released back into the pool when
    /// they are dropped. If you observe this function to block forever
    /// you probably should configure the Index to have a larger
    /// searcher pool, or you are holding references to previous searcher
    /// for ever.
    fn searcher(&self) -> Searcher {
        Searcher {
            inner: self.reader.searcher(),
        }
    }

    /// Check if the given path contains an existing index.
    /// Args:
    ///     path: The path where tantivy will search for an index.
    ///
    /// Returns True if an index exists at the given path, False otherwise.
    ///
    /// Raises OSError if the directory cannot be opened.
    #[staticmethod]
    fn exists(path: &str) -> PyResult<bool> {
        let directory = MmapDirectory::open(path).map_err(to_pyerr)?;
        Ok(tv::Index::exists(&directory))
    }

    /// The schema of the current index.
    #[getter]
    fn schema(&self) -> Schema {
        let schema = self.index.schema();
        Schema { inner: schema }
    }

    /// Update searchers so that they reflect the state of the last .commit().
    ///
    /// If you set up the the reload policy to be on 'commit' (which is the
    /// default) every commit should be rapidly reflected on your IndexReader
    /// and you should not need to call reload() at all.
    fn reload(&self) -> PyResult<()> {
        self.reader.reload().map_err(to_pyerr)
    }

    /// Parse a query
    ///
    /// Args:
    ///     query: the query, following the tantivy query language.
    ///     default_fields (List[Field]): A list of fields used to search if no
    ///         field is specified in the query.
    ///
    #[args(reload_policy = "RELOAD_POLICY")]
    pub fn parse_query(
        &self,
        query: &str,
        default_field_names: Option<Vec<String>>,
    ) -> PyResult<Query> {
        let mut default_fields = vec![];
        let schema = self.index.schema();
        if let Some(default_field_names_vec) = default_field_names {
            for default_field_name in &default_field_names_vec {
                if let Some(field) = schema.get_field(default_field_name) {
                    let field_entry = schema.get_field_entry(field);
                    if !field_entry.is_indexed() {
                        return Err(exceptions::ValueError::py_err(format!(
                            "Field `{}` is not set as indexed in the schema.",
                            default_field_name
                        )));
                    }
                    default_fields.push(field);
                } else {
                    return Err(exceptions::ValueError::py_err(format!(
                        "Field `{}` is not defined in the schema.",
                        default_field_name
                    )));
                }
            }
        } else {
            for (field, field_entry) in self.index.schema().fields() {
                if field_entry.is_indexed() {
                    default_fields.push(field);
                }
            }
        }
        let parser =
            tv::query::QueryParser::for_index(&self.index, default_fields);
        let query = parser.parse_query(query).map_err(to_pyerr)?;
        Ok(Query { inner: query })
    }
}
tantivy: Disable a clippy warning about new not returning self. The new() method using pyo3 does not return self, pyo3 handles some python magic for us instead. Disable the clippy warning about not returning self in new since it does not apply to us. 2019-08-08 11:55:40 +00:00			`#![allow(clippy::new_ret_no_self)]`

Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`use pyo3::exceptions;`
			`use pyo3::prelude::*;`
Adding delete_document function to index (#12) * Adding Term, Field and delete_term API mapping * Fixing test * Fixing code doc * Removing Term and Field and nesting code on delete_term * Fixing lint * Delete_documents and documentation * Fixing style and testing exceptions * Fixing missing return * Fixing fmt 2019-09-09 12:30:50 +00:00			`use pyo3::types::PyAny;`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00
Adding delete_document function to index (#12) * Adding Term, Field and delete_term API mapping * Fixing test * Fixing code doc * Removing Term and Field and nesting code on delete_term * Fixing lint * Delete_documents and documentation * Fixing style and testing exceptions * Fixing missing return * Fixing fmt 2019-09-09 12:30:50 +00:00			`use crate::document::{extract_value, Document};`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`use crate::query::Query;`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`use crate::schema::Schema;`
			`use crate::searcher::Searcher;`
Format the repo. 2020-01-05 14:59:43 +00:00			`use crate::{get_field, to_pyerr};`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`use tantivy as tv;`
			`use tantivy::directory::MmapDirectory;`
index: Fix some lint issues. 2020-01-05 11:39:10 +00:00			`use tantivy::schema::{NamedFieldDocument, Term, Value};`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00
			`const RELOAD_POLICY: &str = "commit";`

			`/// IndexWriter is the user entry-point to add documents to the index.`
			`///`
			`/// To create an IndexWriter first create an Index and call the writer() method`
			`/// on the index object.`
			`#[pyclass]`
			`pub(crate) struct IndexWriter {`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`inner_index_writer: tv::IndexWriter,`
			`schema: tv::schema::Schema,`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`}`

			`#[pymethods]`
			`impl IndexWriter {`
			`/// Add a document to the index.`
			`///`
			`/// If the indexing pipeline is full, this call may block.`
			`///`
			/// Returns an `opstamp`, which is an increasing integer that can be used
			`/// by the client to align commits with its own document queue.`
			/// The `opstamp` represents the number of documents that have been added
			`/// since the creation of the index.`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`pub fn add_document(&mut self, doc: &Document) -> PyResult<u64> {`
			`let named_doc = NamedFieldDocument(doc.field_values.clone());`
			`let doc = self.schema.convert_named_doc(named_doc).map_err(to_pyerr)?;`
			`Ok(self.inner_index_writer.add_document(doc))`
			`}`

			/// Helper for the `add_document` method, but passing a json string.
			`///`
			`/// If the indexing pipeline is full, this call may block.`
			`///`
			/// Returns an `opstamp`, which is an increasing integer that can be used
			`/// by the client to align commits with its own document queue.`
			/// The `opstamp` represents the number of documents that have been added
			`/// since the creation of the index.`
			`pub fn add_json(&mut self, json: &str) -> PyResult<u64> {`
			`let doc = self.schema.parse_document(json).map_err(to_pyerr)?;`
			`let opstamp = self.inner_index_writer.add_document(doc);`
			`Ok(opstamp)`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`}`

			`/// Commits all of the pending changes`
			`///`
			`/// A call to commit blocks. After it returns, all of the document that`
			`/// were added since the last commit are published and persisted.`
			`///`
			`/// In case of a crash or an hardware failure (as long as the hard disk is`
			`/// spared), it will be possible to resume indexing from this point.`
			`///`
			/// Returns the `opstamp` of the last document that made it in the commit.
API simplification. See #9 2019-08-02 11:23:10 +00:00			`fn commit(&mut self) -> PyResult<u64> {`
			`self.inner_index_writer.commit().map_err(to_pyerr)`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`}`

			`/// Rollback to the last commit`
			`///`
			`/// This cancels all of the update that happened before after the last`
			`/// commit. After calling rollback, the index is in the same state as it`
			`/// was after the last commit.`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`fn rollback(&mut self) -> PyResult<u64> {`
			`self.inner_index_writer.rollback().map_err(to_pyerr)`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`}`

			`/// Detect and removes the files that are not used by the index anymore.`
			`fn garbage_collect_files(&mut self) -> PyResult<()> {`
Using tantivy 0.11 2019-12-17 14:23:53 +00:00			`use futures::executor::block_on;`
Format the repo. 2020-01-05 14:59:43 +00:00			`block_on(self.inner_index_writer.garbage_collect_files())`
Using tantivy 0.11 2019-12-17 14:23:53 +00:00			`.map_err(to_pyerr)?;`
			`Ok(())`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`}`

			`/// The opstamp of the last successful commit.`
			`///`
			`/// This is the opstamp the index will rollback to if there is a failure`
			`/// like a power surge.`
			`///`
			`/// This is also the opstamp of the commit that is currently available`
			`/// for searchers.`
			`#[getter]`
			`fn commit_opstamp(&self) -> u64 {`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`self.inner_index_writer.commit_opstamp()`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`}`
Adding delete_document function to index (#12) * Adding Term, Field and delete_term API mapping * Fixing test * Fixing code doc * Removing Term and Field and nesting code on delete_term * Fixing lint * Delete_documents and documentation * Fixing style and testing exceptions * Fixing missing return * Fixing fmt 2019-09-09 12:30:50 +00:00
			`/// Delete all documents containing a given term.`
			`///`
			`/// Args:`
			`/// field_name (str): The field name for which we want to filter deleted docs.`
			`/// field_value (PyAny): Python object with the value we want to filter.`
			`///`
			`/// If the field_name is not on the schema raises ValueError exception.`
			`/// If the field_value is not supported raises Exception.`
			`fn delete_documents(`
			`&mut self,`
			`field_name: &str,`
			`field_value: &PyAny,`
			`) -> PyResult<u64> {`
searcher: Allow the search to be sorted by an unsigned field. 2019-10-01 18:56:42 +00:00			`let field = get_field(&self.schema, field_name)?;`
Adding delete_document function to index (#12) * Adding Term, Field and delete_term API mapping * Fixing test * Fixing code doc * Removing Term and Field and nesting code on delete_term * Fixing lint * Delete_documents and documentation * Fixing style and testing exceptions * Fixing missing return * Fixing fmt 2019-09-09 12:30:50 +00:00			`let value = extract_value(field_value)?;`
			`let term = match value {`
			`Value::Str(text) => Term::from_field_text(field, &text),`
			`Value::U64(num) => Term::from_field_u64(field, num),`
			`Value::I64(num) => Term::from_field_i64(field, num),`
			`Value::F64(num) => Term::from_field_f64(field, num),`
			`Value::Date(d) => Term::from_field_date(field, &d),`
			`Value::Facet(facet) => Term::from_facet(field, &facet),`
			`Value::Bytes(_) => {`
			`return Err(exceptions::ValueError::py_err(format!(`
			"Field `{}` is bytes type not deletable.",
			`field_name`
			`)))`
			`}`
index: Fix some lint issues. 2020-01-05 11:39:10 +00:00			`Value::PreTokStr(_pretok) => {`
Using tantivy 0.11 2019-12-17 14:23:53 +00:00			`return Err(exceptions::ValueError::py_err(format!(`
			"Field `{}` is pretokenized. This is not authorized for delete.",
			`field_name`
			`)))`
			`}`
Adding delete_document function to index (#12) * Adding Term, Field and delete_term API mapping * Fixing test * Fixing code doc * Removing Term and Field and nesting code on delete_term * Fixing lint * Delete_documents and documentation * Fixing style and testing exceptions * Fixing missing return * Fixing fmt 2019-09-09 12:30:50 +00:00			`};`
index: Remove a redundant clone. 2020-01-05 15:00:02 +00:00			`Ok(self.inner_index_writer.delete_term(term))`
Adding delete_document function to index (#12) * Adding Term, Field and delete_term API mapping * Fixing test * Fixing code doc * Removing Term and Field and nesting code on delete_term * Fixing lint * Delete_documents and documentation * Fixing style and testing exceptions * Fixing missing return * Fixing fmt 2019-09-09 12:30:50 +00:00			`}`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`}`

			`/// Create a new index object.`
			`///`
			`/// Args:`
			`/// schema (Schema): The schema of the index.`
			`/// path (str, optional): The path where the index should be stored. If`
			`/// no path is provided, the index will be stored in memory.`
			`/// reuse (bool, optional): Should we open an existing index if one exists`
			`/// or always create a new one.`
			`///`
			`/// If an index already exists it will be opened and reused. Raises OSError`
			`/// if there was a problem during the opening or creation of the index.`
			`#[pyclass]`
			`pub(crate) struct Index {`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`pub(crate) index: tv::Index,`
			`reader: tv::IndexReader,`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`}`

			`#[pymethods]`
			`impl Index {`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`#[staticmethod]`
			`fn open(path: &str) -> PyResult<Index> {`
			`let index = tv::Index::open_in_dir(path).map_err(to_pyerr)?;`
			`let reader = index.reader().map_err(to_pyerr)?;`
			`Ok(Index { index, reader })`
			`}`

Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`#[new]`
			`#[args(reuse = true)]`
			`fn new(`
			`schema: &Schema,`
			`path: Option<&str>,`
			`reuse: bool,`
tantivy-py: Upgrade to PyO3 0.9. 2020-04-19 11:35:14 +00:00			`) -> PyResult<Self> {`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`let index = match path {`
			`Some(p) => {`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`let directory = MmapDirectory::open(p).map_err(to_pyerr)?;`
			`if reuse {`
			`tv::Index::open_or_create(directory, schema.inner.clone())`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`} else {`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`tv::Index::create(directory, schema.inner.clone())`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`}`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`.map_err(to_pyerr)?`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`}`
			`None => tv::Index::create_in_ram(schema.inner.clone()),`
			`};`

API simplification. See #9 2019-08-02 11:23:10 +00:00			`let reader = index.reader().map_err(to_pyerr)?;`
tantivy-py: Upgrade to PyO3 0.9. 2020-04-19 11:35:14 +00:00			`Ok(Index { index, reader })`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`}`

			/// Create a `IndexWriter` for the index.
			`///`
			`/// The writer will be multithreaded and the provided heap size will be`
			`/// split between the given number of threads.`
			`///`
			`/// Args:`
			`/// overall_heap_size (int, optional): The total target memory usage of`
			`/// the writer, can't be less than 3000000.`
			`/// num_threads (int, optional): The number of threads that the writer`
			`/// should use. If this value is 0, tantivy will choose`
			`/// automatically the number of threads.`
			`///`
			`/// Raises ValueError if there was an error while creating the writer.`
			`#[args(heap_size = 3000000, num_threads = 0)]`
			`fn writer(`
			`&self,`
			`heap_size: usize,`
			`num_threads: usize,`
			`) -> PyResult<IndexWriter> {`
			`let writer = match num_threads {`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`0 => self.index.writer(heap_size),`
			`_ => self.index.writer_with_num_threads(num_threads, heap_size),`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`}`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`.map_err(to_pyerr)?;`
			`let schema = self.index.schema();`
			`Ok(IndexWriter {`
			`inner_index_writer: writer,`
			`schema,`
			`})`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`}`

API simplification. See #9 2019-08-02 11:23:10 +00:00			`/// Configure the index reader.`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`///`
			`/// Args:`
			`/// reload_policy (str, optional): The reload policy that the`
API simplification. See #9 2019-08-02 11:23:10 +00:00			/// IndexReader should use. Can be `Manual` or `OnCommit`.
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`/// num_searchers (int, optional): The number of searchers that the`
			`/// reader should create.`
			`#[args(reload_policy = "RELOAD_POLICY", num_searchers = 0)]`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`fn config_reader(`
			`&mut self,`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`reload_policy: &str,`
			`num_searchers: usize,`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`) -> Result<(), PyErr> {`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`let reload_policy = reload_policy.to_lowercase();`
			`let reload_policy = match reload_policy.as_ref() {`
			`"commit" => tv::ReloadPolicy::OnCommit,`
			`"on-commit" => tv::ReloadPolicy::OnCommit,`
			`"oncommit" => tv::ReloadPolicy::OnCommit,`
			`"manual" => tv::ReloadPolicy::Manual,`
			`_ => return Err(exceptions::ValueError::py_err(`
			`"Invalid reload policy, valid choices are: 'manual' and 'OnCommit'"`
			`))`
			`};`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`let builder = self.index.reader_builder();`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`let builder = builder.reload_policy(reload_policy);`
			`let builder = if num_searchers > 0 {`
			`builder.num_searchers(num_searchers)`
			`} else {`
			`builder`
			`};`

API simplification. See #9 2019-08-02 11:23:10 +00:00			`self.reader = builder.try_into().map_err(to_pyerr)?;`
			`Ok(())`
			`}`

CR: Adding trailing lines and removing some trailing spaces. 2019-08-29 00:55:36 +00:00			`/// Acquires a Searcher from the searcher pool.`
			`///`
			`/// If no searcher is available during the call, note that`
			`/// this call will block until one is made available.`
			`///`
			`/// Searcher are automatically released back into the pool when`
			`/// they are dropped. If you observe this function to block forever`
			`/// you probably should configure the Index to have a larger`
			`/// searcher pool, or you are holding references to previous searcher`
			`/// for ever.`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`fn searcher(&self) -> Searcher {`
			`Searcher {`
			`inner: self.reader.searcher(),`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`}`
			`}`

			`/// Check if the given path contains an existing index.`
			`/// Args:`
			`/// path: The path where tantivy will search for an index.`
			`///`
			`/// Returns True if an index exists at the given path, False otherwise.`
			`///`
			`/// Raises OSError if the directory cannot be opened.`
			`#[staticmethod]`
			`fn exists(path: &str) -> PyResult<bool> {`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`let directory = MmapDirectory::open(path).map_err(to_pyerr)?;`
			`Ok(tv::Index::exists(&directory))`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`}`

			`/// The schema of the current index.`
			`#[getter]`
			`fn schema(&self) -> Schema {`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`let schema = self.index.schema();`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`Schema { inner: schema }`
			`}`
API simplification. See #9 2019-08-02 11:23:10 +00:00
			`/// Update searchers so that they reflect the state of the last .commit().`
			`///`
			`/// If you set up the the reload policy to be on 'commit' (which is the`
			`/// default) every commit should be rapidly reflected on your IndexReader`
			`/// and you should not need to call reload() at all.`
			`fn reload(&self) -> PyResult<()> {`
			`self.reader.reload().map_err(to_pyerr)`
			`}`

			`/// Parse a query`
			`///`
			`/// Args:`
			`/// query: the query, following the tantivy query language.`
			`/// default_fields (List[Field]): A list of fields used to search if no`
			`/// field is specified in the query.`
			`///`
			`#[args(reload_policy = "RELOAD_POLICY")]`
			`pub fn parse_query(`
			`&self,`
			`query: &str,`
			`default_field_names: Option<Vec<String>>,`
			`) -> PyResult<Query> {`
			`let mut default_fields = vec![];`
			`let schema = self.index.schema();`
			`if let Some(default_field_names_vec) = default_field_names {`
			`for default_field_name in &default_field_names_vec {`
			`if let Some(field) = schema.get_field(default_field_name) {`
			`let field_entry = schema.get_field_entry(field);`
			`if !field_entry.is_indexed() {`
			`return Err(exceptions::ValueError::py_err(format!(`
			"Field `{}` is not set as indexed in the schema.",
			`default_field_name`
			`)));`
			`}`
			`default_fields.push(field);`
			`} else {`
			`return Err(exceptions::ValueError::py_err(format!(`
			"Field `{}` is not defined in the schema.",
			`default_field_name`
			`)));`
			`}`
			`}`
			`} else {`
Format the repo. 2020-01-05 14:59:43 +00:00			`for (field, field_entry) in self.index.schema().fields() {`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`if field_entry.is_indexed() {`
Using tantivy 0.11 2019-12-17 14:23:53 +00:00			`default_fields.push(field);`
API simplification. See #9 2019-08-02 11:23:10 +00:00			`}`
			`}`
			`}`
			`let parser =`
			`tv::query::QueryParser::for_index(&self.index, default_fields);`
			`let query = parser.parse_query(query).map_err(to_pyerr)?;`
			`Ok(Query { inner: query })`
			`}`
Initial python bindings implementation. 2019-06-04 09:09:58 +00:00			`}`