Tantivy 0.19.2 (#67)

* Adding __init__.py file to the tantivy folder to make maturin happy Add Cargo.lock to the repo Set the git-fetch-with-cli cargo flag so that we can override fetch settings Renaming .cargo/config to .cargo/config.toml Adding github-quiq-sh cargo registry Point dependencies at our github-quiq-sh registry Trying to resolve this build issue, pointing pyo3-build-config at our github-quiq-sh registry SER-21487: Enable support for all standard Tantivy languages plus Chinese + Japanese in tantivy-py SER-21487: Use uname rather than UNAME in the Makefile SER-21487: Fix document date handling SER-23013: Upgrade Tantivy and other dependencies * Upgrade to Tantivy 0.19.1 * Apply rustfmt and fix bug when fast option = None * Upgrade to tantivy-0.19.2 * Standardize around using 'cargo fmt' rather than 'rustfmt' * Reverting to old style dependencies * Linting with clippy * Switching out hashmap for defining tokenizers for an array, and adding test for Spanish indexing * Use cargo fmt instead of rustfmt on the Lint ci step
2023-02-14 08:20:59 -05:00 · 2023-02-14 08:20:59 -05:00 · 164adc87e1
parent b2043793ee
commit 164adc87e1
15 changed files with 1820 additions and 106 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@ -2,4 +2,4 @@
 rustflags = [
  "-C", "link-arg=-undefined",
  "-C", "link-arg=dynamic_lookup",
-]
+]
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -18,7 +18,7 @@ jobs:
          toolchain: stable
          components: rustfmt
      - name: Check Formatting
-        run: rustfmt --check src/*rs
+        run: cargo fmt --check
  Test:
    strategy:
--- a/.gitignore
+++ b/.gitignore
@ -3,9 +3,9 @@
 build
 /target
 **/*.rs.bk
 Cargo.lock
 dist/
 __pycache__/
 tantivy.so
 tantivy.dylib
 tantivy/tantivy.cpython*.so
 tantivy.egg-info/
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "tantivy"
-version = "0.17.0"
+version = "0.19.2"
 readme = "README.md"
 authors = ["Damir Jelić <poljar@termina.org.uk>"]
 edition = "2018"
@ -11,15 +11,15 @@ name = "tantivy"
 crate-type = ["cdylib"]
 [build-dependencies]
-pyo3-build-config = "0.16.3"
+pyo3-build-config = "0.18.0"
 [dependencies]
-chrono = "0.4.19"
+chrono = "0.4.23"
-tantivy = "0.17"
+tantivy = "0.19.2"
-itertools = "0.10.3"
+itertools = "0.10.5"
-futures = "0.3.21"
+futures = "0.3.26"
-serde_json = "1.0.64"
+serde_json = "1.0.91"
 [dependencies.pyo3]
-version = "0.16.3"
+version = "0.18.0"
-features = ["extension-module"]
+features = ["extension-module"]
--- a/12
+++ b/12
@ -1,4 +1,4 @@
-ifeq ($(shell UNAME),Darwin)
+ifeq ($(shell uname),Darwin)
  EXT := dylib
 else
  EXT := so
@ -6,15 +6,21 @@ endif
 source_files := $(wildcard src/*.rs)
-all: tantivy/tantivy.$(EXT)
+all: format lint build test
 PHONY: test format
 lint:
 	cargo clippy
 test: tantivy/tantivy.$(EXT)
 	python3 -m pytest
 format:
-	rustfmt src/*.rs
+	cargo fmt
 build:
 	maturin build --interpreter python3.7 python3.8 python3.9 python3.10 python3.11
 tantivy/tantivy.$(EXT): target/debug/libtantivy.$(EXT)
 	cp target/debug/libtantivy.$(EXT) tantivy/tantivy.so
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,5 +1,5 @@
 [build-system]
-requires = ["maturin"]
+requires = ["maturin>=0.13,<0.14"]
 build-backend = "maturin"
 [project]
--- a/rustfmt.toml
+++ b/rustfmt.toml
@ -1 +1 @@
-max_width = 80
+max_width = 80
--- a/src/document.rs
+++ b/src/document.rs
@ -9,7 +9,7 @@ use pyo3::{
    },
 };
-use chrono::{offset::TimeZone, Datelike, Timelike, Utc};
+use chrono::{offset::TimeZone, Utc};
 use tantivy as tv;
@ -56,37 +56,42 @@ fn value_to_py(py: Python, value: &Value) -> PyResult<PyObject> {
            // TODO implement me
            unimplemented!();
        }
-        Value::Date(d) => PyDateTime::new(
+        Value::Date(d) => {
-            py,
+            let utc = d.into_utc();
-            d.year(),
+            PyDateTime::new(
-            d.month() as u8,
+                py,
-            d.day() as u8,
+                utc.year(),
-            d.hour() as u8,
+                utc.month() as u8,
-            d.minute() as u8,
+                utc.day(),
-            d.second() as u8,
+                utc.hour(),
-            d.timestamp_subsec_micros(),
+                utc.minute(),
-            None,
+                utc.second(),
-        )?
+                utc.microsecond(),
-        .into_py(py),
+                None,
            )?
            .into_py(py)
        }
        Value::Facet(f) => Facet { inner: f.clone() }.into_py(py),
        Value::JsonObject(json_object) => {
            let inner: HashMap<_, _> = json_object
                .iter()
-                .map(|(k, v)| (k, value_to_object(&v, py)))
+                .map(|(k, v)| (k, value_to_object(v, py)))
                .collect();
            inner.to_object(py)
        }
        Value::Bool(b) => b.into_py(py),
        Value::IpAddr(i) => (*i).to_string().into_py(py),
    })
 }
 fn value_to_string(value: &Value) -> String {
    match value {
        Value::Str(text) => text.clone(),
-        Value::U64(num) => format!("{}", num),
+        Value::U64(num) => format!("{num}"),
-        Value::I64(num) => format!("{}", num),
+        Value::I64(num) => format!("{num}"),
-        Value::F64(num) => format!("{}", num),
+        Value::F64(num) => format!("{num}"),
-        Value::Bytes(bytes) => format!("{:?}", bytes),
+        Value::Bytes(bytes) => format!("{bytes:?}"),
-        Value::Date(d) => format!("{:?}", d),
+        Value::Date(d) => format!("{d:?}"),
        Value::Facet(facet) => facet.to_string(),
        Value::PreTokStr(_pretok) => {
            // TODO implement me
@ -95,6 +100,8 @@ fn value_to_string(value: &Value) -> String {
        Value::JsonObject(json_object) => {
            serde_json::to_string(&json_object).unwrap()
        }
        Value::Bool(b) => format!("{b}"),
        Value::IpAddr(i) => format!("{}", *i),
    }
 }
@ -141,10 +148,10 @@ impl fmt::Debug for Document {
                    .chars()
                    .take(10)
                    .collect();
-                format!("{}=[{}]", field_name, values_str)
+                format!("{field_name}=[{values_str}]")
            })
            .join(",");
-        write!(f, "Document({})", doc_str)
+        write!(f, "Document({doc_str})")
    }
 }
@ -170,23 +177,24 @@ pub(crate) fn extract_value(any: &PyAny) -> PyResult<Value> {
    }
    if let Ok(py_datetime) = any.downcast::<PyDateTime>() {
        let datetime = Utc
-            .ymd(
+            .with_ymd_and_hms(
                py_datetime.get_year(),
                py_datetime.get_month().into(),
                py_datetime.get_day().into(),
            )
            .and_hms_micro(
                py_datetime.get_hour().into(),
                py_datetime.get_minute().into(),
                py_datetime.get_second().into(),
-                py_datetime.get_microsecond(),
+            )
-            );
+            .single()
-        return Ok(Value::Date(datetime));
+            .unwrap();
        return Ok(Value::Date(tv::DateTime::from_timestamp_secs(
            datetime.timestamp(),
        )));
    }
    if let Ok(facet) = any.extract::<Facet>() {
-        return Ok(Value::Facet(facet.inner.clone()));
+        return Ok(Value::Facet(facet.inner));
    }
-    Err(to_pyerr(format!("Value unsupported {:?}", any)))
+    Err(to_pyerr(format!("Value unsupported {any:?}")))
 }
 fn extract_value_single_or_list(any: &PyAny) -> PyResult<Vec<Value>> {
@ -200,7 +208,7 @@ fn extract_value_single_or_list(any: &PyAny) -> PyResult<Vec<Value>> {
 #[pymethods]
 impl Document {
    #[new]
-    #[args(kwargs = "**")]
+    #[pyo3(signature = (**kwargs))]
    fn new(kwargs: Option<&PyDict>) -> PyResult<Self> {
        let mut document = Document::default();
        if let Some(field_dict) = kwargs {
@ -299,18 +307,21 @@ impl Document {
    ///     value (datetime): The date that will be added to the document.
    fn add_date(&mut self, field_name: String, value: &PyDateTime) {
        let datetime = Utc
-            .ymd(
+            .with_ymd_and_hms(
                value.get_year(),
                value.get_month().into(),
                value.get_day().into(),
            )
            .and_hms_micro(
                value.get_hour().into(),
                value.get_minute().into(),
                value.get_second().into(),
-                value.get_microsecond(),
+            )
-            );
+            .single()
-        add_value(self, field_name, datetime);
+            .unwrap();
        add_value(
            self,
            field_name,
            tv::DateTime::from_timestamp_secs(datetime.timestamp()),
        );
    }
    /// Add a facet value to the document.
@ -387,13 +398,13 @@ impl Document {
    }
    fn __getitem__(&self, field_name: &str) -> PyResult<Vec<PyObject>> {
-        let gil = Python::acquire_gil();
+        Python::with_gil(|py| -> PyResult<Vec<PyObject>> {
-        let py = gil.python();
+            self.get_all(py, field_name)
-        self.get_all(py, field_name)
+        })
    }
    fn __repr__(&self) -> PyResult<String> {
-        Ok(format!("{:?}", self))
+        Ok(format!("{self:?}"))
    }
 }
--- a/src/index.rs
+++ b/src/index.rs
@ -14,6 +14,10 @@ use tantivy as tv;
 use tantivy::{
    directory::MmapDirectory,
    schema::{NamedFieldDocument, Term, Value},
    tokenizer::{
        Language, LowerCaser, RemoveLongFilter, SimpleTokenizer, Stemmer,
        TextAnalyzer,
    },
 };
 const RELOAD_POLICY: &str = "commit";
@ -120,26 +124,25 @@ impl IndexWriter {
            Value::U64(num) => Term::from_field_u64(field, num),
            Value::I64(num) => Term::from_field_i64(field, num),
            Value::F64(num) => Term::from_field_f64(field, num),
-            Value::Date(d) => Term::from_field_date(field, &d),
+            Value::Date(d) => Term::from_field_date(field, d),
            Value::Facet(facet) => Term::from_facet(field, &facet),
            Value::Bytes(_) => {
                return Err(exceptions::PyValueError::new_err(format!(
-                    "Field `{}` is bytes type not deletable.",
+                    "Field `{field_name}` is bytes type not deletable."
                    field_name
                )))
            }
            Value::PreTokStr(_pretok) => {
                return Err(exceptions::PyValueError::new_err(format!(
-                    "Field `{}` is pretokenized. This is not authorized for delete.",
+                    "Field `{field_name}` is pretokenized. This is not authorized for delete."
                    field_name
                )))
            }
            Value::JsonObject(_) => {
                return Err(exceptions::PyValueError::new_err(format!(
-                    "Field `{}` is json object type not deletable.",
+                    "Field `{field_name}` is json object type not deletable."
                    field_name
                )))
-            }
+            },
            Value::Bool(b) => Term::from_field_bool(field, b),
            Value::IpAddr(i) => Term::from_field_ip_addr(field, i)
        };
        Ok(self.inner_index_writer.delete_term(term))
    }
@ -167,12 +170,15 @@ impl Index {
    #[staticmethod]
    fn open(path: &str) -> PyResult<Index> {
        let index = tv::Index::open_in_dir(path).map_err(to_pyerr)?;
        Index::register_custom_text_analyzers(&index);
        let reader = index.reader().map_err(to_pyerr)?;
        Ok(Index { index, reader })
    }
    #[new]
-    #[args(reuse = true)]
+    #[pyo3(signature = (schema, path = None, reuse = true))]
    fn new(schema: &Schema, path: Option<&str>, reuse: bool) -> PyResult<Self> {
        let index = match path {
            Some(p) => {
@ -191,6 +197,8 @@ impl Index {
            None => tv::Index::create_in_ram(schema.inner.clone()),
        };
        Index::register_custom_text_analyzers(&index);
        let reader = index.reader().map_err(to_pyerr)?;
        Ok(Index { index, reader })
    }
@ -208,7 +216,7 @@ impl Index {
    ///         automatically the number of threads.
    ///
    /// Raises ValueError if there was an error while creating the writer.
-    #[args(heap_size = 3000000, num_threads = 0)]
+    #[pyo3(signature = (heap_size = 3000000, num_threads = 0))]
    fn writer(
        &self,
        heap_size: usize,
@ -231,13 +239,13 @@ impl Index {
    /// Args:
    ///     reload_policy (str, optional): The reload policy that the
    ///         IndexReader should use. Can be `Manual` or `OnCommit`.
-    ///     num_searchers (int, optional): The number of searchers that the
+    ///     num_warmers (int, optional): The number of searchers that the
    ///         reader should create.
-    #[args(reload_policy = "RELOAD_POLICY", num_searchers = 0)]
+    #[pyo3(signature = (reload_policy = RELOAD_POLICY, num_warmers = 0))]
    fn config_reader(
        &mut self,
        reload_policy: &str,
-        num_searchers: usize,
+        num_warmers: usize,
    ) -> Result<(), PyErr> {
        let reload_policy = reload_policy.to_lowercase();
        let reload_policy = match reload_policy.as_ref() {
@ -251,8 +259,8 @@ impl Index {
        };
        let builder = self.index.reader_builder();
        let builder = builder.reload_policy(reload_policy);
-        let builder = if num_searchers > 0 {
+        let builder = if num_warmers > 0 {
-            builder.num_searchers(num_searchers)
+            builder.num_warming_threads(num_warmers)
        } else {
            builder
        };
@ -313,7 +321,7 @@ impl Index {
    ///     default_fields_names (List[Field]): A list of fields used to search if no
    ///         field is specified in the query.
    ///
-    #[args(reload_policy = "RELOAD_POLICY")]
+    #[pyo3(signature = (query, default_field_names = None))]
    pub fn parse_query(
        &self,
        query: &str,
@ -328,16 +336,14 @@ impl Index {
                    if !field_entry.is_indexed() {
                        return Err(exceptions::PyValueError::new_err(
                            format!(
-                            "Field `{}` is not set as indexed in the schema.",
+                            "Field `{default_field_name}` is not set as indexed in the schema."
                            default_field_name
                        ),
                        ));
                    }
                    default_fields.push(field);
                } else {
                    return Err(exceptions::PyValueError::new_err(format!(
-                        "Field `{}` is not defined in the schema.",
+                        "Field `{default_field_name}` is not defined in the schema."
                        default_field_name
                    )));
                }
            }
@ -355,3 +361,35 @@ impl Index {
        Ok(Query { inner: query })
    }
 }
 impl Index {
    fn register_custom_text_analyzers(index: &tv::Index) {
        let analyzers = [
            ("ar_stem", Language::Arabic),
            ("da_stem", Language::Danish),
            ("nl_stem", Language::Dutch),
            ("fi_stem", Language::Finnish),
            ("fr_stem", Language::French),
            ("de_stem", Language::German),
            ("el_stem", Language::Greek),
            ("hu_stem", Language::Hungarian),
            ("it_stem", Language::Italian),
            ("no_stem", Language::Norwegian),
            ("pt_stem", Language::Portuguese),
            ("ro_stem", Language::Romanian),
            ("ru_stem", Language::Russian),
            ("es_stem", Language::Spanish),
            ("sv_stem", Language::Swedish),
            ("ta_stem", Language::Tamil),
            ("tr_stem", Language::Turkish),
        ];
        for (name, lang) in &analyzers {
            let an = TextAnalyzer::from(SimpleTokenizer)
                .filter(RemoveLongFilter::limit(40))
                .filter(LowerCaser)
                .filter(Stemmer::new(*lang));
            index.tokenizers().register(name, an);
        }
    }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,5 +1,5 @@
 use ::tantivy as tv;
 use pyo3::{exceptions, prelude::*};
 use tantivy as tv;
 mod document;
 mod facet;
@ -88,8 +88,7 @@ pub(crate) fn get_field(
 ) -> PyResult<tv::schema::Field> {
    let field = schema.get_field(field_name).ok_or_else(|| {
        exceptions::PyValueError::new_err(format!(
-            "Field `{}` is not defined in the schema.",
+            "Field `{field_name}` is not defined in the schema."
            field_name
        ))
    })?;
--- a/src/schemabuilder.rs
+++ b/src/schemabuilder.rs
@ -6,7 +6,7 @@ use tantivy::schema;
 use crate::schema::Schema;
 use std::sync::{Arc, RwLock};
-use tantivy::schema::INDEXED;
+use tantivy::schema::{DateOptions, INDEXED};
 /// Tantivy has a very strict schema.
 /// You need to specify in advance whether a field is indexed or not,
@ -60,11 +60,12 @@ impl SchemaBuilder {
    ///
    /// Returns the associated field handle.
    /// Raises a ValueError if there was an error with the field creation.
-    #[args(
+    #[pyo3(signature = (
        name,
        stored = false,
-        tokenizer_name = "TOKENIZER",
+        tokenizer_name = TOKENIZER,
-        index_option = "RECORD"
+        index_option = RECORD
-    )]
+    ))]
    fn add_text_field(
        &mut self,
        name: &str,
@ -109,7 +110,7 @@ impl SchemaBuilder {
    ///
    /// Returns the associated field handle.
    /// Raises a ValueError if there was an error with the field creation.
-    #[args(stored = false, indexed = false)]
+    #[pyo3(signature = (name, stored = false, indexed = false, fast = None))]
    fn add_integer_field(
        &mut self,
        name: &str,
@ -151,7 +152,7 @@ impl SchemaBuilder {
    ///
    /// Returns the associated field handle.
    /// Raises a ValueError if there was an error with the field creation.
-    #[args(stored = false, indexed = false)]
+    #[pyo3(signature = (name, stored = false, indexed = false, fast = None))]
    fn add_unsigned_field(
        &mut self,
        name: &str,
@ -185,7 +186,7 @@ impl SchemaBuilder {
    ///         field. Fast fields are designed for random access. Access time
    ///         are similar to a random lookup in an array. If more than one
    ///         value is associated to a fast field, only the last one is kept.
-    ///         Can be one of 'single' or 'multi'. If this is set to 'single,
+    ///         Can be one of 'single' or 'multi'. If this is set to 'single',
    ///         the document must have exactly one value associated to the
    ///         document. If this is set to 'multi', the document can have any
    ///         number of values associated to the document. Defaults to None,
@ -193,7 +194,7 @@ impl SchemaBuilder {
    ///
    /// Returns the associated field handle.
    /// Raises a ValueError if there was an error with the field creation.
-    #[args(stored = false, indexed = false)]
+    #[pyo3(signature = (name, stored = false, indexed = false, fast = None))]
    fn add_date_field(
        &mut self,
        name: &str,
@ -203,7 +204,29 @@ impl SchemaBuilder {
    ) -> PyResult<Self> {
        let builder = &mut self.builder;
-        let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?;
+        let mut opts = DateOptions::default();
        if stored {
            opts = opts.set_stored();
        }
        if indexed {
            opts = opts.set_indexed();
        }
        let fast = match fast {
            Some(f) => {
                let f = f.to_lowercase();
                match f.as_ref() {
                    "single" => Some(schema::Cardinality::SingleValue),
                    "multi" => Some(schema::Cardinality::MultiValues),
                    _ => return Err(exceptions::PyValueError::new_err(
                        "Invalid index option, valid choices are: 'multi' and 'single'"
                    )),
                }
            }
            None => None,
        };
        if let Some(f) = fast {
            opts = opts.set_fast(f);
        }
        if let Some(builder) = builder.write().unwrap().as_mut() {
            builder.add_date_field(name, opts);
@ -234,11 +257,12 @@ impl SchemaBuilder {
    ///
    /// Returns the associated field handle.
    /// Raises a ValueError if there was an error with the field creation.
-    #[args(
+    #[pyo3(signature = (
        name,
        stored = false,
-        tokenizer_name = "TOKENIZER",
+        tokenizer_name = TOKENIZER,
-        index_option = "RECORD"
+        index_option = RECORD
-    )]
+    ))]
    fn add_json_field(
        &mut self,
        name: &str,
--- a/src/searcher.rs
+++ b/src/searcher.rs
@ -10,7 +10,7 @@ use tantivy::collector::{Count, MultiCollector, TopDocs};
 /// A Searcher is used to search the index given a prepared Query.
 #[pyclass]
 pub(crate) struct Searcher {
-    pub(crate) inner: tv::LeasedItem<tv::Searcher>,
+    pub(crate) inner: tv::Searcher,
 }
 #[derive(Clone)]
@ -22,8 +22,8 @@ enum Fruit {
 impl std::fmt::Debug for Fruit {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
-            Fruit::Score(s) => f.write_str(&format!("{}", s)),
+            Fruit::Score(s) => f.write_str(&format!("{s}")),
-            Fruit::Order(o) => f.write_str(&format!("{}", o)),
+            Fruit::Order(o) => f.write_str(&format!("{o}")),
        }
    }
 }
@ -93,7 +93,7 @@ impl Searcher {
    /// Returns `SearchResult` object.
    ///
    /// Raises a ValueError if there was an error with the search.
-    #[args(limit = 10, offset = 0, count = true)]
+    #[pyo3(signature = (query, limit = 10, count = true, order_by_field = None, offset = 0))]
    fn search(
        &self,
        _py: Python,
@ -154,10 +154,7 @@ impl Searcher {
            }
        };
-        let count = match count_handle {
+        let count = count_handle.map(|h| h.extract(&mut multifruit));
            Some(h) => Some(h.extract(&mut multifruit)),
            None => None,
        };
        Ok(SearchResult { hits, count })
    }
@ -230,11 +227,11 @@ impl From<&tv::DocAddress> for DocAddress {
    }
 }
-impl Into<tv::DocAddress> for &DocAddress {
+impl From<&DocAddress> for tv::DocAddress {
-    fn into(self) -> tv::DocAddress {
+    fn from(val: &DocAddress) -> Self {
        tv::DocAddress {
-            segment_ord: self.segment_ord(),
+            segment_ord: val.segment_ord(),
-            doc_id: self.doc(),
+            doc_id: val.doc(),
        }
    }
 }
--- a/tantivy/init.py
+++ b/tantivy/init.py
@ -0,0 +1 @@
 from .tantivy import *
--- a/tests/tantivy_test.py
+++ b/tests/tantivy_test.py
@ -67,6 +67,56 @@ def create_index(dir=None):
    return index
 def spanish_schema():
    return (
        SchemaBuilder()
        .add_text_field("title", stored=True, tokenizer_name='es_stem')
        .add_text_field("body", tokenizer_name='es_stem')
        .build()
    )
 def create_spanish_index():
    # assume all tests will use the same documents for now
    # other methods may set up function-local indexes
    index = Index(spanish_schema(), None)
    writer = index.writer()
    # 2 ways of adding documents
    # 1
    doc = Document()
    # create a document instance
    # add field-value pairs
    doc.add_text("title", "El viejo y el mar")
    doc.add_text(
        "body",
        (
            "Era un viejo que pescaba solo en un bote en el Gulf Stream y hacía ochenta y cuatro días que no cogía un pez. "
        ),
    )
    writer.add_document(doc)
    # 2 use the built-in json support
    # keys need to coincide with field names
    doc = Document.from_dict(
        {
            "title": "De ratones y hombres",
            "body": (
                "Unas millas al sur de Soledad, el río Salinas se ahonda junto al margen de la ladera y fluye profundo y verde. Es tibia el agua, porque se ha deslizado chispeante sobre la arena amarilla y al calor del sol antes de llegar a la angosta laguna. A un lado del río, la dorada falda de la ladera se curva hacia arriba trepando hasta las montañas Gabilán, fuertes y rocosas, pero del lado del valle los árboles bordean la orilla: sauces frescos y verdes cada primavera, que en la s junturas más bajas de sus hojas muestran las consecuencias de la crecida invernal; y sicomoros de troncos veteados, blancos, recostados, y ramas quesear quean sobre el estanque"
            ),
        }
    )
    writer.add_document(doc)
    writer.add_json(
        """{
            "title": ["Frankenstein", "El moderno Prometeo"],
            "body": "Te alegrará saber que no ha ocurrido ningún percance al principio de una aventura que siempre consideraste cargada de malos presagios. Llegué aquí ayer, y mi primera tarea es asegurarle a mi querida hermana que me hallo perfectamente y que tengo una gran confianza en el éxito de mi empresa."
        }"""
    )
    writer.commit()
    index.reload()
    return index
@pytest.fixture()
 def dir_index(tmpdir):
    return (tmpdir, create_index(str(tmpdir)))
@ -77,6 +127,11 @@ def ram_index():
    return create_index()
@pytest.fixture(scope="class")
 def spanish_index():
    return create_spanish_index()
 class TestClass(object):
    def test_simple_search_in_dir(self, dir_index):
        _, index = dir_index
@ -103,6 +158,16 @@ class TestClass(object):
        searched_doc = index.searcher().doc(doc_address)
        assert searched_doc["title"] == ["The Old Man and the Sea"]
    def test_simple_search_in_spanish(self, spanish_index):
        index = spanish_index
        query = index.parse_query("vieja", ["title", "body"])
        result = index.searcher().search(query, 10)
        assert len(result.hits) == 1
        _, doc_address = result.hits[0]
        search_doc = index.searcher().doc(doc_address)
        assert search_doc["title"] == ["El viejo y el mar"]
    def test_and_query(self, ram_index):
        index = ram_index
        query = index.parse_query(