From 75f2f0296de05d2afd83d1ae782d6f8ca76040f4 Mon Sep 17 00:00:00 2001 From: Sidhant29 Date: Tue, 17 Jan 2023 10:43:39 +1000 Subject: [PATCH] Added float support --- src/schemabuilder.rs | 30 ++++++++++++++--- tests/tantivy_test.py | 75 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 100 insertions(+), 5 deletions(-) diff --git a/src/schemabuilder.rs b/src/schemabuilder.rs index 30cbd29..d7a18ae 100644 --- a/src/schemabuilder.rs +++ b/src/schemabuilder.rs @@ -119,7 +119,7 @@ impl SchemaBuilder { ) -> PyResult { let builder = &mut self.builder; - let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?; + let opts = SchemaBuilder::build_numeric_option(stored, indexed, fast)?; if let Some(builder) = builder.write().unwrap().as_mut() { builder.add_i64_field(name, opts); @@ -131,6 +131,28 @@ impl SchemaBuilder { Ok(self.clone()) } + #[args(stored = false, indexed = false)] + fn add_float_field( + &mut self, + name: &str, + stored: bool, + indexed: bool, + fast: Option<&str>, + ) -> PyResult { + let builder = &mut self.builder; + + let opts = SchemaBuilder::build_numeric_option(stored, indexed, fast)?; + + if let Some(builder) = builder.write().unwrap().as_mut() { + builder.add_f64_field(name, opts); + } else { + return Err(exceptions::PyValueError::new_err( + "Schema builder object isn't valid anymore.", + )); + } + Ok(self.clone()) + } + /// Add a new unsigned integer field to the schema. /// /// Args: @@ -161,7 +183,7 @@ impl SchemaBuilder { ) -> PyResult { let builder = &mut self.builder; - let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?; + let opts = SchemaBuilder::build_numeric_option(stored, indexed, fast)?; if let Some(builder) = builder.write().unwrap().as_mut() { builder.add_u64_field(name, opts); @@ -203,7 +225,7 @@ impl SchemaBuilder { ) -> PyResult { let builder = &mut self.builder; - let opts = SchemaBuilder::build_int_option(stored, indexed, fast)?; + let opts = SchemaBuilder::build_numeric_option(stored, indexed, fast)?; if let Some(builder) = builder.write().unwrap().as_mut() { builder.add_date_field(name, opts); @@ -319,7 +341,7 @@ impl SchemaBuilder { } impl SchemaBuilder { - fn build_int_option( + fn build_numeric_option( stored: bool, indexed: bool, fast: Option<&str>, diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index 0d6d898..293f086 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -12,6 +12,14 @@ def schema(): .build() ) +def schema_numeric_fields(): + return ( + SchemaBuilder() + .add_integer_field("id", stored=True, indexed=True) + .add_float_field("rating", stored=True, indexed=True) + .add_text_field("body", stored=True) + .build() + ) def create_index(dir=None): # assume all tests will use the same documents for now @@ -66,6 +74,46 @@ def create_index(dir=None): index.reload() return index +def create_index_with_numeric_fields(dir=None): + index = Index(schema_numeric_fields(), dir) + writer = index.writer() + + doc = Document() + doc.add_integer("id", 1) + doc.add_float("rating", 3.5) + doc.add_text( + "body", + ( + "He was an old man who fished alone in a skiff in" + "the Gulf Stream and he had gone eighty-four days " + "now without taking a fish." + ), + ) + writer.add_document(doc) + doc = Document.from_dict( + { + "id": 2, + "rating": 4.5, + "body": ( + "A few miles south of Soledad, the Salinas River drops " + "in close to the hillside bank and runs deep and " + "green. The water is warm too, for it has slipped " + "twinkling over the yellow sands in the sunlight " + "before reaching the narrow pool. On one side of the " + "river the golden foothill slopes curve up to the " + "strong and rocky Gabilan Mountains, but on the valley " + "side the water is lined with trees—willows fresh and " + "green with every spring, carrying in their lower leaf " + "junctures the debris of the winter’s flooding; and " + "sycamores with mottled, white, recumbent limbs and " + "branches that arch over the pool" + ), + } + ) + writer.add_document(doc) + writer.commit() + index.reload() + return index @pytest.fixture() def dir_index(tmpdir): @@ -77,6 +125,11 @@ def ram_index(): return create_index() +@pytest.fixture(scope="class") +def ram_index_numeric_fields(): + return create_index_with_numeric_fields() + + class TestClass(object): def test_simple_search_in_dir(self, dir_index): _, index = dir_index @@ -120,6 +173,25 @@ class TestClass(object): assert len(result.hits) == 1 + def test_and_query_numeric_fields(self, ram_index_numeric_fields): + index = ram_index_numeric_fields + searcher = index.searcher() + + # 1 result + float_query = index.parse_query("3.5", ["rating"]) + result = searcher.search(float_query) + assert len(result.hits) == 1 + assert searcher.doc(result.hits[0][1])['rating'][0] == 3.5 + + integer_query = index.parse_query("1", ["id"]) + result = searcher.search(integer_query) + assert len(result.hits) == 1 + + # 0 result + integer_query = index.parse_query("10", ["id"]) + result = searcher.search(integer_query) + assert len(result.hits) == 0 + def test_and_query_parser_default_fields(self, ram_index): query = ram_index.parse_query("winter", default_field_names=["title"]) assert repr(query) == """Query(TermQuery(Term(type=Str, field=0, "winter")))""" @@ -279,8 +351,9 @@ class TestFromDiskClass(object): class TestSearcher(object): - def test_searcher_repr(self, ram_index): + def test_searcher_repr(self, ram_index, ram_index_numeric_fields): assert repr(ram_index.searcher()) == "Searcher(num_docs=3, num_segments=1)" + assert repr(ram_index_numeric_fields.searcher()) == "Searcher(num_docs=2, num_segments=1)" class TestDocument(object):