From 32d6b6ced9724fdd1e2c5ca7d1a5a3e4b7558978 Mon Sep 17 00:00:00 2001 From: Caleb Hattingh Date: Mon, 23 Sep 2024 10:41:38 +0200 Subject: [PATCH] Fix json bug - fixes #353 (#354) --- src/document.rs | 15 ++++++++++++--- tests/test_json_bug.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 tests/test_json_bug.py diff --git a/src/document.rs b/src/document.rs index 8e2ba6c..89f6c39 100644 --- a/src/document.rs +++ b/src/document.rs @@ -239,9 +239,18 @@ fn value_to_py(py: Python, value: &Value) -> PyResult { .into_py(py) } Value::Facet(f) => Facet { inner: f.clone() }.into_py(py), - Value::Array(_arr) => { - // TODO implement me - unimplemented!(); + Value::Array(arr) => { + let mut list = PyList::empty_bound(py); + // Because `value_to_py` can return an error, we need to be able + // to handle those errors on demand. Also, we want to avoid + // collecting all the values into an intermediate `Vec` before + // creating the `PyList`. So, the loop below is the simplest + // solution. Another option might have been + // `arr.iter().try_for_each(...)` but it just looks more complex. + for v in arr { + list.append(value_to_py(py, v)?)?; + } + list.into() } Value::Object(obj) => object_to_py(py, obj)?, Value::Bool(b) => b.into_py(py), diff --git a/tests/test_json_bug.py b/tests/test_json_bug.py new file mode 100644 index 0000000..09f67d8 --- /dev/null +++ b/tests/test_json_bug.py @@ -0,0 +1,42 @@ +def test_json_bug(): + import tantivy + + schema_builder = tantivy.SchemaBuilder() + schema_builder.add_json_field("data", stored=True) + schema = schema_builder.build() + + index = tantivy.Index(schema) + + index_writer = index.writer() + + data = { + "name": "John Doe", + "age": 30, + "email": "john.doe@example.com", + "interests": ["reading", "hiking", "coding"], + } + import json + json_data = json.dumps(data) + + doc = tantivy.Document() + doc.add_json("data", json_data) + index_writer.add_document(doc) + index_writer.commit() + index_writer.wait_merging_threads() + index.reload() + + searcher = index.searcher() + + query = "*" + q = index.parse_query(query) + top_docs = searcher.search(q, limit=10) + + print(f"Total hits: {top_docs}") + for score, hit in top_docs.hits: + doc = searcher.doc(hit) + print(doc["data"]) + assert doc["data"] == [{'age': 30, + 'email': 'john.doe@example.com', + 'interests': ['reading', 'hiking', 'coding'], + 'name': 'John Doe' + }]