Fix json bug - fixes #353 (#354)

master
Caleb Hattingh 2024-09-23 10:41:38 +02:00 committed by GitHub
parent 20d470736c
commit 32d6b6ced9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 54 additions and 3 deletions

View File

@ -239,9 +239,18 @@ fn value_to_py(py: Python, value: &Value) -> PyResult<PyObject> {
.into_py(py)
}
Value::Facet(f) => Facet { inner: f.clone() }.into_py(py),
Value::Array(_arr) => {
// TODO implement me
unimplemented!();
Value::Array(arr) => {
let mut list = PyList::empty_bound(py);
// Because `value_to_py` can return an error, we need to be able
// to handle those errors on demand. Also, we want to avoid
// collecting all the values into an intermediate `Vec` before
// creating the `PyList`. So, the loop below is the simplest
// solution. Another option might have been
// `arr.iter().try_for_each(...)` but it just looks more complex.
for v in arr {
list.append(value_to_py(py, v)?)?;
}
list.into()
}
Value::Object(obj) => object_to_py(py, obj)?,
Value::Bool(b) => b.into_py(py),

42
tests/test_json_bug.py Normal file
View File

@ -0,0 +1,42 @@
def test_json_bug():
import tantivy
schema_builder = tantivy.SchemaBuilder()
schema_builder.add_json_field("data", stored=True)
schema = schema_builder.build()
index = tantivy.Index(schema)
index_writer = index.writer()
data = {
"name": "John Doe",
"age": 30,
"email": "john.doe@example.com",
"interests": ["reading", "hiking", "coding"],
}
import json
json_data = json.dumps(data)
doc = tantivy.Document()
doc.add_json("data", json_data)
index_writer.add_document(doc)
index_writer.commit()
index_writer.wait_merging_threads()
index.reload()
searcher = index.searcher()
query = "*"
q = index.parse_query(query)
top_docs = searcher.search(q, limit=10)
print(f"Total hits: {top_docs}")
for score, hit in top_docs.hits:
doc = searcher.doc(hit)
print(doc["data"])
assert doc["data"] == [{'age': 30,
'email': 'john.doe@example.com',
'interests': ['reading', 'hiking', 'coding'],
'name': 'John Doe'
}]