From c86f0fc1d6ceb9a39345cd8ef84286da2480d47c Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Thu, 1 Oct 2020 22:35:36 +0900 Subject: [PATCH] Avoid truncating utf-8 strings in the middle of a codepoint in `Document::__repr__` Closes #24 --- src/document.rs | 10 +++++++--- tests/tantivy_test.py | 4 ++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/document.rs b/src/document.rs index 8d831c0..df53773 100644 --- a/src/document.rs +++ b/src/document.rs @@ -97,9 +97,13 @@ impl fmt::Debug for Document { .field_values .iter() .map(|(field_name, field_values)| { - let mut values_str = - field_values.iter().map(value_to_string).join(","); - values_str.truncate(10); + let values_str: String = field_values + .iter() + .map(value_to_string) + .join(",") + .chars() + .take(10) + .collect(); format!("{}=[{}]", field_name, values_str) }) .join(","); diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index 08b5b2c..8c3b636 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -301,6 +301,10 @@ class TestDocument(object): doc = tantivy.Document(name="Bill", reference=[1, 2]) assert repr(doc) == "Document(name=[Bill],reference=[1,2])" + def test_document_repr_utf8(self): + doc = tantivy.Document(name="野菜食べないとやばい", reference=[1, 2]) + assert repr(doc) == "Document(name=[野菜食べないとやばい],reference=[1,2])" + def test_document_with_facet(self): doc = tantivy.Document() facet = tantivy.Facet.from_string("/europe/france")