Avoid truncating utf-8 strings in the middle of a codepoint in

`Document::__repr__`

Closes #24
master
Paul Masurel 2020-10-01 22:35:36 +09:00
parent 4ecf7119ea
commit c86f0fc1d6
2 changed files with 11 additions and 3 deletions

View File

@ -97,9 +97,13 @@ impl fmt::Debug for Document {
.field_values
.iter()
.map(|(field_name, field_values)| {
let mut values_str =
field_values.iter().map(value_to_string).join(",");
values_str.truncate(10);
let values_str: String = field_values
.iter()
.map(value_to_string)
.join(",")
.chars()
.take(10)
.collect();
format!("{}=[{}]", field_name, values_str)
})
.join(",");

View File

@ -301,6 +301,10 @@ class TestDocument(object):
doc = tantivy.Document(name="Bill", reference=[1, 2])
assert repr(doc) == "Document(name=[Bill],reference=[1,2])"
def test_document_repr_utf8(self):
doc = tantivy.Document(name="野菜食べないとやばい", reference=[1, 2])
assert repr(doc) == "Document(name=[野菜食べないとやばい],reference=[1,2])"
def test_document_with_facet(self):
doc = tantivy.Document()
facet = tantivy.Facet.from_string("/europe/france")