use meta.json to programmatically check last opstamp and num_docs

master
petr-tik 2019-08-03 00:55:31 +01:00
parent b2a769f1d7
commit bd93e4ca75
1 changed files with 16 additions and 10 deletions

View File

@ -136,34 +136,39 @@ class TestFromDiskClass(object):
schema = builder.build()
cls.schema = schema
cls.index = tantivy.Index(schema)
cls.path_to_index = "tests/test_index/"
def test_exists(self):
# prefer to keep it separate in case anyone deletes this
# runs from the root directory
path_to_index = "tests/test_index/"
assert self.index.exists(path_to_index)
assert self.index.exists(self.path_to_index)
def test_opens_from_dir(self):
path_to_index = "tests/test_index/"
tantivy.Index(self.schema, path_to_index)
tantivy.Index(self.schema, self.path_to_index)
def test_create_readers(self):
path_to_index = "tests/test_index/"
idx = tantivy.Index(self.schema, path_to_index)
idx = tantivy.Index(self.schema, self.path_to_index)
reload_policy = "OnCommit" # or "Manual"
assert idx.reader(reload_policy, 4)
assert idx.reader("Manual", 4)
def test_create_writer_and_reader(self):
path_to_index = "tests/test_index/"
idx = tantivy.Index(self.schema, path_to_index)
idx = tantivy.Index(self.schema, self.path_to_index)
writer = idx.writer()
reload_policy = "OnCommit" # or "Manual"
reader = idx.reader(reload_policy, 4)
# check against the opstamp in the meta file
with open("tests/test_index/meta.json") as f:
expected_last_opstamp = json.load(f)["opstamp"]
meta_fname = "meta.json"
with open("{}{}".format(self.path_to_index, meta_fname)) as f:
json_file = json.load(f)
expected_last_opstamp = json_file["opstamp"]
# ASSUMPTION
# We haven't had any deletes in the index
# so max_doc per index coincides with the value of `num_docs`
# summing them in all segments, gives the number of documents
expected_num_docs = sum([segment["max_doc"]
for segment in json_file["segments"]])
assert writer.commit_opstamp == expected_last_opstamp
q_parser = tantivy.QueryParser.for_index(idx, self.default_args)
@ -174,3 +179,4 @@ class TestFromDiskClass(object):
docs = reader.searcher().search(query, top_docs)
for (_score, doc_addr) in docs:
print(reader.searcher().doc(doc_addr))
assert expected_num_docs == len(docs)