Merge branch 'more_tests'

master
Damir Jelić 2019-08-03 13:56:36 +02:00
commit 41a6bb3109
26 changed files with 156 additions and 4 deletions

View File

@ -1,9 +1,13 @@
import json import json
import tantivy import tantivy
import pytest
class TestClass(object):
def test_simple_search(self): @pytest.fixture(scope="class")
def ram_index():
# assume all tests will use the same documents for now
# other methods may set up function-local indexes
builder = tantivy.SchemaBuilder() builder = tantivy.SchemaBuilder()
title = builder.add_text_field("title", stored=True) title = builder.add_text_field("title", stored=True)
@ -14,13 +18,18 @@ class TestClass(object):
writer = index.writer() writer = index.writer()
# 2 ways of adding documents
# 1
doc = tantivy.Document() doc = tantivy.Document()
# create a document instance
# add field-value pairs
doc.add_text(title, "The Old Man and the Sea") doc.add_text(title, "The Old Man and the Sea")
doc.add_text(body, ("He was an old man who fished alone in a skiff in" doc.add_text(body, ("He was an old man who fished alone in a skiff in"
"the Gulf Stream and he had gone eighty-four days " "the Gulf Stream and he had gone eighty-four days "
"now without taking a fish.")) "now without taking a fish."))
writer.add_document(doc) writer.add_document(doc)
# 2 use the built-in json support
# keys need to coincide with field names
doc = schema.parse_document(json.dumps({ doc = schema.parse_document(json.dumps({
"title": "Of Mice and Men", "title": "Of Mice and Men",
"body": ("A few miles south of Soledad, the Salinas River drops " "body": ("A few miles south of Soledad, the Salinas River drops "
@ -54,8 +63,18 @@ class TestClass(object):
reader = index.reader() reader = index.reader()
searcher = reader.searcher() searcher = reader.searcher()
index = index
schema = schema
default_args = [title, body]
ret = (index, searcher, schema, default_args, title, body)
return ret
query_parser = tantivy.QueryParser.for_index(index, [title, body])
class TestClass(object):
def test_simple_search(self, ram_index):
index, searcher, schema, default_args, title, body = ram_index
query_parser = tantivy.QueryParser.for_index(index, default_args)
query = query_parser.parse_query("sea whale") query = query_parser.parse_query("sea whale")
top_docs = tantivy.TopDocs(10) top_docs = tantivy.TopDocs(10)
@ -83,3 +102,91 @@ class TestClass(object):
assert doc.len == 1 assert doc.len == 1
assert not doc.is_empty assert not doc.is_empty
def test_and_query(self, ram_index):
index, searcher, schema, default_args, title, body = ram_index
q_parser = tantivy.QueryParser.for_index(index, default_args)
# look for an intersection of documents
query = q_parser.parse_query("title:men AND body:summer")
top_docs = tantivy.TopDocs(10)
result = searcher.search(query, top_docs)
print(result)
# summer isn't present
assert len(result) == 0
query = q_parser.parse_query("title:men AND body:winter")
result = searcher.search(query, top_docs)
assert len(result) == 1
def test_query_errors(self, ram_index):
index, searcher, schema, default_args, title, body = ram_index
q_parser = tantivy.QueryParser.for_index(index, default_args)
# no "bod" field
with pytest.raises(ValueError):
q_parser.parse_query("bod:title")
@pytest.fixture(scope="class")
def disk_index():
builder = tantivy.SchemaBuilder()
title = builder.add_text_field("title", stored=True)
body = builder.add_text_field("body")
default_args = [title, body]
schema = builder.build()
schema = schema
index = tantivy.Index(schema)
path_to_index = "tests/test_index/"
return index, path_to_index, schema, default_args, title, body
class TestFromDiskClass(object):
def test_exists(self, disk_index):
# prefer to keep it separate in case anyone deletes this
# runs from the root directory
index, path_to_index, _, _, _, _ = disk_index
assert index.exists(path_to_index)
def test_opens_from_dir(self, disk_index):
_, path_to_index, schema, _, _, _ = disk_index
tantivy.Index(schema, path_to_index)
def test_create_readers(self, disk_index):
_, path_to_index, schema, _, _, _ = disk_index
idx = tantivy.Index(schema, path_to_index)
reload_policy = "OnCommit" # or "Manual"
assert idx.reader(reload_policy, 4)
assert idx.reader("Manual", 4)
def test_create_writer_and_reader(self, disk_index):
_, path_to_index, schema, default_args, title, body = disk_index
idx = tantivy.Index(schema, path_to_index)
writer = idx.writer()
reload_policy = "OnCommit" # or "Manual"
reader = idx.reader(reload_policy, 4)
# check against the opstamp in the meta file
meta_fname = "meta.json"
with open("{}{}".format(path_to_index, meta_fname)) as f:
json_file = json.load(f)
expected_last_opstamp = json_file["opstamp"]
# ASSUMPTION
# We haven't had any deletes in the index
# so max_doc per index coincides with the value of `num_docs`
# summing them in all segments, gives the number of documents
expected_num_docs = sum([segment["max_doc"]
for segment in json_file["segments"]])
assert writer.commit_opstamp == expected_last_opstamp
q_parser = tantivy.QueryParser.for_index(idx, default_args)
# get all documents
query = q_parser.parse_query("*")
top_docs = tantivy.TopDocs(10)
docs = reader.searcher().search(query, top_docs)
for (_score, doc_addr) in docs:
print(reader.searcher().doc(doc_addr))
assert expected_num_docs == len(docs)

View File

@ -0,0 +1 @@
["1fc74026b3954090b69817d58bcce59a.fieldnorm","f79c6d0a68e04210a4d026ecf151a1e7.pos","1fc74026b3954090b69817d58bcce59a.store","meta.json","f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm","8949f79450d349e2bd51d885f44c354f.fieldnorm","f79c6d0a68e04210a4d026ecf151a1e7.posidx","8949f79450d349e2bd51d885f44c354f.store","8949f79450d349e2bd51d885f44c354f.term","8949f79450d349e2bd51d885f44c354f.pos","1fc74026b3954090b69817d58bcce59a.posidx","1fc74026b3954090b69817d58bcce59a.pos","8949f79450d349e2bd51d885f44c354f.posidx","f79c6d0a68e04210a4d026ecf151a1e7.idx","1fc74026b3954090b69817d58bcce59a.fast","f79c6d0a68e04210a4d026ecf151a1e7.fast","f79c6d0a68e04210a4d026ecf151a1e7.term","1fc74026b3954090b69817d58bcce59a.term","8949f79450d349e2bd51d885f44c354f.idx","1fc74026b3954090b69817d58bcce59a.idx","f79c6d0a68e04210a4d026ecf151a1e7.store","8949f79450d349e2bd51d885f44c354f.fast"]

View File

View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,44 @@
{
"segments": [
{
"segment_id": "1fc74026-b395-4090-b698-17d58bcce59a",
"max_doc": 1,
"deletes": null
},
{
"segment_id": "8949f794-50d3-49e2-bd51-d885f44c354f",
"max_doc": 1,
"deletes": null
},
{
"segment_id": "f79c6d0a-68e0-4210-a4d0-26ecf151a1e7",
"max_doc": 1,
"deletes": null
}
],
"schema": [
{
"name": "title",
"type": "text",
"options": {
"indexing": {
"record": "position",
"tokenizer": "default"
},
"stored": true
}
},
{
"name": "body",
"type": "text",
"options": {
"indexing": {
"record": "position",
"tokenizer": "default"
},
"stored": false
}
}
],
"opstamp": 6
}