Merge branch 'more_tests'
commit
41a6bb3109
|
@ -1,9 +1,13 @@
|
||||||
import json
|
import json
|
||||||
import tantivy
|
import tantivy
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
class TestClass(object):
|
|
||||||
def test_simple_search(self):
|
@pytest.fixture(scope="class")
|
||||||
|
def ram_index():
|
||||||
|
# assume all tests will use the same documents for now
|
||||||
|
# other methods may set up function-local indexes
|
||||||
builder = tantivy.SchemaBuilder()
|
builder = tantivy.SchemaBuilder()
|
||||||
|
|
||||||
title = builder.add_text_field("title", stored=True)
|
title = builder.add_text_field("title", stored=True)
|
||||||
|
@ -14,13 +18,18 @@ class TestClass(object):
|
||||||
|
|
||||||
writer = index.writer()
|
writer = index.writer()
|
||||||
|
|
||||||
|
# 2 ways of adding documents
|
||||||
|
# 1
|
||||||
doc = tantivy.Document()
|
doc = tantivy.Document()
|
||||||
|
# create a document instance
|
||||||
|
# add field-value pairs
|
||||||
doc.add_text(title, "The Old Man and the Sea")
|
doc.add_text(title, "The Old Man and the Sea")
|
||||||
doc.add_text(body, ("He was an old man who fished alone in a skiff in"
|
doc.add_text(body, ("He was an old man who fished alone in a skiff in"
|
||||||
"the Gulf Stream and he had gone eighty-four days "
|
"the Gulf Stream and he had gone eighty-four days "
|
||||||
"now without taking a fish."))
|
"now without taking a fish."))
|
||||||
writer.add_document(doc)
|
writer.add_document(doc)
|
||||||
|
# 2 use the built-in json support
|
||||||
|
# keys need to coincide with field names
|
||||||
doc = schema.parse_document(json.dumps({
|
doc = schema.parse_document(json.dumps({
|
||||||
"title": "Of Mice and Men",
|
"title": "Of Mice and Men",
|
||||||
"body": ("A few miles south of Soledad, the Salinas River drops "
|
"body": ("A few miles south of Soledad, the Salinas River drops "
|
||||||
|
@ -54,8 +63,18 @@ class TestClass(object):
|
||||||
|
|
||||||
reader = index.reader()
|
reader = index.reader()
|
||||||
searcher = reader.searcher()
|
searcher = reader.searcher()
|
||||||
|
index = index
|
||||||
|
schema = schema
|
||||||
|
default_args = [title, body]
|
||||||
|
ret = (index, searcher, schema, default_args, title, body)
|
||||||
|
return ret
|
||||||
|
|
||||||
query_parser = tantivy.QueryParser.for_index(index, [title, body])
|
|
||||||
|
class TestClass(object):
|
||||||
|
|
||||||
|
def test_simple_search(self, ram_index):
|
||||||
|
index, searcher, schema, default_args, title, body = ram_index
|
||||||
|
query_parser = tantivy.QueryParser.for_index(index, default_args)
|
||||||
query = query_parser.parse_query("sea whale")
|
query = query_parser.parse_query("sea whale")
|
||||||
|
|
||||||
top_docs = tantivy.TopDocs(10)
|
top_docs = tantivy.TopDocs(10)
|
||||||
|
@ -83,3 +102,91 @@ class TestClass(object):
|
||||||
|
|
||||||
assert doc.len == 1
|
assert doc.len == 1
|
||||||
assert not doc.is_empty
|
assert not doc.is_empty
|
||||||
|
|
||||||
|
def test_and_query(self, ram_index):
|
||||||
|
index, searcher, schema, default_args, title, body = ram_index
|
||||||
|
q_parser = tantivy.QueryParser.for_index(index, default_args)
|
||||||
|
# look for an intersection of documents
|
||||||
|
query = q_parser.parse_query("title:men AND body:summer")
|
||||||
|
top_docs = tantivy.TopDocs(10)
|
||||||
|
|
||||||
|
result = searcher.search(query, top_docs)
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
# summer isn't present
|
||||||
|
assert len(result) == 0
|
||||||
|
|
||||||
|
query = q_parser.parse_query("title:men AND body:winter")
|
||||||
|
result = searcher.search(query, top_docs)
|
||||||
|
|
||||||
|
assert len(result) == 1
|
||||||
|
|
||||||
|
def test_query_errors(self, ram_index):
|
||||||
|
index, searcher, schema, default_args, title, body = ram_index
|
||||||
|
q_parser = tantivy.QueryParser.for_index(index, default_args)
|
||||||
|
# no "bod" field
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
q_parser.parse_query("bod:title")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="class")
|
||||||
|
def disk_index():
|
||||||
|
builder = tantivy.SchemaBuilder()
|
||||||
|
title = builder.add_text_field("title", stored=True)
|
||||||
|
body = builder.add_text_field("body")
|
||||||
|
default_args = [title, body]
|
||||||
|
schema = builder.build()
|
||||||
|
schema = schema
|
||||||
|
index = tantivy.Index(schema)
|
||||||
|
path_to_index = "tests/test_index/"
|
||||||
|
return index, path_to_index, schema, default_args, title, body
|
||||||
|
|
||||||
|
|
||||||
|
class TestFromDiskClass(object):
|
||||||
|
|
||||||
|
def test_exists(self, disk_index):
|
||||||
|
# prefer to keep it separate in case anyone deletes this
|
||||||
|
# runs from the root directory
|
||||||
|
index, path_to_index, _, _, _, _ = disk_index
|
||||||
|
assert index.exists(path_to_index)
|
||||||
|
|
||||||
|
def test_opens_from_dir(self, disk_index):
|
||||||
|
_, path_to_index, schema, _, _, _ = disk_index
|
||||||
|
tantivy.Index(schema, path_to_index)
|
||||||
|
|
||||||
|
def test_create_readers(self, disk_index):
|
||||||
|
_, path_to_index, schema, _, _, _ = disk_index
|
||||||
|
idx = tantivy.Index(schema, path_to_index)
|
||||||
|
reload_policy = "OnCommit" # or "Manual"
|
||||||
|
assert idx.reader(reload_policy, 4)
|
||||||
|
assert idx.reader("Manual", 4)
|
||||||
|
|
||||||
|
def test_create_writer_and_reader(self, disk_index):
|
||||||
|
_, path_to_index, schema, default_args, title, body = disk_index
|
||||||
|
idx = tantivy.Index(schema, path_to_index)
|
||||||
|
writer = idx.writer()
|
||||||
|
reload_policy = "OnCommit" # or "Manual"
|
||||||
|
reader = idx.reader(reload_policy, 4)
|
||||||
|
|
||||||
|
# check against the opstamp in the meta file
|
||||||
|
meta_fname = "meta.json"
|
||||||
|
with open("{}{}".format(path_to_index, meta_fname)) as f:
|
||||||
|
json_file = json.load(f)
|
||||||
|
expected_last_opstamp = json_file["opstamp"]
|
||||||
|
# ASSUMPTION
|
||||||
|
# We haven't had any deletes in the index
|
||||||
|
# so max_doc per index coincides with the value of `num_docs`
|
||||||
|
# summing them in all segments, gives the number of documents
|
||||||
|
expected_num_docs = sum([segment["max_doc"]
|
||||||
|
for segment in json_file["segments"]])
|
||||||
|
assert writer.commit_opstamp == expected_last_opstamp
|
||||||
|
|
||||||
|
q_parser = tantivy.QueryParser.for_index(idx, default_args)
|
||||||
|
# get all documents
|
||||||
|
query = q_parser.parse_query("*")
|
||||||
|
top_docs = tantivy.TopDocs(10)
|
||||||
|
|
||||||
|
docs = reader.searcher().search(query, top_docs)
|
||||||
|
for (_score, doc_addr) in docs:
|
||||||
|
print(reader.searcher().doc(doc_addr))
|
||||||
|
assert expected_num_docs == len(docs)
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
["1fc74026b3954090b69817d58bcce59a.fieldnorm","f79c6d0a68e04210a4d026ecf151a1e7.pos","1fc74026b3954090b69817d58bcce59a.store","meta.json","f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm","8949f79450d349e2bd51d885f44c354f.fieldnorm","f79c6d0a68e04210a4d026ecf151a1e7.posidx","8949f79450d349e2bd51d885f44c354f.store","8949f79450d349e2bd51d885f44c354f.term","8949f79450d349e2bd51d885f44c354f.pos","1fc74026b3954090b69817d58bcce59a.posidx","1fc74026b3954090b69817d58bcce59a.pos","8949f79450d349e2bd51d885f44c354f.posidx","f79c6d0a68e04210a4d026ecf151a1e7.idx","1fc74026b3954090b69817d58bcce59a.fast","f79c6d0a68e04210a4d026ecf151a1e7.fast","f79c6d0a68e04210a4d026ecf151a1e7.term","1fc74026b3954090b69817d58bcce59a.term","8949f79450d349e2bd51d885f44c354f.idx","1fc74026b3954090b69817d58bcce59a.idx","f79c6d0a68e04210a4d026ecf151a1e7.store","8949f79450d349e2bd51d885f44c354f.fast"]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,44 @@
|
||||||
|
{
|
||||||
|
"segments": [
|
||||||
|
{
|
||||||
|
"segment_id": "1fc74026-b395-4090-b698-17d58bcce59a",
|
||||||
|
"max_doc": 1,
|
||||||
|
"deletes": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"segment_id": "8949f794-50d3-49e2-bd51-d885f44c354f",
|
||||||
|
"max_doc": 1,
|
||||||
|
"deletes": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"segment_id": "f79c6d0a-68e0-4210-a4d0-26ecf151a1e7",
|
||||||
|
"max_doc": 1,
|
||||||
|
"deletes": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schema": [
|
||||||
|
{
|
||||||
|
"name": "title",
|
||||||
|
"type": "text",
|
||||||
|
"options": {
|
||||||
|
"indexing": {
|
||||||
|
"record": "position",
|
||||||
|
"tokenizer": "default"
|
||||||
|
},
|
||||||
|
"stored": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "body",
|
||||||
|
"type": "text",
|
||||||
|
"options": {
|
||||||
|
"indexing": {
|
||||||
|
"record": "position",
|
||||||
|
"tokenizer": "default"
|
||||||
|
},
|
||||||
|
"stored": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"opstamp": 6
|
||||||
|
}
|
Loading…
Reference in New Issue