Merge branch 'more_tests'
This commit is contained in:
commit
41a6bb3109
@ -1,9 +1,13 @@
|
|||||||
import json
|
import json
|
||||||
import tantivy
|
import tantivy
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
class TestClass(object):
|
|
||||||
def test_simple_search(self):
|
@pytest.fixture(scope="class")
|
||||||
|
def ram_index():
|
||||||
|
# assume all tests will use the same documents for now
|
||||||
|
# other methods may set up function-local indexes
|
||||||
builder = tantivy.SchemaBuilder()
|
builder = tantivy.SchemaBuilder()
|
||||||
|
|
||||||
title = builder.add_text_field("title", stored=True)
|
title = builder.add_text_field("title", stored=True)
|
||||||
@ -14,13 +18,18 @@ class TestClass(object):
|
|||||||
|
|
||||||
writer = index.writer()
|
writer = index.writer()
|
||||||
|
|
||||||
|
# 2 ways of adding documents
|
||||||
|
# 1
|
||||||
doc = tantivy.Document()
|
doc = tantivy.Document()
|
||||||
|
# create a document instance
|
||||||
|
# add field-value pairs
|
||||||
doc.add_text(title, "The Old Man and the Sea")
|
doc.add_text(title, "The Old Man and the Sea")
|
||||||
doc.add_text(body, ("He was an old man who fished alone in a skiff in"
|
doc.add_text(body, ("He was an old man who fished alone in a skiff in"
|
||||||
"the Gulf Stream and he had gone eighty-four days "
|
"the Gulf Stream and he had gone eighty-four days "
|
||||||
"now without taking a fish."))
|
"now without taking a fish."))
|
||||||
writer.add_document(doc)
|
writer.add_document(doc)
|
||||||
|
# 2 use the built-in json support
|
||||||
|
# keys need to coincide with field names
|
||||||
doc = schema.parse_document(json.dumps({
|
doc = schema.parse_document(json.dumps({
|
||||||
"title": "Of Mice and Men",
|
"title": "Of Mice and Men",
|
||||||
"body": ("A few miles south of Soledad, the Salinas River drops "
|
"body": ("A few miles south of Soledad, the Salinas River drops "
|
||||||
@ -54,8 +63,18 @@ class TestClass(object):
|
|||||||
|
|
||||||
reader = index.reader()
|
reader = index.reader()
|
||||||
searcher = reader.searcher()
|
searcher = reader.searcher()
|
||||||
|
index = index
|
||||||
|
schema = schema
|
||||||
|
default_args = [title, body]
|
||||||
|
ret = (index, searcher, schema, default_args, title, body)
|
||||||
|
return ret
|
||||||
|
|
||||||
query_parser = tantivy.QueryParser.for_index(index, [title, body])
|
|
||||||
|
class TestClass(object):
|
||||||
|
|
||||||
|
def test_simple_search(self, ram_index):
|
||||||
|
index, searcher, schema, default_args, title, body = ram_index
|
||||||
|
query_parser = tantivy.QueryParser.for_index(index, default_args)
|
||||||
query = query_parser.parse_query("sea whale")
|
query = query_parser.parse_query("sea whale")
|
||||||
|
|
||||||
top_docs = tantivy.TopDocs(10)
|
top_docs = tantivy.TopDocs(10)
|
||||||
@ -83,3 +102,91 @@ class TestClass(object):
|
|||||||
|
|
||||||
assert doc.len == 1
|
assert doc.len == 1
|
||||||
assert not doc.is_empty
|
assert not doc.is_empty
|
||||||
|
|
||||||
|
def test_and_query(self, ram_index):
|
||||||
|
index, searcher, schema, default_args, title, body = ram_index
|
||||||
|
q_parser = tantivy.QueryParser.for_index(index, default_args)
|
||||||
|
# look for an intersection of documents
|
||||||
|
query = q_parser.parse_query("title:men AND body:summer")
|
||||||
|
top_docs = tantivy.TopDocs(10)
|
||||||
|
|
||||||
|
result = searcher.search(query, top_docs)
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
# summer isn't present
|
||||||
|
assert len(result) == 0
|
||||||
|
|
||||||
|
query = q_parser.parse_query("title:men AND body:winter")
|
||||||
|
result = searcher.search(query, top_docs)
|
||||||
|
|
||||||
|
assert len(result) == 1
|
||||||
|
|
||||||
|
def test_query_errors(self, ram_index):
|
||||||
|
index, searcher, schema, default_args, title, body = ram_index
|
||||||
|
q_parser = tantivy.QueryParser.for_index(index, default_args)
|
||||||
|
# no "bod" field
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
q_parser.parse_query("bod:title")
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="class")
|
||||||
|
def disk_index():
|
||||||
|
builder = tantivy.SchemaBuilder()
|
||||||
|
title = builder.add_text_field("title", stored=True)
|
||||||
|
body = builder.add_text_field("body")
|
||||||
|
default_args = [title, body]
|
||||||
|
schema = builder.build()
|
||||||
|
schema = schema
|
||||||
|
index = tantivy.Index(schema)
|
||||||
|
path_to_index = "tests/test_index/"
|
||||||
|
return index, path_to_index, schema, default_args, title, body
|
||||||
|
|
||||||
|
|
||||||
|
class TestFromDiskClass(object):
|
||||||
|
|
||||||
|
def test_exists(self, disk_index):
|
||||||
|
# prefer to keep it separate in case anyone deletes this
|
||||||
|
# runs from the root directory
|
||||||
|
index, path_to_index, _, _, _, _ = disk_index
|
||||||
|
assert index.exists(path_to_index)
|
||||||
|
|
||||||
|
def test_opens_from_dir(self, disk_index):
|
||||||
|
_, path_to_index, schema, _, _, _ = disk_index
|
||||||
|
tantivy.Index(schema, path_to_index)
|
||||||
|
|
||||||
|
def test_create_readers(self, disk_index):
|
||||||
|
_, path_to_index, schema, _, _, _ = disk_index
|
||||||
|
idx = tantivy.Index(schema, path_to_index)
|
||||||
|
reload_policy = "OnCommit" # or "Manual"
|
||||||
|
assert idx.reader(reload_policy, 4)
|
||||||
|
assert idx.reader("Manual", 4)
|
||||||
|
|
||||||
|
def test_create_writer_and_reader(self, disk_index):
|
||||||
|
_, path_to_index, schema, default_args, title, body = disk_index
|
||||||
|
idx = tantivy.Index(schema, path_to_index)
|
||||||
|
writer = idx.writer()
|
||||||
|
reload_policy = "OnCommit" # or "Manual"
|
||||||
|
reader = idx.reader(reload_policy, 4)
|
||||||
|
|
||||||
|
# check against the opstamp in the meta file
|
||||||
|
meta_fname = "meta.json"
|
||||||
|
with open("{}{}".format(path_to_index, meta_fname)) as f:
|
||||||
|
json_file = json.load(f)
|
||||||
|
expected_last_opstamp = json_file["opstamp"]
|
||||||
|
# ASSUMPTION
|
||||||
|
# We haven't had any deletes in the index
|
||||||
|
# so max_doc per index coincides with the value of `num_docs`
|
||||||
|
# summing them in all segments, gives the number of documents
|
||||||
|
expected_num_docs = sum([segment["max_doc"]
|
||||||
|
for segment in json_file["segments"]])
|
||||||
|
assert writer.commit_opstamp == expected_last_opstamp
|
||||||
|
|
||||||
|
q_parser = tantivy.QueryParser.for_index(idx, default_args)
|
||||||
|
# get all documents
|
||||||
|
query = q_parser.parse_query("*")
|
||||||
|
top_docs = tantivy.TopDocs(10)
|
||||||
|
|
||||||
|
docs = reader.searcher().search(query, top_docs)
|
||||||
|
for (_score, doc_addr) in docs:
|
||||||
|
print(reader.searcher().doc(doc_addr))
|
||||||
|
assert expected_num_docs == len(docs)
|
||||||
|
|||||||
1
tests/test_index/.managed.json
Normal file
1
tests/test_index/.managed.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
["1fc74026b3954090b69817d58bcce59a.fieldnorm","f79c6d0a68e04210a4d026ecf151a1e7.pos","1fc74026b3954090b69817d58bcce59a.store","meta.json","f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm","8949f79450d349e2bd51d885f44c354f.fieldnorm","f79c6d0a68e04210a4d026ecf151a1e7.posidx","8949f79450d349e2bd51d885f44c354f.store","8949f79450d349e2bd51d885f44c354f.term","8949f79450d349e2bd51d885f44c354f.pos","1fc74026b3954090b69817d58bcce59a.posidx","1fc74026b3954090b69817d58bcce59a.pos","8949f79450d349e2bd51d885f44c354f.posidx","f79c6d0a68e04210a4d026ecf151a1e7.idx","1fc74026b3954090b69817d58bcce59a.fast","f79c6d0a68e04210a4d026ecf151a1e7.fast","f79c6d0a68e04210a4d026ecf151a1e7.term","1fc74026b3954090b69817d58bcce59a.term","8949f79450d349e2bd51d885f44c354f.idx","1fc74026b3954090b69817d58bcce59a.idx","f79c6d0a68e04210a4d026ecf151a1e7.store","8949f79450d349e2bd51d885f44c354f.fast"]
|
||||||
0
tests/test_index/.tantivy-meta.lock
Normal file
0
tests/test_index/.tantivy-meta.lock
Normal file
0
tests/test_index/.tantivy-writer.lock
Normal file
0
tests/test_index/.tantivy-writer.lock
Normal file
BIN
tests/test_index/1fc74026b3954090b69817d58bcce59a.fast
Normal file
BIN
tests/test_index/1fc74026b3954090b69817d58bcce59a.fast
Normal file
Binary file not shown.
BIN
tests/test_index/1fc74026b3954090b69817d58bcce59a.fieldnorm
Normal file
BIN
tests/test_index/1fc74026b3954090b69817d58bcce59a.fieldnorm
Normal file
Binary file not shown.
BIN
tests/test_index/1fc74026b3954090b69817d58bcce59a.idx
Normal file
BIN
tests/test_index/1fc74026b3954090b69817d58bcce59a.idx
Normal file
Binary file not shown.
BIN
tests/test_index/1fc74026b3954090b69817d58bcce59a.pos
Normal file
BIN
tests/test_index/1fc74026b3954090b69817d58bcce59a.pos
Normal file
Binary file not shown.
BIN
tests/test_index/1fc74026b3954090b69817d58bcce59a.posidx
Normal file
BIN
tests/test_index/1fc74026b3954090b69817d58bcce59a.posidx
Normal file
Binary file not shown.
BIN
tests/test_index/1fc74026b3954090b69817d58bcce59a.store
Normal file
BIN
tests/test_index/1fc74026b3954090b69817d58bcce59a.store
Normal file
Binary file not shown.
BIN
tests/test_index/1fc74026b3954090b69817d58bcce59a.term
Normal file
BIN
tests/test_index/1fc74026b3954090b69817d58bcce59a.term
Normal file
Binary file not shown.
BIN
tests/test_index/8949f79450d349e2bd51d885f44c354f.fast
Normal file
BIN
tests/test_index/8949f79450d349e2bd51d885f44c354f.fast
Normal file
Binary file not shown.
BIN
tests/test_index/8949f79450d349e2bd51d885f44c354f.fieldnorm
Normal file
BIN
tests/test_index/8949f79450d349e2bd51d885f44c354f.fieldnorm
Normal file
Binary file not shown.
BIN
tests/test_index/8949f79450d349e2bd51d885f44c354f.idx
Normal file
BIN
tests/test_index/8949f79450d349e2bd51d885f44c354f.idx
Normal file
Binary file not shown.
BIN
tests/test_index/8949f79450d349e2bd51d885f44c354f.pos
Normal file
BIN
tests/test_index/8949f79450d349e2bd51d885f44c354f.pos
Normal file
Binary file not shown.
BIN
tests/test_index/8949f79450d349e2bd51d885f44c354f.posidx
Normal file
BIN
tests/test_index/8949f79450d349e2bd51d885f44c354f.posidx
Normal file
Binary file not shown.
BIN
tests/test_index/8949f79450d349e2bd51d885f44c354f.store
Normal file
BIN
tests/test_index/8949f79450d349e2bd51d885f44c354f.store
Normal file
Binary file not shown.
BIN
tests/test_index/8949f79450d349e2bd51d885f44c354f.term
Normal file
BIN
tests/test_index/8949f79450d349e2bd51d885f44c354f.term
Normal file
Binary file not shown.
BIN
tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fast
Normal file
BIN
tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fast
Normal file
Binary file not shown.
BIN
tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm
Normal file
BIN
tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm
Normal file
Binary file not shown.
BIN
tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.idx
Normal file
BIN
tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.idx
Normal file
Binary file not shown.
BIN
tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.pos
Normal file
BIN
tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.pos
Normal file
Binary file not shown.
BIN
tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.posidx
Normal file
BIN
tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.posidx
Normal file
Binary file not shown.
BIN
tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.store
Normal file
BIN
tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.store
Normal file
Binary file not shown.
BIN
tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.term
Normal file
BIN
tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.term
Normal file
Binary file not shown.
44
tests/test_index/meta.json
Normal file
44
tests/test_index/meta.json
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
{
|
||||||
|
"segments": [
|
||||||
|
{
|
||||||
|
"segment_id": "1fc74026-b395-4090-b698-17d58bcce59a",
|
||||||
|
"max_doc": 1,
|
||||||
|
"deletes": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"segment_id": "8949f794-50d3-49e2-bd51-d885f44c354f",
|
||||||
|
"max_doc": 1,
|
||||||
|
"deletes": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"segment_id": "f79c6d0a-68e0-4210-a4d0-26ecf151a1e7",
|
||||||
|
"max_doc": 1,
|
||||||
|
"deletes": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"schema": [
|
||||||
|
{
|
||||||
|
"name": "title",
|
||||||
|
"type": "text",
|
||||||
|
"options": {
|
||||||
|
"indexing": {
|
||||||
|
"record": "position",
|
||||||
|
"tokenizer": "default"
|
||||||
|
},
|
||||||
|
"stored": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "body",
|
||||||
|
"type": "text",
|
||||||
|
"options": {
|
||||||
|
"indexing": {
|
||||||
|
"record": "position",
|
||||||
|
"tokenizer": "default"
|
||||||
|
},
|
||||||
|
"stored": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"opstamp": 6
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user