diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index 5a72026..6da61aa 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -1,9 +1,13 @@ import json import tantivy +import pytest -class TestClass(object): - def test_simple_search(self): + +@pytest.fixture(scope="class") +def ram_index(): + # assume all tests will use the same documents for now + # other methods may set up function-local indexes builder = tantivy.SchemaBuilder() title = builder.add_text_field("title", stored=True) @@ -14,13 +18,18 @@ class TestClass(object): writer = index.writer() + # 2 ways of adding documents + # 1 doc = tantivy.Document() + # create a document instance + # add field-value pairs doc.add_text(title, "The Old Man and the Sea") doc.add_text(body, ("He was an old man who fished alone in a skiff in" "the Gulf Stream and he had gone eighty-four days " "now without taking a fish.")) writer.add_document(doc) - + # 2 use the built-in json support + # keys need to coincide with field names doc = schema.parse_document(json.dumps({ "title": "Of Mice and Men", "body": ("A few miles south of Soledad, the Salinas River drops " @@ -54,8 +63,18 @@ class TestClass(object): reader = index.reader() searcher = reader.searcher() + index = index + schema = schema + default_args = [title, body] + ret = (index, searcher, schema, default_args, title, body) + return ret - query_parser = tantivy.QueryParser.for_index(index, [title, body]) + +class TestClass(object): + + def test_simple_search(self, ram_index): + index, searcher, schema, default_args, title, body = ram_index + query_parser = tantivy.QueryParser.for_index(index, default_args) query = query_parser.parse_query("sea whale") top_docs = tantivy.TopDocs(10) @@ -83,3 +102,91 @@ class TestClass(object): assert doc.len == 1 assert not doc.is_empty + + def test_and_query(self, ram_index): + index, searcher, schema, default_args, title, body = ram_index + q_parser = tantivy.QueryParser.for_index(index, default_args) + # look for an intersection of documents + query = q_parser.parse_query("title:men AND body:summer") + top_docs = tantivy.TopDocs(10) + + result = searcher.search(query, top_docs) + print(result) + + # summer isn't present + assert len(result) == 0 + + query = q_parser.parse_query("title:men AND body:winter") + result = searcher.search(query, top_docs) + + assert len(result) == 1 + + def test_query_errors(self, ram_index): + index, searcher, schema, default_args, title, body = ram_index + q_parser = tantivy.QueryParser.for_index(index, default_args) + # no "bod" field + with pytest.raises(ValueError): + q_parser.parse_query("bod:title") + + +@pytest.fixture(scope="class") +def disk_index(): + builder = tantivy.SchemaBuilder() + title = builder.add_text_field("title", stored=True) + body = builder.add_text_field("body") + default_args = [title, body] + schema = builder.build() + schema = schema + index = tantivy.Index(schema) + path_to_index = "tests/test_index/" + return index, path_to_index, schema, default_args, title, body + + +class TestFromDiskClass(object): + + def test_exists(self, disk_index): + # prefer to keep it separate in case anyone deletes this + # runs from the root directory + index, path_to_index, _, _, _, _ = disk_index + assert index.exists(path_to_index) + + def test_opens_from_dir(self, disk_index): + _, path_to_index, schema, _, _, _ = disk_index + tantivy.Index(schema, path_to_index) + + def test_create_readers(self, disk_index): + _, path_to_index, schema, _, _, _ = disk_index + idx = tantivy.Index(schema, path_to_index) + reload_policy = "OnCommit" # or "Manual" + assert idx.reader(reload_policy, 4) + assert idx.reader("Manual", 4) + + def test_create_writer_and_reader(self, disk_index): + _, path_to_index, schema, default_args, title, body = disk_index + idx = tantivy.Index(schema, path_to_index) + writer = idx.writer() + reload_policy = "OnCommit" # or "Manual" + reader = idx.reader(reload_policy, 4) + + # check against the opstamp in the meta file + meta_fname = "meta.json" + with open("{}{}".format(path_to_index, meta_fname)) as f: + json_file = json.load(f) + expected_last_opstamp = json_file["opstamp"] + # ASSUMPTION + # We haven't had any deletes in the index + # so max_doc per index coincides with the value of `num_docs` + # summing them in all segments, gives the number of documents + expected_num_docs = sum([segment["max_doc"] + for segment in json_file["segments"]]) + assert writer.commit_opstamp == expected_last_opstamp + + q_parser = tantivy.QueryParser.for_index(idx, default_args) + # get all documents + query = q_parser.parse_query("*") + top_docs = tantivy.TopDocs(10) + + docs = reader.searcher().search(query, top_docs) + for (_score, doc_addr) in docs: + print(reader.searcher().doc(doc_addr)) + assert expected_num_docs == len(docs) diff --git a/tests/test_index/.managed.json b/tests/test_index/.managed.json new file mode 100644 index 0000000..029c8e7 --- /dev/null +++ b/tests/test_index/.managed.json @@ -0,0 +1 @@ +["1fc74026b3954090b69817d58bcce59a.fieldnorm","f79c6d0a68e04210a4d026ecf151a1e7.pos","1fc74026b3954090b69817d58bcce59a.store","meta.json","f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm","8949f79450d349e2bd51d885f44c354f.fieldnorm","f79c6d0a68e04210a4d026ecf151a1e7.posidx","8949f79450d349e2bd51d885f44c354f.store","8949f79450d349e2bd51d885f44c354f.term","8949f79450d349e2bd51d885f44c354f.pos","1fc74026b3954090b69817d58bcce59a.posidx","1fc74026b3954090b69817d58bcce59a.pos","8949f79450d349e2bd51d885f44c354f.posidx","f79c6d0a68e04210a4d026ecf151a1e7.idx","1fc74026b3954090b69817d58bcce59a.fast","f79c6d0a68e04210a4d026ecf151a1e7.fast","f79c6d0a68e04210a4d026ecf151a1e7.term","1fc74026b3954090b69817d58bcce59a.term","8949f79450d349e2bd51d885f44c354f.idx","1fc74026b3954090b69817d58bcce59a.idx","f79c6d0a68e04210a4d026ecf151a1e7.store","8949f79450d349e2bd51d885f44c354f.fast"] diff --git a/tests/test_index/.tantivy-meta.lock b/tests/test_index/.tantivy-meta.lock new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_index/.tantivy-writer.lock b/tests/test_index/.tantivy-writer.lock new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_index/1fc74026b3954090b69817d58bcce59a.fast b/tests/test_index/1fc74026b3954090b69817d58bcce59a.fast new file mode 100644 index 0000000..707c720 Binary files /dev/null and b/tests/test_index/1fc74026b3954090b69817d58bcce59a.fast differ diff --git a/tests/test_index/1fc74026b3954090b69817d58bcce59a.fieldnorm b/tests/test_index/1fc74026b3954090b69817d58bcce59a.fieldnorm new file mode 100644 index 0000000..fbf2687 Binary files /dev/null and b/tests/test_index/1fc74026b3954090b69817d58bcce59a.fieldnorm differ diff --git a/tests/test_index/1fc74026b3954090b69817d58bcce59a.idx b/tests/test_index/1fc74026b3954090b69817d58bcce59a.idx new file mode 100644 index 0000000..2c13ad6 Binary files /dev/null and b/tests/test_index/1fc74026b3954090b69817d58bcce59a.idx differ diff --git a/tests/test_index/1fc74026b3954090b69817d58bcce59a.pos b/tests/test_index/1fc74026b3954090b69817d58bcce59a.pos new file mode 100644 index 0000000..32c6395 Binary files /dev/null and b/tests/test_index/1fc74026b3954090b69817d58bcce59a.pos differ diff --git a/tests/test_index/1fc74026b3954090b69817d58bcce59a.posidx b/tests/test_index/1fc74026b3954090b69817d58bcce59a.posidx new file mode 100644 index 0000000..67ae764 Binary files /dev/null and b/tests/test_index/1fc74026b3954090b69817d58bcce59a.posidx differ diff --git a/tests/test_index/1fc74026b3954090b69817d58bcce59a.store b/tests/test_index/1fc74026b3954090b69817d58bcce59a.store new file mode 100644 index 0000000..ffa0b21 Binary files /dev/null and b/tests/test_index/1fc74026b3954090b69817d58bcce59a.store differ diff --git a/tests/test_index/1fc74026b3954090b69817d58bcce59a.term b/tests/test_index/1fc74026b3954090b69817d58bcce59a.term new file mode 100644 index 0000000..fe21ca9 Binary files /dev/null and b/tests/test_index/1fc74026b3954090b69817d58bcce59a.term differ diff --git a/tests/test_index/8949f79450d349e2bd51d885f44c354f.fast b/tests/test_index/8949f79450d349e2bd51d885f44c354f.fast new file mode 100644 index 0000000..707c720 Binary files /dev/null and b/tests/test_index/8949f79450d349e2bd51d885f44c354f.fast differ diff --git a/tests/test_index/8949f79450d349e2bd51d885f44c354f.fieldnorm b/tests/test_index/8949f79450d349e2bd51d885f44c354f.fieldnorm new file mode 100644 index 0000000..5610bf0 Binary files /dev/null and b/tests/test_index/8949f79450d349e2bd51d885f44c354f.fieldnorm differ diff --git a/tests/test_index/8949f79450d349e2bd51d885f44c354f.idx b/tests/test_index/8949f79450d349e2bd51d885f44c354f.idx new file mode 100644 index 0000000..f63fec0 Binary files /dev/null and b/tests/test_index/8949f79450d349e2bd51d885f44c354f.idx differ diff --git a/tests/test_index/8949f79450d349e2bd51d885f44c354f.pos b/tests/test_index/8949f79450d349e2bd51d885f44c354f.pos new file mode 100644 index 0000000..69726f8 Binary files /dev/null and b/tests/test_index/8949f79450d349e2bd51d885f44c354f.pos differ diff --git a/tests/test_index/8949f79450d349e2bd51d885f44c354f.posidx b/tests/test_index/8949f79450d349e2bd51d885f44c354f.posidx new file mode 100644 index 0000000..c4716ec Binary files /dev/null and b/tests/test_index/8949f79450d349e2bd51d885f44c354f.posidx differ diff --git a/tests/test_index/8949f79450d349e2bd51d885f44c354f.store b/tests/test_index/8949f79450d349e2bd51d885f44c354f.store new file mode 100644 index 0000000..6a2f543 Binary files /dev/null and b/tests/test_index/8949f79450d349e2bd51d885f44c354f.store differ diff --git a/tests/test_index/8949f79450d349e2bd51d885f44c354f.term b/tests/test_index/8949f79450d349e2bd51d885f44c354f.term new file mode 100644 index 0000000..6f2c110 Binary files /dev/null and b/tests/test_index/8949f79450d349e2bd51d885f44c354f.term differ diff --git a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fast b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fast new file mode 100644 index 0000000..707c720 Binary files /dev/null and b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fast differ diff --git a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm new file mode 100644 index 0000000..c9dc5e3 Binary files /dev/null and b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm differ diff --git a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.idx b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.idx new file mode 100644 index 0000000..14ba696 Binary files /dev/null and b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.idx differ diff --git a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.pos b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.pos new file mode 100644 index 0000000..f94ad12 Binary files /dev/null and b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.pos differ diff --git a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.posidx b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.posidx new file mode 100644 index 0000000..f826f16 Binary files /dev/null and b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.posidx differ diff --git a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.store b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.store new file mode 100644 index 0000000..9969480 Binary files /dev/null and b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.store differ diff --git a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.term b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.term new file mode 100644 index 0000000..7d994f8 Binary files /dev/null and b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.term differ diff --git a/tests/test_index/meta.json b/tests/test_index/meta.json new file mode 100644 index 0000000..a8d0b15 --- /dev/null +++ b/tests/test_index/meta.json @@ -0,0 +1,44 @@ +{ + "segments": [ + { + "segment_id": "1fc74026-b395-4090-b698-17d58bcce59a", + "max_doc": 1, + "deletes": null + }, + { + "segment_id": "8949f794-50d3-49e2-bd51-d885f44c354f", + "max_doc": 1, + "deletes": null + }, + { + "segment_id": "f79c6d0a-68e0-4210-a4d0-26ecf151a1e7", + "max_doc": 1, + "deletes": null + } + ], + "schema": [ + { + "name": "title", + "type": "text", + "options": { + "indexing": { + "record": "position", + "tokenizer": "default" + }, + "stored": true + } + }, + { + "name": "body", + "type": "text", + "options": { + "indexing": { + "record": "position", + "tokenizer": "default" + }, + "stored": false + } + } + ], + "opstamp": 6 +}