Merge branch 'more_tests'

2019-08-03 13:56:36 +02:00 · 2019-08-03 13:56:36 +02:00 · 41a6bb3109
commit 41a6bb3109
parent d785822225 b911c2f353
26 changed files with 156 additions and 4 deletions
--- a/tests/tantivy_test.py
+++ b/tests/tantivy_test.py
@ -1,9 +1,13 @@
 import json
 import tantivy

+import pytest

-class TestClass(object):
-    def test_simple_search(self):
+
+@pytest.fixture(scope="class")
+def ram_index():
+        # assume all tests will use the same documents for now
+        # other methods may set up function-local indexes
        builder = tantivy.SchemaBuilder()

        title = builder.add_text_field("title", stored=True)
@ -14,13 +18,18 @@ class TestClass(object):

        writer = index.writer()

+        # 2 ways of adding documents
+        # 1
        doc = tantivy.Document()
+        # create a document instance
+        # add field-value pairs
        doc.add_text(title, "The Old Man and the Sea")
        doc.add_text(body, ("He was an old man who fished alone in a skiff in"
                            "the Gulf Stream and he had gone eighty-four days "
                            "now without taking a fish."))
        writer.add_document(doc)
-
+        # 2 use the built-in json support
+        # keys need to coincide with field names
        doc = schema.parse_document(json.dumps({
            "title": "Of Mice and Men",
            "body": ("A few miles south of Soledad, the Salinas River drops "
@ -54,8 +63,18 @@ class TestClass(object):

        reader = index.reader()
        searcher = reader.searcher()
+        index = index
+        schema = schema
+        default_args = [title, body]
+        ret = (index, searcher, schema, default_args, title, body)
+        return ret

-        query_parser = tantivy.QueryParser.for_index(index, [title, body])
+
+class TestClass(object):
+
+    def test_simple_search(self, ram_index):
+        index, searcher, schema, default_args, title, body = ram_index
+        query_parser = tantivy.QueryParser.for_index(index, default_args)
        query = query_parser.parse_query("sea whale")

        top_docs = tantivy.TopDocs(10)
@ -83,3 +102,91 @@ class TestClass(object):

        assert doc.len == 1
        assert not doc.is_empty
+
+    def test_and_query(self, ram_index):
+        index, searcher, schema, default_args, title, body = ram_index
+        q_parser = tantivy.QueryParser.for_index(index, default_args)
+        # look for an intersection of documents
+        query = q_parser.parse_query("title:men AND body:summer")
+        top_docs = tantivy.TopDocs(10)
+
+        result = searcher.search(query, top_docs)
+        print(result)
+
+        # summer isn't present
+        assert len(result) == 0
+
+        query = q_parser.parse_query("title:men AND body:winter")
+        result = searcher.search(query, top_docs)
+
+        assert len(result) == 1
+
+    def test_query_errors(self, ram_index):
+        index, searcher, schema, default_args, title, body = ram_index
+        q_parser = tantivy.QueryParser.for_index(index, default_args)
+        # no "bod" field
+        with pytest.raises(ValueError):
+            q_parser.parse_query("bod:title")
+
+
+@pytest.fixture(scope="class")
+def disk_index():
+    builder = tantivy.SchemaBuilder()
+    title = builder.add_text_field("title", stored=True)
+    body = builder.add_text_field("body")
+    default_args = [title, body]
+    schema = builder.build()
+    schema = schema
+    index = tantivy.Index(schema)
+    path_to_index = "tests/test_index/"
+    return index, path_to_index, schema, default_args, title, body
+
+
+class TestFromDiskClass(object):
+
+    def test_exists(self, disk_index):
+        # prefer to keep it separate in case anyone deletes this
+        # runs from the root directory
+        index, path_to_index, _, _, _, _ = disk_index
+        assert index.exists(path_to_index)
+
+    def test_opens_from_dir(self, disk_index):
+        _, path_to_index, schema, _, _, _ = disk_index
+        tantivy.Index(schema, path_to_index)
+
+    def test_create_readers(self, disk_index):
+        _, path_to_index, schema, _, _, _ = disk_index
+        idx = tantivy.Index(schema, path_to_index)
+        reload_policy = "OnCommit"  # or "Manual"
+        assert idx.reader(reload_policy, 4)
+        assert idx.reader("Manual", 4)
+
+    def test_create_writer_and_reader(self, disk_index):
+        _, path_to_index, schema, default_args, title, body = disk_index
+        idx = tantivy.Index(schema, path_to_index)
+        writer = idx.writer()
+        reload_policy = "OnCommit"  # or "Manual"
+        reader = idx.reader(reload_policy, 4)
+
+        # check against the opstamp in the meta file
+        meta_fname = "meta.json"
+        with open("{}{}".format(path_to_index, meta_fname)) as f:
+            json_file = json.load(f)
+            expected_last_opstamp = json_file["opstamp"]
+            # ASSUMPTION
+            # We haven't had any deletes in the index
+            # so max_doc per index coincides with the value of `num_docs`
+            # summing them in all segments, gives the number of documents
+            expected_num_docs = sum([segment["max_doc"]
+                                     for segment in json_file["segments"]])
+        assert writer.commit_opstamp == expected_last_opstamp
+
+        q_parser = tantivy.QueryParser.for_index(idx, default_args)
+        # get all documents
+        query = q_parser.parse_query("*")
+        top_docs = tantivy.TopDocs(10)
+
+        docs = reader.searcher().search(query, top_docs)
+        for (_score, doc_addr) in docs:
+            print(reader.searcher().doc(doc_addr))
+        assert expected_num_docs == len(docs)
--- a/tests/test_index/.managed.json
+++ b/tests/test_index/.managed.json
@ -0,0 +1 @@
+["1fc74026b3954090b69817d58bcce59a.fieldnorm","f79c6d0a68e04210a4d026ecf151a1e7.pos","1fc74026b3954090b69817d58bcce59a.store","meta.json","f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm","8949f79450d349e2bd51d885f44c354f.fieldnorm","f79c6d0a68e04210a4d026ecf151a1e7.posidx","8949f79450d349e2bd51d885f44c354f.store","8949f79450d349e2bd51d885f44c354f.term","8949f79450d349e2bd51d885f44c354f.pos","1fc74026b3954090b69817d58bcce59a.posidx","1fc74026b3954090b69817d58bcce59a.pos","8949f79450d349e2bd51d885f44c354f.posidx","f79c6d0a68e04210a4d026ecf151a1e7.idx","1fc74026b3954090b69817d58bcce59a.fast","f79c6d0a68e04210a4d026ecf151a1e7.fast","f79c6d0a68e04210a4d026ecf151a1e7.term","1fc74026b3954090b69817d58bcce59a.term","8949f79450d349e2bd51d885f44c354f.idx","1fc74026b3954090b69817d58bcce59a.idx","f79c6d0a68e04210a4d026ecf151a1e7.store","8949f79450d349e2bd51d885f44c354f.fast"]
--- a/tests/test_index/.tantivy-meta.lock
+++ b/tests/test_index/.tantivy-meta.lock
--- a/tests/test_index/.tantivy-writer.lock
+++ b/tests/test_index/.tantivy-writer.lock
--- a/tests/test_index/1fc74026b3954090b69817d58bcce59a.fast
+++ b/tests/test_index/1fc74026b3954090b69817d58bcce59a.fast
--- a/tests/test_index/1fc74026b3954090b69817d58bcce59a.fieldnorm
+++ b/tests/test_index/1fc74026b3954090b69817d58bcce59a.fieldnorm
--- a/tests/test_index/1fc74026b3954090b69817d58bcce59a.idx
+++ b/tests/test_index/1fc74026b3954090b69817d58bcce59a.idx
--- a/tests/test_index/1fc74026b3954090b69817d58bcce59a.pos
+++ b/tests/test_index/1fc74026b3954090b69817d58bcce59a.pos
--- a/tests/test_index/1fc74026b3954090b69817d58bcce59a.posidx
+++ b/tests/test_index/1fc74026b3954090b69817d58bcce59a.posidx
--- a/tests/test_index/1fc74026b3954090b69817d58bcce59a.store
+++ b/tests/test_index/1fc74026b3954090b69817d58bcce59a.store
--- a/tests/test_index/1fc74026b3954090b69817d58bcce59a.term
+++ b/tests/test_index/1fc74026b3954090b69817d58bcce59a.term
--- a/tests/test_index/8949f79450d349e2bd51d885f44c354f.fast
+++ b/tests/test_index/8949f79450d349e2bd51d885f44c354f.fast
--- a/tests/test_index/8949f79450d349e2bd51d885f44c354f.fieldnorm
+++ b/tests/test_index/8949f79450d349e2bd51d885f44c354f.fieldnorm
--- a/tests/test_index/8949f79450d349e2bd51d885f44c354f.idx
+++ b/tests/test_index/8949f79450d349e2bd51d885f44c354f.idx
--- a/tests/test_index/8949f79450d349e2bd51d885f44c354f.pos
+++ b/tests/test_index/8949f79450d349e2bd51d885f44c354f.pos
--- a/tests/test_index/8949f79450d349e2bd51d885f44c354f.posidx
+++ b/tests/test_index/8949f79450d349e2bd51d885f44c354f.posidx
--- a/tests/test_index/8949f79450d349e2bd51d885f44c354f.store
+++ b/tests/test_index/8949f79450d349e2bd51d885f44c354f.store
--- a/tests/test_index/8949f79450d349e2bd51d885f44c354f.term
+++ b/tests/test_index/8949f79450d349e2bd51d885f44c354f.term
--- a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fast
+++ b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fast
--- a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm
+++ b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm
--- a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.idx
+++ b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.idx
--- a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.pos
+++ b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.pos
--- a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.posidx
+++ b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.posidx
--- a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.store
+++ b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.store
--- a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.term
+++ b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.term
--- a/tests/test_index/meta.json
+++ b/tests/test_index/meta.json
@ -0,0 +1,44 @@
+{
+  "segments": [
+    {
+      "segment_id": "1fc74026-b395-4090-b698-17d58bcce59a",
+      "max_doc": 1,
+      "deletes": null
+    },
+    {
+      "segment_id": "8949f794-50d3-49e2-bd51-d885f44c354f",
+      "max_doc": 1,
+      "deletes": null
+    },
+    {
+      "segment_id": "f79c6d0a-68e0-4210-a4d0-26ecf151a1e7",
+      "max_doc": 1,
+      "deletes": null
+    }
+  ],
+  "schema": [
+    {
+      "name": "title",
+      "type": "text",
+      "options": {
+        "indexing": {
+          "record": "position",
+          "tokenizer": "default"
+        },
+        "stored": true
+      }
+    },
+    {
+      "name": "body",
+      "type": "text",
+      "options": {
+        "indexing": {
+          "record": "position",
+          "tokenizer": "default"
+        },
+        "stored": false
+      }
+    }
+  ],
+  "opstamp": 6
+}
				`@ -0,0 +1 @@`
				["1fc74026b3954090b69817d58bcce59a.fieldnorm","f79c6d0a68e04210a4d026ecf151a1e7.pos","1fc74026b3954090b69817d58bcce59a.store","meta.json","f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm","8949f79450d349e2bd51d885f44c354f.fieldnorm","f79c6d0a68e04210a4d026ecf151a1e7.posidx","8949f79450d349e2bd51d885f44c354f.store","8949f79450d349e2bd51d885f44c354f.term","8949f79450d349e2bd51d885f44c354f.pos","1fc74026b3954090b69817d58bcce59a.posidx","1fc74026b3954090b69817d58bcce59a.pos","8949f79450d349e2bd51d885f44c354f.posidx","f79c6d0a68e04210a4d026ecf151a1e7.idx","1fc74026b3954090b69817d58bcce59a.fast","f79c6d0a68e04210a4d026ecf151a1e7.fast","f79c6d0a68e04210a4d026ecf151a1e7.term","1fc74026b3954090b69817d58bcce59a.term","8949f79450d349e2bd51d885f44c354f.idx","1fc74026b3954090b69817d58bcce59a.idx","f79c6d0a68e04210a4d026ecf151a1e7.store","8949f79450d349e2bd51d885f44c354f.fast"]