From 9864053cf8559ccfe2980aa8fa8a71e50001a27d Mon Sep 17 00:00:00 2001 From: petr-tik Date: Fri, 2 Aug 2019 22:32:08 +0100 Subject: [PATCH 1/4] Tests added tests for AND BooleanQuery and ValueError in case of a malformed query Moved document index setup into a classmethod. classmethod will setup once at the start, giving all test methods attributes to access. I think it cuts on boilerplate in each test method --- tests/tantivy_test.py | 56 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index 5a72026..c6e95e4 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -1,9 +1,15 @@ import json import tantivy +import pytest + class TestClass(object): - def test_simple_search(self): + + @classmethod + def setup_class(cls): + # assume all tests will use the same documents for now + # other methods may set up function-local indexes builder = tantivy.SchemaBuilder() title = builder.add_text_field("title", stored=True) @@ -14,13 +20,18 @@ class TestClass(object): writer = index.writer() + # 2 ways of adding documents + # 1 doc = tantivy.Document() + # create a document instance + # add field-value pairs doc.add_text(title, "The Old Man and the Sea") doc.add_text(body, ("He was an old man who fished alone in a skiff in" "the Gulf Stream and he had gone eighty-four days " "now without taking a fish.")) writer.add_document(doc) - + # 2 use the built-in json support + # keys need to coincide with field names doc = schema.parse_document(json.dumps({ "title": "Of Mice and Men", "body": ("A few miles south of Soledad, the Salinas River drops " @@ -52,23 +63,29 @@ class TestClass(object): writer.add_document(doc) writer.commit() - reader = index.reader() - searcher = reader.searcher() + cls.reader = index.reader() + cls.searcher = cls.reader.searcher() + cls.index = index + cls.schema = schema + cls.default_args = [title, body] + cls.title = title + cls.body = body - query_parser = tantivy.QueryParser.for_index(index, [title, body]) + def test_simple_search(self): + query_parser = tantivy.QueryParser.for_index(self.index, self.default_args) query = query_parser.parse_query("sea whale") top_docs = tantivy.TopDocs(10) - result = searcher.search(query, top_docs) + result = self.searcher.search(query, top_docs) print(result) assert len(result) == 1 _, doc_address = result[0] - searched_doc = searcher.doc(doc_address) - assert searched_doc.get_first(title) == "The Old Man and the Sea" + searched_doc = self.searcher.doc(doc_address) + assert searched_doc.get_first(self.title) == "The Old Man and the Sea" def test_doc(self): builder = tantivy.SchemaBuilder() @@ -83,3 +100,26 @@ class TestClass(object): assert doc.len == 1 assert not doc.is_empty + + def test_and_query(self): + q_parser = tantivy.QueryParser.for_index(self.index, self.default_args) + # look for an intersection of documents + query = q_parser.parse_query("title:men AND body:summer") + top_docs = tantivy.TopDocs(10) + + result = self.searcher.search(query, top_docs) + print(result) + + # summer isn't present + assert len(result) == 0 + + query = q_parser.parse_query("title:men AND body:winter") + result = self.searcher.search(query, top_docs) + + assert len(result) == 1 + + def test_query_errors(self): + q_parser = tantivy.QueryParser.for_index(self.index, self.default_args) + # no "bod" field + with pytest.raises(ValueError): + q_parser.parse_query("bod:title") From b2a769f1d773c1f80a2aece95af2cdffe53b19bb Mon Sep 17 00:00:00 2001 From: petr-tik Date: Sat, 3 Aug 2019 00:39:02 +0100 Subject: [PATCH 2/4] Test reading an index from dir Created and committed an index dir with 3 documents --- tests/tantivy_test.py | 51 ++++++++++++++++++ tests/test_index/.managed.json | 1 + tests/test_index/.tantivy-meta.lock | 0 tests/test_index/.tantivy-writer.lock | 0 .../1fc74026b3954090b69817d58bcce59a.fast | Bin 0 -> 5 bytes ...1fc74026b3954090b69817d58bcce59a.fieldnorm | Bin 0 -> 19 bytes .../1fc74026b3954090b69817d58bcce59a.idx | Bin 0 -> 189 bytes .../1fc74026b3954090b69817d58bcce59a.pos | Bin 0 -> 161 bytes .../1fc74026b3954090b69817d58bcce59a.posidx | Bin 0 -> 27 bytes .../1fc74026b3954090b69817d58bcce59a.store | Bin 0 -> 68 bytes .../1fc74026b3954090b69817d58bcce59a.term | Bin 0 -> 1022 bytes .../8949f79450d349e2bd51d885f44c354f.fast | Bin 0 -> 5 bytes ...8949f79450d349e2bd51d885f44c354f.fieldnorm | Bin 0 -> 19 bytes .../8949f79450d349e2bd51d885f44c354f.idx | Bin 0 -> 91 bytes .../8949f79450d349e2bd51d885f44c354f.pos | Bin 0 -> 145 bytes .../8949f79450d349e2bd51d885f44c354f.posidx | Bin 0 -> 27 bytes .../8949f79450d349e2bd51d885f44c354f.store | Bin 0 -> 76 bytes .../8949f79450d349e2bd51d885f44c354f.term | Bin 0 -> 446 bytes .../f79c6d0a68e04210a4d026ecf151a1e7.fast | Bin 0 -> 5 bytes ...f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm | Bin 0 -> 19 bytes .../f79c6d0a68e04210a4d026ecf151a1e7.idx | Bin 0 -> 115 bytes .../f79c6d0a68e04210a4d026ecf151a1e7.pos | Bin 0 -> 113 bytes .../f79c6d0a68e04210a4d026ecf151a1e7.posidx | Bin 0 -> 27 bytes .../f79c6d0a68e04210a4d026ecf151a1e7.store | Bin 0 -> 65 bytes .../f79c6d0a68e04210a4d026ecf151a1e7.term | Bin 0 -> 649 bytes tests/test_index/meta.json | 44 +++++++++++++++ 26 files changed, 96 insertions(+) create mode 100644 tests/test_index/.managed.json create mode 100644 tests/test_index/.tantivy-meta.lock create mode 100644 tests/test_index/.tantivy-writer.lock create mode 100644 tests/test_index/1fc74026b3954090b69817d58bcce59a.fast create mode 100644 tests/test_index/1fc74026b3954090b69817d58bcce59a.fieldnorm create mode 100644 tests/test_index/1fc74026b3954090b69817d58bcce59a.idx create mode 100644 tests/test_index/1fc74026b3954090b69817d58bcce59a.pos create mode 100644 tests/test_index/1fc74026b3954090b69817d58bcce59a.posidx create mode 100644 tests/test_index/1fc74026b3954090b69817d58bcce59a.store create mode 100644 tests/test_index/1fc74026b3954090b69817d58bcce59a.term create mode 100644 tests/test_index/8949f79450d349e2bd51d885f44c354f.fast create mode 100644 tests/test_index/8949f79450d349e2bd51d885f44c354f.fieldnorm create mode 100644 tests/test_index/8949f79450d349e2bd51d885f44c354f.idx create mode 100644 tests/test_index/8949f79450d349e2bd51d885f44c354f.pos create mode 100644 tests/test_index/8949f79450d349e2bd51d885f44c354f.posidx create mode 100644 tests/test_index/8949f79450d349e2bd51d885f44c354f.store create mode 100644 tests/test_index/8949f79450d349e2bd51d885f44c354f.term create mode 100644 tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fast create mode 100644 tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm create mode 100644 tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.idx create mode 100644 tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.pos create mode 100644 tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.posidx create mode 100644 tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.store create mode 100644 tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.term create mode 100644 tests/test_index/meta.json diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index c6e95e4..d213cfe 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -123,3 +123,54 @@ class TestClass(object): # no "bod" field with pytest.raises(ValueError): q_parser.parse_query("bod:title") + + +class TestFromDiskClass(object): + + @classmethod + def setup_class(cls): + builder = tantivy.SchemaBuilder() + title = builder.add_text_field("title", stored=True) + body = builder.add_text_field("body") + cls.default_args = [title, body] + schema = builder.build() + cls.schema = schema + cls.index = tantivy.Index(schema) + + def test_exists(self): + # prefer to keep it separate in case anyone deletes this + # runs from the root directory + path_to_index = "tests/test_index/" + assert self.index.exists(path_to_index) + + def test_opens_from_dir(self): + path_to_index = "tests/test_index/" + tantivy.Index(self.schema, path_to_index) + + def test_create_readers(self): + path_to_index = "tests/test_index/" + idx = tantivy.Index(self.schema, path_to_index) + reload_policy = "OnCommit" # or "Manual" + assert idx.reader(reload_policy, 4) + assert idx.reader("Manual", 4) + + def test_create_writer_and_reader(self): + path_to_index = "tests/test_index/" + idx = tantivy.Index(self.schema, path_to_index) + writer = idx.writer() + reload_policy = "OnCommit" # or "Manual" + reader = idx.reader(reload_policy, 4) + + # check against the opstamp in the meta file + with open("tests/test_index/meta.json") as f: + expected_last_opstamp = json.load(f)["opstamp"] + assert writer.commit_opstamp == expected_last_opstamp + + q_parser = tantivy.QueryParser.for_index(idx, self.default_args) + # get all documents + query = q_parser.parse_query("*") + top_docs = tantivy.TopDocs(10) + + docs = reader.searcher().search(query, top_docs) + for (_score, doc_addr) in docs: + print(reader.searcher().doc(doc_addr)) diff --git a/tests/test_index/.managed.json b/tests/test_index/.managed.json new file mode 100644 index 0000000..029c8e7 --- /dev/null +++ b/tests/test_index/.managed.json @@ -0,0 +1 @@ +["1fc74026b3954090b69817d58bcce59a.fieldnorm","f79c6d0a68e04210a4d026ecf151a1e7.pos","1fc74026b3954090b69817d58bcce59a.store","meta.json","f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm","8949f79450d349e2bd51d885f44c354f.fieldnorm","f79c6d0a68e04210a4d026ecf151a1e7.posidx","8949f79450d349e2bd51d885f44c354f.store","8949f79450d349e2bd51d885f44c354f.term","8949f79450d349e2bd51d885f44c354f.pos","1fc74026b3954090b69817d58bcce59a.posidx","1fc74026b3954090b69817d58bcce59a.pos","8949f79450d349e2bd51d885f44c354f.posidx","f79c6d0a68e04210a4d026ecf151a1e7.idx","1fc74026b3954090b69817d58bcce59a.fast","f79c6d0a68e04210a4d026ecf151a1e7.fast","f79c6d0a68e04210a4d026ecf151a1e7.term","1fc74026b3954090b69817d58bcce59a.term","8949f79450d349e2bd51d885f44c354f.idx","1fc74026b3954090b69817d58bcce59a.idx","f79c6d0a68e04210a4d026ecf151a1e7.store","8949f79450d349e2bd51d885f44c354f.fast"] diff --git a/tests/test_index/.tantivy-meta.lock b/tests/test_index/.tantivy-meta.lock new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_index/.tantivy-writer.lock b/tests/test_index/.tantivy-writer.lock new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_index/1fc74026b3954090b69817d58bcce59a.fast b/tests/test_index/1fc74026b3954090b69817d58bcce59a.fast new file mode 100644 index 0000000000000000000000000000000000000000..707c720ff0147fbbf08df5514ea03da0c9b28b56 GIT binary patch literal 5 McmZo*WME(b00M~sf&c&j literal 0 HcmV?d00001 diff --git a/tests/test_index/1fc74026b3954090b69817d58bcce59a.fieldnorm b/tests/test_index/1fc74026b3954090b69817d58bcce59a.fieldnorm new file mode 100644 index 0000000000000000000000000000000000000000..fbf2687e9306f645d171f5328efafd61a606423e GIT binary patch literal 19 UcmZRvYHDBrf`&#$5XlQ903uuh&;S4c literal 0 HcmV?d00001 diff --git a/tests/test_index/1fc74026b3954090b69817d58bcce59a.idx b/tests/test_index/1fc74026b3954090b69817d58bcce59a.idx new file mode 100644 index 0000000000000000000000000000000000000000..2c13ad64ffce2ad9c26bfc144eca65bd0c329c6d GIT binary patch literal 189 zcmZQ!fPjWZ2+T(C+hAe@=q9jya|48ingiDZ5pROZBCA1W!*ull^`VNxc+CxsO${K& MHB4Xxk-R_x0L)TRe*gdg literal 0 HcmV?d00001 diff --git a/tests/test_index/1fc74026b3954090b69817d58bcce59a.pos b/tests/test_index/1fc74026b3954090b69817d58bcce59a.pos new file mode 100644 index 0000000000000000000000000000000000000000..32c63959bb879b666a6e147de048556205558247 GIT binary patch literal 161 zcmZQ#U|?VdVn!gw1{zK-_fii(S*)9JQbznrfXKRlM@G`GOtyNh zYH$cNj5x}a?ElrWOL?u6;a)f9tYk&PxXe2k}A)k4nWE28{(0Ym5$VV4OH( b35$(bE=SHfWeh8u8bCq~3qbB^-~|!@^xrIC literal 0 HcmV?d00001 diff --git a/tests/test_index/1fc74026b3954090b69817d58bcce59a.posidx b/tests/test_index/1fc74026b3954090b69817d58bcce59a.posidx new file mode 100644 index 0000000000000000000000000000000000000000..67ae7649281f7ad5965c852fbd4095e787588733 GIT binary patch literal 27 YcmZQ#00MRp(bNDU8(JAbBrlKv03Q4Tp8x;= literal 0 HcmV?d00001 diff --git a/tests/test_index/1fc74026b3954090b69817d58bcce59a.store b/tests/test_index/1fc74026b3954090b69817d58bcce59a.store new file mode 100644 index 0000000000000000000000000000000000000000..ffa0b21f77ee6db22686b90591e224000c843816 GIT binary patch literal 68 zcmdO7U|{&q#=ubQml#kG$tcahz}9x~%CttHFatxsf0}}CW^$@RVqS`ZZ)#p+XQM7y Lf`I`>GXnVlK4}ga literal 0 HcmV?d00001 diff --git a/tests/test_index/1fc74026b3954090b69817d58bcce59a.term b/tests/test_index/1fc74026b3954090b69817d58bcce59a.term new file mode 100644 index 0000000000000000000000000000000000000000..fe21ca9bd00a061346ff0cb796c979c7929f9017 GIT binary patch literal 1022 zcmZWnZAcVr6n@Xlj=Gk1UNzCO?rddbRFo8zd4H^NFMU&yQBu&BYi_JNHtkw=Z=uVk zc{S}W_x3IHK}0_yilPsS*lo8NcdDtR4;6nXL{voF9~X)UIwQLf^};*v;XLPkpXWSt z1op$iu)(mMVK4yUMD(!=r{isAmRpCN2aoI-R z$tzS9uiGMNkDGeGgu!Q~h`|v7c2yycQyA7Cm?m*_dt6)~gqDAokBgVFwKtM5dda)^ z9X!#8K}(3Q&T_Lg`q1(-El(kL#XjsR!mh{=w(IeTLHUe;pIaORbKx9@G%e16xw(@r z%?H-ttKs5R48=Z?S7ShKkM=+5rE>c=Qc#6w2k=6|CJJmhNErc(!B!6P0Yk3Nq+V&bzUS87yMIRZ$D4XWk6NA{eEwHo$6)8s z;n(4}|9m*|-_`%GeZKkiHr$o&*&|u^d3)JVn!fl1!6+r0TKhoy<)nKFFJGzBYQV1Ut#Kt2G&j}RFE literal 0 HcmV?d00001 diff --git a/tests/test_index/8949f79450d349e2bd51d885f44c354f.term b/tests/test_index/8949f79450d349e2bd51d885f44c354f.term new file mode 100644 index 0000000000000000000000000000000000000000..6f2c11069f5ff046c693b622888e7e9f44b8ed31 GIT binary patch literal 446 zcmZQ#Km!7kPBRE}A7vBhXAo#T#30agj)j?tk%5twlSiPWI6pU0kd+mx#TZJ?=y>iJgFyRbCZN0WGX6&K(5#lp`w>D99`-EGWRw$IH#h z#>&FLD5hd&X=~@`8JbgEQ4S1;yxh!;^t9BJL_uB&s2>hPX;^T;#9>~<8XRn_tP>@8 zg)~JcTgr-fsZNd370cc_z1voDj_a&7zS4U`=bVk@4T2ic)Bs{M{0EuTzzZY*Nia&~ literal 0 HcmV?d00001 diff --git a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fast b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fast new file mode 100644 index 0000000000000000000000000000000000000000..707c720ff0147fbbf08df5514ea03da0c9b28b56 GIT binary patch literal 5 McmZo*WME(b00M~sf&c&j literal 0 HcmV?d00001 diff --git a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.fieldnorm new file mode 100644 index 0000000000000000000000000000000000000000..c9dc5e3ec672c69669cfc38218aea5b3bae34a8d GIT binary patch literal 19 UcmZS7X=-2qf`&#$5XlQ903jy=zW@LL literal 0 HcmV?d00001 diff --git a/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.idx b/tests/test_index/f79c6d0a68e04210a4d026ecf151a1e7.idx new file mode 100644 index 0000000000000000000000000000000000000000..14ba69692de5412a122985f96d74c02d39b8b278 GIT binary patch literal 115 scmZQ%fPjWZLpZCUv7rfw3Bwkknr1Y0O=x0fNBIw1FDNPR^1cmBBLBe)xR?P>i zP*D-lLl5@gNkI_ALx%n9E@<3}KMfwmiwCdXd@H+`&f$IYzM1*vd+%}Bvhc1adrlyK z1-3h2@nJs3JuxPH@^)(;^M_*O$-M+|MS6j~-nV>UFeeHlxazrp9e2Y?DGXR9x1(M& zRFMK~2eL1H3VFN}$j~ECENfH*w*YV296LS7c8J@iYl0Wh^xSn<_5hbXBGU_o;8SRl zw)r*iLTgjHMCYoy;0!9TP>--#td#`kq^l7EJ`Oy`mM$=+iULG_>V}vCddfFB@0L0X@x*=w(ePm$4gC84lnBB2!hW?ZDp2{KT%E<6~lYdp47%#*M;ZI0mO+ z9AMgiqZ1HNdQLj~vie8@A=G4+89*o;>#^Xw+nefSU;^QR!k1R8k zE_C|Gr$@(ToKNkgefv-Np|p-1J^5inNf(Q!!_U*j+1WFXzP#x!Y8NKHjXSrS>hAAn ghn{!!)y=mvuOC)k4F2*)tlDWso7T_huhuYyf4teERR910 literal 0 HcmV?d00001 diff --git a/tests/test_index/meta.json b/tests/test_index/meta.json new file mode 100644 index 0000000..a8d0b15 --- /dev/null +++ b/tests/test_index/meta.json @@ -0,0 +1,44 @@ +{ + "segments": [ + { + "segment_id": "1fc74026-b395-4090-b698-17d58bcce59a", + "max_doc": 1, + "deletes": null + }, + { + "segment_id": "8949f794-50d3-49e2-bd51-d885f44c354f", + "max_doc": 1, + "deletes": null + }, + { + "segment_id": "f79c6d0a-68e0-4210-a4d0-26ecf151a1e7", + "max_doc": 1, + "deletes": null + } + ], + "schema": [ + { + "name": "title", + "type": "text", + "options": { + "indexing": { + "record": "position", + "tokenizer": "default" + }, + "stored": true + } + }, + { + "name": "body", + "type": "text", + "options": { + "indexing": { + "record": "position", + "tokenizer": "default" + }, + "stored": false + } + } + ], + "opstamp": 6 +} From bd93e4ca750223cea3ff52e7c6ac9bd91c06f12e Mon Sep 17 00:00:00 2001 From: petr-tik Date: Sat, 3 Aug 2019 00:55:31 +0100 Subject: [PATCH 3/4] use meta.json to programmatically check last opstamp and num_docs --- tests/tantivy_test.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index d213cfe..2dd2782 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -136,34 +136,39 @@ class TestFromDiskClass(object): schema = builder.build() cls.schema = schema cls.index = tantivy.Index(schema) + cls.path_to_index = "tests/test_index/" def test_exists(self): # prefer to keep it separate in case anyone deletes this # runs from the root directory - path_to_index = "tests/test_index/" - assert self.index.exists(path_to_index) + assert self.index.exists(self.path_to_index) def test_opens_from_dir(self): - path_to_index = "tests/test_index/" - tantivy.Index(self.schema, path_to_index) + tantivy.Index(self.schema, self.path_to_index) def test_create_readers(self): - path_to_index = "tests/test_index/" - idx = tantivy.Index(self.schema, path_to_index) + idx = tantivy.Index(self.schema, self.path_to_index) reload_policy = "OnCommit" # or "Manual" assert idx.reader(reload_policy, 4) assert idx.reader("Manual", 4) def test_create_writer_and_reader(self): - path_to_index = "tests/test_index/" - idx = tantivy.Index(self.schema, path_to_index) + idx = tantivy.Index(self.schema, self.path_to_index) writer = idx.writer() reload_policy = "OnCommit" # or "Manual" reader = idx.reader(reload_policy, 4) # check against the opstamp in the meta file - with open("tests/test_index/meta.json") as f: - expected_last_opstamp = json.load(f)["opstamp"] + meta_fname = "meta.json" + with open("{}{}".format(self.path_to_index, meta_fname)) as f: + json_file = json.load(f) + expected_last_opstamp = json_file["opstamp"] + # ASSUMPTION + # We haven't had any deletes in the index + # so max_doc per index coincides with the value of `num_docs` + # summing them in all segments, gives the number of documents + expected_num_docs = sum([segment["max_doc"] + for segment in json_file["segments"]]) assert writer.commit_opstamp == expected_last_opstamp q_parser = tantivy.QueryParser.for_index(idx, self.default_args) @@ -174,3 +179,4 @@ class TestFromDiskClass(object): docs = reader.searcher().search(query, top_docs) for (_score, doc_addr) in docs: print(reader.searcher().doc(doc_addr)) + assert expected_num_docs == len(docs) From b911c2f353dc7e1cecac568209cecc390eef394e Mon Sep 17 00:00:00 2001 From: petr-tik Date: Sat, 3 Aug 2019 12:28:12 +0100 Subject: [PATCH 4/4] Replace setup_class methods with pytest.fixture --- tests/tantivy_test.py | 96 ++++++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 43 deletions(-) diff --git a/tests/tantivy_test.py b/tests/tantivy_test.py index 2dd2782..6da61aa 100644 --- a/tests/tantivy_test.py +++ b/tests/tantivy_test.py @@ -4,10 +4,8 @@ import tantivy import pytest -class TestClass(object): - - @classmethod - def setup_class(cls): +@pytest.fixture(scope="class") +def ram_index(): # assume all tests will use the same documents for now # other methods may set up function-local indexes builder = tantivy.SchemaBuilder() @@ -63,29 +61,33 @@ class TestClass(object): writer.add_document(doc) writer.commit() - cls.reader = index.reader() - cls.searcher = cls.reader.searcher() - cls.index = index - cls.schema = schema - cls.default_args = [title, body] - cls.title = title - cls.body = body + reader = index.reader() + searcher = reader.searcher() + index = index + schema = schema + default_args = [title, body] + ret = (index, searcher, schema, default_args, title, body) + return ret - def test_simple_search(self): - query_parser = tantivy.QueryParser.for_index(self.index, self.default_args) + +class TestClass(object): + + def test_simple_search(self, ram_index): + index, searcher, schema, default_args, title, body = ram_index + query_parser = tantivy.QueryParser.for_index(index, default_args) query = query_parser.parse_query("sea whale") top_docs = tantivy.TopDocs(10) - result = self.searcher.search(query, top_docs) + result = searcher.search(query, top_docs) print(result) assert len(result) == 1 _, doc_address = result[0] - searched_doc = self.searcher.doc(doc_address) - assert searched_doc.get_first(self.title) == "The Old Man and the Sea" + searched_doc = searcher.doc(doc_address) + assert searched_doc.get_first(title) == "The Old Man and the Sea" def test_doc(self): builder = tantivy.SchemaBuilder() @@ -101,66 +103,74 @@ class TestClass(object): assert doc.len == 1 assert not doc.is_empty - def test_and_query(self): - q_parser = tantivy.QueryParser.for_index(self.index, self.default_args) + def test_and_query(self, ram_index): + index, searcher, schema, default_args, title, body = ram_index + q_parser = tantivy.QueryParser.for_index(index, default_args) # look for an intersection of documents query = q_parser.parse_query("title:men AND body:summer") top_docs = tantivy.TopDocs(10) - result = self.searcher.search(query, top_docs) + result = searcher.search(query, top_docs) print(result) # summer isn't present assert len(result) == 0 query = q_parser.parse_query("title:men AND body:winter") - result = self.searcher.search(query, top_docs) + result = searcher.search(query, top_docs) assert len(result) == 1 - def test_query_errors(self): - q_parser = tantivy.QueryParser.for_index(self.index, self.default_args) + def test_query_errors(self, ram_index): + index, searcher, schema, default_args, title, body = ram_index + q_parser = tantivy.QueryParser.for_index(index, default_args) # no "bod" field with pytest.raises(ValueError): q_parser.parse_query("bod:title") +@pytest.fixture(scope="class") +def disk_index(): + builder = tantivy.SchemaBuilder() + title = builder.add_text_field("title", stored=True) + body = builder.add_text_field("body") + default_args = [title, body] + schema = builder.build() + schema = schema + index = tantivy.Index(schema) + path_to_index = "tests/test_index/" + return index, path_to_index, schema, default_args, title, body + + class TestFromDiskClass(object): - @classmethod - def setup_class(cls): - builder = tantivy.SchemaBuilder() - title = builder.add_text_field("title", stored=True) - body = builder.add_text_field("body") - cls.default_args = [title, body] - schema = builder.build() - cls.schema = schema - cls.index = tantivy.Index(schema) - cls.path_to_index = "tests/test_index/" - - def test_exists(self): + def test_exists(self, disk_index): # prefer to keep it separate in case anyone deletes this # runs from the root directory - assert self.index.exists(self.path_to_index) + index, path_to_index, _, _, _, _ = disk_index + assert index.exists(path_to_index) - def test_opens_from_dir(self): - tantivy.Index(self.schema, self.path_to_index) + def test_opens_from_dir(self, disk_index): + _, path_to_index, schema, _, _, _ = disk_index + tantivy.Index(schema, path_to_index) - def test_create_readers(self): - idx = tantivy.Index(self.schema, self.path_to_index) + def test_create_readers(self, disk_index): + _, path_to_index, schema, _, _, _ = disk_index + idx = tantivy.Index(schema, path_to_index) reload_policy = "OnCommit" # or "Manual" assert idx.reader(reload_policy, 4) assert idx.reader("Manual", 4) - def test_create_writer_and_reader(self): - idx = tantivy.Index(self.schema, self.path_to_index) + def test_create_writer_and_reader(self, disk_index): + _, path_to_index, schema, default_args, title, body = disk_index + idx = tantivy.Index(schema, path_to_index) writer = idx.writer() reload_policy = "OnCommit" # or "Manual" reader = idx.reader(reload_policy, 4) # check against the opstamp in the meta file meta_fname = "meta.json" - with open("{}{}".format(self.path_to_index, meta_fname)) as f: + with open("{}{}".format(path_to_index, meta_fname)) as f: json_file = json.load(f) expected_last_opstamp = json_file["opstamp"] # ASSUMPTION @@ -171,7 +181,7 @@ class TestFromDiskClass(object): for segment in json_file["segments"]]) assert writer.commit_opstamp == expected_last_opstamp - q_parser = tantivy.QueryParser.for_index(idx, self.default_args) + q_parser = tantivy.QueryParser.for_index(idx, default_args) # get all documents query = q_parser.parse_query("*") top_docs = tantivy.TopDocs(10)