2019-06-04 09:09:58 +00:00
import tantivy
2019-08-02 21:32:08 +00:00
import pytest
2019-08-02 11:23:10 +00:00
from tantivy import Document , Index , SchemaBuilder , Schema
def schema ( ) :
return SchemaBuilder ( ) \
. add_text_field ( " title " , stored = True ) \
. add_text_field ( " body " ) \
. build ( )
2019-06-04 09:09:58 +00:00
2019-08-03 11:28:12 +00:00
@pytest.fixture ( scope = " class " )
def ram_index ( ) :
2019-08-02 21:32:08 +00:00
# assume all tests will use the same documents for now
# other methods may set up function-local indexes
2019-08-02 11:23:10 +00:00
index = Index ( schema ( ) )
2019-06-04 09:09:58 +00:00
writer = index . writer ( )
2019-08-02 21:32:08 +00:00
# 2 ways of adding documents
# 1
2019-08-02 11:23:10 +00:00
doc = Document ( )
2019-08-02 21:32:08 +00:00
# create a document instance
# add field-value pairs
2019-08-02 11:23:10 +00:00
doc . add_text ( " title " , " The Old Man and the Sea " )
doc . add_text ( " body " , ( " He was an old man who fished alone in a skiff in "
2019-06-04 09:09:58 +00:00
" the Gulf Stream and he had gone eighty-four days "
" now without taking a fish. " ) )
writer . add_document ( doc )
2019-08-02 21:32:08 +00:00
# 2 use the built-in json support
# keys need to coincide with field names
2019-08-02 11:23:10 +00:00
doc = Document . from_dict ( {
2019-06-04 09:09:58 +00:00
" title " : " Of Mice and Men " ,
" body " : ( " A few miles south of Soledad, the Salinas River drops "
" in close to the hillside bank and runs deep and "
" green. The water is warm too, for it has slipped "
" twinkling over the yellow sands in the sunlight "
" before reaching the narrow pool. On one side of the "
" river the golden foothill slopes curve up to the "
" strong and rocky Gabilan Mountains, but on the valley "
" side the water is lined with trees—willows fresh and "
" green with every spring, carrying in their lower leaf "
" junctures the debris of the winter’ s flooding; and "
" sycamores with mottled, white, recumbent limbs and "
" branches that arch over the pool " )
2019-08-02 11:23:10 +00:00
} )
2019-06-04 09:09:58 +00:00
writer . add_document ( doc )
2019-08-02 11:23:10 +00:00
writer . add_json ( """ {
2019-06-04 09:09:58 +00:00
" title " : [ " Frankenstein " , " The Modern Prometheus " ] ,
2019-08-02 11:23:10 +00:00
" body " : " You will rejoice to hear that no disaster has accompanied the commencement of an enterprise which you have regarded with such evil forebodings. I arrived here yesterday, and my first task is to assure my dear sister of my welfare and increasing confidence in the success of my undertaking. "
} """ )
2019-06-04 09:09:58 +00:00
writer . commit ( )
2019-08-02 11:23:10 +00:00
index . reload ( )
return index
2019-06-04 09:09:58 +00:00
2019-08-03 11:28:12 +00:00
2019-06-04 09:09:58 +00:00
2019-08-03 11:28:12 +00:00
class TestClass ( object ) :
def test_simple_search ( self , ram_index ) :
2019-08-02 11:23:10 +00:00
index = ram_index
query = index . parse_query ( " sea whale " , [ " title " , " body " ] )
2019-06-04 09:09:58 +00:00
top_docs = tantivy . TopDocs ( 10 )
2019-08-02 11:23:10 +00:00
result = index . searcher ( ) . search ( query , top_docs )
2019-06-04 09:09:58 +00:00
assert len ( result ) == 1
_ , doc_address = result [ 0 ]
2019-08-02 11:23:10 +00:00
searched_doc = index . searcher ( ) . doc ( doc_address )
assert searched_doc [ " title " ] == [ " The Old Man and the Sea " ]
2019-08-02 21:32:08 +00:00
2019-08-03 11:28:12 +00:00
def test_and_query ( self , ram_index ) :
2019-08-02 11:23:10 +00:00
index = ram_index
query = index . parse_query ( " title:men AND body:summer " , default_field_names = [ " title " , " body " ] )
2019-08-02 21:32:08 +00:00
# look for an intersection of documents
top_docs = tantivy . TopDocs ( 10 )
2019-08-02 11:23:10 +00:00
searcher = index . searcher ( )
2019-08-03 11:28:12 +00:00
result = searcher . search ( query , top_docs )
2019-08-02 21:32:08 +00:00
# summer isn't present
assert len ( result ) == 0
2019-08-02 11:23:10 +00:00
query = index . parse_query ( " title:men AND body:winter " , [ " title " , " body " ] )
2019-08-03 11:28:12 +00:00
result = searcher . search ( query , top_docs )
2019-08-02 21:32:08 +00:00
assert len ( result ) == 1
2019-08-02 11:23:10 +00:00
def test_and_query_parser_default_fields ( self , ram_index ) :
query = ram_index . parse_query ( " winter " , default_field_names = [ " title " ] )
assert repr ( query ) == """ Query(TermQuery(Term(field=0,bytes=[119, 105, 110, 116, 101, 114]))) """
def test_and_query_parser_default_fields_undefined ( self , ram_index ) :
query = ram_index . parse_query ( " winter " )
assert repr ( query ) == " Query(BooleanQuery { subqueries: [ " \
" (Should, TermQuery(Term(field=0,bytes=[119, 105, 110, 116, 101, 114]))), " \
" (Should, TermQuery(Term(field=1,bytes=[119, 105, 110, 116, 101, 114])))] " \
" }) "
2019-08-03 11:28:12 +00:00
def test_query_errors ( self , ram_index ) :
2019-08-02 11:23:10 +00:00
index = ram_index
2019-08-02 21:32:08 +00:00
# no "bod" field
with pytest . raises ( ValueError ) :
2019-08-02 11:23:10 +00:00
index . parse_query ( " bod:men " , [ " title " , " body " ] )
2019-08-02 23:39:02 +00:00
2019-08-02 11:23:10 +00:00
PATH_TO_INDEX = " tests/test_index/ "
2019-08-02 23:39:02 +00:00
2019-08-03 11:28:12 +00:00
class TestFromDiskClass ( object ) :
2019-08-02 11:23:10 +00:00
def test_exists ( self ) :
2019-08-02 23:39:02 +00:00
# prefer to keep it separate in case anyone deletes this
# runs from the root directory
2019-08-02 11:23:10 +00:00
assert Index . exists ( PATH_TO_INDEX )
def test_opens_from_dir ( self ) :
index = Index ( schema ( ) , PATH_TO_INDEX , reuse = True )
assert index . searcher ( ) . num_docs == 3
def test_create_readers ( self ) :
# not sure what is the point of this test.
idx = Index ( schema ( ) )
assert idx . searcher ( ) . num_docs == 0
# by default this is manual mode
writer = idx . writer ( 30000000 , 1 )
writer . add_document ( Document ( title = " mytitle " , body = " mybody " ) )
writer . commit ( )
assert idx . searcher ( ) . num_docs == 0
# Manual is the default setting.
# In this case, change are reflected only when
# the index is manually reloaded.
idx . reload ( )
assert idx . searcher ( ) . num_docs == 1
idx . config_reader ( " OnCommit " , 4 )
writer . add_document ( Document ( title = " mytitle2 " , body = " mybody2 " ) )
writer . commit ( )
import time
for i in range ( 50 ) :
# The index should be automatically reloaded.
# Wait for at most 5s for it to happen.
time . sleep ( 0.1 )
if idx . searcher ( ) . num_docs == 2 :
return
assert False
class TestSearcher ( object ) :
def test_searcher_repr ( self , ram_index ) :
assert repr ( ram_index . searcher ( ) ) == " Searcher(num_docs=3, num_segments=1) "
class TestDocument ( object ) :
def test_document ( self ) :
doc = tantivy . Document ( name = " Bill " , reference = [ 1 , 2 ] )
assert doc [ " reference " ] == [ 1 , 2 ]
assert doc [ " name " ] == [ " Bill " ]
assert doc . get_first ( " name " ) == " Bill "
assert doc . get_first ( " reference " ) == 1
assert doc . to_dict ( ) == { " name " : [ " Bill " ] , " reference " : [ 1 , 2 ] }
def test_document_with_date ( self ) :
import datetime
date = datetime . datetime ( 2019 , 8 , 12 , 13 , 0 , 0 , )
doc = tantivy . Document ( name = " Bill " , date = date )
assert doc [ " date " ] [ 0 ] == date
def test_document_repr ( self ) :
doc = tantivy . Document ( name = " Bill " , reference = [ 1 , 2 ] )
assert repr ( doc ) == " Document(name=[Bill],reference=[1,2]) "
def test_document_with_facet ( self ) :
doc = tantivy . Document ( )
facet = tantivy . Facet . from_string ( " /europe/france " )
doc . add_facet ( " facet " , facet )
assert doc [ " facet " ] [ 0 ] . to_path ( ) == [ ' europe ' , ' france ' ]
doc = tantivy . Document ( )
facet = tantivy . Facet . from_string ( " /asia \\ /oceania/fiji " )
doc . add_facet ( " facet " , facet )
assert doc [ " facet " ] [ 0 ] . to_path ( ) == [ ' asia/oceania ' , ' fiji ' ]
assert doc [ " facet " ] [ 0 ] . to_path_str ( ) == " /asia \\ /oceania/fiji "
assert repr ( doc [ " facet " ] [ 0 ] ) == " Facet(/asia \\ /oceania/fiji) "
doc = tantivy . Document ( facet = facet )
assert doc [ " facet " ] [ 0 ] . to_path ( ) == [ ' asia/oceania ' , ' fiji ' ]
def test_document_error ( self ) :
with pytest . raises ( ValueError ) :
tantivy . Document ( name = { } )