2024-06-09 23:00:39 +00:00
use crate ::{
get_field , make_term , make_term_for_type , schema ::FieldType , to_pyerr ,
DocAddress , Schema ,
} ;
use core ::ops ::Bound as OpsBound ;
2024-04-22 23:36:48 +00:00
use pyo3 ::{
2024-04-24 12:12:24 +00:00
exceptions ,
prelude ::* ,
types ::{ PyAny , PyFloat , PyString , PyTuple } ,
2024-04-22 23:36:48 +00:00
} ;
2019-06-04 09:09:58 +00:00
use tantivy as tv ;
2024-04-22 23:27:51 +00:00
/// Custom Tuple struct to represent a pair of Occur and Query
/// for the BooleanQuery
struct OccurQueryPair ( Occur , Query ) ;
2024-04-22 23:36:48 +00:00
impl < ' source > FromPyObject < ' source > for OccurQueryPair {
2024-04-22 23:27:51 +00:00
fn extract ( ob : & ' source PyAny ) -> PyResult < Self > {
let tuple = ob . downcast ::< PyTuple > ( ) ? ;
let occur = tuple . get_item ( 0 ) ? . extract ( ) ? ;
let query = tuple . get_item ( 1 ) ? . extract ( ) ? ;
Ok ( OccurQueryPair ( occur , query ) )
}
}
/// Tantivy's Occur
#[ pyclass(frozen, module = " tantivy.tantivy " ) ]
#[ derive(Clone) ]
pub enum Occur {
Must ,
Should ,
MustNot ,
}
impl From < Occur > for tv ::query ::Occur {
fn from ( occur : Occur ) -> tv ::query ::Occur {
match occur {
Occur ::Must = > tv ::query ::Occur ::Must ,
Occur ::Should = > tv ::query ::Occur ::Should ,
Occur ::MustNot = > tv ::query ::Occur ::MustNot ,
}
}
}
2019-06-04 09:09:58 +00:00
/// Tantivy's Query
2024-01-21 20:16:34 +00:00
#[ pyclass(frozen, module = " tantivy.tantivy " ) ]
2019-06-04 09:09:58 +00:00
pub ( crate ) struct Query {
2020-09-19 13:17:07 +00:00
pub ( crate ) inner : Box < dyn tv ::query ::Query > ,
2020-09-19 09:36:31 +00:00
}
2024-04-22 23:27:51 +00:00
impl Clone for Query {
fn clone ( & self ) -> Self {
Query {
inner : self . inner . box_clone ( ) ,
}
}
}
2020-09-19 09:36:31 +00:00
impl Query {
2020-09-19 13:17:07 +00:00
pub ( crate ) fn get ( & self ) -> & dyn tv ::query ::Query {
& self . inner
2020-09-19 09:36:31 +00:00
}
2019-06-04 09:09:58 +00:00
}
2022-04-15 03:50:37 +00:00
#[ pymethods ]
impl Query {
2019-08-02 11:23:10 +00:00
fn __repr__ ( & self ) -> PyResult < String > {
2020-09-19 09:36:31 +00:00
Ok ( format! ( " Query( {:?} ) " , self . get ( ) ) )
2019-06-04 09:09:58 +00:00
}
2023-12-20 09:40:50 +00:00
/// Construct a Tantivy's TermQuery
#[ staticmethod ]
#[ pyo3(signature = (schema, field_name, field_value, index_option = " position " )) ]
pub ( crate ) fn term_query (
schema : & Schema ,
field_name : & str ,
2024-05-07 14:36:46 +00:00
field_value : & Bound < PyAny > ,
2023-12-20 09:40:50 +00:00
index_option : & str ,
) -> PyResult < Query > {
let term = make_term ( & schema . inner , field_name , field_value ) ? ;
let index_option = match index_option {
" position " = > tv ::schema ::IndexRecordOption ::WithFreqsAndPositions ,
" freq " = > tv ::schema ::IndexRecordOption ::WithFreqs ,
" basic " = > tv ::schema ::IndexRecordOption ::Basic ,
_ = > return Err ( exceptions ::PyValueError ::new_err (
" Invalid index option, valid choices are: 'basic', 'freq' and 'position' "
) )
} ;
let inner = tv ::query ::TermQuery ::new ( term , index_option ) ;
Ok ( Query {
inner : Box ::new ( inner ) ,
} )
}
2024-03-31 11:56:22 +00:00
2024-04-26 11:21:46 +00:00
/// Construct a Tantivy's TermSetQuery
#[ staticmethod ]
#[ pyo3(signature = (schema, field_name, field_values)) ]
pub ( crate ) fn term_set_query (
schema : & Schema ,
field_name : & str ,
2024-05-07 14:36:46 +00:00
field_values : Vec < Bound < PyAny > > ,
2024-04-26 11:21:46 +00:00
) -> PyResult < Query > {
let terms = field_values
. into_iter ( )
. map ( | field_value | {
2024-05-07 14:36:46 +00:00
make_term ( & schema . inner , field_name , & field_value )
2024-04-26 11:21:46 +00:00
} )
. collect ::< Result < Vec < _ > , _ > > ( ) ? ;
let inner = tv ::query ::TermSetQuery ::new ( terms ) ;
Ok ( Query {
inner : Box ::new ( inner ) ,
} )
}
2024-03-31 11:56:22 +00:00
/// Construct a Tantivy's AllQuery
#[ staticmethod ]
pub ( crate ) fn all_query ( ) -> PyResult < Query > {
let inner = tv ::query ::AllQuery { } ;
Ok ( Query {
inner : Box ::new ( inner ) ,
} )
}
2024-04-13 09:14:56 +00:00
/// Construct a Tantivy's FuzzyTermQuery
///
/// # Arguments
///
/// * `schema` - Schema of the target index.
/// * `field_name` - Field name to be searched.
/// * `text` - String representation of the query term.
/// * `distance` - (Optional) Edit distance you are going to alow. When not specified, the default is 1.
2024-04-13 10:26:05 +00:00
/// * `transposition_cost_one` - (Optional) If true, a transposition (swapping) cost will be 1; otherwise it will be 2. When not specified, the default is true.
/// * `prefix` - (Optional) If true, prefix levenshtein distance is applied. When not specified, the default is false.
2024-04-13 09:14:56 +00:00
#[ staticmethod ]
#[ pyo3(signature = (schema, field_name, text, distance = 1, transposition_cost_one = true, prefix = false)) ]
pub ( crate ) fn fuzzy_term_query (
schema : & Schema ,
field_name : & str ,
2024-05-07 14:36:46 +00:00
text : & Bound < PyString > ,
2024-04-13 09:14:56 +00:00
distance : u8 ,
transposition_cost_one : bool ,
prefix : bool ,
) -> PyResult < Query > {
2024-05-03 20:15:21 +00:00
let term = make_term ( & schema . inner , field_name , text ) ? ;
2024-04-13 09:14:56 +00:00
let inner = if prefix {
tv ::query ::FuzzyTermQuery ::new_prefix (
term ,
distance ,
transposition_cost_one ,
)
} else {
tv ::query ::FuzzyTermQuery ::new (
term ,
distance ,
transposition_cost_one ,
)
} ;
Ok ( Query {
inner : Box ::new ( inner ) ,
} )
}
2024-04-22 23:27:51 +00:00
2024-05-03 20:10:58 +00:00
/// Construct a Tantivy's PhraseQuery with custom offsets and slop
///
/// # Arguments
///
/// * `schema` - Schema of the target index.
/// * `field_name` - Field name to be searched.
/// * `words` - Word list that constructs the phrase. A word can be a term text or a pair of term text and its offset in the phrase.
/// * `slop` - (Optional) The number of gaps permitted between the words in the query phrase. Default is 0.
#[ staticmethod ]
#[ pyo3(signature = (schema, field_name, words, slop = 0)) ]
pub ( crate ) fn phrase_query (
schema : & Schema ,
field_name : & str ,
2024-05-07 14:36:46 +00:00
words : Vec < Bound < PyAny > > ,
2024-05-03 20:10:58 +00:00
slop : u32 ,
) -> PyResult < Query > {
let mut terms_with_offset = Vec ::with_capacity ( words . len ( ) ) ;
for ( idx , word ) in words . into_iter ( ) . enumerate ( ) {
if let Ok ( ( offset , value ) ) = word . extract ( ) {
// Custom offset is provided.
2024-05-07 14:36:46 +00:00
let term = make_term ( & schema . inner , field_name , & value ) ? ;
2024-05-03 20:10:58 +00:00
terms_with_offset . push ( ( offset , term ) ) ;
} else {
// Custom offset is not provided. Use the list index as the offset.
2024-05-07 14:36:46 +00:00
let term = make_term ( & schema . inner , field_name , & word ) ? ;
2024-05-03 20:10:58 +00:00
terms_with_offset . push ( ( idx , term ) ) ;
} ;
}
if terms_with_offset . is_empty ( ) {
return Err ( exceptions ::PyValueError ::new_err (
" words must not be empty. " ,
) ) ;
}
let inner = tv ::query ::PhraseQuery ::new_with_offset_and_slop (
terms_with_offset ,
slop ,
) ;
Ok ( Query {
inner : Box ::new ( inner ) ,
} )
}
2024-04-28 10:54:21 +00:00
/// Construct a Tantivy's BooleanQuery
2024-04-22 23:27:51 +00:00
#[ staticmethod ]
#[ pyo3(signature = (subqueries)) ]
pub ( crate ) fn boolean_query (
2024-04-22 23:36:48 +00:00
subqueries : Vec < ( Occur , Query ) > ,
2024-04-22 23:27:51 +00:00
) -> PyResult < Query > {
let dyn_subqueries = subqueries
. into_iter ( )
. map ( | ( occur , query ) | ( occur . into ( ) , query . inner . box_clone ( ) ) )
. collect ::< Vec < _ > > ( ) ;
2024-04-22 23:36:48 +00:00
2024-04-22 23:27:51 +00:00
let inner = tv ::query ::BooleanQuery ::from ( dyn_subqueries ) ;
Ok ( Query {
inner : Box ::new ( inner ) ,
} )
}
2024-04-24 12:12:24 +00:00
/// Construct a Tantivy's DisjunctionMaxQuery
#[ staticmethod ]
pub ( crate ) fn disjunction_max_query (
subqueries : Vec < Query > ,
2024-05-07 14:36:46 +00:00
tie_breaker : Option < Bound < PyFloat > > ,
2024-04-24 12:12:24 +00:00
) -> PyResult < Query > {
let inner_queries : Vec < Box < dyn tv ::query ::Query > > = subqueries
. iter ( )
. map ( | query | query . inner . box_clone ( ) )
. collect ( ) ;
let dismax_query = if let Some ( tie_breaker ) = tie_breaker {
tv ::query ::DisjunctionMaxQuery ::with_tie_breaker (
inner_queries ,
tie_breaker . extract ::< f32 > ( ) ? ,
)
} else {
tv ::query ::DisjunctionMaxQuery ::new ( inner_queries )
} ;
Ok ( Query {
inner : Box ::new ( dismax_query ) ,
} )
}
2024-04-24 21:57:16 +00:00
2024-04-28 10:54:21 +00:00
/// Construct a Tantivy's BoostQuery
2024-04-24 21:57:16 +00:00
#[ staticmethod ]
#[ pyo3(signature = (query, boost)) ]
pub ( crate ) fn boost_query ( query : Query , boost : f32 ) -> PyResult < Query > {
let inner = tv ::query ::BoostQuery ::new ( query . inner , boost ) ;
Ok ( Query {
inner : Box ::new ( inner ) ,
} )
}
2024-04-25 02:57:09 +00:00
2024-04-28 10:54:21 +00:00
/// Construct a Tantivy's RegexQuery
2024-04-25 02:57:09 +00:00
#[ staticmethod ]
#[ pyo3(signature = (schema, field_name, regex_pattern)) ]
pub ( crate ) fn regex_query (
schema : & Schema ,
field_name : & str ,
regex_pattern : & str ,
) -> PyResult < Query > {
let field = get_field ( & schema . inner , field_name ) ? ;
let inner_result =
tv ::query ::RegexQuery ::from_pattern ( regex_pattern , field ) ;
match inner_result {
Ok ( inner ) = > Ok ( Query {
inner : Box ::new ( inner ) ,
} ) ,
Err ( e ) = > Err ( to_pyerr ( e ) ) ,
}
}
2024-04-28 10:54:21 +00:00
2024-05-03 20:15:21 +00:00
#[ staticmethod ]
#[ pyo3(signature = (doc_address, min_doc_frequency = Some(5), max_doc_frequency = None, min_term_frequency = Some(2), max_query_terms = Some(25), min_word_length = None, max_word_length = None, boost_factor = Some(1.0), stop_words = vec! [ ] )) ]
#[ allow(clippy::too_many_arguments) ]
pub ( crate ) fn more_like_this_query (
doc_address : & DocAddress ,
min_doc_frequency : Option < u64 > ,
max_doc_frequency : Option < u64 > ,
min_term_frequency : Option < usize > ,
max_query_terms : Option < usize > ,
min_word_length : Option < usize > ,
max_word_length : Option < usize > ,
boost_factor : Option < f32 > ,
stop_words : Vec < String > ,
) -> PyResult < Query > {
let mut builder = tv ::query ::MoreLikeThisQuery ::builder ( ) ;
if let Some ( value ) = min_doc_frequency {
builder = builder . with_min_doc_frequency ( value ) ;
}
if let Some ( value ) = max_doc_frequency {
builder = builder . with_max_doc_frequency ( value ) ;
}
if let Some ( value ) = min_term_frequency {
builder = builder . with_min_term_frequency ( value ) ;
}
if let Some ( value ) = max_query_terms {
builder = builder . with_max_query_terms ( value ) ;
}
if let Some ( value ) = min_word_length {
builder = builder . with_min_word_length ( value ) ;
}
if let Some ( value ) = max_word_length {
builder = builder . with_max_word_length ( value ) ;
}
if let Some ( value ) = boost_factor {
builder = builder . with_boost_factor ( value ) ;
}
builder = builder . with_stop_words ( stop_words ) ;
let inner = builder . with_document ( tv ::DocAddress ::from ( doc_address ) ) ;
Ok ( Query {
inner : Box ::new ( inner ) ,
} )
}
2024-04-28 10:54:21 +00:00
/// Construct a Tantivy's ConstScoreQuery
#[ staticmethod ]
#[ pyo3(signature = (query, score)) ]
pub ( crate ) fn const_score_query (
query : Query ,
score : f32 ,
) -> PyResult < Query > {
let inner = tv ::query ::ConstScoreQuery ::new ( query . inner , score ) ;
Ok ( Query {
inner : Box ::new ( inner ) ,
} )
}
2024-06-09 23:00:39 +00:00
#[ staticmethod ]
#[ pyo3(signature = (schema, field_name, field_type, lower_bound, upper_bound, include_lower = true, include_upper = true)) ]
pub ( crate ) fn range_query (
schema : & Schema ,
field_name : & str ,
field_type : FieldType ,
lower_bound : & Bound < PyAny > ,
upper_bound : & Bound < PyAny > ,
include_lower : bool ,
include_upper : bool ,
) -> PyResult < Query > {
match field_type {
FieldType ::Text = > {
return Err ( exceptions ::PyValueError ::new_err (
" Text fields are not supported for range queries. " ,
) )
}
FieldType ::Boolean = > {
return Err ( exceptions ::PyValueError ::new_err (
" Boolean fields are not supported for range queries. " ,
) )
}
FieldType ::Facet = > {
return Err ( exceptions ::PyValueError ::new_err (
" Facet fields are not supported for range queries. " ,
) )
}
FieldType ::Bytes = > {
return Err ( exceptions ::PyValueError ::new_err (
" Bytes fields are not supported for range queries. " ,
) )
}
FieldType ::Json = > {
return Err ( exceptions ::PyValueError ::new_err (
" Json fields are not supported for range queries. " ,
) )
}
_ = > { }
}
let lower_bound_term = make_term_for_type (
& schema . inner ,
field_name ,
field_type . clone ( ) ,
lower_bound ,
) ? ;
let upper_bound_term = make_term_for_type (
& schema . inner ,
field_name ,
field_type . clone ( ) ,
upper_bound ,
) ? ;
let lower_bound = if include_lower {
OpsBound ::Included ( lower_bound_term )
} else {
OpsBound ::Excluded ( lower_bound_term )
} ;
let upper_bound = if include_upper {
OpsBound ::Included ( upper_bound_term )
} else {
OpsBound ::Excluded ( upper_bound_term )
} ;
let inner = tv ::query ::RangeQuery ::new_term_bounds (
field_name . to_string ( ) ,
field_type . into ( ) ,
& lower_bound ,
& upper_bound ,
) ;
Ok ( Query {
inner : Box ::new ( inner ) ,
} )
}
2019-06-04 09:09:58 +00:00
}