parent
c74990aeb8
commit
5c3666349b
21
src/query.rs
21
src/query.rs
|
@ -1,4 +1,4 @@
|
||||||
use crate::{make_term, Schema};
|
use crate::{get_field, make_term, to_pyerr, Schema};
|
||||||
use pyo3::{
|
use pyo3::{
|
||||||
exceptions,
|
exceptions,
|
||||||
prelude::*,
|
prelude::*,
|
||||||
|
@ -187,4 +187,23 @@ impl Query {
|
||||||
inner: Box::new(inner),
|
inner: Box::new(inner),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[staticmethod]
|
||||||
|
#[pyo3(signature = (schema, field_name, regex_pattern))]
|
||||||
|
pub(crate) fn regex_query(
|
||||||
|
schema: &Schema,
|
||||||
|
field_name: &str,
|
||||||
|
regex_pattern: &str,
|
||||||
|
) -> PyResult<Query> {
|
||||||
|
let field = get_field(&schema.inner, field_name)?;
|
||||||
|
|
||||||
|
let inner_result =
|
||||||
|
tv::query::RegexQuery::from_pattern(regex_pattern, field);
|
||||||
|
match inner_result {
|
||||||
|
Ok(inner) => Ok(Query {
|
||||||
|
inner: Box::new(inner),
|
||||||
|
}),
|
||||||
|
Err(e) => Err(to_pyerr(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,108 +2,105 @@ import datetime
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import Any, Optional, Sequence
|
from typing import Any, Optional, Sequence
|
||||||
|
|
||||||
|
|
||||||
class Schema:
|
class Schema:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class SchemaBuilder:
|
class SchemaBuilder:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_valid_field_name(name: str) -> bool:
|
def is_valid_field_name(name: str) -> bool:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def add_text_field(
|
def add_text_field(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
stored: bool = False,
|
stored: bool = False,
|
||||||
tokenizer_name: str = "default",
|
tokenizer_name: str = "default",
|
||||||
index_option: str = "position",
|
index_option: str = "position",
|
||||||
) -> SchemaBuilder:
|
) -> SchemaBuilder:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def add_integer_field(
|
def add_integer_field(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
stored: bool = False,
|
stored: bool = False,
|
||||||
indexed: bool = False,
|
indexed: bool = False,
|
||||||
fast: bool = False,
|
fast: bool = False,
|
||||||
) -> SchemaBuilder:
|
) -> SchemaBuilder:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def add_float_field(
|
def add_float_field(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
stored: bool = False,
|
stored: bool = False,
|
||||||
indexed: bool = False,
|
indexed: bool = False,
|
||||||
fast: bool = False,
|
fast: bool = False,
|
||||||
) -> SchemaBuilder:
|
) -> SchemaBuilder:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def add_unsigned_field(
|
def add_unsigned_field(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
stored: bool = False,
|
stored: bool = False,
|
||||||
indexed: bool = False,
|
indexed: bool = False,
|
||||||
fast: bool = False,
|
fast: bool = False,
|
||||||
) -> SchemaBuilder:
|
) -> SchemaBuilder:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def add_boolean_field(
|
def add_boolean_field(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
stored: bool = False,
|
stored: bool = False,
|
||||||
indexed: bool = False,
|
indexed: bool = False,
|
||||||
fast: bool = False,
|
fast: bool = False,
|
||||||
) -> SchemaBuilder:
|
) -> SchemaBuilder:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def add_date_field(
|
def add_date_field(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
stored: bool = False,
|
stored: bool = False,
|
||||||
indexed: bool = False,
|
indexed: bool = False,
|
||||||
fast: bool = False,
|
fast: bool = False,
|
||||||
) -> SchemaBuilder:
|
) -> SchemaBuilder:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def add_json_field(
|
def add_json_field(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
stored: bool = False,
|
stored: bool = False,
|
||||||
tokenizer_name: str = "default",
|
tokenizer_name: str = "default",
|
||||||
index_option: str = "position",
|
index_option: str = "position",
|
||||||
) -> SchemaBuilder:
|
) -> SchemaBuilder:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def add_facet_field(
|
def add_facet_field(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
) -> SchemaBuilder:
|
) -> SchemaBuilder:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def add_bytes_field(
|
def add_bytes_field(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
stored: bool = False,
|
stored: bool = False,
|
||||||
indexed: bool = False,
|
indexed: bool = False,
|
||||||
fast: bool = False,
|
fast: bool = False,
|
||||||
index_option: str = "position",
|
index_option: str = "position",
|
||||||
) -> SchemaBuilder:
|
) -> SchemaBuilder:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def add_ip_addr_field(
|
def add_ip_addr_field(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
stored: bool = False,
|
stored: bool = False,
|
||||||
indexed: bool = False,
|
indexed: bool = False,
|
||||||
fast: bool = False,
|
fast: bool = False,
|
||||||
) -> SchemaBuilder:
|
) -> SchemaBuilder:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def build(self) -> Schema:
|
def build(self) -> Schema:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Facet:
|
class Facet:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def from_encoded(encoded_bytes: bytes) -> Facet:
|
def from_encoded(encoded_bytes: bytes) -> Facet:
|
||||||
|
@ -130,9 +127,7 @@ class Facet:
|
||||||
def to_path_str(self) -> str:
|
def to_path_str(self) -> str:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Document:
|
class Document:
|
||||||
|
|
||||||
def __new__(cls, **kwargs) -> Document:
|
def __new__(cls, **kwargs) -> Document:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -194,7 +189,12 @@ class Occur(Enum):
|
||||||
|
|
||||||
class Query:
|
class Query:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def term_query(schema: Schema, field_name: str, field_value: Any, index_option: str = "position") -> Query:
|
def term_query(
|
||||||
|
schema: Schema,
|
||||||
|
field_name: str,
|
||||||
|
field_value: Any,
|
||||||
|
index_option: str = "position",
|
||||||
|
) -> Query:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -202,9 +202,16 @@ class Query:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def fuzzy_term_query(schema: Schema, field_name: str, text: str, distance: int = 1, transposition_cost_one: bool = True, prefix = False) -> Query:
|
def fuzzy_term_query(
|
||||||
|
schema: Schema,
|
||||||
|
field_name: str,
|
||||||
|
text: str,
|
||||||
|
distance: int = 1,
|
||||||
|
transposition_cost_one: bool = True,
|
||||||
|
prefix=False,
|
||||||
|
) -> Query:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def boolean_query(subqueries: Sequence[tuple[Occur, Query]]) -> Query:
|
def boolean_query(subqueries: Sequence[tuple[Occur, Query]]) -> Query:
|
||||||
pass
|
pass
|
||||||
|
@ -218,13 +225,15 @@ class Query:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def regex_query(schema: Schema, field_name: str, regex_pattern: str) -> Query:
|
||||||
|
pass
|
||||||
|
|
||||||
class Order(Enum):
|
class Order(Enum):
|
||||||
Asc = 1
|
Asc = 1
|
||||||
Desc = 2
|
Desc = 2
|
||||||
|
|
||||||
|
|
||||||
class DocAddress:
|
class DocAddress:
|
||||||
|
|
||||||
def __new__(cls, segment_ord: int, doc: int) -> DocAddress:
|
def __new__(cls, segment_ord: int, doc: int) -> DocAddress:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -237,22 +246,19 @@ class DocAddress:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class SearchResult:
|
class SearchResult:
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def hits(self) -> list[tuple[Any, DocAddress]]:
|
def hits(self) -> list[tuple[Any, DocAddress]]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Searcher:
|
class Searcher:
|
||||||
|
|
||||||
def search(
|
def search(
|
||||||
self,
|
self,
|
||||||
query: Query,
|
query: Query,
|
||||||
limit: int = 10,
|
limit: int = 10,
|
||||||
count: bool = True,
|
count: bool = True,
|
||||||
order_by_field: Optional[str] = None,
|
order_by_field: Optional[str] = None,
|
||||||
offset: int = 0,
|
offset: int = 0,
|
||||||
order: Order = Order.Desc,
|
order: Order = Order.Desc,
|
||||||
) -> SearchResult:
|
) -> SearchResult:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -267,9 +273,7 @@ class Searcher:
|
||||||
def doc(self, doc_address: DocAddress) -> Document:
|
def doc(self, doc_address: DocAddress) -> Document:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class IndexWriter:
|
class IndexWriter:
|
||||||
|
|
||||||
def add_document(self, doc: Document) -> int:
|
def add_document(self, doc: Document) -> int:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -298,10 +302,10 @@ class IndexWriter:
|
||||||
def wait_merging_threads(self) -> None:
|
def wait_merging_threads(self) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Index:
|
class Index:
|
||||||
|
def __new__(
|
||||||
def __new__(cls, schema: Schema, path: Optional[str] = None, reuse: bool = True) -> Index:
|
cls, schema: Schema, path: Optional[str] = None, reuse: bool = True
|
||||||
|
) -> Index:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -311,7 +315,9 @@ class Index:
|
||||||
def writer(self, heap_size: int = 128_000_000, num_threads: int = 0) -> IndexWriter:
|
def writer(self, heap_size: int = 128_000_000, num_threads: int = 0) -> IndexWriter:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def config_reader(self, reload_policy: str = "commit", num_warmers: int = 0) -> None:
|
def config_reader(
|
||||||
|
self, reload_policy: str = "commit", num_warmers: int = 0
|
||||||
|
) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def searcher(self) -> Searcher:
|
def searcher(self) -> Searcher:
|
||||||
|
@ -328,15 +334,17 @@ class Index:
|
||||||
def reload(self) -> None:
|
def reload(self) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def parse_query(self, query: str, default_field_names: Optional[list[str]] = None) -> Query:
|
def parse_query(
|
||||||
|
self, query: str, default_field_names: Optional[list[str]] = None
|
||||||
|
) -> Query:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def parse_query_lenient(self, query: str, default_field_names: Optional[list[str]] = None) -> Query:
|
def parse_query_lenient(
|
||||||
|
self, query: str, default_field_names: Optional[list[str]] = None
|
||||||
|
) -> Query:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Range:
|
class Range:
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def start(self) -> int:
|
def start(self) -> int:
|
||||||
pass
|
pass
|
||||||
|
@ -345,24 +353,17 @@ class Range:
|
||||||
def end(self) -> int:
|
def end(self) -> int:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class Snippet:
|
class Snippet:
|
||||||
|
|
||||||
def to_html(self) -> str:
|
def to_html(self) -> str:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def highlighted(self) -> list[Range]:
|
def highlighted(self) -> list[Range]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class SnippetGenerator:
|
class SnippetGenerator:
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def create(
|
def create(
|
||||||
searcher: Searcher,
|
searcher: Searcher, query: Query, schema: Schema, field_name: str
|
||||||
query: Query,
|
|
||||||
schema: Schema,
|
|
||||||
field_name: str
|
|
||||||
) -> SnippetGenerator:
|
) -> SnippetGenerator:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -995,3 +995,36 @@ class TestQuery(object):
|
||||||
# no boost type error
|
# no boost type error
|
||||||
with pytest.raises(TypeError, match = r"Query.boost_query\(\) missing 1 required positional argument: 'boost'"):
|
with pytest.raises(TypeError, match = r"Query.boost_query\(\) missing 1 required positional argument: 'boost'"):
|
||||||
Query.boost_query(query1)
|
Query.boost_query(query1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_regex_query(self, ram_index):
|
||||||
|
index = ram_index
|
||||||
|
|
||||||
|
query = Query.regex_query(index.schema, "body", "fish")
|
||||||
|
result = index.searcher().search(query, 10)
|
||||||
|
assert len(result.hits) == 1
|
||||||
|
_, doc_address = result.hits[0]
|
||||||
|
searched_doc = index.searcher().doc(doc_address)
|
||||||
|
assert searched_doc["title"] == ["The Old Man and the Sea"]
|
||||||
|
|
||||||
|
query = Query.regex_query(index.schema, "title", "(?:man|men)")
|
||||||
|
result = index.searcher().search(query, 10)
|
||||||
|
assert len(result.hits) == 2
|
||||||
|
_, doc_address = result.hits[0]
|
||||||
|
searched_doc = index.searcher().doc(doc_address)
|
||||||
|
assert searched_doc["title"] == ["The Old Man and the Sea"]
|
||||||
|
_, doc_address = result.hits[1]
|
||||||
|
searched_doc = index.searcher().doc(doc_address)
|
||||||
|
assert searched_doc["title"] == ["Of Mice and Men"]
|
||||||
|
|
||||||
|
# unknown field in the schema
|
||||||
|
with pytest.raises(
|
||||||
|
ValueError, match="Field `unknown_field` is not defined in the schema."
|
||||||
|
):
|
||||||
|
Query.regex_query(index.schema, "unknown_field", "fish")
|
||||||
|
|
||||||
|
# invalid regex pattern
|
||||||
|
with pytest.raises(
|
||||||
|
ValueError, match=r"An invalid argument was passed: 'fish\('"
|
||||||
|
):
|
||||||
|
Query.regex_query(index.schema, "body", "fish(")
|
||||||
|
|
Loading…
Reference in New Issue