Support fast text fields (#180)
parent
8e589c9dc0
commit
32197431f7
|
@ -29,6 +29,7 @@ pub(crate) struct SchemaBuilder {
|
||||||
pub(crate) builder: Arc<RwLock<Option<schema::SchemaBuilder>>>,
|
pub(crate) builder: Arc<RwLock<Option<schema::SchemaBuilder>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const NO_TOKENIZER_NAME: &str = "raw";
|
||||||
const TOKENIZER: &str = "default";
|
const TOKENIZER: &str = "default";
|
||||||
const RECORD: &str = "position";
|
const RECORD: &str = "position";
|
||||||
|
|
||||||
|
@ -53,6 +54,14 @@ impl SchemaBuilder {
|
||||||
/// stored (bool, optional): If true sets the field as stored, the
|
/// stored (bool, optional): If true sets the field as stored, the
|
||||||
/// content of the field can be later restored from a Searcher.
|
/// content of the field can be later restored from a Searcher.
|
||||||
/// Defaults to False.
|
/// Defaults to False.
|
||||||
|
/// fast (bool, optional): Set the text options as a fast field. A
|
||||||
|
/// fast field is a column-oriented fashion storage for tantivy.
|
||||||
|
/// Text fast fields will have the term ids stored in the fast
|
||||||
|
/// field. The fast field will be a multivalued fast field.
|
||||||
|
/// It is recommended to use the "raw" tokenizer, since it will
|
||||||
|
/// store the original text unchanged. The "default" tokenizer will
|
||||||
|
/// store the terms as lower case and this will be reflected in the
|
||||||
|
/// dictionary.
|
||||||
/// tokenizer_name (str, optional): The name of the tokenizer that
|
/// tokenizer_name (str, optional): The name of the tokenizer that
|
||||||
/// should be used to process the field. Defaults to 'default'
|
/// should be used to process the field. Defaults to 'default'
|
||||||
/// index_option (str, optional): Sets which information should be
|
/// index_option (str, optional): Sets which information should be
|
||||||
|
@ -68,6 +77,7 @@ impl SchemaBuilder {
|
||||||
#[pyo3(signature = (
|
#[pyo3(signature = (
|
||||||
name,
|
name,
|
||||||
stored = false,
|
stored = false,
|
||||||
|
fast = false,
|
||||||
tokenizer_name = TOKENIZER,
|
tokenizer_name = TOKENIZER,
|
||||||
index_option = RECORD
|
index_option = RECORD
|
||||||
))]
|
))]
|
||||||
|
@ -75,12 +85,14 @@ impl SchemaBuilder {
|
||||||
&mut self,
|
&mut self,
|
||||||
name: &str,
|
name: &str,
|
||||||
stored: bool,
|
stored: bool,
|
||||||
|
fast: bool,
|
||||||
tokenizer_name: &str,
|
tokenizer_name: &str,
|
||||||
index_option: &str,
|
index_option: &str,
|
||||||
) -> PyResult<Self> {
|
) -> PyResult<Self> {
|
||||||
let builder = &mut self.builder;
|
let builder = &mut self.builder;
|
||||||
let options = SchemaBuilder::build_text_option(
|
let options = SchemaBuilder::build_text_option(
|
||||||
stored,
|
stored,
|
||||||
|
fast,
|
||||||
tokenizer_name,
|
tokenizer_name,
|
||||||
index_option,
|
index_option,
|
||||||
)?;
|
)?;
|
||||||
|
@ -296,6 +308,14 @@ impl SchemaBuilder {
|
||||||
/// stored (bool, optional): If true sets the field as stored, the
|
/// stored (bool, optional): If true sets the field as stored, the
|
||||||
/// content of the field can be later restored from a Searcher.
|
/// content of the field can be later restored from a Searcher.
|
||||||
/// Defaults to False.
|
/// Defaults to False.
|
||||||
|
/// fast (bool, optional): Set the text options as a fast field. A
|
||||||
|
/// fast field is a column-oriented fashion storage for tantivy.
|
||||||
|
/// Text fast fields will have the term ids stored in the fast
|
||||||
|
/// field. The fast field will be a multivalued fast field.
|
||||||
|
/// It is recommended to use the "raw" tokenizer, since it will
|
||||||
|
/// store the original text unchanged. The "default" tokenizer will
|
||||||
|
/// store the terms as lower case and this will be reflected in the
|
||||||
|
/// dictionary.
|
||||||
/// tokenizer_name (str, optional): The name of the tokenizer that
|
/// tokenizer_name (str, optional): The name of the tokenizer that
|
||||||
/// should be used to process the field. Defaults to 'default'
|
/// should be used to process the field. Defaults to 'default'
|
||||||
/// index_option (str, optional): Sets which information should be
|
/// index_option (str, optional): Sets which information should be
|
||||||
|
@ -311,6 +331,7 @@ impl SchemaBuilder {
|
||||||
#[pyo3(signature = (
|
#[pyo3(signature = (
|
||||||
name,
|
name,
|
||||||
stored = false,
|
stored = false,
|
||||||
|
fast = false,
|
||||||
tokenizer_name = TOKENIZER,
|
tokenizer_name = TOKENIZER,
|
||||||
index_option = RECORD
|
index_option = RECORD
|
||||||
))]
|
))]
|
||||||
|
@ -318,12 +339,14 @@ impl SchemaBuilder {
|
||||||
&mut self,
|
&mut self,
|
||||||
name: &str,
|
name: &str,
|
||||||
stored: bool,
|
stored: bool,
|
||||||
|
fast: bool,
|
||||||
tokenizer_name: &str,
|
tokenizer_name: &str,
|
||||||
index_option: &str,
|
index_option: &str,
|
||||||
) -> PyResult<Self> {
|
) -> PyResult<Self> {
|
||||||
let builder = &mut self.builder;
|
let builder = &mut self.builder;
|
||||||
let options = SchemaBuilder::build_text_option(
|
let options = SchemaBuilder::build_text_option(
|
||||||
stored,
|
stored,
|
||||||
|
fast,
|
||||||
tokenizer_name,
|
tokenizer_name,
|
||||||
index_option,
|
index_option,
|
||||||
)?;
|
)?;
|
||||||
|
@ -482,6 +505,7 @@ impl SchemaBuilder {
|
||||||
|
|
||||||
fn build_text_option(
|
fn build_text_option(
|
||||||
stored: bool,
|
stored: bool,
|
||||||
|
fast: bool,
|
||||||
tokenizer_name: &str,
|
tokenizer_name: &str,
|
||||||
index_option: &str,
|
index_option: &str,
|
||||||
) -> PyResult<schema::TextOptions> {
|
) -> PyResult<schema::TextOptions> {
|
||||||
|
@ -506,6 +530,17 @@ impl SchemaBuilder {
|
||||||
options
|
options
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let options = if fast {
|
||||||
|
let text_tokenizer = if tokenizer_name != NO_TOKENIZER_NAME {
|
||||||
|
Some(tokenizer_name)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
options.set_fast(text_tokenizer)
|
||||||
|
} else {
|
||||||
|
options
|
||||||
|
};
|
||||||
|
|
||||||
Ok(options)
|
Ok(options)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue