use ::tantivy as tv; use ::tantivy::schema::{OwnedValue as Value, Term}; use pyo3::{exceptions, prelude::*, wrap_pymodule}; mod document; mod facet; mod index; mod parser_error; mod query; mod schema; mod schemabuilder; mod searcher; mod snippet; use document::Document; use facet::Facet; use index::Index; use query::{Occur, Query}; use schema::Schema; use schemabuilder::SchemaBuilder; use searcher::{DocAddress, Order, SearchResult, Searcher}; use snippet::{Snippet, SnippetGenerator}; use crate::document::extract_value; /// Python bindings for the search engine library Tantivy. /// /// Tantivy is a full text search engine library written in rust. /// /// It is closer to Apache Lucene than to Elasticsearch and Apache Solr in /// the sense it is not an off-the-shelf search engine server, but rather /// a library that can be used to build such a search engine. /// Tantivy is, in fact, strongly inspired by Lucene's design. /// /// Example: /// >>> import json /// >>> import tantivy /// /// >>> builder = tantivy.SchemaBuilder() /// /// >>> title = builder.add_text_field("title", stored=True) /// >>> body = builder.add_text_field("body") /// /// >>> schema = builder.build() /// >>> index = tantivy.Index(schema) /// >>> doc = tantivy.Document() /// >>> doc.add_text(title, "The Old Man and the Sea") /// >>> doc.add_text(body, ("He was an old man who fished alone in a " /// "skiff in the Gulf Stream and he had gone " /// "eighty-four days now without taking a fish.")) /// /// >>> writer.add_document(doc) /// /// >>> doc = schema.parse_document(json.dumps({ /// "title": ["Frankenstein", "The Modern Prometheus"], /// "body": ("You will rejoice to hear that no disaster has " /// "accompanied the commencement of an enterprise which " /// "you have regarded with such evil forebodings. " /// "I arrived here yesterday, and my first task is to " /// "assure my dear sister of my welfare and increasing " /// "confidence in the success of my undertaking.") /// })) /// /// >>> writer.add_document(doc) /// >>> writer.commit() /// /// >>> reader = index.reader() /// >>> searcher = reader.searcher() /// /// >>> query = index.parse_query("sea whale", [title, body]) /// /// >>> result = searcher.search(query, 10) /// /// >>> assert len(result) == 1 /// #[pymodule] fn tantivy(_py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_wrapped(wrap_pymodule!(query_parser_error))?; Ok(()) } /// Submodule containing all the possible errors that can be raised during /// query parsing. /// /// Example: /// >>> import tantivy /// >>> from tantivy import query_parser_error /// /// >>> builder = tantivy.SchemaBuilder() /// /// >>> title = builder.add_text_field("title", stored=True) /// >>> body = builder.add_text_field("body") /// >>> id = builder.add_unsigned_field("id") /// >>> rating = builder.add_float_field("rating") /// /// >>> schema = builder.build() /// >>> index = tantivy.Index(schema) /// /// >>> query, errors = index.parse_query_lenient( /// "bod:'world' AND id:<3.5 AND rating:5.0" /// ) /// /// >>> assert len(errors) == 2 /// >>> assert isinstance(errors[0], query_parser_error.FieldDoesNotExistError) /// >>> assert isinstance(errors[1], query_parser_error.ExpectedIntError) #[pymodule] fn query_parser_error(_py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; Ok(()) } pub(crate) fn to_pyerr(err: E) -> PyErr { exceptions::PyValueError::new_err(err.to_string()) } pub(crate) fn get_field( schema: &tv::schema::Schema, field_name: &str, ) -> PyResult { let field = schema.get_field(field_name).map_err(|_err| { exceptions::PyValueError::new_err(format!( "Field `{field_name}` is not defined in the schema." )) })?; Ok(field) } pub(crate) fn make_term( schema: &tv::schema::Schema, field_name: &str, field_value: &PyAny, ) -> PyResult { let field = get_field(schema, field_name)?; let value = extract_value(field_value)?; let term = match value { Value::Str(text) => Term::from_field_text(field, &text), Value::U64(num) => Term::from_field_u64(field, num), Value::I64(num) => Term::from_field_i64(field, num), Value::F64(num) => Term::from_field_f64(field, num), Value::Date(d) => Term::from_field_date(field, d), Value::Facet(facet) => Term::from_facet(field, &facet), Value::Bool(b) => Term::from_field_bool(field, b), Value::IpAddr(i) => Term::from_field_ip_addr(field, i), _ => { return Err(exceptions::PyValueError::new_err(format!( "Can't create a term for Field `{field_name}` with value `{field_value}`." ))) } }; Ok(term) }