Co-authored-by: Justin Greene <justin.greene@intouchsol.com> Co-authored-by: Paul Masurel <paul@quickwit.io> Co-authored-by: Pratyush Mittal <pratyushmittal@gmail.com> Co-authored-by: mukeshsahnis <er.mks89@gmail.com>master
parent
72e221033c
commit
f164b0ef4a
|
@ -9,6 +9,6 @@ tantivy.so
|
||||||
tantivy.dylib
|
tantivy.dylib
|
||||||
tantivy/tantivy.cpython*.so
|
tantivy/tantivy.cpython*.so
|
||||||
tantivy.egg-info/
|
tantivy.egg-info/
|
||||||
|
.venv
|
||||||
# Exclude the mkdocs site directory
|
.envrc
|
||||||
site/
|
site/
|
15
README.md
15
README.md
|
@ -23,6 +23,21 @@ only supports python3.
|
||||||
|
|
||||||
# Development
|
# Development
|
||||||
|
|
||||||
|
For compiling Python module:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# create virtual env
|
||||||
|
python -m venv .venv
|
||||||
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
# install maturin, the build tool for PyO3
|
||||||
|
pip install maturin
|
||||||
|
|
||||||
|
# compile and install python module in venv
|
||||||
|
maturin develop
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
Setting up a development environment can be done in a virtual environment using
|
Setting up a development environment can be done in a virtual environment using
|
||||||
[`nox`](https://nox.thea.codes) or using local packages using the provided `Makefile`.
|
[`nox`](https://nox.thea.codes) or using local packages using the provided `Makefile`.
|
||||||
|
|
||||||
|
|
|
@ -797,7 +797,7 @@ impl Document {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn iter_values_for_field<'a>(
|
pub fn iter_values_for_field<'a>(
|
||||||
&'a self,
|
&'a self,
|
||||||
field: &str,
|
field: &str,
|
||||||
) -> impl Iterator<Item = &'a Value> + 'a {
|
) -> impl Iterator<Item = &'a Value> + 'a {
|
||||||
|
|
|
@ -8,13 +8,16 @@ mod query;
|
||||||
mod schema;
|
mod schema;
|
||||||
mod schemabuilder;
|
mod schemabuilder;
|
||||||
mod searcher;
|
mod searcher;
|
||||||
|
mod snippet;
|
||||||
|
|
||||||
use document::Document;
|
use document::Document;
|
||||||
use facet::Facet;
|
use facet::Facet;
|
||||||
use index::Index;
|
use index::Index;
|
||||||
|
use query::Query;
|
||||||
use schema::Schema;
|
use schema::Schema;
|
||||||
use schemabuilder::SchemaBuilder;
|
use schemabuilder::SchemaBuilder;
|
||||||
use searcher::{DocAddress, SearchResult, Searcher};
|
use searcher::{DocAddress, SearchResult, Searcher};
|
||||||
|
use snippet::{Snippet, SnippetGenerator};
|
||||||
|
|
||||||
/// Python bindings for the search engine library Tantivy.
|
/// Python bindings for the search engine library Tantivy.
|
||||||
///
|
///
|
||||||
|
@ -76,6 +79,9 @@ fn tantivy(_py: Python, m: &PyModule) -> PyResult<()> {
|
||||||
m.add_class::<Index>()?;
|
m.add_class::<Index>()?;
|
||||||
m.add_class::<DocAddress>()?;
|
m.add_class::<DocAddress>()?;
|
||||||
m.add_class::<Facet>()?;
|
m.add_class::<Facet>()?;
|
||||||
|
m.add_class::<Query>()?;
|
||||||
|
m.add_class::<Snippet>()?;
|
||||||
|
m.add_class::<SnippetGenerator>()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,81 @@
|
||||||
|
use crate::to_pyerr;
|
||||||
|
use pyo3::prelude::*;
|
||||||
|
use tantivy as tv;
|
||||||
|
|
||||||
|
/// Tantivy schema.
|
||||||
|
///
|
||||||
|
/// The schema is very strict. To build the schema the `SchemaBuilder` class is
|
||||||
|
/// provided.
|
||||||
|
#[pyclass]
|
||||||
|
pub(crate) struct Snippet {
|
||||||
|
pub(crate) inner: tv::Snippet,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyclass]
|
||||||
|
pub(crate) struct Range {
|
||||||
|
#[pyo3(get)]
|
||||||
|
start: usize,
|
||||||
|
#[pyo3(get)]
|
||||||
|
end: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl Snippet {
|
||||||
|
pub fn to_html(&self) -> PyResult<String> {
|
||||||
|
Ok(self.inner.to_html())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn highlighted(&self) -> Vec<Range> {
|
||||||
|
let highlighted = self.inner.highlighted();
|
||||||
|
let results = highlighted
|
||||||
|
.iter()
|
||||||
|
.map(|r| Range {
|
||||||
|
start: r.start,
|
||||||
|
end: r.end,
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
results
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pyclass]
|
||||||
|
pub(crate) struct SnippetGenerator {
|
||||||
|
pub(crate) field_name: String,
|
||||||
|
pub(crate) inner: tv::SnippetGenerator,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[pymethods]
|
||||||
|
impl SnippetGenerator {
|
||||||
|
#[staticmethod]
|
||||||
|
pub fn create(
|
||||||
|
searcher: &crate::Searcher,
|
||||||
|
query: &crate::Query,
|
||||||
|
schema: &crate::Schema,
|
||||||
|
field_name: &str,
|
||||||
|
) -> PyResult<SnippetGenerator> {
|
||||||
|
let field = schema
|
||||||
|
.inner
|
||||||
|
.get_field(field_name)
|
||||||
|
.or(Err("field not found"))
|
||||||
|
.map_err(to_pyerr)?;
|
||||||
|
let generator =
|
||||||
|
tv::SnippetGenerator::create(&searcher.inner, query.get(), field)
|
||||||
|
.map_err(to_pyerr)?;
|
||||||
|
|
||||||
|
return Ok(SnippetGenerator {
|
||||||
|
field_name: field_name.to_string(),
|
||||||
|
inner: generator,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn snippet_from_doc(&self, doc: &crate::Document) -> crate::Snippet {
|
||||||
|
let text: String = doc
|
||||||
|
.iter_values_for_field(&self.field_name)
|
||||||
|
.flat_map(tv::schema::Value::as_text)
|
||||||
|
.collect::<Vec<&str>>()
|
||||||
|
.join(" ");
|
||||||
|
|
||||||
|
let result = self.inner.snippet(&text);
|
||||||
|
Snippet { inner: result }
|
||||||
|
}
|
||||||
|
}
|
|
@ -5,8 +5,8 @@ import datetime
|
||||||
import tantivy
|
import tantivy
|
||||||
import pickle
|
import pickle
|
||||||
import pytest
|
import pytest
|
||||||
|
import tantivy
|
||||||
from tantivy import Document, Index, SchemaBuilder
|
from tantivy import Document, Index, SchemaBuilder, SnippetGenerator
|
||||||
|
|
||||||
|
|
||||||
def schema():
|
def schema():
|
||||||
|
@ -784,3 +784,27 @@ def test_doc_address_pickle():
|
||||||
pickled = pickle.loads(pickle.dumps(orig))
|
pickled = pickle.loads(pickle.dumps(orig))
|
||||||
|
|
||||||
assert orig == pickled
|
assert orig == pickled
|
||||||
|
|
||||||
|
|
||||||
|
class TestSnippets(object):
|
||||||
|
def test_document_snippet(self, dir_index):
|
||||||
|
index_dir, _ = dir_index
|
||||||
|
doc_schema = schema()
|
||||||
|
index = Index(doc_schema, str(index_dir))
|
||||||
|
query = index.parse_query("sea whale", ["title", "body"])
|
||||||
|
searcher = index.searcher()
|
||||||
|
result = searcher.search(query)
|
||||||
|
assert len(result.hits) == 1
|
||||||
|
|
||||||
|
snippet_generator = SnippetGenerator.create(searcher, query, doc_schema, "title")
|
||||||
|
|
||||||
|
for (score, doc_address) in result.hits:
|
||||||
|
doc = searcher.doc(doc_address)
|
||||||
|
snippet = snippet_generator.snippet_from_doc(doc)
|
||||||
|
highlights = snippet.highlighted()
|
||||||
|
assert len(highlights) == 1
|
||||||
|
first = highlights[0]
|
||||||
|
assert first.start == 20
|
||||||
|
assert first.end == 23
|
||||||
|
html_snippet = snippet.to_html()
|
||||||
|
assert html_snippet == 'The Old Man and the <b>Sea</b>'
|
||||||
|
|
Loading…
Reference in New Issue