Co-authored-by: Justin Greene <justin.greene@intouchsol.com> Co-authored-by: Paul Masurel <paul@quickwit.io> Co-authored-by: Pratyush Mittal <pratyushmittal@gmail.com> Co-authored-by: mukeshsahnis <er.mks89@gmail.com>master
parent
72e221033c
commit
f164b0ef4a
|
@ -9,6 +9,6 @@ tantivy.so
|
|||
tantivy.dylib
|
||||
tantivy/tantivy.cpython*.so
|
||||
tantivy.egg-info/
|
||||
|
||||
# Exclude the mkdocs site directory
|
||||
.venv
|
||||
.envrc
|
||||
site/
|
15
README.md
15
README.md
|
@ -23,6 +23,21 @@ only supports python3.
|
|||
|
||||
# Development
|
||||
|
||||
For compiling Python module:
|
||||
|
||||
```bash
|
||||
# create virtual env
|
||||
python -m venv .venv
|
||||
source .venv/bin/activate
|
||||
|
||||
# install maturin, the build tool for PyO3
|
||||
pip install maturin
|
||||
|
||||
# compile and install python module in venv
|
||||
maturin develop
|
||||
```
|
||||
|
||||
|
||||
Setting up a development environment can be done in a virtual environment using
|
||||
[`nox`](https://nox.thea.codes) or using local packages using the provided `Makefile`.
|
||||
|
||||
|
|
|
@ -797,7 +797,7 @@ impl Document {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn iter_values_for_field<'a>(
|
||||
pub fn iter_values_for_field<'a>(
|
||||
&'a self,
|
||||
field: &str,
|
||||
) -> impl Iterator<Item = &'a Value> + 'a {
|
||||
|
|
|
@ -8,13 +8,16 @@ mod query;
|
|||
mod schema;
|
||||
mod schemabuilder;
|
||||
mod searcher;
|
||||
mod snippet;
|
||||
|
||||
use document::Document;
|
||||
use facet::Facet;
|
||||
use index::Index;
|
||||
use query::Query;
|
||||
use schema::Schema;
|
||||
use schemabuilder::SchemaBuilder;
|
||||
use searcher::{DocAddress, SearchResult, Searcher};
|
||||
use snippet::{Snippet, SnippetGenerator};
|
||||
|
||||
/// Python bindings for the search engine library Tantivy.
|
||||
///
|
||||
|
@ -76,6 +79,9 @@ fn tantivy(_py: Python, m: &PyModule) -> PyResult<()> {
|
|||
m.add_class::<Index>()?;
|
||||
m.add_class::<DocAddress>()?;
|
||||
m.add_class::<Facet>()?;
|
||||
m.add_class::<Query>()?;
|
||||
m.add_class::<Snippet>()?;
|
||||
m.add_class::<SnippetGenerator>()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
use crate::to_pyerr;
|
||||
use pyo3::prelude::*;
|
||||
use tantivy as tv;
|
||||
|
||||
/// Tantivy schema.
|
||||
///
|
||||
/// The schema is very strict. To build the schema the `SchemaBuilder` class is
|
||||
/// provided.
|
||||
#[pyclass]
|
||||
pub(crate) struct Snippet {
|
||||
pub(crate) inner: tv::Snippet,
|
||||
}
|
||||
|
||||
#[pyclass]
|
||||
pub(crate) struct Range {
|
||||
#[pyo3(get)]
|
||||
start: usize,
|
||||
#[pyo3(get)]
|
||||
end: usize,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl Snippet {
|
||||
pub fn to_html(&self) -> PyResult<String> {
|
||||
Ok(self.inner.to_html())
|
||||
}
|
||||
|
||||
pub fn highlighted(&self) -> Vec<Range> {
|
||||
let highlighted = self.inner.highlighted();
|
||||
let results = highlighted
|
||||
.iter()
|
||||
.map(|r| Range {
|
||||
start: r.start,
|
||||
end: r.end,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
results
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass]
|
||||
pub(crate) struct SnippetGenerator {
|
||||
pub(crate) field_name: String,
|
||||
pub(crate) inner: tv::SnippetGenerator,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl SnippetGenerator {
|
||||
#[staticmethod]
|
||||
pub fn create(
|
||||
searcher: &crate::Searcher,
|
||||
query: &crate::Query,
|
||||
schema: &crate::Schema,
|
||||
field_name: &str,
|
||||
) -> PyResult<SnippetGenerator> {
|
||||
let field = schema
|
||||
.inner
|
||||
.get_field(field_name)
|
||||
.or(Err("field not found"))
|
||||
.map_err(to_pyerr)?;
|
||||
let generator =
|
||||
tv::SnippetGenerator::create(&searcher.inner, query.get(), field)
|
||||
.map_err(to_pyerr)?;
|
||||
|
||||
return Ok(SnippetGenerator {
|
||||
field_name: field_name.to_string(),
|
||||
inner: generator,
|
||||
});
|
||||
}
|
||||
|
||||
pub fn snippet_from_doc(&self, doc: &crate::Document) -> crate::Snippet {
|
||||
let text: String = doc
|
||||
.iter_values_for_field(&self.field_name)
|
||||
.flat_map(tv::schema::Value::as_text)
|
||||
.collect::<Vec<&str>>()
|
||||
.join(" ");
|
||||
|
||||
let result = self.inner.snippet(&text);
|
||||
Snippet { inner: result }
|
||||
}
|
||||
}
|
|
@ -5,8 +5,8 @@ import datetime
|
|||
import tantivy
|
||||
import pickle
|
||||
import pytest
|
||||
|
||||
from tantivy import Document, Index, SchemaBuilder
|
||||
import tantivy
|
||||
from tantivy import Document, Index, SchemaBuilder, SnippetGenerator
|
||||
|
||||
|
||||
def schema():
|
||||
|
@ -784,3 +784,27 @@ def test_doc_address_pickle():
|
|||
pickled = pickle.loads(pickle.dumps(orig))
|
||||
|
||||
assert orig == pickled
|
||||
|
||||
|
||||
class TestSnippets(object):
|
||||
def test_document_snippet(self, dir_index):
|
||||
index_dir, _ = dir_index
|
||||
doc_schema = schema()
|
||||
index = Index(doc_schema, str(index_dir))
|
||||
query = index.parse_query("sea whale", ["title", "body"])
|
||||
searcher = index.searcher()
|
||||
result = searcher.search(query)
|
||||
assert len(result.hits) == 1
|
||||
|
||||
snippet_generator = SnippetGenerator.create(searcher, query, doc_schema, "title")
|
||||
|
||||
for (score, doc_address) in result.hits:
|
||||
doc = searcher.doc(doc_address)
|
||||
snippet = snippet_generator.snippet_from_doc(doc)
|
||||
highlights = snippet.highlighted()
|
||||
assert len(highlights) == 1
|
||||
first = highlights[0]
|
||||
assert first.start == 20
|
||||
assert first.end == 23
|
||||
html_snippet = snippet.to_html()
|
||||
assert html_snippet == 'The Old Man and the <b>Sea</b>'
|
||||
|
|
Loading…
Reference in New Issue