Adding delete_document function to index (#12)
* Adding Term, Field and delete_term API mapping * Fixing test * Fixing code doc * Removing Term and Field and nesting code on delete_term * Fixing lint * Delete_documents and documentation * Fixing style and testing exceptions * Fixing missing return * Fixing fmtmaster
parent
51f080d001
commit
75f0670dd6
|
@ -109,7 +109,7 @@ where
|
||||||
.push(Value::from(value));
|
.push(Value::from(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_value(any: &PyAny) -> PyResult<Value> {
|
pub(crate) fn extract_value(any: &PyAny) -> PyResult<Value> {
|
||||||
if let Ok(s) = any.extract::<String>() {
|
if let Ok(s) = any.extract::<String>() {
|
||||||
return Ok(Value::Str(s));
|
return Ok(Value::Str(s));
|
||||||
}
|
}
|
||||||
|
|
43
src/index.rs
43
src/index.rs
|
@ -2,15 +2,16 @@
|
||||||
|
|
||||||
use pyo3::exceptions;
|
use pyo3::exceptions;
|
||||||
use pyo3::prelude::*;
|
use pyo3::prelude::*;
|
||||||
|
use pyo3::types::PyAny;
|
||||||
|
|
||||||
use crate::document::Document;
|
use crate::document::{extract_value, Document};
|
||||||
use crate::query::Query;
|
use crate::query::Query;
|
||||||
use crate::schema::Schema;
|
use crate::schema::Schema;
|
||||||
use crate::searcher::Searcher;
|
use crate::searcher::Searcher;
|
||||||
use crate::to_pyerr;
|
use crate::to_pyerr;
|
||||||
use tantivy as tv;
|
use tantivy as tv;
|
||||||
use tantivy::directory::MmapDirectory;
|
use tantivy::directory::MmapDirectory;
|
||||||
use tantivy::schema::{Field, NamedFieldDocument};
|
use tantivy::schema::{Field, NamedFieldDocument, Term, Value};
|
||||||
|
|
||||||
const RELOAD_POLICY: &str = "commit";
|
const RELOAD_POLICY: &str = "commit";
|
||||||
|
|
||||||
|
@ -94,6 +95,44 @@ impl IndexWriter {
|
||||||
fn commit_opstamp(&self) -> u64 {
|
fn commit_opstamp(&self) -> u64 {
|
||||||
self.inner_index_writer.commit_opstamp()
|
self.inner_index_writer.commit_opstamp()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Delete all documents containing a given term.
|
||||||
|
///
|
||||||
|
/// Args:
|
||||||
|
/// field_name (str): The field name for which we want to filter deleted docs.
|
||||||
|
/// field_value (PyAny): Python object with the value we want to filter.
|
||||||
|
///
|
||||||
|
/// If the field_name is not on the schema raises ValueError exception.
|
||||||
|
/// If the field_value is not supported raises Exception.
|
||||||
|
fn delete_documents(
|
||||||
|
&mut self,
|
||||||
|
field_name: &str,
|
||||||
|
field_value: &PyAny,
|
||||||
|
) -> PyResult<u64> {
|
||||||
|
let field = self.schema.get_field(field_name).ok_or_else(|| {
|
||||||
|
exceptions::ValueError::py_err(format!(
|
||||||
|
"Field `{}` is not defined in the schema.",
|
||||||
|
field_name
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let value = extract_value(field_value)?;
|
||||||
|
let term = match value {
|
||||||
|
Value::Str(text) => Term::from_field_text(field, &text),
|
||||||
|
Value::U64(num) => Term::from_field_u64(field, num),
|
||||||
|
Value::I64(num) => Term::from_field_i64(field, num),
|
||||||
|
Value::F64(num) => Term::from_field_f64(field, num),
|
||||||
|
Value::Date(d) => Term::from_field_date(field, &d),
|
||||||
|
Value::Facet(facet) => Term::from_facet(field, &facet),
|
||||||
|
Value::Bytes(_) => {
|
||||||
|
return Err(exceptions::ValueError::py_err(format!(
|
||||||
|
"Field `{}` is bytes type not deletable.",
|
||||||
|
field_name
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(self.inner_index_writer.delete_term(term.clone()))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a new index object.
|
/// Create a new index object.
|
||||||
|
|
|
@ -5,10 +5,7 @@ from tantivy import Document, Index, SchemaBuilder, Schema
|
||||||
|
|
||||||
|
|
||||||
def schema():
|
def schema():
|
||||||
return SchemaBuilder() \
|
return SchemaBuilder().add_text_field("title", stored=True).add_text_field("body").build()
|
||||||
.add_text_field("title", stored=True) \
|
|
||||||
.add_text_field("body") \
|
|
||||||
.build()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="class")
|
@pytest.fixture(scope="class")
|
||||||
|
@ -24,15 +21,22 @@ def ram_index():
|
||||||
# create a document instance
|
# create a document instance
|
||||||
# add field-value pairs
|
# add field-value pairs
|
||||||
doc.add_text("title", "The Old Man and the Sea")
|
doc.add_text("title", "The Old Man and the Sea")
|
||||||
doc.add_text("body", ("He was an old man who fished alone in a skiff in"
|
doc.add_text(
|
||||||
|
"body",
|
||||||
|
(
|
||||||
|
"He was an old man who fished alone in a skiff in"
|
||||||
"the Gulf Stream and he had gone eighty-four days "
|
"the Gulf Stream and he had gone eighty-four days "
|
||||||
"now without taking a fish."))
|
"now without taking a fish."
|
||||||
|
),
|
||||||
|
)
|
||||||
writer.add_document(doc)
|
writer.add_document(doc)
|
||||||
# 2 use the built-in json support
|
# 2 use the built-in json support
|
||||||
# keys need to coincide with field names
|
# keys need to coincide with field names
|
||||||
doc = Document.from_dict({
|
doc = Document.from_dict(
|
||||||
|
{
|
||||||
"title": "Of Mice and Men",
|
"title": "Of Mice and Men",
|
||||||
"body": ("A few miles south of Soledad, the Salinas River drops "
|
"body": (
|
||||||
|
"A few miles south of Soledad, the Salinas River drops "
|
||||||
"in close to the hillside bank and runs deep and "
|
"in close to the hillside bank and runs deep and "
|
||||||
"green. The water is warm too, for it has slipped "
|
"green. The water is warm too, for it has slipped "
|
||||||
"twinkling over the yellow sands in the sunlight "
|
"twinkling over the yellow sands in the sunlight "
|
||||||
|
@ -43,20 +47,23 @@ def ram_index():
|
||||||
"green with every spring, carrying in their lower leaf "
|
"green with every spring, carrying in their lower leaf "
|
||||||
"junctures the debris of the winter’s flooding; and "
|
"junctures the debris of the winter’s flooding; and "
|
||||||
"sycamores with mottled, white, recumbent limbs and "
|
"sycamores with mottled, white, recumbent limbs and "
|
||||||
"branches that arch over the pool")
|
"branches that arch over the pool"
|
||||||
})
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
writer.add_document(doc)
|
writer.add_document(doc)
|
||||||
writer.add_json("""{
|
writer.add_json(
|
||||||
|
"""{
|
||||||
"title": ["Frankenstein", "The Modern Prometheus"],
|
"title": ["Frankenstein", "The Modern Prometheus"],
|
||||||
"body": "You will rejoice to hear that no disaster has accompanied the commencement of an enterprise which you have regarded with such evil forebodings. I arrived here yesterday, and my first task is to assure my dear sister of my welfare and increasing confidence in the success of my undertaking."
|
"body": "You will rejoice to hear that no disaster has accompanied the commencement of an enterprise which you have regarded with such evil forebodings. I arrived here yesterday, and my first task is to assure my dear sister of my welfare and increasing confidence in the success of my undertaking."
|
||||||
}""")
|
}"""
|
||||||
|
)
|
||||||
writer.commit()
|
writer.commit()
|
||||||
index.reload()
|
index.reload()
|
||||||
return index
|
return index
|
||||||
|
|
||||||
|
|
||||||
class TestClass(object):
|
class TestClass(object):
|
||||||
|
|
||||||
def test_simple_search(self, ram_index):
|
def test_simple_search(self, ram_index):
|
||||||
index = ram_index
|
index = ram_index
|
||||||
query = index.parse_query("sea whale", ["title", "body"])
|
query = index.parse_query("sea whale", ["title", "body"])
|
||||||
|
@ -91,10 +98,12 @@ class TestClass(object):
|
||||||
|
|
||||||
def test_and_query_parser_default_fields_undefined(self, ram_index):
|
def test_and_query_parser_default_fields_undefined(self, ram_index):
|
||||||
query = ram_index.parse_query("winter")
|
query = ram_index.parse_query("winter")
|
||||||
assert repr(query) == "Query(BooleanQuery { subqueries: [" \
|
assert (
|
||||||
"(Should, TermQuery(Term(field=0,bytes=[119, 105, 110, 116, 101, 114]))), " \
|
repr(query) == "Query(BooleanQuery { subqueries: ["
|
||||||
"(Should, TermQuery(Term(field=1,bytes=[119, 105, 110, 116, 101, 114])))] " \
|
"(Should, TermQuery(Term(field=0,bytes=[119, 105, 110, 116, 101, 114]))), "
|
||||||
|
"(Should, TermQuery(Term(field=1,bytes=[119, 105, 110, 116, 101, 114])))] "
|
||||||
"})"
|
"})"
|
||||||
|
)
|
||||||
|
|
||||||
def test_query_errors(self, ram_index):
|
def test_query_errors(self, ram_index):
|
||||||
index = ram_index
|
index = ram_index
|
||||||
|
@ -103,11 +112,33 @@ class TestClass(object):
|
||||||
index.parse_query("bod:men", ["title", "body"])
|
index.parse_query("bod:men", ["title", "body"])
|
||||||
|
|
||||||
|
|
||||||
|
class TestUpdateClass(object):
|
||||||
|
def test_delete_update(self, ram_index):
|
||||||
|
query = ram_index.parse_query("Frankenstein", ["title"])
|
||||||
|
top_docs = tantivy.TopDocs(10)
|
||||||
|
result = ram_index.searcher().search(query, top_docs)
|
||||||
|
assert len(result) == 1
|
||||||
|
|
||||||
|
writer = ram_index.writer()
|
||||||
|
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
writer.delete_documents("fake_field", "frankenstein")
|
||||||
|
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
writer.delete_documents("title", b"frankenstein")
|
||||||
|
|
||||||
|
writer.delete_documents("title", "frankenstein")
|
||||||
|
writer.commit()
|
||||||
|
ram_index.reload()
|
||||||
|
|
||||||
|
result = ram_index.searcher().search(query, top_docs)
|
||||||
|
assert len(result) == 0
|
||||||
|
|
||||||
|
|
||||||
PATH_TO_INDEX = "tests/test_index/"
|
PATH_TO_INDEX = "tests/test_index/"
|
||||||
|
|
||||||
|
|
||||||
class TestFromDiskClass(object):
|
class TestFromDiskClass(object):
|
||||||
|
|
||||||
def test_exists(self):
|
def test_exists(self):
|
||||||
# prefer to keep it separate in case anyone deletes this
|
# prefer to keep it separate in case anyone deletes this
|
||||||
# runs from the root directory
|
# runs from the root directory
|
||||||
|
@ -136,6 +167,7 @@ class TestFromDiskClass(object):
|
||||||
writer.add_document(Document(title="mytitle2", body="mybody2"))
|
writer.add_document(Document(title="mytitle2", body="mybody2"))
|
||||||
writer.commit()
|
writer.commit()
|
||||||
import time
|
import time
|
||||||
|
|
||||||
for i in range(50):
|
for i in range(50):
|
||||||
# The index should be automatically reloaded.
|
# The index should be automatically reloaded.
|
||||||
# Wait for at most 5s for it to happen.
|
# Wait for at most 5s for it to happen.
|
||||||
|
@ -151,7 +183,6 @@ class TestSearcher(object):
|
||||||
|
|
||||||
|
|
||||||
class TestDocument(object):
|
class TestDocument(object):
|
||||||
|
|
||||||
def test_document(self):
|
def test_document(self):
|
||||||
doc = tantivy.Document(name="Bill", reference=[1, 2])
|
doc = tantivy.Document(name="Bill", reference=[1, 2])
|
||||||
assert doc["reference"] == [1, 2]
|
assert doc["reference"] == [1, 2]
|
||||||
|
@ -162,7 +193,8 @@ class TestDocument(object):
|
||||||
|
|
||||||
def test_document_with_date(self):
|
def test_document_with_date(self):
|
||||||
import datetime
|
import datetime
|
||||||
date = datetime.datetime(2019, 8, 12, 13, 0, 0, )
|
|
||||||
|
date = datetime.datetime(2019, 8, 12, 13, 0, 0)
|
||||||
doc = tantivy.Document(name="Bill", date=date)
|
doc = tantivy.Document(name="Bill", date=date)
|
||||||
assert doc["date"][0] == date
|
assert doc["date"][0] == date
|
||||||
|
|
||||||
|
@ -174,15 +206,15 @@ class TestDocument(object):
|
||||||
doc = tantivy.Document()
|
doc = tantivy.Document()
|
||||||
facet = tantivy.Facet.from_string("/europe/france")
|
facet = tantivy.Facet.from_string("/europe/france")
|
||||||
doc.add_facet("facet", facet)
|
doc.add_facet("facet", facet)
|
||||||
assert doc["facet"][0].to_path() == ['europe', 'france']
|
assert doc["facet"][0].to_path() == ["europe", "france"]
|
||||||
doc = tantivy.Document()
|
doc = tantivy.Document()
|
||||||
facet = tantivy.Facet.from_string("/asia\\/oceania/fiji")
|
facet = tantivy.Facet.from_string("/asia\\/oceania/fiji")
|
||||||
doc.add_facet("facet", facet)
|
doc.add_facet("facet", facet)
|
||||||
assert doc["facet"][0].to_path() == ['asia/oceania', 'fiji']
|
assert doc["facet"][0].to_path() == ["asia/oceania", "fiji"]
|
||||||
assert doc["facet"][0].to_path_str() == "/asia\\/oceania/fiji"
|
assert doc["facet"][0].to_path_str() == "/asia\\/oceania/fiji"
|
||||||
assert repr(doc["facet"][0]) == "Facet(/asia\\/oceania/fiji)"
|
assert repr(doc["facet"][0]) == "Facet(/asia\\/oceania/fiji)"
|
||||||
doc = tantivy.Document(facet=facet)
|
doc = tantivy.Document(facet=facet)
|
||||||
assert doc["facet"][0].to_path() == ['asia/oceania', 'fiji']
|
assert doc["facet"][0].to_path() == ["asia/oceania", "fiji"]
|
||||||
|
|
||||||
def test_document_error(self):
|
def test_document_error(self):
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
|
|
Loading…
Reference in New Issue