Fine-tune handling of GIL for search (#124)

master
Adam Reichold 2023-09-11 17:58:17 +02:00 committed by GitHub
parent 91a422b49e
commit 76512f859b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 62 additions and 59 deletions

View File

@ -740,7 +740,7 @@ impl Document {
{ {
self.field_values self.field_values
.entry(field_name) .entry(field_name)
.or_insert_with(Vec::new) .or_default()
.push(Value::from(value)); .push(Value::from(value));
} }

View File

@ -308,19 +308,13 @@ impl Index {
Ok(()) Ok(())
} }
/// Acquires a Searcher from the searcher pool. /// Returns a searcher
/// ///
/// If no searcher is available during the call, note that /// This method should be called every single time a search query is performed.
/// this call will block until one is made available. /// The same searcher must be used for a given query, as it ensures the use of a consistent segment set.
/// fn searcher(&self) -> Searcher {
/// Searcher are automatically released back into the pool when
/// they are dropped. If you observe this function to block forever
/// you probably should configure the Index to have a larger
/// searcher pool, or you are holding references to previous searcher
/// for ever.
fn searcher(&self, py: Python) -> Searcher {
Searcher { Searcher {
inner: py.allow_threads(|| self.reader.searcher()), inner: self.reader.searcher(),
} }
} }

View File

@ -133,66 +133,75 @@ impl Searcher {
#[pyo3(signature = (query, limit = 10, count = true, order_by_field = None, offset = 0))] #[pyo3(signature = (query, limit = 10, count = true, order_by_field = None, offset = 0))]
fn search( fn search(
&self, &self,
_py: Python, py: Python,
query: &Query, query: &Query,
limit: usize, limit: usize,
count: bool, count: bool,
order_by_field: Option<&str>, order_by_field: Option<&str>,
offset: usize, offset: usize,
) -> PyResult<SearchResult> { ) -> PyResult<SearchResult> {
let mut multicollector = MultiCollector::new(); py.allow_threads(move || {
let mut multicollector = MultiCollector::new();
let count_handle = if count { let count_handle = if count {
Some(multicollector.add_collector(Count)) Some(multicollector.add_collector(Count))
} else {
None
};
let (mut multifruit, hits) = {
if let Some(order_by) = order_by_field {
let collector = TopDocs::with_limit(limit)
.and_offset(offset)
.order_by_u64_field(order_by);
let top_docs_handle = multicollector.add_collector(collector);
let ret = self.inner.search(query.get(), &multicollector);
match ret {
Ok(mut r) => {
let top_docs = top_docs_handle.extract(&mut r);
let result: Vec<(Fruit, DocAddress)> = top_docs
.iter()
.map(|(f, d)| {
(Fruit::Order(*f), DocAddress::from(d))
})
.collect();
(r, result)
}
Err(e) => return Err(PyValueError::new_err(e.to_string())),
}
} else { } else {
let collector = TopDocs::with_limit(limit).and_offset(offset); None
let top_docs_handle = multicollector.add_collector(collector); };
let ret = self.inner.search(query.get(), &multicollector);
match ret { let (mut multifruit, hits) = {
Ok(mut r) => { if let Some(order_by) = order_by_field {
let top_docs = top_docs_handle.extract(&mut r); let collector = TopDocs::with_limit(limit)
let result: Vec<(Fruit, DocAddress)> = top_docs .and_offset(offset)
.iter() .order_by_u64_field(order_by);
.map(|(f, d)| { let top_docs_handle =
(Fruit::Score(*f), DocAddress::from(d)) multicollector.add_collector(collector);
}) let ret = self.inner.search(query.get(), &multicollector);
.collect();
(r, result) match ret {
Ok(mut r) => {
let top_docs = top_docs_handle.extract(&mut r);
let result: Vec<(Fruit, DocAddress)> = top_docs
.iter()
.map(|(f, d)| {
(Fruit::Order(*f), DocAddress::from(d))
})
.collect();
(r, result)
}
Err(e) => {
return Err(PyValueError::new_err(e.to_string()))
}
}
} else {
let collector =
TopDocs::with_limit(limit).and_offset(offset);
let top_docs_handle =
multicollector.add_collector(collector);
let ret = self.inner.search(query.get(), &multicollector);
match ret {
Ok(mut r) => {
let top_docs = top_docs_handle.extract(&mut r);
let result: Vec<(Fruit, DocAddress)> = top_docs
.iter()
.map(|(f, d)| {
(Fruit::Score(*f), DocAddress::from(d))
})
.collect();
(r, result)
}
Err(e) => {
return Err(PyValueError::new_err(e.to_string()))
}
} }
Err(e) => return Err(PyValueError::new_err(e.to_string())),
} }
} };
};
let count = count_handle.map(|h| h.extract(&mut multifruit)); let count = count_handle.map(|h| h.extract(&mut multifruit));
Ok(SearchResult { hits, count }) Ok(SearchResult { hits, count })
})
} }
/// Returns the overall number of documents in the index. /// Returns the overall number of documents in the index.