Fine-tune handling of GIL for search (#124)

master
Adam Reichold 2023-09-11 17:58:17 +02:00 committed by GitHub
parent 91a422b49e
commit 76512f859b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 62 additions and 59 deletions

View File

@ -740,7 +740,7 @@ impl Document {
{
self.field_values
.entry(field_name)
.or_insert_with(Vec::new)
.or_default()
.push(Value::from(value));
}

View File

@ -308,19 +308,13 @@ impl Index {
Ok(())
}
/// Acquires a Searcher from the searcher pool.
/// Returns a searcher
///
/// If no searcher is available during the call, note that
/// this call will block until one is made available.
///
/// Searcher are automatically released back into the pool when
/// they are dropped. If you observe this function to block forever
/// you probably should configure the Index to have a larger
/// searcher pool, or you are holding references to previous searcher
/// for ever.
fn searcher(&self, py: Python) -> Searcher {
/// This method should be called every single time a search query is performed.
/// The same searcher must be used for a given query, as it ensures the use of a consistent segment set.
fn searcher(&self) -> Searcher {
Searcher {
inner: py.allow_threads(|| self.reader.searcher()),
inner: self.reader.searcher(),
}
}

View File

@ -133,66 +133,75 @@ impl Searcher {
#[pyo3(signature = (query, limit = 10, count = true, order_by_field = None, offset = 0))]
fn search(
&self,
_py: Python,
py: Python,
query: &Query,
limit: usize,
count: bool,
order_by_field: Option<&str>,
offset: usize,
) -> PyResult<SearchResult> {
let mut multicollector = MultiCollector::new();
py.allow_threads(move || {
let mut multicollector = MultiCollector::new();
let count_handle = if count {
Some(multicollector.add_collector(Count))
} else {
None
};
let (mut multifruit, hits) = {
if let Some(order_by) = order_by_field {
let collector = TopDocs::with_limit(limit)
.and_offset(offset)
.order_by_u64_field(order_by);
let top_docs_handle = multicollector.add_collector(collector);
let ret = self.inner.search(query.get(), &multicollector);
match ret {
Ok(mut r) => {
let top_docs = top_docs_handle.extract(&mut r);
let result: Vec<(Fruit, DocAddress)> = top_docs
.iter()
.map(|(f, d)| {
(Fruit::Order(*f), DocAddress::from(d))
})
.collect();
(r, result)
}
Err(e) => return Err(PyValueError::new_err(e.to_string())),
}
let count_handle = if count {
Some(multicollector.add_collector(Count))
} else {
let collector = TopDocs::with_limit(limit).and_offset(offset);
let top_docs_handle = multicollector.add_collector(collector);
let ret = self.inner.search(query.get(), &multicollector);
None
};
match ret {
Ok(mut r) => {
let top_docs = top_docs_handle.extract(&mut r);
let result: Vec<(Fruit, DocAddress)> = top_docs
.iter()
.map(|(f, d)| {
(Fruit::Score(*f), DocAddress::from(d))
})
.collect();
(r, result)
let (mut multifruit, hits) = {
if let Some(order_by) = order_by_field {
let collector = TopDocs::with_limit(limit)
.and_offset(offset)
.order_by_u64_field(order_by);
let top_docs_handle =
multicollector.add_collector(collector);
let ret = self.inner.search(query.get(), &multicollector);
match ret {
Ok(mut r) => {
let top_docs = top_docs_handle.extract(&mut r);
let result: Vec<(Fruit, DocAddress)> = top_docs
.iter()
.map(|(f, d)| {
(Fruit::Order(*f), DocAddress::from(d))
})
.collect();
(r, result)
}
Err(e) => {
return Err(PyValueError::new_err(e.to_string()))
}
}
} else {
let collector =
TopDocs::with_limit(limit).and_offset(offset);
let top_docs_handle =
multicollector.add_collector(collector);
let ret = self.inner.search(query.get(), &multicollector);
match ret {
Ok(mut r) => {
let top_docs = top_docs_handle.extract(&mut r);
let result: Vec<(Fruit, DocAddress)> = top_docs
.iter()
.map(|(f, d)| {
(Fruit::Score(*f), DocAddress::from(d))
})
.collect();
(r, result)
}
Err(e) => {
return Err(PyValueError::new_err(e.to_string()))
}
}
Err(e) => return Err(PyValueError::new_err(e.to_string())),
}
}
};
};
let count = count_handle.map(|h| h.extract(&mut multifruit));
let count = count_handle.map(|h| h.extract(&mut multifruit));
Ok(SearchResult { hits, count })
Ok(SearchResult { hits, count })
})
}
/// Returns the overall number of documents in the index.