Skip to content

Commit 0250667

Browse files
Auto merge of #149149 - yotamofek:wip/rustdoc/search_index/misc-cleanups, r=<try>
[WIP] [rustdoc] misc search index cleanups
2 parents e22dab3 + 27d4f47 commit 0250667

File tree

2 files changed

+91
-87
lines changed

2 files changed

+91
-87
lines changed

src/librustdoc/html/render/search_index.rs

Lines changed: 85 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ mod serde;
33

44
use std::collections::BTreeSet;
55
use std::collections::hash_map::Entry;
6-
use std::io;
76
use std::path::Path;
87
use std::string::FromUtf8Error;
8+
use std::{io, iter};
99

1010
use ::serde::de::{self, Deserializer, Error as _};
1111
use ::serde::ser::{SerializeSeq, Serializer};
@@ -256,10 +256,14 @@ impl SerializedSearchIndex {
256256
/// The returned ID can be used to attach more data to the search result.
257257
fn add_entry(&mut self, name: Symbol, entry_data: EntryData, desc: String) -> usize {
258258
let fqp = if let Some(module_path_index) = entry_data.module_path {
259-
let mut fqp = self.path_data[module_path_index].as_ref().unwrap().module_path.clone();
260-
fqp.push(Symbol::intern(&self.names[module_path_index]));
261-
fqp.push(name);
262-
fqp
259+
self.path_data[module_path_index]
260+
.as_ref()
261+
.unwrap()
262+
.module_path
263+
.iter()
264+
.copied()
265+
.chain([Symbol::intern(&self.names[module_path_index]), name])
266+
.collect()
263267
} else {
264268
vec![name]
265269
};
@@ -306,13 +310,13 @@ impl SerializedSearchIndex {
306310

307311
pub(crate) fn union(mut self, other: &SerializedSearchIndex) -> SerializedSearchIndex {
308312
let other_entryid_offset = self.names.len();
309-
let mut map_other_pathid_to_self_pathid: Vec<usize> = Vec::new();
313+
let mut map_other_pathid_to_self_pathid = Vec::with_capacity(other.path_data.len());
310314
let mut skips = FxHashSet::default();
311315
for (other_pathid, other_path_data) in other.path_data.iter().enumerate() {
312316
if let Some(other_path_data) = other_path_data {
313-
let mut fqp = other_path_data.module_path.clone();
314317
let name = Symbol::intern(&other.names[other_pathid]);
315-
fqp.push(name);
318+
let fqp =
319+
other_path_data.module_path.iter().copied().chain(iter::once(name)).collect();
316320
let self_pathid = other_entryid_offset + other_pathid;
317321
let self_pathid = match self.crate_paths_index.entry((other_path_data.ty, fqp)) {
318322
Entry::Vacant(slot) => {
@@ -458,7 +462,7 @@ impl SerializedSearchIndex {
458462
other.descs[other_entryid].clone(),
459463
other.function_data[other_entryid].clone().map(|mut func| {
460464
fn map_fn_sig_item(
461-
map_other_pathid_to_self_pathid: &mut Vec<usize>,
465+
map_other_pathid_to_self_pathid: &Vec<usize>,
462466
ty: &mut RenderType,
463467
) {
464468
match ty.id {
@@ -501,14 +505,14 @@ impl SerializedSearchIndex {
501505
}
502506
}
503507
for input in &mut func.inputs {
504-
map_fn_sig_item(&mut map_other_pathid_to_self_pathid, input);
508+
map_fn_sig_item(&map_other_pathid_to_self_pathid, input);
505509
}
506510
for output in &mut func.output {
507-
map_fn_sig_item(&mut map_other_pathid_to_self_pathid, output);
511+
map_fn_sig_item(&map_other_pathid_to_self_pathid, output);
508512
}
509513
for clause in &mut func.where_clause {
510514
for entry in clause {
511-
map_fn_sig_item(&mut map_other_pathid_to_self_pathid, entry);
515+
map_fn_sig_item(&map_other_pathid_to_self_pathid, entry);
512516
}
513517
}
514518
func
@@ -555,19 +559,19 @@ impl SerializedSearchIndex {
555559
);
556560
}
557561
}
558-
for (i, other_generic_inverted_index) in other.generic_inverted_index.iter().enumerate() {
559-
for (size, other_list) in other_generic_inverted_index.iter().enumerate() {
560-
let self_generic_inverted_index = match self.generic_inverted_index.get_mut(i) {
561-
Some(self_generic_inverted_index) => self_generic_inverted_index,
562-
None => {
563-
self.generic_inverted_index.push(Vec::new());
564-
self.generic_inverted_index.last_mut().unwrap()
565-
}
566-
};
567-
while self_generic_inverted_index.len() <= size {
568-
self_generic_inverted_index.push(Vec::new());
569-
}
570-
self_generic_inverted_index[size].extend(
562+
if other.generic_inverted_index.len() > self.generic_inverted_index.len() {
563+
self.generic_inverted_index.resize(other.generic_inverted_index.len(), Vec::new());
564+
}
565+
for (other_generic_inverted_index, self_generic_inverted_index) in
566+
iter::zip(&other.generic_inverted_index, &mut self.generic_inverted_index)
567+
{
568+
if other_generic_inverted_index.len() > self_generic_inverted_index.len() {
569+
self_generic_inverted_index.resize(other_generic_inverted_index.len(), Vec::new());
570+
}
571+
for (other_list, self_list) in
572+
iter::zip(other_generic_inverted_index, self_generic_inverted_index)
573+
{
574+
self_list.extend(
571575
other_list
572576
.iter()
573577
.copied()
@@ -1819,20 +1823,23 @@ pub(crate) fn build_index(
18191823
tcx,
18201824
);
18211825
}
1822-
let mut used_in_constraints = Vec::new();
1823-
for constraint in &mut search_type.where_clause {
1824-
let mut used_in_constraint = BTreeSet::new();
1825-
for trait_ in &mut constraint[..] {
1826-
convert_render_type(
1827-
trait_,
1828-
cache,
1829-
&mut serialized_index,
1830-
&mut used_in_constraint,
1831-
tcx,
1832-
);
1833-
}
1834-
used_in_constraints.push(used_in_constraint);
1835-
}
1826+
let used_in_constraints = search_type
1827+
.where_clause
1828+
.iter_mut()
1829+
.map(|constraint| {
1830+
let mut used_in_constraint = BTreeSet::new();
1831+
for trait_ in constraint {
1832+
convert_render_type(
1833+
trait_,
1834+
cache,
1835+
&mut serialized_index,
1836+
&mut used_in_constraint,
1837+
tcx,
1838+
);
1839+
}
1840+
used_in_constraint
1841+
})
1842+
.collect::<Vec<_>>();
18361843
loop {
18371844
let mut inserted_any = false;
18381845
for (i, used_in_constraint) in used_in_constraints.iter().enumerate() {
@@ -1864,48 +1871,49 @@ pub(crate) fn build_index(
18641871
// unoccupied size.
18651872
if item.ty.is_fn_like() { 0 } else { 16 };
18661873
serialized_index.function_data[new_entry_id] = Some(search_type.clone());
1867-
for index in used_in_function_inputs {
1868-
let postings = if index >= 0 {
1869-
assert!(serialized_index.path_data[index as usize].is_some());
1870-
&mut serialized_index.type_data[index as usize]
1871-
.as_mut()
1872-
.unwrap()
1873-
.inverted_function_inputs_index
1874-
} else {
1875-
let generic_id = usize::try_from(-index).unwrap() - 1;
1876-
for _ in serialized_index.generic_inverted_index.len()..=generic_id {
1877-
serialized_index.generic_inverted_index.push(Vec::new());
1874+
fn process_used_in_function(
1875+
serialized_index: &mut SerializedSearchIndex,
1876+
search_type_size: usize,
1877+
new_entry_id: usize,
1878+
get_index: impl Fn(&mut TypeData) -> &mut Vec<Vec<u32>>,
1879+
used_in_function: BTreeSet<isize>,
1880+
) {
1881+
for index in used_in_function {
1882+
let postings = if index >= 0 {
1883+
assert!(serialized_index.path_data[index as usize].is_some());
1884+
get_index(serialized_index.type_data[index as usize].as_mut().unwrap())
1885+
} else {
1886+
let generic_id = usize::try_from(-index).unwrap() - 1;
1887+
if generic_id >= serialized_index.generic_inverted_index.len() {
1888+
serialized_index
1889+
.generic_inverted_index
1890+
.resize(generic_id + 1, Vec::new());
1891+
}
1892+
&mut serialized_index.generic_inverted_index[generic_id]
1893+
};
1894+
if search_type_size >= postings.len() {
1895+
postings.resize(search_type_size + 1, Vec::new());
18781896
}
1879-
&mut serialized_index.generic_inverted_index[generic_id]
1880-
};
1881-
while postings.len() <= search_type_size {
1882-
postings.push(Vec::new());
1883-
}
1884-
if postings[search_type_size].last() != Some(&(new_entry_id as u32)) {
1885-
postings[search_type_size].push(new_entry_id as u32);
1886-
}
1887-
}
1888-
for index in used_in_function_output {
1889-
let postings = if index >= 0 {
1890-
assert!(serialized_index.path_data[index as usize].is_some());
1891-
&mut serialized_index.type_data[index as usize]
1892-
.as_mut()
1893-
.unwrap()
1894-
.inverted_function_output_index
1895-
} else {
1896-
let generic_id = usize::try_from(-index).unwrap() - 1;
1897-
for _ in serialized_index.generic_inverted_index.len()..=generic_id {
1898-
serialized_index.generic_inverted_index.push(Vec::new());
1897+
let posting = &mut postings[search_type_size];
1898+
if posting.last() != Some(&(new_entry_id as u32)) {
1899+
posting.push(new_entry_id as u32);
18991900
}
1900-
&mut serialized_index.generic_inverted_index[generic_id]
1901-
};
1902-
while postings.len() <= search_type_size {
1903-
postings.push(Vec::new());
1904-
}
1905-
if postings[search_type_size].last() != Some(&(new_entry_id as u32)) {
1906-
postings[search_type_size].push(new_entry_id as u32);
19071901
}
19081902
}
1903+
process_used_in_function(
1904+
&mut serialized_index,
1905+
search_type_size,
1906+
new_entry_id,
1907+
|type_data| &mut type_data.inverted_function_inputs_index,
1908+
used_in_function_inputs,
1909+
);
1910+
process_used_in_function(
1911+
&mut serialized_index,
1912+
search_type_size,
1913+
new_entry_id,
1914+
|type_data| &mut type_data.inverted_function_output_index,
1915+
used_in_function_output,
1916+
);
19091917
}
19101918
}
19111919

src/librustdoc/html/render/search_index/encode.rs

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ pub fn read_signed_vlqhex_from_string(string: &[u8]) -> Option<(i32, usize)> {
5252
}
5353

5454
pub fn write_postings_to_string(postings: &[Vec<u32>], buf: &mut Vec<u8>) {
55+
// there's gonna be at least 1 byte pushed for every posting
56+
buf.reserve(postings.len());
57+
5558
for list in postings {
5659
if list.is_empty() {
5760
buf.push(0);
@@ -63,29 +66,22 @@ pub fn write_postings_to_string(postings: &[Vec<u32>], buf: &mut Vec<u8>) {
6366
if len_after - len_before > 1 + (4 * list.len()) && list.len() < 0x3a {
6467
buf.truncate(len_before);
6568
buf.push(list.len() as u8);
66-
for &item in list {
67-
buf.push(item as u8);
68-
buf.push((item >> 8) as u8);
69-
buf.push((item >> 16) as u8);
70-
buf.push((item >> 24) as u8);
71-
}
69+
buf.extend(list.iter().copied().map(u32::to_le_bytes).flatten());
7270
}
7371
}
7472
}
7573

7674
pub fn read_postings_from_string(postings: &mut Vec<Vec<u32>>, mut buf: &[u8]) {
7775
use stringdex::internals::decode::RoaringBitmap;
78-
while let Some(&c) = buf.get(0) {
76+
while let Some(&c) = buf.first() {
7977
if c < 0x3a {
8078
buf = &buf[1..];
8179
let buf = buf.split_off(..usize::from(c) * size_of::<u32>()).unwrap();
8280
let (chunks, _) = buf.as_chunks();
8381
let slot = chunks.iter().copied().map(u32::from_le_bytes).collect();
8482
postings.push(slot);
8583
} else {
86-
let (bitmap, consumed_bytes_len) =
87-
RoaringBitmap::from_bytes(buf).unwrap_or_else(|| (RoaringBitmap::default(), 0));
88-
assert_ne!(consumed_bytes_len, 0);
84+
let (bitmap, consumed_bytes_len) = RoaringBitmap::from_bytes(buf).unwrap();
8985
postings.push(bitmap.to_vec());
9086
buf = &buf[consumed_bytes_len..];
9187
}

0 commit comments

Comments
 (0)