Skip to content

Commit 683739d

Browse files
committed
Allow looking up "crate paths" in map without allocating a vector
1 parent c90bcb9 commit 683739d

File tree

4 files changed

+123
-73
lines changed

4 files changed

+123
-73
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4793,11 +4793,13 @@ dependencies = [
47934793
"askama",
47944794
"base64",
47954795
"expect-test",
4796+
"hashbrown",
47964797
"indexmap",
47974798
"itertools",
47984799
"minifier",
47994800
"pulldown-cmark-escape",
48004801
"regex",
4802+
"rustc-hash 2.1.1",
48014803
"rustdoc-json-types",
48024804
"serde",
48034805
"serde_json",

src/librustdoc/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@ path = "lib.rs"
1212
arrayvec = { version = "0.7", default-features = false }
1313
askama = { version = "0.14", default-features = false, features = ["alloc", "config", "derive"] }
1414
base64 = "0.21.7"
15+
hashbrown = "0.15"
1516
indexmap = { version = "2", features = ["serde"] }
1617
itertools = "0.12"
1718
minifier = { version = "0.3.5", default-features = false }
1819
pulldown-cmark-escape = { version = "0.11.0", features = ["simd"] }
1920
regex = "1"
21+
rustc-hash = "2.1.1"
2022
rustdoc-json-types = { path = "../rustdoc-json-types" }
2123
serde = { version = "1.0", features = ["derive"] }
2224
serde_json = "1.0"

src/librustdoc/html/render/search_index.rs

Lines changed: 118 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,18 @@ pub(crate) mod encode;
22
mod serde;
33

44
use std::collections::BTreeSet;
5-
use std::collections::hash_map::Entry;
5+
use std::hash::Hash;
66
use std::path::Path;
77

88
use ::serde::de::{self, Deserializer, Error as _};
99
use ::serde::ser::{SerializeSeq, Serializer};
1010
use ::serde::{Deserialize, Serialize};
11+
use hashbrown::hash_map::EntryRef;
12+
use hashbrown::{Equivalent, HashMap};
1113
use rustc_ast::join_path_syms;
1214
use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexMap};
1315
use rustc_data_structures::thin_vec::ThinVec;
16+
use rustc_hash::FxBuildHasher;
1417
use rustc_hir::attrs::AttributeKind;
1518
use rustc_hir::find_attr;
1619
use rustc_middle::ty::TyCtxt;
@@ -53,7 +56,46 @@ pub(crate) struct SerializedSearchIndex {
5356
generic_inverted_index: Vec<Vec<Vec<u32>>>,
5457
// generated in-memory backref cache
5558
#[serde(skip)]
56-
crate_paths_index: FxHashMap<(ItemType, Vec<Symbol>), usize>,
59+
crate_paths_index: HashMap<CratePath, usize, FxBuildHasher>,
60+
}
61+
62+
#[derive(Clone, Debug, PartialEq, Eq)]
63+
struct CratePath(ItemType, Vec<Symbol>);
64+
65+
impl Hash for CratePath {
66+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
67+
self.0.hash(state);
68+
state.write_length_prefix(self.1.len());
69+
for sym in &self.1 {
70+
sym.hash(state);
71+
}
72+
}
73+
}
74+
75+
struct CratePathRef<'sym>(ItemType, &'sym [&'sym [Symbol]]);
76+
77+
impl<'a, 'sym> From<&'a CratePathRef<'sym>> for CratePath {
78+
fn from(value: &'a CratePathRef<'sym>) -> Self {
79+
Self(value.0, value.1.iter().copied().flatten().copied().collect())
80+
}
81+
}
82+
83+
impl<'sym> Equivalent<CratePath> for CratePathRef<'sym> {
84+
fn equivalent(&self, key: &CratePath) -> bool {
85+
self.0 == key.0 && self.1.iter().copied().flatten().eq(&key.1)
86+
}
87+
}
88+
89+
impl<'sym> Hash for CratePathRef<'sym> {
90+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
91+
self.0.hash(state);
92+
let iter = self.1.iter().copied().flatten();
93+
let len = iter.clone().count();
94+
state.write_length_prefix(len);
95+
for sym in iter {
96+
sym.hash(state);
97+
}
98+
}
5799
}
58100

59101
impl SerializedSearchIndex {
@@ -176,7 +218,7 @@ impl SerializedSearchIndex {
176218
// generic_inverted_index is not the same length as other columns,
177219
// because it's actually a completely different set of objects
178220

179-
let mut crate_paths_index: FxHashMap<(ItemType, Vec<Symbol>), usize> = FxHashMap::default();
221+
let mut crate_paths_index = HashMap::default();
180222
for (i, (name, path_data)) in names.iter().zip(path_data.iter()).enumerate() {
181223
if let Some(path_data) = path_data {
182224
let full_path = if path_data.module_path.is_empty() {
@@ -186,7 +228,7 @@ impl SerializedSearchIndex {
186228
full_path.push(Symbol::intern(name));
187229
full_path
188230
};
189-
crate_paths_index.insert((path_data.ty, full_path), i);
231+
crate_paths_index.insert(CratePath(path_data.ty, full_path), i);
190232
}
191233
}
192234

@@ -216,15 +258,10 @@ impl SerializedSearchIndex {
216258
assert_eq!(self.names.len(), self.path_data.len());
217259
if let Some(path_data) = &path_data
218260
&& let name = Symbol::intern(&name)
219-
&& let fqp = if path_data.module_path.is_empty() {
220-
vec![name]
221-
} else {
222-
let mut v = path_data.module_path.clone();
223-
v.push(name);
224-
v
225-
}
226-
&& let Some(&other_path) = self.crate_paths_index.get(&(path_data.ty, fqp))
227-
&& self.path_data.get(other_path).map_or(false, Option::is_some)
261+
&& let Some(&other_path) = self
262+
.crate_paths_index
263+
.get(&CratePathRef(path_data.ty, &[&path_data.module_path, &[name]]))
264+
&& let Some(Some(_)) = self.path_data.get(other_path)
228265
{
229266
self.path_data.push(None);
230267
} else {
@@ -246,20 +283,22 @@ impl SerializedSearchIndex {
246283
///
247284
/// The returned ID can be used to attach more data to the search result.
248285
fn add_entry(&mut self, name: Symbol, entry_data: EntryData, desc: String) -> usize {
249-
let fqp = if let Some(module_path_index) = entry_data.module_path {
250-
let mut fqp = self.path_data[module_path_index].as_ref().unwrap().module_path.clone();
251-
fqp.push(Symbol::intern(&self.names[module_path_index]));
252-
fqp.push(name);
253-
fqp
286+
let interned;
287+
let extra: [Symbol; 2];
288+
let blah: &[Symbol] = &[name];
289+
let fqp: &[&[Symbol]] = if let Some(module_path_index) = entry_data.module_path {
290+
interned = Symbol::intern(&self.names[module_path_index]);
291+
extra = [interned, name];
292+
&[&self.path_data[module_path_index].as_ref().unwrap().module_path[..], &extra]
254293
} else {
255-
vec![name]
294+
&[blah]
256295
};
257296
// If a path with the same name already exists, but no entry does,
258297
// we can fill in the entry without having to allocate a new row ID.
259298
//
260299
// Because paths and entries both share the same index, using the same
261300
// ID saves space by making the tree smaller.
262-
if let Some(&other_path) = self.crate_paths_index.get(&(entry_data.ty, fqp))
301+
if let Some(&other_path) = self.crate_paths_index.get(&CratePathRef(entry_data.ty, fqp))
263302
&& self.entry_data[other_path].is_none()
264303
&& self.descs[other_path].is_empty()
265304
{
@@ -282,9 +321,9 @@ impl SerializedSearchIndex {
282321

283322
fn get_id_by_module_path(&mut self, path: &[Symbol]) -> usize {
284323
let ty = if path.len() == 1 { ItemType::ExternCrate } else { ItemType::Module };
285-
match self.crate_paths_index.entry((ty, path.to_vec())) {
286-
Entry::Occupied(index) => *index.get(),
287-
Entry::Vacant(slot) => {
324+
match self.crate_paths_index.entry_ref(&CratePathRef(ty, &[path])) {
325+
EntryRef::Occupied(index) => *index.get(),
326+
EntryRef::Vacant(slot) => {
288327
slot.insert(self.path_data.len());
289328
let (name, module_path) = path.split_last().unwrap();
290329
self.push_path(
@@ -301,16 +340,18 @@ impl SerializedSearchIndex {
301340
let mut skips = FxHashSet::default();
302341
for (other_pathid, other_path_data) in other.path_data.iter().enumerate() {
303342
if let Some(other_path_data) = other_path_data {
304-
let mut fqp = other_path_data.module_path.clone();
305343
let name = Symbol::intern(&other.names[other_pathid]);
306-
fqp.push(name);
344+
let fqp = [&other_path_data.module_path[..], &[name]];
307345
let self_pathid = other_entryid_offset + other_pathid;
308-
let self_pathid = match self.crate_paths_index.entry((other_path_data.ty, fqp)) {
309-
Entry::Vacant(slot) => {
346+
let self_pathid = match self
347+
.crate_paths_index
348+
.entry_ref(&CratePathRef(other_path_data.ty, &fqp))
349+
{
350+
EntryRef::Vacant(slot) => {
310351
slot.insert(self_pathid);
311352
self_pathid
312353
}
313-
Entry::Occupied(existing_entryid) => {
354+
EntryRef::Occupied(existing_entryid) => {
314355
skips.insert(other_pathid);
315356
let self_pathid = *existing_entryid.get();
316357
let new_type_data = match (
@@ -1292,9 +1333,9 @@ pub(crate) fn build_index(
12921333
let crate_doc =
12931334
short_markdown_summary(&krate.module.doc_value(), &krate.module.link_names(cache));
12941335
let crate_idx = {
1295-
let crate_path = (ItemType::ExternCrate, vec![crate_name]);
1296-
match serialized_index.crate_paths_index.entry(crate_path) {
1297-
Entry::Occupied(index) => {
1336+
let crate_path = CratePathRef(ItemType::ExternCrate, &[&[crate_name]]);
1337+
match serialized_index.crate_paths_index.entry_ref(&crate_path) {
1338+
EntryRef::Occupied(index) => {
12981339
let index = *index.get();
12991340
serialized_index.descs[index] = crate_doc;
13001341
for type_data in serialized_index.type_data.iter_mut() {
@@ -1343,7 +1384,7 @@ pub(crate) fn build_index(
13431384
}
13441385
index
13451386
}
1346-
Entry::Vacant(slot) => {
1387+
EntryRef::Vacant(slot) => {
13471388
let krate = serialized_index.names.len();
13481389
slot.insert(krate);
13491390
serialized_index.push(
@@ -1384,9 +1425,12 @@ pub(crate) fn build_index(
13841425
.or_else(|| check_external.then(|| cache.external_paths.get(&defid)).flatten())
13851426
.map(|&(ref fqp, ty)| {
13861427
let pathid = serialized_index.names.len();
1387-
match serialized_index.crate_paths_index.entry((ty, fqp.clone())) {
1388-
Entry::Occupied(entry) => *entry.get(),
1389-
Entry::Vacant(entry) => {
1428+
match serialized_index
1429+
.crate_paths_index
1430+
.entry_ref(&CratePathRef(ty, &[&&fqp[..]]))
1431+
{
1432+
EntryRef::Occupied(entry) => *entry.get(),
1433+
EntryRef::Vacant(entry) => {
13901434
entry.insert(pathid);
13911435
let (name, path) = fqp.split_last().unwrap();
13921436
serialized_index.push_path(
@@ -1533,46 +1577,47 @@ pub(crate) fn build_index(
15331577
used_in_function_signature: &mut BTreeSet<isize>,
15341578
) -> RenderTypeId {
15351579
let pathid = serialized_index.names.len();
1536-
let pathid = match serialized_index.crate_paths_index.entry((ty, path.to_vec())) {
1537-
Entry::Occupied(entry) => {
1538-
let id = *entry.get();
1539-
if serialized_index.type_data[id].as_mut().is_none() {
1540-
serialized_index.type_data[id] = Some(TypeData {
1541-
search_unbox,
1542-
inverted_function_inputs_index: Vec::new(),
1543-
inverted_function_output_index: Vec::new(),
1544-
});
1545-
} else if search_unbox {
1546-
serialized_index.type_data[id].as_mut().unwrap().search_unbox = true;
1580+
let pathid =
1581+
match serialized_index.crate_paths_index.entry_ref(&CratePathRef(ty, &[path])) {
1582+
EntryRef::Occupied(entry) => {
1583+
let id = *entry.get();
1584+
if serialized_index.type_data[id].as_mut().is_none() {
1585+
serialized_index.type_data[id] = Some(TypeData {
1586+
search_unbox,
1587+
inverted_function_inputs_index: Vec::new(),
1588+
inverted_function_output_index: Vec::new(),
1589+
});
1590+
} else if search_unbox {
1591+
serialized_index.type_data[id].as_mut().unwrap().search_unbox = true;
1592+
}
1593+
id
15471594
}
1548-
id
1549-
}
1550-
Entry::Vacant(entry) => {
1551-
entry.insert(pathid);
1552-
let (name, path) = path.split_last().unwrap();
1553-
serialized_index.push_type(
1554-
name.to_string(),
1555-
PathData {
1556-
ty,
1557-
module_path: path.to_vec(),
1558-
exact_module_path: if let Some(exact_path) = exact_path
1559-
&& let Some((name2, exact_path)) = exact_path.split_last()
1560-
&& name == name2
1561-
{
1562-
Some(exact_path.to_vec())
1563-
} else {
1564-
None
1595+
EntryRef::Vacant(entry) => {
1596+
entry.insert(pathid);
1597+
let (name, path) = path.split_last().unwrap();
1598+
serialized_index.push_type(
1599+
name.to_string(),
1600+
PathData {
1601+
ty,
1602+
module_path: path.to_vec(),
1603+
exact_module_path: if let Some(exact_path) = exact_path
1604+
&& let Some((name2, exact_path)) = exact_path.split_last()
1605+
&& name == name2
1606+
{
1607+
Some(exact_path.to_vec())
1608+
} else {
1609+
None
1610+
},
15651611
},
1566-
},
1567-
TypeData {
1568-
inverted_function_inputs_index: Vec::new(),
1569-
inverted_function_output_index: Vec::new(),
1570-
search_unbox,
1571-
},
1572-
);
1573-
pathid
1574-
}
1575-
};
1612+
TypeData {
1613+
inverted_function_inputs_index: Vec::new(),
1614+
inverted_function_output_index: Vec::new(),
1615+
search_unbox,
1616+
},
1617+
);
1618+
pathid
1619+
}
1620+
};
15761621
used_in_function_signature.insert(isize::try_from(pathid).unwrap());
15771622
RenderTypeId::Index(isize::try_from(pathid).unwrap())
15781623
}

src/librustdoc/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#![feature(box_patterns)]
1212
#![feature(file_buffered)]
1313
#![feature(formatting_options)]
14+
#![feature(hasher_prefixfree_extras)]
1415
#![feature(if_let_guard)]
1516
#![feature(iter_advance_by)]
1617
#![feature(iter_intersperse)]

0 commit comments

Comments
 (0)