From ba8f3fcb90e00f21f79dbfa8bdc916fd0017cc20 Mon Sep 17 00:00:00 2001 From: Carlo Federico Vescovo Date: Fri, 23 Jan 2026 14:58:54 -0600 Subject: [PATCH] Upgrade ego-tree to 0.11.0 and html5ever to 0.37.1 - Bump ego-tree from 0.10.0 to 0.11.0 - Bump html5ever from 0.36.0 to 0.37.1 - Bump tendril from 0.4.3 to 0.5.0 (required by html5ever 0.37.1) - Implement clone_subtree() method for TreeSink trait --- Cargo.lock | 40 ++++++++++------------------------- scraper/Cargo.toml | 6 +++--- scraper/src/html/tree_sink.rs | 9 ++++++++ scraper/src/main.rs | 4 ++-- 4 files changed, 25 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f9efe932..58ba7a41 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,9 +75,9 @@ dependencies = [ [[package]] name = "ego-tree" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8" +checksum = "b04dc5a38e4f151a79d9f2451ae6037fb6eaf5cba34771f44781f80e508498e3" [[package]] name = "equivalent" @@ -91,16 +91,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "futf" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" -dependencies = [ - "mac", - "new_debug_unreachable", -] - [[package]] name = "getopts" version = "0.2.24" @@ -118,9 +108,9 @@ checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" [[package]] name = "html5ever" -version = "0.36.1" +version = "0.37.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6452c4751a24e1b99c3260d505eaeee76a050573e61f30ac2c924ddc7236f01e" +checksum = "5935f02fdc02823ff15fec27c2b3d7ca19d629e996f7a0ae4d7d500e62e54c76" dependencies = [ "log", "markup5ever", @@ -163,17 +153,11 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" -[[package]] -name = "mac" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" - [[package]] name = "markup5ever" -version = "0.36.1" +version = "0.37.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c3294c4d74d0742910f8c7b466f44dda9eb2d5742c1e430138df290a1e8451c" +checksum = "7cfb33ea12d5d83b1ba9a55ae7d05faec4f2189d47b79c04d4cea6bbe9f5b083" dependencies = [ "log", "tendril", @@ -422,7 +406,6 @@ dependencies = [ "parking_lot", "phf_shared", "precomputed-hash", - "serde", ] [[package]] @@ -450,12 +433,11 @@ dependencies = [ [[package]] name = "tendril" -version = "0.4.3" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +checksum = "c4790fc369d5a530f4b544b094e31388b9b3a37c0f4652ade4505945f5660d24" dependencies = [ - "futf", - "mac", + "new_debug_unreachable", "utf-8", ] @@ -479,9 +461,9 @@ checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" [[package]] name = "web_atoms" -version = "0.2.0" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acd0c322f146d0f8aad130ce6c187953889359584497dac6561204c8e17bb43d" +checksum = "c7fa72497c57079de16225d9a886d6c9a80c34f8e5a9cd5c64b71a449cbba195" dependencies = [ "phf", "phf_codegen", diff --git a/scraper/Cargo.toml b/scraper/Cargo.toml index f1534d10..4a2780ad 100644 --- a/scraper/Cargo.toml +++ b/scraper/Cargo.toml @@ -14,13 +14,13 @@ readme = "README.md" [dependencies] cssparser = "0.36.0" -ego-tree = "0.10.0" -html5ever = "0.36.0" +ego-tree = "0.11.0" +html5ever = "0.37.1" indexmap = { version = "2.13.0", optional = true } precomputed-hash = "0.1.1" selectors = "0.35.0" serde = { version = "1.0.228", optional = true } -tendril = "0.4.3" +tendril = "0.5.0" [dependencies.getopts] version = "0.2.24" diff --git a/scraper/src/html/tree_sink.rs b/scraper/src/html/tree_sink.rs index 8af46156..7339300c 100644 --- a/scraper/src/html/tree_sink.rs +++ b/scraper/src/html/tree_sink.rs @@ -294,4 +294,13 @@ impl TreeSink for HtmlTreeSink { self.append(prev_element, child) } } + + fn clone_subtree(&self, target: &Self::Handle) -> Self::Handle { + let mut html = self.0.borrow_mut(); + + let mut source_node = html.tree.get_mut(*target).unwrap(); + let cloned_subtree = source_node.clone_subtree(); + + cloned_subtree.id() + } } diff --git a/scraper/src/main.rs b/scraper/src/main.rs index 78016eac..72328670 100644 --- a/scraper/src/main.rs +++ b/scraper/src/main.rs @@ -75,7 +75,7 @@ fn main() { let matches = match opts.parse(&args[1..]) { Ok(m) => m, Err(f) => { - eprintln!("{}", f); + eprintln!("{f}"); process::exit(USAGE); } }; @@ -131,7 +131,7 @@ fn main() { let files = &matches.free[1..]; let selector = Selector::parse(selector).unwrap_or_else(|e| { - eprintln!("failed to parse selector: {}", e); + eprintln!("failed to parse selector: {e}"); process::exit(USAGE); });