Skip to content
Prev Previous commit
Next Next commit
feat: use scraper to parser html fragment
  • Loading branch information
wendajiang committed Jul 8, 2022
commit 210d772ed779c69095c15a588f73df41c405b041
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ clap = { version = "3.2.8", features = ["cargo"] }
colored = "2.0.0"
dirs = "4.0.0"
env_logger = "0.9.0"
escaper = "0.1.1"
keyring = "1.1.2"
log = "0.4.17"
openssl = "0.10.40"
Expand All @@ -32,6 +31,7 @@ serde = { version = "1.0.138", features = ["derive"] }
serde_json = "1.0.82"
toml = "0.5.9"
regex = "1.5.6"
scraper = "0.13.0"

[dependencies.diesel]
version = "1.4.8"
Expand Down
15 changes: 8 additions & 7 deletions src/cmds/pick.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,14 +133,15 @@ impl Command for PickCommand {
});

let r = cache.get_question(fid).await;
if r.is_err() {
let e = r.err().ok_or(Error::NoneError)?;
eprintln!("{:?}", &e);
if let Error::FeatureError(_) | Error::NetworkError(_) = e {
Self::handler(m).await?;

match r {
Ok(r) => println!("{}", r),
Err(e) => {
eprintln!("{:?}", e);
if let Error::FeatureError(_) | Error::NetworkError(_) = e {
Self::handler(m).await?;
}
}
} else {
println!("{}", r?);
}

Ok(())
Expand Down
167 changes: 13 additions & 154 deletions src/helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,168 +92,27 @@ mod filter {

/// Render html to command-line
mod html {
// use crate::Error;
use colored::{Color, Colorize};
use escaper::decode_html;
use regex::Regex;
pub enum Token {
Plain(String),
Bold(String),
Sup(String),
Sub(String),
Font((String, Color)),
Eof(String),
}

use scraper::Html;
/// Html render plugin
pub trait HTML {
fn ser(&self) -> Vec<Token>;
fn render(&self) -> String;
}

pub fn superscript(n: u8) -> String {
match n {
x if x >= 10 => format!("{}{}", superscript(n / 10), superscript(n % 10)),
0 => "⁰".to_string(),
1 => "¹".to_string(),
2 => "²".to_string(),
3 => "³".to_string(),
4 => "⁴".to_string(),
5 => "⁵".to_string(),
6 => "⁶".to_string(),
7 => "⁷".to_string(),
8 => "⁸".to_string(),
9 => "⁹".to_string(),
_ => n.to_string(),
}
}
Comment on lines -114 to -129
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

plz check if scrapper can support this

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

however, I do not think is nessesary, and this relpace use ^ and _, if you think this must be use the utf-8 character, I can do some manual work to recover this. :)

Copy link
Owner

@clearloop clearloop Jul 12, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these optimizations are from #12 and #39, we better to keep these for respecting the previous contributors, btw, I think there is a escraper in our crate also lol #11


pub fn subscript(n: u8) -> String {
match n {
x if x >= 10 => format!("{}{}", subscript(n / 10), subscript(n % 10)),
0 => "₀".to_string(),
1 => "₁".to_string(),
2 => "₂".to_string(),
3 => "₃".to_string(),
4 => "₄".to_string(),
5 => "₅".to_string(),
6 => "₆".to_string(),
7 => "₇".to_string(),
8 => "₈".to_string(),
9 => "₉".to_string(),
_ => n.to_string(),
}
}
impl HTML for String {
fn ser(&self) -> Vec<Token> {
// empty tags
let tks = {
let mut s = self.clone();
// some problems (e.g. 1653) have ZWSPs.
s.retain(|x| x != '\u{200B}');
s
};
let res: Vec<Token>;
// styled
{
let mut ptr = 0;
let mut output = vec![];
let mut bold = false;
let mut sup = false;
let mut sub = false;
let mut color: Option<Color> = None;

// TODO: check how to make this `unwrap` more flexible..
//
// or looks better.
//
// or do some handwrite matching.
let re_color = Regex::new(r#"color=['"]([^'"]+)"#).unwrap();
for (i, e) in tks.chars().enumerate() {
match e {
'<' => {
if bold {
output.push(Token::Bold(tks[ptr..i].to_string()));
bold = false;
} else if sup {
output.push(Token::Sup(tks[ptr..i].to_string()));
sup = false;
} else if sub {
output.push(Token::Sub(tks[ptr..i].to_string()));
sub = false;
} else if color.is_some() {
output.push(Token::Font((tks[ptr..i].to_string(), color.unwrap())));
color = None;
} else {
output.push(Token::Plain(tks[ptr..i].to_string()));
}
ptr = i;
}
'>' => {
match &tks[i - 1..i] {
"-" => continue,
_ => match &tks[(ptr + 1)..i] {
"b" | "strong" => bold = true,
"sup" => sup = true,
"sub" => sub = true,
s if s.starts_with("font") => {
color = re_color
.captures(s)
.and_then(|caps| caps.get(1))
.and_then(|cap| cap.as_str().parse().ok());
}
_ => {}
},
}
ptr = i + 1;
}
_ => {}
}
}
output.push(Token::Eof(tks[ptr..tks.len()].to_string()));
res = output;
}

res
}

fn render(&self) -> String {
let ts = self.ser();
let mut tks: Vec<String> = vec![];

for i in ts {
match i {
Token::Plain(s) => tks.push(s.normal().to_string()),
Token::Bold(s) => {
if s.contains("Example") {
let mut br = "-".repeat(50).dimmed().to_string();
br.push_str("\n\n");
tks.push(br);
} else if s.contains("Note") {
let mut br = "* ".repeat(25).dimmed().to_string();
br.push_str("\n\n");
tks.push(br);
}
let rep = self
.replace(r#"</sup>"#, "")
.replace(r#"<sup>"#, "^")
.replace(r#"</sub>"#, "")
.replace(r#"<sub>"#, "_");
let frag = Html::parse_fragment(rep.as_str());

let res = frag
.root_element()
.text()
.fold(String::new(), |acc, e| acc + e);

tks.push(s.bold().to_string());
}
Token::Sup(s) => tks.push(match s.parse::<u8>() {
Ok(n) => superscript(n),
_ => s,
}),
Token::Sub(s) => tks.push(match s.parse::<u8>() {
Ok(n) => subscript(n),
_ => s,
}),
Token::Font((s, color)) => tks.push(s.color(color).to_string()),
Token::Eof(s) => tks.push(s.normal().to_string()),
}
}

// post replace
let tks = tks.join("");

decode_html(&tks).unwrap_or(tks)
res
}
}
}
Expand Down