home ~ projects ~ socials

Get the Title of a Web Page from its URL in Rust

---
[dependencies]
anyhow = "1.0.98"
reqwest = { version = "0.12.20", features = ["blocking"] }
scraper = "0.23.1"
---

use anyhow::{Result, anyhow};
use reqwest::blocking::get;
use scraper::{Html, Selector};

fn main() -> Result<()> {
  let url = "https://www.example.com/";
  let html = fetch_url(url)?;
  let title = get_title(&html);
  if let Some(text) = title {
    println!("Page Title: {}", text);
  } else {
    println!("No title found");
  }
  Ok(())
}

fn fetch_url(url: &str) -> Result<String> {
  let response = get(url)?;
  if response.status() == 200 {
    let html = response.text()?;
    Ok(html)
  } else {
    Err(anyhow!("Could not fetch page"))
  }
}

fn get_title(html: &str) -> Option<String> {
    let document = Html::parse_document(html);
    let selector = Selector::parse("title").unwrap();
    let mut found = document.select(&selector);
    if let Some(title_tag) = found.next() {
      let title_text = title_tag.text().collect::<Vec<_>>().join("");
      Some(title_text)
    } else {
      None
    }
}
Output:
Page Title: Example Domain
-- end of line --