Paring Word Parts With nom In Rust
This is prep work for Neopoligen. It's for looking for words that have characters in them that are also used for demarking spans
---
[dependencies]
nom = "7.1.3"
---
#![allow(unused_imports)]
use nom::IResult;
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::bytes::complete::take;
use nom::bytes::complete::is_not;
use nom::character::complete::space1;
use nom::multi::many1;
#[derive(Debug, PartialEq)]
pub enum Token {
WordSeg(String),
EscapedChar(String),
Space,
Footnote(Vec<Token>),
}
fn main() {
assert_eq!(parse("alfa"), Ok(
("", vec![
Token::WordSeg("alfa".to_string())
])
));
assert_eq!(parse("bravo charlie"), Ok(
("", vec![
Token::WordSeg("bravo".to_string()),
Token::Space,
Token::WordSeg("charlie".to_string())
]))
);
assert_eq!(parse("^^example^^"), Ok(
("", vec![
Token::Footnote(vec![
Token::WordSeg("example".to_string())
])
])
));
assert_eq!(parse(r#"de\^lta"#), Ok(
("", vec![
Token::WordSeg("de".to_string()),
Token::EscapedChar("^".to_string()),
Token::WordSeg("lta".to_string()),
])
));
println!("Finished tests");
}
pub fn parse(source: &str) -> IResult<&str, Vec<Token>> {
let (source, result) = many1(alt((escaped, footnote, space, word_seg)))(source)?;
Ok((source, result))
}
pub fn footnote(source: &str) -> IResult<&str, Token> {
let (source, _) = tag("^^")(source)?;
let (source, results) = parse(source)?;
let (source, _) = tag("^^")(source)?;
Ok((source, Token::Footnote(results)))
}
pub fn escaped(source: &str) -> IResult<&str, Token> {
let (source, _) = tag(r#"\"#)(source)?;
let (source, result) = take(1usize)(source)?;
Ok((source, Token::EscapedChar(result.to_string())))
}
pub fn space(source: &str) -> IResult<&str, Token> {
let (source, _) = space1(source)?;
Ok((source, Token::Space))
}
pub fn word_seg(source: &str) -> IResult<&str, Token> {
let (source, result) = is_not(r#" \^"#)(source)?;
Ok((source, Token::WordSeg(result.to_string())))
}