Neopolitan nom Parsing Playground Scratchpad
This is a holding spot for code while I get it working.
This version does basic text splitting on multi line breaks but does it by looking at things before they get parsed by the many1 parsers
The ones up top are the later ones.
#![allow(unused_imports)]
#![allow(unused_variables)]
#![allow(dead_code)]
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::bytes::complete::take_until;
use nom::character::complete::line_ending;
use nom::character::complete::multispace0;
use nom::character::complete::newline;
use nom::character::complete::not_line_ending;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::rest;
use nom::multi::many0;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::delimited;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple;
use nom::IResult;
use nom::Parser;
fn main() {
println!("Run tests instead of running main");
}
fn attributes(source: &str) -> IResult<&str, Option<Vec<Attribute>>> {
let (source, attributes) = opt(many1(preceded(alt((tag("--"), tag("|"))), attr_id)))(source)?;
Ok((source, attributes))
}
fn attr_id(source: &str) -> IResult<&str, Attribute> {
let (source, _) = space0(source)?;
let (source, attr) = preceded(tag("id: "), is_not("|>\n"))(source)?;
Ok((
source,
Attribute::Id {
value: attr.to_string(),
},
))
}
fn aside(source: &str) -> IResult<&str, Section> {
let (source, _) = tag_no_case("-- aside")(source)?;
let (source, _) = space0(source)?;
let (source, _) = line_ending(source)?;
let (source, attributes) = attributes(source)?;
let (source, content) = paragraphs(source)?;
Ok((
source,
Section::Aside {
attributes,
content,
},
))
}
fn list(source: &str) -> IResult<&str, Section> {
let (source, _) = tag_no_case("-- list")(source)?;
let (source, _) = space0(source)?;
let (source, _) = line_ending(source)?;
let (source, attributes) = attributes(source)?;
let preface = None;
let (source, items) = opt(many1(preceded(multispace0, list_item)))(source)?;
Ok((
source,
Section::List {
attributes,
items,
preface,
},
))
}
fn list_item(source: &str) -> IResult<&str, Container> {
let (source, content) = many1(preceded(tag("- "), paragraph))(source)?;
Ok((source, Container::ListItem { content }))
}
fn sections(source: &str) -> IResult<&str, Vec<Section>> {
let (source, sections) = many1(preceded(multispace0, alt((aside, list, p, title))))(source)?;
Ok((source, sections))
}
fn snippet_strong(source: &str) -> IResult<&str, Snippet> {
let (source, _) = tag_no_case("<<strong|")(source)?;
let (source, content) = is_not("|>")(source)?;
dbg!(&source);
let (source, attributes) = attributes(source)?;
dbg!(&source);
let (source, _) = tag(">>")(source)?;
Ok((
source,
Snippet::Strong {
text: content.to_string(),
attributes,
},
))
}
fn snippet_text(source: &str) -> IResult<&str, Snippet> {
let (source, content) = is_not("\n<")(source)?;
Ok((
source,
Snippet::Text {
text: content.to_string(),
},
))
}
fn p(source: &str) -> IResult<&str, Section> {
let (source, _) = tag_no_case("-- p")(source)?;
let (source, _) = space0(source)?;
let (source, _) = line_ending(source)?;
let (source, attributes) = attributes(source)?;
let (source, content) = paragraphs(source)?;
Ok((
source,
Section::P {
attributes,
content,
},
))
}
fn paragraphs(source: &str) -> IResult<&str, Option<Vec<Block>>> {
let (source, paragraphs) = opt(many1(preceded(multispace0, paragraph)))(source)?;
Ok((source, paragraphs))
}
fn paragraph(source: &str) -> IResult<&str, Block> {
let (source, _) = not(tag("--"))(source)?;
let (source, snippets) = many1(preceded(
opt(line_ending),
alt((snippet_text, snippet_strong)),
))(source)?;
Ok((source, Block::Paragraph { snippets }))
}
fn title(source: &str) -> IResult<&str, Section> {
let (source, _) = tag_no_case("-- title")(source)?;
let (source, _) = space0(source)?;
let (source, _) = line_ending(source)?;
let (source, attributes) = attributes(source)?;
let (source, headline) = opt(paragraph)(source)?;
let (source, content) = paragraphs(source)?;
Ok((
source,
Section::Title {
attributes,
headline,
content,
},
))
}
#[derive(Debug, PartialEq)]
enum Attribute {
Id { value: String },
None,
}
#[derive(Debug, PartialEq)]
enum Section {
Aside {
attributes: Option<Vec<Attribute>>,
content: Option<Vec<Block>>,
},
List {
attributes: Option<Vec<Attribute>>,
preface: Option<Vec<Block>>,
items: Option<Vec<Container>>,
},
P {
attributes: Option<Vec<Attribute>>,
content: Option<Vec<Block>>,
},
Title {
attributes: Option<Vec<Attribute>>,
headline: Option<Block>,
content: Option<Vec<Block>>,
},
}
#[derive(Debug, PartialEq)]
enum Block {
Paragraph { snippets: Vec<Snippet> },
None,
}
#[derive(Debug, PartialEq)]
enum Container {
ListItem { content: Vec<Block> },
None,
}
#[derive(Debug, PartialEq)]
enum Snippet {
Text {
text: String,
},
Strong {
text: String,
attributes: Option<Vec<Attribute>>,
},
None,
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn integration_alfa() {
let source = r#"
-- title
alfa bravo
charlie <<strong|delta>>
echo foxtrot
"#;
let expected = vec![Section::Title {
attributes: None,
headline: Some(Block::Paragraph {
snippets: vec![Snippet::Text {
text: "alfa bravo".to_string(),
}],
}),
content: Some(vec![
Block::Paragraph {
snippets: vec![
Snippet::Text {
text: "charlie ".to_string(),
},
Snippet::Strong {
attributes: None,
text: "delta".to_string(),
},
],
},
Block::Paragraph {
snippets: vec![Snippet::Text {
text: "echo foxtrot".to_string(),
}],
},
]),
}];
let results = sections(source);
assert_eq!(expected, results.unwrap().1);
}
#[test]
fn aside_basic() {
let source = "-- aside\n\nalfa bravo";
let expected = vec![Section::Aside {
attributes: None,
content: Some(vec![Block::Paragraph {
snippets: vec![Snippet::Text {
text: "alfa bravo".to_string(),
}],
}]),
}];
let results = sections(source);
assert_eq!(expected, results.unwrap().1);
}
#[test]
fn aside_with_attributes() {
let source = "-- aside\n-- id: charlie\n\nalfa bravo";
let expected = vec![Section::Aside {
attributes: Some(vec![Attribute::Id {
value: "charlie".to_string(),
}]),
content: Some(vec![Block::Paragraph {
snippets: vec![Snippet::Text {
text: "alfa bravo".to_string(),
}],
}]),
}];
let results = sections(source);
assert_eq!(expected, results.unwrap().1);
}
#[test]
fn aside_with_strong() {
let source = "-- aside\n\nalfa <<strong|bravo>> charlie";
let expected = vec![Section::Aside {
attributes: None,
content: Some(vec![Block::Paragraph {
snippets: vec![
Snippet::Text {
text: "alfa ".to_string(),
},
Snippet::Strong {
text: "bravo".to_string(),
attributes: None,
},
Snippet::Text {
text: " charlie".to_string(),
},
],
}]),
}];
let results = sections(source);
assert_eq!(expected, results.unwrap().1);
}
#[test]
fn aside_multiple_paragraphs() {
let source = "-- aside\n\nalfa bravo\n\n<<strong|charlie>> delta\n\necho foxtrot";
let expected = vec![Section::Aside {
attributes: None,
content: Some(vec![
Block::Paragraph {
snippets: vec![Snippet::Text {
text: "alfa bravo".to_string(),
}],
},
Block::Paragraph {
snippets: vec![
Snippet::Strong {
text: "charlie".to_string(),
attributes: None,
},
Snippet::Text {
text: " delta".to_string(),
},
],
},
Block::Paragraph {
snippets: vec![Snippet::Text {
text: "echo foxtrot".to_string(),
}],
},
]),
}];
let results = sections(source);
assert_eq!(expected, results.unwrap().1);
}
#[test]
fn multiple_sections() {
let source = r#"
-- aside
-- id: widget
sierra tango
-- aside
alfa bravo
<<strong|charlie>> delta
echo foxtrot
"#;
let expected = vec![
Section::Aside {
attributes: Some(vec![Attribute::Id {
value: "widget".to_string(),
}]),
content: Some(vec![Block::Paragraph {
snippets: vec![Snippet::Text {
text: "sierra tango".to_string(),
}],
}]),
},
Section::Aside {
attributes: None,
content: Some(vec![
Block::Paragraph {
snippets: vec![Snippet::Text {
text: "alfa bravo".to_string(),
}],
},
Block::Paragraph {
snippets: vec![
Snippet::Strong {
text: "charlie".to_string(),
attributes: None,
},
Snippet::Text {
text: " delta".to_string(),
},
],
},
Block::Paragraph {
snippets: vec![Snippet::Text {
text: "echo foxtrot".to_string(),
}],
},
]),
},
];
let results = sections(source);
assert_eq!(expected, results.unwrap().1);
}
#[test]
fn aside_list() {
let source = r#"
-- aside
alfa bravo
-- list
- charlie
"#;
let expected = vec![
Section::Aside {
attributes: None,
content: Some(vec![Block::Paragraph {
snippets: vec![Snippet::Text {
text: "alfa bravo".to_string(),
}],
}]),
},
Section::List {
attributes: None,
items: Some(vec![Container::ListItem {
content: vec![Block::Paragraph {
snippets: vec![Snippet::Text {
text: "charlie".to_string(),
}],
}],
}]),
preface: None,
},
];
let results = sections(source);
assert_eq!(expected, results.unwrap().1);
}
#[test]
fn title_basic() {
let source = r#"
-- title
alfa bravo
"#;
let expected = vec![Section::Title {
attributes: None,
headline: Some(Block::Paragraph {
snippets: vec![Snippet::Text {
text: "alfa bravo".to_string(),
}],
}),
content: None,
}];
let results = sections(source);
assert_eq!(expected, results.unwrap().1);
}
#[test]
fn title_with_content() {
let source = r#"
-- title
alfa bravo
charlie delta
echo foxtrot
"#;
let expected = vec![Section::Title {
attributes: None,
headline: Some(Block::Paragraph {
snippets: vec![Snippet::Text {
text: "alfa bravo".to_string(),
}],
}),
content: Some(vec![
Block::Paragraph {
snippets: vec![Snippet::Text {
text: "charlie delta".to_string(),
}],
},
Block::Paragraph {
snippets: vec![Snippet::Text {
text: "echo foxtrot".to_string(),
}],
},
]),
}];
let results = sections(source);
assert_eq!(expected, results.unwrap().1);
}
#[test]
fn strong_with_attributes() {
let source = r#"<<strong|bravo|id: tango>>"#;
let expected = Snippet::Strong {
attributes: Some(vec![Attribute::Id {
value: "tango".to_string(),
}]),
text: "bravo".to_string(),
};
let results = snippet_strong(source);
assert_eq!(expected, results.unwrap().1);
}
#[test]
fn strong_text_with_attributes_in_p() {
let source = "-- p\n\nalfa <<strong|bravo|id: tango>> charlie";
let expected = vec![Section::P {
attributes: None,
content: Some(vec![Block::Paragraph {
snippets: vec![
Snippet::Text {
text: "alfa ".to_string(),
},
Snippet::Strong {
attributes: Some(vec![Attribute::Id {
value: "tango".to_string(),
}]),
text: "bravo".to_string(),
},
Snippet::Text {
text: " charlie".to_string(),
},
],
}]),
}];
let results = sections(source);
assert_eq!(expected, results.unwrap().1);
}
#[test]
fn inline_attribute_first_item_of_paragraph() {
let source = "-- p\n\n<<strong|foxtrot echo|id: delta>> bravo";
let expected = vec![Section::P {
attributes: None,
content: Some(vec![Block::Paragraph {
snippets: vec![
Snippet::Strong {
attributes: Some(vec![Attribute::Id {
value: "delta".to_string(),
}]),
text: "foxtrot echo".to_string(),
},
Snippet::Text {
text: " bravo".to_string(),
},
],
}]),
}];
let results = sections(source);
assert_eq!(expected, results.unwrap().1);
}
#[test]
fn inline_attribute_list_item_of_paragraph() {
let source = "-- p\n\ntango <<strong|sierra alfa|id: delta>>";
let expected = vec![Section::P {
attributes: None,
content: Some(vec![Block::Paragraph {
snippets: vec![
Snippet::Text {
text: "tango ".to_string(),
},
Snippet::Strong {
attributes: Some(vec![Attribute::Id {
value: "delta".to_string(),
}]),
text: "sierra alfa".to_string(),
},
],
}]),
}];
let results = sections(source);
assert_eq!(expected, results.unwrap().1);
}
//
}
--code
-- rust
#![allow(unused_imports)]
#![allow(unused_variables)]
#![allow(dead_code)]
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::bytes::complete::take_until;
use nom::character::complete::line_ending;
use nom::character::complete::multispace0;
use nom::character::complete::newline;
use nom::character::complete::not_line_ending;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::rest;
use nom::multi::many0;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::delimited;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple;
use nom::IResult;
use nom::Parser;
fn main() {
// dbg!(get_sections(
// "-- ALFA\n\nasdf asdf\n\nwer\n\nwer\n\n-- BRAVO\n\nyiyiyi yiyi\n\nwkwkwkwk\n\n"
// )
// .unwrap());
}
fn attributes(source: &str) -> IResult<&str, Option<Vec<Attribute>>> {
let (source, attrs) = opt(many1(id_attr))(source)?;
Ok((source, attrs))
}
fn id_attr(source: &str) -> IResult<&str, Attribute> {
let (source, attr) = delimited(tag("-- id: "), not_line_ending, line_ending)(source)?;
Ok((source, Attribute::Id(attr.to_string())))
}
fn get_sections(source: &str) -> IResult<&str, Vec<Section>> {
let (source, sections) =
many1(preceded(multispace0, alt((aside_section, section_bravo))))(source)?;
Ok((source, sections))
}
fn aside_section(source: &str) -> IResult<&str, Section> {
let (source, _) = tuple((tag_no_case("-- aside"), space0, line_ending))(source)?;
let (source, attrs) = attributes(source)?;
let (source, paragraphs) = paragraphs(source)?;
Ok((
source,
Section::Aside {
attrs,
content: paragraphs,
},
))
}
fn section_bravo(source: &str) -> IResult<&str, Section> {
let (source, _) = tuple((tag_no_case("-- bravo"), space0, line_ending))(source)?;
let (source, p) = paragraphs(source)?;
Ok((
source,
Section::Bravo {
attrs: None,
content: None,
},
))
}
fn paragraphs(source: &str) -> IResult<&str, Option<Vec<Block>>> {
let (source, paragraphs) = opt(many1(preceded(multispace0, paragraph)))(source)?;
Ok((source, paragraphs))
}
fn paragraph(source: &str) -> IResult<&str, Block> {
let (source, _) = not(tag("--"))(source)?;
let (source, x) = many1(preceded(opt(line_ending), text_content))(source)?;
dbg!(&x);
Ok((source, Block::Paragraph(x)))
}
fn text_content(source: &str) -> IResult<&str, Snippet> {
let (source, content) = is_not("\n")(source)?;
Ok((source, Snippet::Text(content.to_string())))
}
#[derive(Debug, PartialEq)]
enum Section {
Aside {
attrs: Option<Vec<Attribute>>,
content: Option<Vec<Block>>,
},
Bravo {
attrs: Option<Vec<Attribute>>,
content: Option<Vec<Block>>,
},
}
#[derive(Debug, PartialEq)]
enum Attribute {
Id(String),
None,
}
#[derive(Debug, PartialEq)]
enum Block {
Paragraph(Vec<Snippet>),
None,
}
#[derive(Debug, PartialEq)]
enum Snippet {
Text(String),
None,
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn aside_basic() {
let source = "-- aside\n\nalfa bravo";
// asdf asdf\n\nwer\n\nwer\n\n-- BRAVO\n\nyiyiyi yiyi\n\nwkwkwkwk\n\n";
let expected = vec![Section::Aside {
attrs: None,
content: Some(vec![Block::Paragraph(vec![Snippet::Text(
"alfa bravo".to_string(),
)])]),
}];
let results = get_sections(source);
assert_eq!(expected, results.unwrap().1);
}
#[test]
fn aside_with_attributes() {
let source = "-- aside\n-- id: charlie\n\nalfa bravo";
// asdf asdf\n\nwer\n\nwer\n\n-- BRAVO\n\nyiyiyi yiyi\n\nwkwkwkwk\n\n";
let expected = vec![Section::Aside {
attrs: Some(vec![Attribute::Id("charlie".to_string())]),
content: Some(vec![Block::Paragraph(vec![Snippet::Text(
"alfa bravo".to_string(),
)])]),
}];
let results = get_sections(source);
assert_eq!(expected, results.unwrap().1);
}
}
-- code
-- rust
#![allow(unused_imports)]
#![allow(unused_variables)]
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::take_until;
use nom::character::complete::line_ending;
use nom::character::complete::multispace0;
use nom::character::complete::newline;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::rest;
use nom::multi::many0;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple;
use nom::IResult;
use nom::Parser;
fn main() {
dbg!(get_sections(
"-- ALFA\n\nasdf asdf\n\nwer\n\nwer\n\n-- BRAVO\n\nyiyiyi yiyi\n\nwkwkwkwk\n\n"
)
.unwrap());
}
fn get_sections(source: &str) -> IResult<&str, Vec<Vec<&str>>> {
let (source, paragraphs) =
many0(preceded(multispace0, alt((section_alfa, section_bravo))))(source)?;
Ok((source, paragraphs))
}
fn section_alfa(source: &str) -> IResult<&str, Vec<&str>> {
let (source, x) = tuple((tag("-- ALFA"), space0, line_ending))(source)?;
let (source, p) = paragraphs(source)?;
dbg!("ALFA");
Ok((source, vec![]))
}
fn section_bravo(source: &str) -> IResult<&str, Vec<&str>> {
let (source, x) = tuple((tag("-- BRAVO"), space0, line_ending))(source)?;
let (source, p) = paragraphs(source)?;
dbg!("BRAVO");
Ok((source, vec![]))
}
fn paragraphs(source: &str) -> IResult<&str, Vec<Vec<&str>>> {
let (source, paragraphs) = many1(preceded(multispace0, paragraph))(source)?;
Ok((source, paragraphs))
}
fn paragraph(source: &str) -> IResult<&str, Vec<&str>> {
let (source, _) = not(tag("--"))(source)?;
let (source, x) = many1(preceded(opt(line_ending), text_content))(source)?;
dbg!(&x);
Ok((source, x))
}
fn text_content(source: &str) -> IResult<&str, &str> {
let (source, content) = is_not("\n")(source)?;
Ok((source, content))
}
-- code
-- rust
#![allow(unused_imports)]
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::take_until;
use nom::character::complete::line_ending;
use nom::character::complete::multispace0;
use nom::character::complete::newline;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::opt;
use nom::combinator::rest;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple;
use nom::IResult;
use nom::Parser;
fn main() {
dbg!(get_paragraphs("asdf asdf\n\nwer\n\nwer").unwrap());
}
fn get_paragraphs(source: &str) -> IResult<&str, Vec<Vec<&str>>> {
let (source, paragraphs) = many1(preceded(multispace0, paragraph))(source)?;
Ok((source, paragraphs))
}
fn paragraph(source: &str) -> IResult<&str, Vec<&str>> {
let (source, x) = many1(preceded(opt(line_ending), text_content))(source)?;
dbg!(&x);
// let (source, p) = alt((
// terminated(take_until("\n"), tuple((newline, space0, newline))).map(|s: &str| s.trim()),
// rest,
// ))(source)?;
Ok((source, x))
}
fn text_content(source: &str) -> IResult<&str, &str> {
let (source, content) = is_not("\n")(source)?;
Ok((source, content))
}
-- end of line --