The words Under construction in black text on a yellow background with diagonal black stipes surrounding it
I'm in the process of moving my site. It's still a work in progress. Please excuse the mess and broken links.

Neopolitan nom Parsing Playground Scratchpad

TODO: Pull subtitle into page object code_start_default_section code_end_default_section

Debugging Stuff

I'm moving stuff around right now. All this below is helping me figure out where to put stuff

        -- title

Neopolitan nom Parsing Playground Scratchpad

This is a holding spot for code while I get it working.

This version does basic text splitting on multi
line breaks but does it by looking at things before
they get parsed by the many1 parsers

The ones up top are the later ones. 

-- code/
-- rust

#![allow(unused_imports)]
#![allow(unused_variables)]
#![allow(dead_code)]
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::bytes::complete::take_until;
use nom::character::complete::line_ending;
use nom::character::complete::multispace0;
use nom::character::complete::newline;
use nom::character::complete::not_line_ending;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::rest;
use nom::multi::many0;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::delimited;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple;
use nom::IResult;
use nom::Parser;

fn main() {
    println!("Run tests instead of running main");
}

fn attributes(source: &str) -> IResult<&str, Option<Vec<Attribute>>> {
    let (source, attributes) = opt(many1(preceded(alt((tag("--"), tag("|"))), attr_id)))(source)?;
    Ok((source, attributes))
}

fn attr_id(source: &str) -> IResult<&str, Attribute> {
    let (source, _) = space0(source)?;
    let (source, attr) = preceded(tag("id: "), is_not("|>\n"))(source)?;
    Ok((
        source,
        Attribute::Id {
            value: attr.to_string(),
        },
    ))
}

fn aside(source: &str) -> IResult<&str, Section> {
    let (source, _) = tag_no_case("-- aside")(source)?;
    let (source, _) = space0(source)?;
    let (source, _) = line_ending(source)?;
    let (source, attributes) = attributes(source)?;
    let (source, content) = paragraphs(source)?;
    Ok((
        source,
        Section::Aside {
            attributes,
            content,
        },
    ))
}

fn list(source: &str) -> IResult<&str, Section> {
    let (source, _) = tag_no_case("-- list")(source)?;
    let (source, _) = space0(source)?;
    let (source, _) = line_ending(source)?;
    let (source, attributes) = attributes(source)?;
    let preface = None;
    let (source, items) = opt(many1(preceded(multispace0, list_item)))(source)?;
    Ok((
        source,
        Section::List {
            attributes,
            items,
            preface,
        },
    ))
}

fn list_item(source: &str) -> IResult<&str, Container> {
    let (source, content) = many1(preceded(tag("- "), paragraph))(source)?;
    Ok((source, Container::ListItem { content }))
}

fn sections(source: &str) -> IResult<&str, Vec<Section>> {
    let (source, sections) = many1(preceded(multispace0, alt((aside, list, p, title))))(source)?;
    Ok((source, sections))
}

fn snippet_strong(source: &str) -> IResult<&str, Snippet> {
    let (source, _) = tag_no_case("<<strong|")(source)?;
    let (source, content) = is_not("|>")(source)?;
    dbg!(&source);
    let (source, attributes) = attributes(source)?;
    dbg!(&source);
    let (source, _) = tag(">>")(source)?;
    Ok((
        source,
        Snippet::Strong {
            text: content.to_string(),
            attributes,
        },
    ))
}

fn snippet_text(source: &str) -> IResult<&str, Snippet> {
    let (source, content) = is_not("\n<")(source)?;
    Ok((
        source,
        Snippet::Text {
            text: content.to_string(),
        },
    ))
}

fn p(source: &str) -> IResult<&str, Section> {
    let (source, _) = tag_no_case("-- p")(source)?;
    let (source, _) = space0(source)?;
    let (source, _) = line_ending(source)?;
    let (source, attributes) = attributes(source)?;
    let (source, content) = paragraphs(source)?;
    Ok((
        source,
        Section::P {
            attributes,
            content,
        },
    ))
}

fn paragraphs(source: &str) -> IResult<&str, Option<Vec<Block>>> {
    let (source, paragraphs) = opt(many1(preceded(multispace0, paragraph)))(source)?;
    Ok((source, paragraphs))
}

fn paragraph(source: &str) -> IResult<&str, Block> {
    let (source, _) = not(tag("--"))(source)?;
    let (source, snippets) = many1(preceded(
        opt(line_ending),
        alt((snippet_text, snippet_strong)),
    ))(source)?;
    Ok((source, Block::Paragraph { snippets }))
}

fn title(source: &str) -> IResult<&str, Section> {
    let (source, _) = tag_no_case("-- title")(source)?;
    let (source, _) = space0(source)?;
    let (source, _) = line_ending(source)?;
    let (source, attributes) = attributes(source)?;
    let (source, headline) = opt(paragraph)(source)?;
    let (source, content) = paragraphs(source)?;
    Ok((
        source,
        Section::Title {
            attributes,
            headline,
            content,
        },
    ))
}

#[derive(Debug, PartialEq)]
enum Attribute {
    Id { value: String },
    None,
}

#[derive(Debug, PartialEq)]
enum Section {
    Aside {
        attributes: Option<Vec<Attribute>>,
        content: Option<Vec<Block>>,
    },
    List {
        attributes: Option<Vec<Attribute>>,
        preface: Option<Vec<Block>>,
        items: Option<Vec<Container>>,
    },
    P {
        attributes: Option<Vec<Attribute>>,
        content: Option<Vec<Block>>,
    },
    Title {
        attributes: Option<Vec<Attribute>>,
        headline: Option<Block>,
        content: Option<Vec<Block>>,
    },
}

#[derive(Debug, PartialEq)]
enum Block {
    Paragraph { snippets: Vec<Snippet> },
    None,
}

#[derive(Debug, PartialEq)]
enum Container {
    ListItem { content: Vec<Block> },
    None,
}

#[derive(Debug, PartialEq)]
enum Snippet {
    Text {
        text: String,
    },
    Strong {
        text: String,
        attributes: Option<Vec<Attribute>>,
    },
    None,
}

#[cfg(test)]

mod test {

    use super::*;

    #[test]
    fn integration_alfa() {
        let source = r#"

-- title

alfa bravo

charlie <<strong|delta>>

echo foxtrot
"#;
        let expected = vec![Section::Title {
            attributes: None,
            headline: Some(Block::Paragraph {
                snippets: vec![Snippet::Text {
                    text: "alfa bravo".to_string(),
                }],
            }),
            content: Some(vec![
                Block::Paragraph {
                    snippets: vec![
                        Snippet::Text {
                            text: "charlie ".to_string(),
                        },
                        Snippet::Strong {
                            attributes: None,
                            text: "delta".to_string(),
                        },
                    ],
                },
                Block::Paragraph {
                    snippets: vec![Snippet::Text {
                        text: "echo foxtrot".to_string(),
                    }],
                },
            ]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn aside_basic() {
        let source = "-- aside\n\nalfa bravo";
        let expected = vec![Section::Aside {
            attributes: None,
            content: Some(vec![Block::Paragraph {
                snippets: vec![Snippet::Text {
                    text: "alfa bravo".to_string(),
                }],
            }]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn aside_with_attributes() {
        let source = "-- aside\n-- id: charlie\n\nalfa bravo";
        let expected = vec![Section::Aside {
            attributes: Some(vec![Attribute::Id {
                value: "charlie".to_string(),
            }]),
            content: Some(vec![Block::Paragraph {
                snippets: vec![Snippet::Text {
                    text: "alfa bravo".to_string(),
                }],
            }]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn aside_with_strong() {
        let source = "-- aside\n\nalfa <<strong|bravo>> charlie";
        let expected = vec![Section::Aside {
            attributes: None,
            content: Some(vec![Block::Paragraph {
                snippets: vec![
                    Snippet::Text {
                        text: "alfa ".to_string(),
                    },
                    Snippet::Strong {
                        text: "bravo".to_string(),
                        attributes: None,
                    },
                    Snippet::Text {
                        text: " charlie".to_string(),
                    },
                ],
            }]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn aside_multiple_paragraphs() {
        let source = "-- aside\n\nalfa bravo\n\n<<strong|charlie>> delta\n\necho foxtrot";
        let expected = vec![Section::Aside {
            attributes: None,
            content: Some(vec![
                Block::Paragraph {
                    snippets: vec![Snippet::Text {
                        text: "alfa bravo".to_string(),
                    }],
                },
                Block::Paragraph {
                    snippets: vec![
                        Snippet::Strong {
                            text: "charlie".to_string(),
                            attributes: None,
                        },
                        Snippet::Text {
                            text: " delta".to_string(),
                        },
                    ],
                },
                Block::Paragraph {
                    snippets: vec![Snippet::Text {
                        text: "echo foxtrot".to_string(),
                    }],
                },
            ]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn multiple_sections() {
        let source = r#"


-- aside
-- id: widget

sierra tango

-- aside

alfa bravo

<<strong|charlie>> delta

echo foxtrot
"#;
        let expected = vec![
            Section::Aside {
                attributes: Some(vec![Attribute::Id {
                    value: "widget".to_string(),
                }]),
                content: Some(vec![Block::Paragraph {
                    snippets: vec![Snippet::Text {
                        text: "sierra tango".to_string(),
                    }],
                }]),
            },
            Section::Aside {
                attributes: None,
                content: Some(vec![
                    Block::Paragraph {
                        snippets: vec![Snippet::Text {
                            text: "alfa bravo".to_string(),
                        }],
                    },
                    Block::Paragraph {
                        snippets: vec![
                            Snippet::Strong {
                                text: "charlie".to_string(),
                                attributes: None,
                            },
                            Snippet::Text {
                                text: " delta".to_string(),
                            },
                        ],
                    },
                    Block::Paragraph {
                        snippets: vec![Snippet::Text {
                            text: "echo foxtrot".to_string(),
                        }],
                    },
                ]),
            },
        ];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn aside_list() {
        let source = r#"

-- aside

alfa bravo

-- list

- charlie
"#;
        let expected = vec![
            Section::Aside {
                attributes: None,
                content: Some(vec![Block::Paragraph {
                    snippets: vec![Snippet::Text {
                        text: "alfa bravo".to_string(),
                    }],
                }]),
            },
            Section::List {
                attributes: None,
                items: Some(vec![Container::ListItem {
                    content: vec![Block::Paragraph {
                        snippets: vec![Snippet::Text {
                            text: "charlie".to_string(),
                        }],
                    }],
                }]),
                preface: None,
            },
        ];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn title_basic() {
        let source = r#"

-- title

alfa bravo
"#;
        let expected = vec![Section::Title {
            attributes: None,
            headline: Some(Block::Paragraph {
                snippets: vec![Snippet::Text {
                    text: "alfa bravo".to_string(),
                }],
            }),
            content: None,
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn title_with_content() {
        let source = r#"

-- title

alfa bravo

charlie delta

echo foxtrot
"#;
        let expected = vec![Section::Title {
            attributes: None,
            headline: Some(Block::Paragraph {
                snippets: vec![Snippet::Text {
                    text: "alfa bravo".to_string(),
                }],
            }),
            content: Some(vec![
                Block::Paragraph {
                    snippets: vec![Snippet::Text {
                        text: "charlie delta".to_string(),
                    }],
                },
                Block::Paragraph {
                    snippets: vec![Snippet::Text {
                        text: "echo foxtrot".to_string(),
                    }],
                },
            ]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn strong_with_attributes() {
        let source = r#"<<strong|bravo|id: tango>>"#;
        let expected = Snippet::Strong {
            attributes: Some(vec![Attribute::Id {
                value: "tango".to_string(),
            }]),
            text: "bravo".to_string(),
        };
        let results = snippet_strong(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn strong_text_with_attributes_in_p() {
        let source = "-- p\n\nalfa <<strong|bravo|id: tango>> charlie";
        let expected = vec![Section::P {
            attributes: None,
            content: Some(vec![Block::Paragraph {
                snippets: vec![
                    Snippet::Text {
                        text: "alfa ".to_string(),
                    },
                    Snippet::Strong {
                        attributes: Some(vec![Attribute::Id {
                            value: "tango".to_string(),
                        }]),
                        text: "bravo".to_string(),
                    },
                    Snippet::Text {
                        text: " charlie".to_string(),
                    },
                ],
            }]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn inline_attribute_first_item_of_paragraph() {
        let source = "-- p\n\n<<strong|foxtrot echo|id: delta>> bravo";
        let expected = vec![Section::P {
            attributes: None,
            content: Some(vec![Block::Paragraph {
                snippets: vec![
                    Snippet::Strong {
                        attributes: Some(vec![Attribute::Id {
                            value: "delta".to_string(),
                        }]),
                        text: "foxtrot echo".to_string(),
                    },
                    Snippet::Text {
                        text: " bravo".to_string(),
                    },
                ],
            }]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn inline_attribute_list_item_of_paragraph() {
        let source = "-- p\n\ntango <<strong|sierra alfa|id: delta>>";
        let expected = vec![Section::P {
            attributes: None,
            content: Some(vec![Block::Paragraph {
                snippets: vec![
                    Snippet::Text {
                        text: "tango ".to_string(),
                    },
                    Snippet::Strong {
                        attributes: Some(vec![Attribute::Id {
                            value: "delta".to_string(),
                        }]),
                        text: "sierra alfa".to_string(),
                    },
                ],
            }]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    //
}

--code
-- rust


#![allow(unused_imports)]
#![allow(unused_variables)]
#![allow(dead_code)]
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::bytes::complete::take_until;
use nom::character::complete::line_ending;
use nom::character::complete::multispace0;
use nom::character::complete::newline;
use nom::character::complete::not_line_ending;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::rest;
use nom::multi::many0;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::delimited;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple;
use nom::IResult;
use nom::Parser;

fn main() {
    // dbg!(get_sections(
    //     "-- ALFA\n\nasdf asdf\n\nwer\n\nwer\n\n-- BRAVO\n\nyiyiyi yiyi\n\nwkwkwkwk\n\n"
    // )
    // .unwrap());
}

fn attributes(source: &str) -> IResult<&str, Option<Vec<Attribute>>> {
    let (source, attrs) = opt(many1(id_attr))(source)?;
    Ok((source, attrs))
}

fn id_attr(source: &str) -> IResult<&str, Attribute> {
    let (source, attr) = delimited(tag("-- id: "), not_line_ending, line_ending)(source)?;
    Ok((source, Attribute::Id(attr.to_string())))
}

fn get_sections(source: &str) -> IResult<&str, Vec<Section>> {
    let (source, sections) =
        many1(preceded(multispace0, alt((aside_section, section_bravo))))(source)?;
    Ok((source, sections))
}

fn aside_section(source: &str) -> IResult<&str, Section> {
    let (source, _) = tuple((tag_no_case("-- aside"), space0, line_ending))(source)?;
    let (source, attrs) = attributes(source)?;
    let (source, paragraphs) = paragraphs(source)?;
    Ok((
        source,
        Section::Aside {
            attrs,
            content: paragraphs,
        },
    ))
}

fn section_bravo(source: &str) -> IResult<&str, Section> {
    let (source, _) = tuple((tag_no_case("-- bravo"), space0, line_ending))(source)?;
    let (source, p) = paragraphs(source)?;
    Ok((
        source,
        Section::Bravo {
            attrs: None,
            content: None,
        },
    ))
}

fn paragraphs(source: &str) -> IResult<&str, Option<Vec<Block>>> {
    let (source, paragraphs) = opt(many1(preceded(multispace0, paragraph)))(source)?;
    Ok((source, paragraphs))
}

fn paragraph(source: &str) -> IResult<&str, Block> {
    let (source, _) = not(tag("--"))(source)?;
    let (source, x) = many1(preceded(opt(line_ending), text_content))(source)?;
    dbg!(&x);
    Ok((source, Block::Paragraph(x)))
}

fn text_content(source: &str) -> IResult<&str, Snippet> {
    let (source, content) = is_not("\n")(source)?;
    Ok((source, Snippet::Text(content.to_string())))
}

#[derive(Debug, PartialEq)]
enum Section {
    Aside {
        attrs: Option<Vec<Attribute>>,
        content: Option<Vec<Block>>,
    },
    Bravo {
        attrs: Option<Vec<Attribute>>,
        content: Option<Vec<Block>>,
    },
}

#[derive(Debug, PartialEq)]
enum Attribute {
    Id(String),
    None,
}

#[derive(Debug, PartialEq)]
enum Block {
    Paragraph(Vec<Snippet>),
    None,
}

#[derive(Debug, PartialEq)]
enum Snippet {
    Text(String),
    None,
}

#[cfg(test)]

mod test {

    use super::*;

    #[test]
    fn aside_basic() {
        let source = "-- aside\n\nalfa bravo";
        // asdf asdf\n\nwer\n\nwer\n\n-- BRAVO\n\nyiyiyi yiyi\n\nwkwkwkwk\n\n";
        let expected = vec![Section::Aside {
            attrs: None,
            content: Some(vec![Block::Paragraph(vec![Snippet::Text(
                "alfa bravo".to_string(),
            )])]),
        }];
        let results = get_sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn aside_with_attributes() {
        let source = "-- aside\n-- id: charlie\n\nalfa bravo";
        // asdf asdf\n\nwer\n\nwer\n\n-- BRAVO\n\nyiyiyi yiyi\n\nwkwkwkwk\n\n";
        let expected = vec![Section::Aside {
            attrs: Some(vec![Attribute::Id("charlie".to_string())]),
            content: Some(vec![Block::Paragraph(vec![Snippet::Text(
                "alfa bravo".to_string(),
            )])]),
        }];
        let results = get_sections(source);
        assert_eq!(expected, results.unwrap().1);
    }
}

-- code
-- rust

#![allow(unused_imports)]
#![allow(unused_variables)]
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::take_until;
use nom::character::complete::line_ending;
use nom::character::complete::multispace0;
use nom::character::complete::newline;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::rest;
use nom::multi::many0;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple;
use nom::IResult;
use nom::Parser;

fn main() {
    dbg!(get_sections(
        "-- ALFA\n\nasdf asdf\n\nwer\n\nwer\n\n-- BRAVO\n\nyiyiyi yiyi\n\nwkwkwkwk\n\n"
    )
    .unwrap());
}

fn get_sections(source: &str) -> IResult<&str, Vec<Vec<&str>>> {
    let (source, paragraphs) =
        many0(preceded(multispace0, alt((section_alfa, section_bravo))))(source)?;
    Ok((source, paragraphs))
}

fn section_alfa(source: &str) -> IResult<&str, Vec<&str>> {
    let (source, x) = tuple((tag("-- ALFA"), space0, line_ending))(source)?;
    let (source, p) = paragraphs(source)?;
    dbg!("ALFA");
    Ok((source, vec![]))
}

fn section_bravo(source: &str) -> IResult<&str, Vec<&str>> {
    let (source, x) = tuple((tag("-- BRAVO"), space0, line_ending))(source)?;
    let (source, p) = paragraphs(source)?;
    dbg!("BRAVO");
    Ok((source, vec![]))
}

fn paragraphs(source: &str) -> IResult<&str, Vec<Vec<&str>>> {
    let (source, paragraphs) = many1(preceded(multispace0, paragraph))(source)?;
    Ok((source, paragraphs))
}

fn paragraph(source: &str) -> IResult<&str, Vec<&str>> {
    let (source, _) = not(tag("--"))(source)?;
    let (source, x) = many1(preceded(opt(line_ending), text_content))(source)?;
    dbg!(&x);
    Ok((source, x))
}

fn text_content(source: &str) -> IResult<&str, &str> {
    let (source, content) = is_not("\n")(source)?;
    Ok((source, content))
}



-- code
-- rust


#![allow(unused_imports)]
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::take_until;
use nom::character::complete::line_ending;
use nom::character::complete::multispace0;
use nom::character::complete::newline;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::opt;
use nom::combinator::rest;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple;
use nom::IResult;
use nom::Parser;

fn main() {
    dbg!(get_paragraphs("asdf asdf\n\nwer\n\nwer").unwrap());
}

fn get_paragraphs(source: &str) -> IResult<&str, Vec<Vec<&str>>> {
    let (source, paragraphs) = many1(preceded(multispace0, paragraph))(source)?;
    Ok((source, paragraphs))
}

fn paragraph(source: &str) -> IResult<&str, Vec<&str>> {
    let (source, x) = many1(preceded(opt(line_ending), text_content))(source)?;
    dbg!(&x);

    // let (source, p) = alt((
    //     terminated(take_until("\n"), tuple((newline, space0, newline))).map(|s: &str| s.trim()),
    //     rest,
    // ))(source)?;

    Ok((source, x))
}

fn text_content(source: &str) -> IResult<&str, &str> {
    let (source, content) = is_not("\n")(source)?;
    Ok((source, content))
}


-- /code

-- categories
-- Rust
-- nom

-- metadata
-- date: 2023-07-20 01:34:55
-- id: 2sp6hfb0
-- site: aws
-- type: post
-- status: scratch