Combine Vecs Of Parsed Tokens For An AST With Chumsky

September 2023

This is the base setup for how I'm parsing Neopolitan files in the LSP for syntax highlighting. The idea is that the child parsers all produce a vec of tokens with their associated spans that then get aggregated and flattened at the top into a single vec.

#!/usr/bin/env cargo +nightly -Zscript

//! ```cargo
//! [package]
//! edition = "2021"
//! [dependencies]
//! chumsky = { version = "0.9.2" }
//! ```

use chumsky::prelude::*;
use chumsky::Parser;
use std::ops::Range;

#[derive(Debug)]
pub enum Token {
    LetterA,
    LetterB,
}

fn main() {
    let source = "aabbaa";
    let result = parser().parse_recovery_verbose(source);
    dbg!(&result);
}

fn parser() -> impl Parser<char, Vec<(Token, Range<usize>)>, Error = Simple<char>> {
    letter1().or(letter2()).repeated().at_least(1).flatten()
}

fn letter1() -> impl Parser<char, Vec<(Token, Range<usize>)>, Error = Simple<char>> {
    just("a")
        .map_with_span(|_, span| (Token::LetterA, span))
        .repeated()
        .at_least(1) // TODO: test changing this to .exactly(1) 
}

fn letter2() -> impl Parser<char, Vec<(Token, Range<usize>)>, Error = Simple<char>> {
    just("b")
        .map_with_span(|_, span| (Token::LetterB, span))
        .repeated()
        .at_least(1) // TODO: test changing this to .exactly(1) 
}

end of line