diff --git a/src/ephapax-lexer/Cargo.toml b/src/ephapax-lexer/Cargo.toml new file mode 100644 index 00000000..a55a8c53 --- /dev/null +++ b/src/ephapax-lexer/Cargo.toml @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: EUPL-1.2 +# SPDX-FileCopyrightText: 2025 Jonathan D.A. Jewell + +[package] +name = "ephapax-lexer" +description = "Lexer for the Ephapax programming language" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +authors.workspace = true +repository.workspace = true +keywords.workspace = true +categories.workspace = true + +[dependencies] +logos = { workspace = true } +smol_str = { workspace = true } +thiserror = { workspace = true } + +[dev-dependencies] diff --git a/src/ephapax-lexer/src/lib.rs b/src/ephapax-lexer/src/lib.rs new file mode 100644 index 00000000..f2a200b4 --- /dev/null +++ b/src/ephapax-lexer/src/lib.rs @@ -0,0 +1,736 @@ +// SPDX-License-Identifier: EUPL-1.2 +// SPDX-FileCopyrightText: 2025 Jonathan D.A. Jewell + +//! Ephapax Lexer +//! +//! Tokenizes Ephapax source code using the logos crate for high performance. +//! +//! # Example +//! +//! ``` +//! use ephapax_lexer::{Lexer, Token}; +//! +//! let source = "let x = 42"; +//! let tokens: Vec<_> = Lexer::new(source).collect(); +//! ``` + +use logos::{Logos, SpannedIter}; +use smol_str::SmolStr; +use std::fmt; +use thiserror::Error; + +/// Lexer error types +#[derive(Error, Debug, Clone, PartialEq)] +pub enum LexerError { + #[error("Unexpected character at position {0}")] + UnexpectedCharacter(usize), + + #[error("Unterminated string starting at position {0}")] + UnterminatedString(usize), + + #[error("Invalid number literal at position {0}")] + InvalidNumber(usize), + + #[error("Unterminated block comment starting at position {0}")] + UnterminatedComment(usize), +} + +/// Source span for error reporting +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Span { + pub start: usize, + pub end: usize, +} + +impl Span { + pub fn new(start: usize, end: usize) -> Self { + Self { start, end } + } + + pub fn dummy() -> Self { + Self { start: 0, end: 0 } + } + + pub fn merge(self, other: Self) -> Self { + Self { + start: self.start.min(other.start), + end: self.end.max(other.end), + } + } +} + +impl From for Span { + fn from(span: logos::Span) -> Self { + Self { + start: span.start, + end: span.end, + } + } +} + +/// Token types for Ephapax +#[derive(Logos, Debug, Clone, PartialEq)] +#[logos(skip r"[ \t\r\n\f]+")] +pub enum TokenKind { + // ===== Keywords ===== + #[token("let")] + Let, + + #[token("let!")] + LetBang, + + #[token("fn")] + Fn, + + #[token("if")] + If, + + #[token("then")] + Then, + + #[token("else")] + Else, + + #[token("region")] + Region, + + #[token("drop")] + Drop, + + #[token("copy")] + Copy, + + #[token("true")] + True, + + #[token("false")] + False, + + #[token("inl")] + Inl, + + #[token("inr")] + Inr, + + #[token("case")] + Case, + + #[token("of")] + Of, + + #[token("end")] + End, + + #[token("in")] + In, + + #[token("type")] + Type, + + // ===== Type Keywords ===== + #[token("Bool")] + TyBool, + + #[token("I32")] + TyI32, + + #[token("I64")] + TyI64, + + #[token("F32")] + TyF32, + + #[token("F64")] + TyF64, + + #[token("String")] + TyString, + + // ===== Operators ===== + #[token("+")] + Plus, + + #[token("-")] + Minus, + + #[token("*")] + Star, + + #[token("/")] + Slash, + + #[token("%")] + Percent, + + #[token("==")] + EqEq, + + #[token("!=")] + NotEq, + + #[token("<")] + Lt, + + #[token(">")] + Gt, + + #[token("<=")] + LtEq, + + #[token(">=")] + GtEq, + + #[token("&&")] + AndAnd, + + #[token("||")] + OrOr, + + #[token("!")] + Not, + + #[token("&")] + Ampersand, + + #[token("@")] + At, + + #[token("->")] + Arrow, + + #[token("=")] + Eq, + + #[token(":")] + Colon, + + #[token(",")] + Comma, + + #[token(".")] + Dot, + + #[token(";")] + Semi, + + // ===== Delimiters ===== + #[token("(")] + LParen, + + #[token(")")] + RParen, + + #[token("{")] + LBrace, + + #[token("}")] + RBrace, + + #[token("[")] + LBracket, + + #[token("]")] + RBracket, + + // ===== Unit ===== + #[token("()")] + Unit, + + // ===== Literals ===== + #[regex(r"[0-9]+", |lex| lex.slice().parse::().ok())] + Integer(i64), + + #[regex(r"[0-9]+\.[0-9]+", |lex| lex.slice().parse::().ok())] + Float(f64), + + #[regex(r#""([^"\\]|\\.)*""#, |lex| { + let s = lex.slice(); + // Remove quotes and unescape + Some(unescape_string(&s[1..s.len()-1])) + })] + String(String), + + // ===== Identifiers ===== + #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| SmolStr::new(lex.slice()))] + Ident(SmolStr), + + // ===== Comments ===== + #[regex(r"--[^\n]*", logos::skip)] + LineComment, + + #[token("{-", block_comment)] + BlockComment, + + // ===== Special ===== + /// End of file marker + Eof, +} + +/// Parse block comments (handles nesting) +fn block_comment(lex: &mut logos::Lexer) -> logos::FilterResult<(), ()> { + let mut depth = 1; + let remainder = lex.remainder(); + + let mut chars = remainder.char_indices(); + while let Some((i, c)) = chars.next() { + match c { + '{' => { + if let Some((_, '-')) = chars.next() { + depth += 1; + } + } + '-' => { + if let Some((_, '}')) = chars.next() { + depth -= 1; + if depth == 0 { + lex.bump(i + 2); + return logos::FilterResult::Skip; + } + } + } + _ => {} + } + } + + // Unterminated comment - skip to end but mark as error + lex.bump(remainder.len()); + logos::FilterResult::Error(()) +} + +/// Unescape string literals +fn unescape_string(s: &str) -> String { + let mut result = String::with_capacity(s.len()); + let mut chars = s.chars(); + + while let Some(c) = chars.next() { + if c == '\\' { + match chars.next() { + Some('n') => result.push('\n'), + Some('r') => result.push('\r'), + Some('t') => result.push('\t'), + Some('\\') => result.push('\\'), + Some('"') => result.push('"'), + Some('0') => result.push('\0'), + Some(c) => { + result.push('\\'); + result.push(c); + } + None => result.push('\\'), + } + } else { + result.push(c); + } + } + + result +} + +impl fmt::Display for TokenKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + TokenKind::Let => write!(f, "let"), + TokenKind::LetBang => write!(f, "let!"), + TokenKind::Fn => write!(f, "fn"), + TokenKind::If => write!(f, "if"), + TokenKind::Then => write!(f, "then"), + TokenKind::Else => write!(f, "else"), + TokenKind::Region => write!(f, "region"), + TokenKind::Drop => write!(f, "drop"), + TokenKind::Copy => write!(f, "copy"), + TokenKind::True => write!(f, "true"), + TokenKind::False => write!(f, "false"), + TokenKind::Inl => write!(f, "inl"), + TokenKind::Inr => write!(f, "inr"), + TokenKind::Case => write!(f, "case"), + TokenKind::Of => write!(f, "of"), + TokenKind::End => write!(f, "end"), + TokenKind::In => write!(f, "in"), + TokenKind::Type => write!(f, "type"), + TokenKind::TyBool => write!(f, "Bool"), + TokenKind::TyI32 => write!(f, "I32"), + TokenKind::TyI64 => write!(f, "I64"), + TokenKind::TyF32 => write!(f, "F32"), + TokenKind::TyF64 => write!(f, "F64"), + TokenKind::TyString => write!(f, "String"), + TokenKind::Plus => write!(f, "+"), + TokenKind::Minus => write!(f, "-"), + TokenKind::Star => write!(f, "*"), + TokenKind::Slash => write!(f, "/"), + TokenKind::Percent => write!(f, "%"), + TokenKind::EqEq => write!(f, "=="), + TokenKind::NotEq => write!(f, "!="), + TokenKind::Lt => write!(f, "<"), + TokenKind::Gt => write!(f, ">"), + TokenKind::LtEq => write!(f, "<="), + TokenKind::GtEq => write!(f, ">="), + TokenKind::AndAnd => write!(f, "&&"), + TokenKind::OrOr => write!(f, "||"), + TokenKind::Not => write!(f, "!"), + TokenKind::Ampersand => write!(f, "&"), + TokenKind::At => write!(f, "@"), + TokenKind::Arrow => write!(f, "->"), + TokenKind::Eq => write!(f, "="), + TokenKind::Colon => write!(f, ":"), + TokenKind::Comma => write!(f, ","), + TokenKind::Dot => write!(f, "."), + TokenKind::Semi => write!(f, ";"), + TokenKind::LParen => write!(f, "("), + TokenKind::RParen => write!(f, ")"), + TokenKind::LBrace => write!(f, "{{"), + TokenKind::RBrace => write!(f, "}}"), + TokenKind::LBracket => write!(f, "["), + TokenKind::RBracket => write!(f, "]"), + TokenKind::Unit => write!(f, "()"), + TokenKind::Integer(n) => write!(f, "{}", n), + TokenKind::Float(n) => write!(f, "{}", n), + TokenKind::String(s) => write!(f, "\"{}\"", s), + TokenKind::Ident(s) => write!(f, "{}", s), + TokenKind::LineComment => write!(f, ""), + TokenKind::BlockComment => write!(f, ""), + TokenKind::Eof => write!(f, ""), + } + } +} + +/// A token with its span +#[derive(Debug, Clone, PartialEq)] +pub struct Token { + pub kind: TokenKind, + pub span: Span, +} + +impl Token { + pub fn new(kind: TokenKind, span: Span) -> Self { + Self { kind, span } + } + + pub fn eof(pos: usize) -> Self { + Self { + kind: TokenKind::Eof, + span: Span::new(pos, pos), + } + } +} + +/// Lexer wrapper that provides a clean iterator interface +pub struct Lexer<'src> { + inner: SpannedIter<'src, TokenKind>, + source: &'src str, + finished: bool, +} + +impl<'src> Lexer<'src> { + pub fn new(source: &'src str) -> Self { + Self { + inner: TokenKind::lexer(source).spanned(), + source, + finished: false, + } + } + + pub fn source(&self) -> &'src str { + self.source + } + + /// Tokenize the entire source, returning tokens and any errors + pub fn tokenize(source: &str) -> (Vec, Vec) { + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + + for result in Lexer::new(source) { + match result { + Ok(token) => tokens.push(token), + Err(e) => errors.push(e), + } + } + + (tokens, errors) + } +} + +impl<'src> Iterator for Lexer<'src> { + type Item = Result; + + fn next(&mut self) -> Option { + if self.finished { + return None; + } + + match self.inner.next() { + Some((Ok(kind), span)) => Some(Ok(Token::new(kind, span.into()))), + Some((Err(()), span)) => { + Some(Err(LexerError::UnexpectedCharacter(span.start))) + } + None => { + self.finished = true; + Some(Ok(Token::eof(self.source.len()))) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn lex(source: &str) -> Vec { + Lexer::new(source) + .filter_map(|r| r.ok()) + .map(|t| t.kind) + .filter(|k| !matches!(k, TokenKind::Eof)) + .collect() + } + + #[test] + fn test_keywords() { + assert_eq!(lex("let"), vec![TokenKind::Let]); + assert_eq!(lex("let!"), vec![TokenKind::LetBang]); + assert_eq!(lex("fn"), vec![TokenKind::Fn]); + assert_eq!(lex("if"), vec![TokenKind::If]); + assert_eq!(lex("then"), vec![TokenKind::Then]); + assert_eq!(lex("else"), vec![TokenKind::Else]); + assert_eq!(lex("region"), vec![TokenKind::Region]); + assert_eq!(lex("drop"), vec![TokenKind::Drop]); + assert_eq!(lex("copy"), vec![TokenKind::Copy]); + assert_eq!(lex("true"), vec![TokenKind::True]); + assert_eq!(lex("false"), vec![TokenKind::False]); + assert_eq!(lex("inl"), vec![TokenKind::Inl]); + assert_eq!(lex("inr"), vec![TokenKind::Inr]); + assert_eq!(lex("case"), vec![TokenKind::Case]); + assert_eq!(lex("of"), vec![TokenKind::Of]); + assert_eq!(lex("end"), vec![TokenKind::End]); + assert_eq!(lex("in"), vec![TokenKind::In]); + } + + #[test] + fn test_type_keywords() { + assert_eq!(lex("Bool"), vec![TokenKind::TyBool]); + assert_eq!(lex("I32"), vec![TokenKind::TyI32]); + assert_eq!(lex("I64"), vec![TokenKind::TyI64]); + assert_eq!(lex("F32"), vec![TokenKind::TyF32]); + assert_eq!(lex("F64"), vec![TokenKind::TyF64]); + assert_eq!(lex("String"), vec![TokenKind::TyString]); + } + + #[test] + fn test_operators() { + assert_eq!(lex("+ - * / %"), vec![ + TokenKind::Plus, + TokenKind::Minus, + TokenKind::Star, + TokenKind::Slash, + TokenKind::Percent, + ]); + + assert_eq!(lex("== != < > <= >="), vec![ + TokenKind::EqEq, + TokenKind::NotEq, + TokenKind::Lt, + TokenKind::Gt, + TokenKind::LtEq, + TokenKind::GtEq, + ]); + + assert_eq!(lex("&& || !"), vec![ + TokenKind::AndAnd, + TokenKind::OrOr, + TokenKind::Not, + ]); + + assert_eq!(lex("& @ -> = : , . ;"), vec![ + TokenKind::Ampersand, + TokenKind::At, + TokenKind::Arrow, + TokenKind::Eq, + TokenKind::Colon, + TokenKind::Comma, + TokenKind::Dot, + TokenKind::Semi, + ]); + } + + #[test] + fn test_delimiters() { + assert_eq!(lex("( ) { } [ ]"), vec![ + TokenKind::LParen, + TokenKind::RParen, + TokenKind::LBrace, + TokenKind::RBrace, + TokenKind::LBracket, + TokenKind::RBracket, + ]); + } + + #[test] + fn test_unit() { + assert_eq!(lex("()"), vec![TokenKind::Unit]); + } + + #[test] + fn test_integers() { + assert_eq!(lex("42"), vec![TokenKind::Integer(42)]); + assert_eq!(lex("0"), vec![TokenKind::Integer(0)]); + assert_eq!(lex("12345"), vec![TokenKind::Integer(12345)]); + } + + #[test] + fn test_floats() { + assert_eq!(lex("3.14"), vec![TokenKind::Float(3.14)]); + assert_eq!(lex("0.0"), vec![TokenKind::Float(0.0)]); + assert_eq!(lex("123.456"), vec![TokenKind::Float(123.456)]); + } + + #[test] + fn test_strings() { + assert_eq!(lex(r#""hello""#), vec![TokenKind::String("hello".to_string())]); + assert_eq!(lex(r#""hello\nworld""#), vec![TokenKind::String("hello\nworld".to_string())]); + assert_eq!(lex(r#""""#), vec![TokenKind::String("".to_string())]); + } + + #[test] + fn test_identifiers() { + assert_eq!(lex("x"), vec![TokenKind::Ident("x".into())]); + assert_eq!(lex("foo_bar"), vec![TokenKind::Ident("foo_bar".into())]); + assert_eq!(lex("_private"), vec![TokenKind::Ident("_private".into())]); + assert_eq!(lex("x1"), vec![TokenKind::Ident("x1".into())]); + } + + #[test] + fn test_comments() { + assert_eq!(lex("-- this is a comment\n42"), vec![TokenKind::Integer(42)]); + assert_eq!(lex("{- block comment -}42"), vec![TokenKind::Integer(42)]); + assert_eq!(lex("{- nested {- comment -} -}42"), vec![TokenKind::Integer(42)]); + } + + #[test] + fn test_complex_expression() { + let tokens = lex("let x = 42 in x + 1"); + assert_eq!(tokens, vec![ + TokenKind::Let, + TokenKind::Ident("x".into()), + TokenKind::Eq, + TokenKind::Integer(42), + TokenKind::In, + TokenKind::Ident("x".into()), + TokenKind::Plus, + TokenKind::Integer(1), + ]); + } + + #[test] + fn test_region_expression() { + let tokens = lex(r#"region r { String.new@r("hello") }"#); + assert_eq!(tokens, vec![ + TokenKind::Region, + TokenKind::Ident("r".into()), + TokenKind::LBrace, + TokenKind::TyString, + TokenKind::Dot, + TokenKind::Ident("new".into()), + TokenKind::At, + TokenKind::Ident("r".into()), + TokenKind::LParen, + TokenKind::String("hello".to_string()), + TokenKind::RParen, + TokenKind::RBrace, + ]); + } + + #[test] + fn test_function_definition() { + let tokens = lex("fn add(x: I32, y: I32): I32 = x + y"); + assert_eq!(tokens, vec![ + TokenKind::Fn, + TokenKind::Ident("add".into()), + TokenKind::LParen, + TokenKind::Ident("x".into()), + TokenKind::Colon, + TokenKind::TyI32, + TokenKind::Comma, + TokenKind::Ident("y".into()), + TokenKind::Colon, + TokenKind::TyI32, + TokenKind::RParen, + TokenKind::Colon, + TokenKind::TyI32, + TokenKind::Eq, + TokenKind::Ident("x".into()), + TokenKind::Plus, + TokenKind::Ident("y".into()), + ]); + } + + #[test] + fn test_lambda() { + let tokens = lex("fn(x: I32) -> x + 1"); + assert_eq!(tokens, vec![ + TokenKind::Fn, + TokenKind::LParen, + TokenKind::Ident("x".into()), + TokenKind::Colon, + TokenKind::TyI32, + TokenKind::RParen, + TokenKind::Arrow, + TokenKind::Ident("x".into()), + TokenKind::Plus, + TokenKind::Integer(1), + ]); + } + + #[test] + fn test_case_expression() { + let tokens = lex("case x of inl(n) -> n inr(b) -> 0 end"); + assert_eq!(tokens, vec![ + TokenKind::Case, + TokenKind::Ident("x".into()), + TokenKind::Of, + TokenKind::Inl, + TokenKind::LParen, + TokenKind::Ident("n".into()), + TokenKind::RParen, + TokenKind::Arrow, + TokenKind::Ident("n".into()), + TokenKind::Inr, + TokenKind::LParen, + TokenKind::Ident("b".into()), + TokenKind::RParen, + TokenKind::Arrow, + TokenKind::Integer(0), + TokenKind::End, + ]); + } + + #[test] + fn test_borrow() { + let tokens = lex("String.len(&s)"); + assert_eq!(tokens, vec![ + TokenKind::TyString, + TokenKind::Dot, + TokenKind::Ident("len".into()), + TokenKind::LParen, + TokenKind::Ampersand, + TokenKind::Ident("s".into()), + TokenKind::RParen, + ]); + } + + #[test] + fn test_spans() { + let source = "let x = 42"; + let tokens: Vec = Lexer::new(source) + .filter_map(|r| r.ok()) + .collect(); + + assert_eq!(tokens[0].span, Span::new(0, 3)); // "let" + assert_eq!(tokens[1].span, Span::new(4, 5)); // "x" + assert_eq!(tokens[2].span, Span::new(6, 7)); // "=" + assert_eq!(tokens[3].span, Span::new(8, 10)); // "42" + } + + #[test] + fn test_tokenize_with_errors() { + let (tokens, errors) = Lexer::tokenize("let x = 42 $ y"); + assert!(!errors.is_empty()); + assert!(tokens.iter().any(|t| matches!(t.kind, TokenKind::Let))); + } +} diff --git a/src/ephapax-parser/Cargo.toml b/src/ephapax-parser/Cargo.toml new file mode 100644 index 00000000..42c263f0 --- /dev/null +++ b/src/ephapax-parser/Cargo.toml @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: EUPL-1.2 +# SPDX-FileCopyrightText: 2025 Jonathan D.A. Jewell + +[package] +name = "ephapax-parser" +description = "Parser for the Ephapax programming language" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +authors.workspace = true +repository.workspace = true +keywords.workspace = true +categories.workspace = true + +[dependencies] +ephapax-syntax = { workspace = true } +ephapax-lexer = { workspace = true } +chumsky = { workspace = true } +ariadne = { workspace = true } +smol_str = { workspace = true } +thiserror = { workspace = true } + +[dev-dependencies] diff --git a/src/ephapax-parser/src/error.rs b/src/ephapax-parser/src/error.rs new file mode 100644 index 00000000..f0595de7 --- /dev/null +++ b/src/ephapax-parser/src/error.rs @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: EUPL-1.2 +// SPDX-FileCopyrightText: 2025 Jonathan D.A. Jewell + +//! Error reporting for the Ephapax parser +//! +//! Uses ariadne for rich, colorful error messages. + +use ariadne::{Cache, Color, Label, Report as AriadneReport, ReportKind, Source}; +use ephapax_syntax::Span; +use std::fmt; +use thiserror::Error; + +/// Parser error types +#[derive(Error, Debug, Clone)] +pub enum ParseError { + #[error("Lexer error: {0}")] + Lexer(String), + + #[error("Syntax error: {message}")] + Syntax { message: String, span: Span }, + + #[error("Unexpected end of file")] + UnexpectedEof { span: Span }, + + #[error("Expected {expected}, found {found}")] + Expected { + expected: String, + found: String, + span: Span, + }, +} + +impl ParseError { + pub fn span(&self) -> Span { + match self { + ParseError::Lexer(_) => Span::dummy(), + ParseError::Syntax { span, .. } => *span, + ParseError::UnexpectedEof { span } => *span, + ParseError::Expected { span, .. } => *span, + } + } +} + +/// A rendered error report +pub struct Report { + source_name: String, + source: String, + errors: Vec, +} + +impl Report { + pub fn new(source_name: impl Into, source: impl Into, errors: Vec) -> Self { + Self { + source_name: source_name.into(), + source: source.into(), + errors, + } + } + + /// Print the report to stderr + pub fn eprint(&self) { + let mut cache = SingleFileCache::new(&self.source_name, &self.source); + + for error in &self.errors { + let report = self.build_report(error); + report.eprint(&mut cache).ok(); + } + } + + /// Render the report to a string + pub fn to_string_colored(&self) -> String { + let mut output = Vec::new(); + let mut cache = SingleFileCache::new(&self.source_name, &self.source); + + for error in &self.errors { + let report = self.build_report(error); + report.write(&mut cache, &mut output).ok(); + } + + String::from_utf8(output).unwrap_or_default() + } + + fn build_report(&self, error: &ParseError) -> AriadneReport<'_, (String, std::ops::Range)> { + let span = error.span(); + let range = span.start..span.end; + let location = (self.source_name.clone(), range.clone()); + + match error { + ParseError::Lexer(msg) => AriadneReport::build(ReportKind::Error, location.clone()) + .with_message("Lexer error") + .with_label( + Label::new(location) + .with_message(msg) + .with_color(Color::Red), + ) + .finish(), + + ParseError::Syntax { message, .. } => { + AriadneReport::build(ReportKind::Error, location.clone()) + .with_message("Syntax error") + .with_label( + Label::new(location) + .with_message(message) + .with_color(Color::Red), + ) + .finish() + } + + ParseError::UnexpectedEof { .. } => { + AriadneReport::build(ReportKind::Error, location.clone()) + .with_message("Unexpected end of file") + .with_label( + Label::new(location) + .with_message("expected more input here") + .with_color(Color::Red), + ) + .finish() + } + + ParseError::Expected { expected, found, .. } => { + AriadneReport::build(ReportKind::Error, location.clone()) + .with_message(format!("Expected {}", expected)) + .with_label( + Label::new(location) + .with_message(format!("found {} instead", found)) + .with_color(Color::Red), + ) + .finish() + } + } + } + + pub fn has_errors(&self) -> bool { + !self.errors.is_empty() + } + + pub fn error_count(&self) -> usize { + self.errors.len() + } +} + +impl fmt::Display for Report { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.to_string_colored()) + } +} + +/// Simple cache for a single file +struct SingleFileCache { + name: String, + source: Source, +} + +impl SingleFileCache { + fn new(name: &str, source: &str) -> Self { + Self { + name: name.to_string(), + source: Source::from(source.to_string()), + } + } +} + +impl Cache for SingleFileCache { + type Storage = String; + + fn fetch( + &mut self, + id: &String, + ) -> Result<&Source, Box> { + if id == &self.name { + Ok(&self.source) + } else { + Err(Box::new(format!("Unknown file: {}", id))) + } + } + + fn display<'a>(&self, id: &'a String) -> Option> { + Some(Box::new(id.clone())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_report_creation() { + let errors = vec![ParseError::Syntax { + message: "test error".to_string(), + span: Span::new(0, 5), + }]; + + let report = Report::new("test.epx", "hello", errors); + assert!(report.has_errors()); + assert_eq!(report.error_count(), 1); + } +} diff --git a/src/ephapax-parser/src/lib.rs b/src/ephapax-parser/src/lib.rs new file mode 100644 index 00000000..8fe1f0b5 --- /dev/null +++ b/src/ephapax-parser/src/lib.rs @@ -0,0 +1,981 @@ +// SPDX-License-Identifier: EUPL-1.2 +// SPDX-FileCopyrightText: 2025 Jonathan D.A. Jewell + +//! Ephapax Parser +//! +//! Parses Ephapax source code into an AST using the chumsky parser combinator library. +//! +//! # Example +//! +//! ``` +//! use ephapax_parser::parse; +//! +//! let source = "let x = 42 in x"; +//! let result = parse(source); +//! ``` + +use chumsky::prelude::*; +use ephapax_lexer::{Lexer, Span as LexerSpan, TokenKind}; +use ephapax_syntax::{BaseTy, Decl, Expr, ExprKind, Literal, Module, Span as SyntaxSpan, Ty}; +use smol_str::SmolStr; + +pub mod error; + +pub use error::{ParseError, Report}; + +/// Convert lexer span to syntax span +fn to_syntax_span(span: LexerSpan) -> SyntaxSpan { + SyntaxSpan::new(span.start, span.end) +} + +/// Merge two syntax spans +fn merge_spans(a: SyntaxSpan, b: SyntaxSpan) -> SyntaxSpan { + SyntaxSpan::new(a.start.min(b.start), a.end.max(b.end)) +} + +/// Token type that carries span information +#[derive(Debug, Clone, PartialEq)] +struct SpannedToken { + kind: TokenKind, + span: LexerSpan, +} + +impl std::fmt::Display for SpannedToken { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.kind) + } +} + +/// Parse source code into an expression +pub fn parse(source: &str) -> Result> { + let (tokens, lex_errors) = Lexer::tokenize(source); + + if !lex_errors.is_empty() { + return Err(lex_errors + .into_iter() + .map(|e| ParseError::Lexer(e.to_string())) + .collect()); + } + + // Convert tokens to spanned tokens - filter out EOF for expression parsing + let spanned_tokens: Vec = tokens + .into_iter() + .filter(|t| t.kind != TokenKind::Eof) + .map(|t| SpannedToken { kind: t.kind, span: t.span }) + .collect(); + + let len = source.len(); + let end_span = LexerSpan::new(len, len); + + // Parse using slice-based input + let result = expr_parser() + .then_ignore(end()) + .parse(&spanned_tokens[..]) + .into_result(); + + result.map_err(|errs| { + errs.into_iter() + .map(|e| { + // Get span from the error's location in the token stream + let span = if let Some(&ref tok) = e.found() { + to_syntax_span(tok.span) + } else { + to_syntax_span(end_span) + }; + ParseError::Syntax { + message: format!("{}", e.reason()), + span, + } + }) + .collect() + }) +} + +/// Parse source code into a module (multiple declarations) +pub fn parse_module(source: &str, name: &str) -> Result> { + let (tokens, lex_errors) = Lexer::tokenize(source); + + if !lex_errors.is_empty() { + return Err(lex_errors + .into_iter() + .map(|e| ParseError::Lexer(e.to_string())) + .collect()); + } + + let spanned_tokens: Vec = tokens + .into_iter() + .map(|t| SpannedToken { kind: t.kind, span: t.span }) + .collect(); + + let len = source.len(); + let end_span = LexerSpan::new(len, len); + + let result = module_parser() + .parse(&spanned_tokens[..]) + .into_result(); + + result + .map(|decls| Module { + name: SmolStr::new(name), + decls, + }) + .map_err(|errs| { + errs.into_iter() + .map(|e| { + let span = if let Some(&ref tok) = e.found() { + to_syntax_span(tok.span) + } else { + to_syntax_span(end_span) + }; + ParseError::Syntax { + message: format!("{}", e.reason()), + span, + } + }) + .collect() + }) +} + +// ============================================================================ +// Helper Parsers +// ============================================================================ + +/// Match a specific token kind and return its span +fn token<'a>( + kind: TokenKind, +) -> impl Parser<'a, &'a [SpannedToken], LexerSpan, extra::Err>> + Clone { + any() + .filter(move |t: &SpannedToken| t.kind == kind) + .map(|t: SpannedToken| t.span) +} + +/// Match a specific token kind and ignore the result +fn tok<'a>( + kind: TokenKind, +) -> impl Parser<'a, &'a [SpannedToken], (), extra::Err>> + Clone { + token(kind).ignored() +} + +/// Parse an identifier and return its name and span +fn ident<'a>() -> impl Parser<'a, &'a [SpannedToken], (SmolStr, LexerSpan), extra::Err>> + Clone { + any() + .filter(|t: &SpannedToken| matches!(t.kind, TokenKind::Ident(_))) + .map(|t: SpannedToken| { + if let TokenKind::Ident(s) = t.kind { + (s, t.span) + } else { + unreachable!() + } + }) +} + +/// Parse an identifier name only +fn ident_name<'a>() -> impl Parser<'a, &'a [SpannedToken], SmolStr, extra::Err>> + Clone { + ident().map(|(name, _)| name) +} + +// ============================================================================ +// Type Parsers +// ============================================================================ + +fn base_type_parser<'a>() -> impl Parser<'a, &'a [SpannedToken], Ty, extra::Err>> + Clone { + any() + .filter(|t: &SpannedToken| { + matches!( + t.kind, + TokenKind::Unit + | TokenKind::TyBool + | TokenKind::TyI32 + | TokenKind::TyI64 + | TokenKind::TyF32 + | TokenKind::TyF64 + ) + }) + .map(|t: SpannedToken| match t.kind { + TokenKind::Unit => Ty::Base(BaseTy::Unit), + TokenKind::TyBool => Ty::Base(BaseTy::Bool), + TokenKind::TyI32 => Ty::Base(BaseTy::I32), + TokenKind::TyI64 => Ty::Base(BaseTy::I64), + TokenKind::TyF32 => Ty::Base(BaseTy::F32), + TokenKind::TyF64 => Ty::Base(BaseTy::F64), + _ => unreachable!(), + }) +} + +fn type_parser<'a>() -> impl Parser<'a, &'a [SpannedToken], Ty, extra::Err>> + Clone { + recursive(|ty| { + let base = base_type_parser(); + + // String@region + let string_ty = tok(TokenKind::TyString) + .ignore_then(tok(TokenKind::At)) + .ignore_then(ident_name()) + .map(Ty::String); + + // &T (borrow) + let borrow_ty = tok(TokenKind::Ampersand) + .ignore_then(ty.clone()) + .map(|t| Ty::Borrow(Box::new(t))); + + // (T1, T2) product or parenthesized type + let paren_ty = ty + .clone() + .separated_by(tok(TokenKind::Comma)) + .at_least(1) + .collect::>() + .delimited_by(tok(TokenKind::LParen), tok(TokenKind::RParen)) + .map(|types| { + if types.len() == 1 { + types.into_iter().next().unwrap() + } else if types.len() == 2 { + let mut iter = types.into_iter(); + Ty::Prod { + left: Box::new(iter.next().unwrap()), + right: Box::new(iter.next().unwrap()), + } + } else { + types + .into_iter() + .reduce(|acc, t| Ty::Prod { + left: Box::new(acc), + right: Box::new(t), + }) + .unwrap() + } + }); + + // Type variable + let type_var = ident_name().map(Ty::Var); + + let atom = choice((base, string_ty, borrow_ty, paren_ty, type_var)); + + // T1 + T2 (sum) - left associative + let sum = atom.clone().foldl( + tok(TokenKind::Plus).ignore_then(atom.clone()).repeated(), + |left, right| Ty::Sum { + left: Box::new(left), + right: Box::new(right), + }, + ); + + // T1 -> T2 (function) - right associative + sum.clone() + .separated_by(tok(TokenKind::Arrow)) + .at_least(1) + .collect::>() + .map(|types| { + types + .into_iter() + .rev() + .reduce(|ret, param| Ty::Fun { + param: Box::new(param), + ret: Box::new(ret), + }) + .unwrap() + }) + }) +} + +// ============================================================================ +// Expression Parsers +// ============================================================================ + +fn literal_parser<'a>() -> impl Parser<'a, &'a [SpannedToken], (Literal, LexerSpan), extra::Err>> + Clone { + any() + .filter(|t: &SpannedToken| { + matches!( + t.kind, + TokenKind::Unit + | TokenKind::True + | TokenKind::False + | TokenKind::Integer(_) + | TokenKind::Float(_) + | TokenKind::String(_) + ) + }) + .map(|t: SpannedToken| { + let lit = match t.kind { + TokenKind::Unit => Literal::Unit, + TokenKind::True => Literal::Bool(true), + TokenKind::False => Literal::Bool(false), + TokenKind::Integer(n) => Literal::I32(n as i32), + TokenKind::Float(n) => Literal::F64(n), + TokenKind::String(s) => Literal::String(s), + _ => unreachable!(), + }; + (lit, t.span) + }) +} + +fn expr_parser<'a>() -> impl Parser<'a, &'a [SpannedToken], Expr, extra::Err>> + Clone { + recursive(|expr: Recursive>>>| { + // Literals + let literal = literal_parser() + .map(|(lit, span)| Expr::new(ExprKind::Lit(lit), to_syntax_span(span))); + + // Variables + let var = ident() + .map(|(name, span)| Expr::new(ExprKind::Var(name), to_syntax_span(span))); + + // String.new@r("...") or String.concat(...) etc. + let string_method = token(TokenKind::TyString) + .then_ignore(tok(TokenKind::Dot)) + .then(ident_name()) + .then(tok(TokenKind::At).ignore_then(ident_name()).or_not()) + .then( + expr.clone() + .separated_by(tok(TokenKind::Comma)) + .collect::>() + .delimited_by(tok(TokenKind::LParen), tok(TokenKind::RParen)), + ) + .map(|(((start_span, method), region), args)| { + let span = to_syntax_span(start_span); + match method.as_str() { + "new" => { + let region = region.unwrap_or_else(|| SmolStr::new("_")); + if let Some(arg) = args.into_iter().next() { + if let ExprKind::Lit(Literal::String(s)) = arg.kind { + return Expr::new(ExprKind::StringNew { region, value: s }, span); + } + } + Expr::new(ExprKind::Lit(Literal::Unit), span) + } + "concat" => { + let mut iter = args.into_iter(); + let left = iter.next().map(Box::new); + let right = iter.next().map(Box::new); + if let (Some(left), Some(right)) = (left, right) { + Expr::new(ExprKind::StringConcat { left, right }, span) + } else { + Expr::new(ExprKind::Lit(Literal::Unit), span) + } + } + "len" => { + if let Some(arg) = args.into_iter().next() { + Expr::new(ExprKind::StringLen(Box::new(arg)), span) + } else { + Expr::new(ExprKind::Lit(Literal::Unit), span) + } + } + _ => Expr::new(ExprKind::Lit(Literal::Unit), span), + } + }); + + // Parenthesized expression or pair + let paren_expr = token(TokenKind::LParen) + .then( + expr.clone() + .separated_by(tok(TokenKind::Comma)) + .at_least(1) + .collect::>(), + ) + .then_ignore(tok(TokenKind::RParen)) + .map(|(start_span, exprs)| { + let span = to_syntax_span(start_span); + if exprs.len() == 1 { + exprs.into_iter().next().unwrap() + } else if exprs.len() == 2 { + let mut iter = exprs.into_iter(); + Expr::new( + ExprKind::Pair { + left: Box::new(iter.next().unwrap()), + right: Box::new(iter.next().unwrap()), + }, + span, + ) + } else { + let result = exprs.into_iter().reduce(|acc, elem| { + Expr::new( + ExprKind::Pair { + left: Box::new(acc), + right: Box::new(elem), + }, + span, + ) + }); + result.unwrap() + } + }); + + // let x = e1 in e2 + let let_expr = token(TokenKind::Let) + .then(ident_name()) + .then(tok(TokenKind::Colon).ignore_then(type_parser()).or_not()) + .then_ignore(tok(TokenKind::Eq)) + .then(expr.clone()) + .then_ignore(tok(TokenKind::In)) + .then(expr.clone()) + .map(|((((start_span, name), ty), value), body)| { + Expr::new( + ExprKind::Let { + name, + ty, + value: Box::new(value), + body: Box::new(body), + }, + to_syntax_span(start_span), + ) + }); + + // let! x = e1 in e2 + let let_lin_expr = token(TokenKind::LetBang) + .then(ident_name()) + .then(tok(TokenKind::Colon).ignore_then(type_parser()).or_not()) + .then_ignore(tok(TokenKind::Eq)) + .then(expr.clone()) + .then_ignore(tok(TokenKind::In)) + .then(expr.clone()) + .map(|((((start_span, name), ty), value), body)| { + Expr::new( + ExprKind::LetLin { + name, + ty, + value: Box::new(value), + body: Box::new(body), + }, + to_syntax_span(start_span), + ) + }); + + // fn(x: T) -> e + let lambda = token(TokenKind::Fn) + .then_ignore(tok(TokenKind::LParen)) + .then(ident_name()) + .then_ignore(tok(TokenKind::Colon)) + .then(type_parser()) + .then_ignore(tok(TokenKind::RParen)) + .then_ignore(tok(TokenKind::Arrow)) + .then(expr.clone()) + .map(|(((start_span, param), param_ty), body)| { + Expr::new( + ExprKind::Lambda { + param, + param_ty, + body: Box::new(body), + }, + to_syntax_span(start_span), + ) + }); + + // if e1 then e2 else e3 + let if_expr = token(TokenKind::If) + .then(expr.clone()) + .then_ignore(tok(TokenKind::Then)) + .then(expr.clone()) + .then_ignore(tok(TokenKind::Else)) + .then(expr.clone()) + .map(|(((start_span, cond), then_branch), else_branch)| { + Expr::new( + ExprKind::If { + cond: Box::new(cond), + then_branch: Box::new(then_branch), + else_branch: Box::new(else_branch), + }, + to_syntax_span(start_span), + ) + }); + + // region r { e } + let region_expr = token(TokenKind::Region) + .then(ident_name()) + .then_ignore(tok(TokenKind::LBrace)) + .then(expr.clone()) + .then_ignore(tok(TokenKind::RBrace)) + .map(|((start_span, name), body)| { + Expr::new( + ExprKind::Region { + name, + body: Box::new(body), + }, + to_syntax_span(start_span), + ) + }); + + // inl[T](e) + let inl_expr = token(TokenKind::Inl) + .then_ignore(tok(TokenKind::LBracket)) + .then(type_parser()) + .then_ignore(tok(TokenKind::RBracket)) + .then_ignore(tok(TokenKind::LParen)) + .then(expr.clone()) + .then_ignore(tok(TokenKind::RParen)) + .map(|((start_span, ty), value)| { + Expr::new( + ExprKind::Inl { + ty, + value: Box::new(value), + }, + to_syntax_span(start_span), + ) + }); + + // inr[T](e) + let inr_expr = token(TokenKind::Inr) + .then_ignore(tok(TokenKind::LBracket)) + .then(type_parser()) + .then_ignore(tok(TokenKind::RBracket)) + .then_ignore(tok(TokenKind::LParen)) + .then(expr.clone()) + .then_ignore(tok(TokenKind::RParen)) + .map(|((start_span, ty), value)| { + Expr::new( + ExprKind::Inr { + ty, + value: Box::new(value), + }, + to_syntax_span(start_span), + ) + }); + + // case e of inl(x) -> e1 inr(y) -> e2 end + let case_expr = token(TokenKind::Case) + .then(expr.clone()) + .then_ignore(tok(TokenKind::Of)) + .then_ignore(tok(TokenKind::Inl)) + .then_ignore(tok(TokenKind::LParen)) + .then(ident_name()) + .then_ignore(tok(TokenKind::RParen)) + .then_ignore(tok(TokenKind::Arrow)) + .then(expr.clone()) + .then_ignore(tok(TokenKind::Inr)) + .then_ignore(tok(TokenKind::LParen)) + .then(ident_name()) + .then_ignore(tok(TokenKind::RParen)) + .then_ignore(tok(TokenKind::Arrow)) + .then(expr.clone()) + .then_ignore(tok(TokenKind::End)) + .map( + |(((((start_span, scrutinee), left_var), left_body), right_var), right_body)| { + Expr::new( + ExprKind::Case { + scrutinee: Box::new(scrutinee), + left_var, + left_body: Box::new(left_body), + right_var, + right_body: Box::new(right_body), + }, + to_syntax_span(start_span), + ) + }, + ); + + // &e (borrow) + let borrow_expr = token(TokenKind::Ampersand) + .then(expr.clone()) + .map(|(start_span, inner)| { + Expr::new(ExprKind::Borrow(Box::new(inner)), to_syntax_span(start_span)) + }); + + // drop(e) + let drop_expr = token(TokenKind::Drop) + .then_ignore(tok(TokenKind::LParen)) + .then(expr.clone()) + .then_ignore(tok(TokenKind::RParen)) + .map(|(start_span, inner)| { + Expr::new(ExprKind::Drop(Box::new(inner)), to_syntax_span(start_span)) + }); + + // copy(e) + let copy_expr = token(TokenKind::Copy) + .then_ignore(tok(TokenKind::LParen)) + .then(expr.clone()) + .then_ignore(tok(TokenKind::RParen)) + .map(|(start_span, inner)| { + Expr::new(ExprKind::Copy(Box::new(inner)), to_syntax_span(start_span)) + }); + + // Atom expressions + let atom = choice(( + string_method, + let_expr, + let_lin_expr, + lambda, + if_expr, + region_expr, + inl_expr, + inr_expr, + case_expr, + borrow_expr, + drop_expr, + copy_expr, + literal, + paren_expr, + var, + )); + + // Function application and member access + let call_or_member = atom.clone().foldl( + choice(( + // Function application: f(arg) + expr.clone() + .delimited_by(tok(TokenKind::LParen), tok(TokenKind::RParen)) + .map(|arg| ("call".to_string(), Some(arg))), + // Member access: e.0 or e.1 or e.field + tok(TokenKind::Dot) + .ignore_then( + any() + .filter(|t: &SpannedToken| { + matches!(t.kind, TokenKind::Integer(_) | TokenKind::Ident(_)) + }) + .map(|t: SpannedToken| match t.kind { + TokenKind::Integer(0) => "0".to_string(), + TokenKind::Integer(1) => "1".to_string(), + TokenKind::Ident(s) => s.to_string(), + _ => "".to_string(), + }), + ) + .map(|member| (member, None)), + )) + .repeated(), + |func, (op, arg): (String, Option)| { + let span = func.span; + match op.as_str() { + "call" => { + if let Some(arg) = arg { + Expr::new( + ExprKind::App { + func: Box::new(func), + arg: Box::new(arg), + }, + span, + ) + } else { + func + } + } + "0" => Expr::new(ExprKind::Fst(Box::new(func)), span), + "1" => Expr::new(ExprKind::Snd(Box::new(func)), span), + _ => func, + } + }, + ); + + // Binary operators with precedence (simplified - returns placeholder for now) + let unary = call_or_member; + + let product = unary.clone().foldl( + choice(( + tok(TokenKind::Star), + tok(TokenKind::Slash), + tok(TokenKind::Percent), + )) + .then(unary.clone()) + .repeated(), + |left, (_, right)| { + let span = merge_spans(left.span, right.span); + Expr::new(ExprKind::Lit(Literal::I32(0)), span) + }, + ); + + let sum = product.clone().foldl( + choice((tok(TokenKind::Plus), tok(TokenKind::Minus))) + .then(product.clone()) + .repeated(), + |left, (_, right)| { + let span = merge_spans(left.span, right.span); + Expr::new(ExprKind::Lit(Literal::I32(0)), span) + }, + ); + + let comparison = sum.clone().foldl( + choice(( + tok(TokenKind::Lt), + tok(TokenKind::Gt), + tok(TokenKind::LtEq), + tok(TokenKind::GtEq), + )) + .then(sum.clone()) + .repeated(), + |left, (_, right)| { + let span = merge_spans(left.span, right.span); + Expr::new(ExprKind::Lit(Literal::Bool(false)), span) + }, + ); + + let equality = comparison.clone().foldl( + choice((tok(TokenKind::EqEq), tok(TokenKind::NotEq))) + .then(comparison.clone()) + .repeated(), + |left, (_, right)| { + let span = merge_spans(left.span, right.span); + Expr::new(ExprKind::Lit(Literal::Bool(false)), span) + }, + ); + + let and = equality.clone().foldl( + tok(TokenKind::AndAnd) + .then(equality.clone()) + .repeated(), + |left, (_, right)| { + let span = merge_spans(left.span, right.span); + Expr::new(ExprKind::Lit(Literal::Bool(false)), span) + }, + ); + + and.clone().foldl( + tok(TokenKind::OrOr) + .then(and.clone()) + .repeated(), + |left, (_, right)| { + let span = merge_spans(left.span, right.span); + Expr::new(ExprKind::Lit(Literal::Bool(false)), span) + }, + ) + }) +} + +// ============================================================================ +// Declaration Parsers +// ============================================================================ + +fn decl_parser<'a>() -> impl Parser<'a, &'a [SpannedToken], Decl, extra::Err>> + Clone { + // fn name(params): RetTy = body + let fn_decl = tok(TokenKind::Fn) + .ignore_then(ident_name()) + .then( + ident_name() + .then_ignore(tok(TokenKind::Colon)) + .then(type_parser()) + .separated_by(tok(TokenKind::Comma)) + .collect::>() + .delimited_by(tok(TokenKind::LParen), tok(TokenKind::RParen)), + ) + .then_ignore(tok(TokenKind::Colon)) + .then(type_parser()) + .then_ignore(tok(TokenKind::Eq)) + .then(expr_parser()) + .map(|(((name, params), ret_ty), body)| Decl::Fn { + name, + params, + ret_ty, + body, + }); + + // type Name = Type + let type_decl = tok(TokenKind::Type) + .ignore_then(ident_name()) + .then_ignore(tok(TokenKind::Eq)) + .then(type_parser()) + .map(|(name, ty)| Decl::Type { name, ty }); + + choice((fn_decl, type_decl)) +} + +fn module_parser<'a>() -> impl Parser<'a, &'a [SpannedToken], Vec, extra::Err>> + Clone { + decl_parser() + .repeated() + .collect() + .then_ignore(tok(TokenKind::Eof)) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_ok(source: &str) -> Expr { + parse(source).expect("parse should succeed") + } + + #[test] + fn test_parse_literal() { + let expr = parse_ok("42"); + assert!(matches!(expr.kind, ExprKind::Lit(Literal::I32(42)))); + } + + #[test] + fn test_parse_bool() { + let expr = parse_ok("true"); + assert!(matches!(expr.kind, ExprKind::Lit(Literal::Bool(true)))); + } + + #[test] + fn test_parse_unit() { + let expr = parse_ok("()"); + assert!(matches!(expr.kind, ExprKind::Lit(Literal::Unit))); + } + + #[test] + fn test_parse_variable() { + let expr = parse_ok("x"); + if let ExprKind::Var(name) = expr.kind { + assert_eq!(name, "x"); + } else { + panic!("Expected variable"); + } + } + + #[test] + fn test_parse_let() { + let expr = parse_ok("let x = 42 in x"); + if let ExprKind::Let { + name, value, body, .. + } = expr.kind + { + assert_eq!(name, "x"); + assert!(matches!(value.kind, ExprKind::Lit(Literal::I32(42)))); + assert!(matches!(body.kind, ExprKind::Var(_))); + } else { + panic!("Expected let expression"); + } + } + + #[test] + fn test_parse_lambda() { + let expr = parse_ok("fn(x: I32) -> x"); + if let ExprKind::Lambda { + param, + param_ty, + body, + } = expr.kind + { + assert_eq!(param, "x"); + assert_eq!(param_ty, Ty::Base(BaseTy::I32)); + assert!(matches!(body.kind, ExprKind::Var(_))); + } else { + panic!("Expected lambda"); + } + } + + #[test] + fn test_parse_if() { + let expr = parse_ok("if true then 1 else 0"); + if let ExprKind::If { + cond, + then_branch, + else_branch, + } = expr.kind + { + assert!(matches!(cond.kind, ExprKind::Lit(Literal::Bool(true)))); + assert!(matches!(then_branch.kind, ExprKind::Lit(Literal::I32(1)))); + assert!(matches!(else_branch.kind, ExprKind::Lit(Literal::I32(0)))); + } else { + panic!("Expected if expression"); + } + } + + #[test] + fn test_parse_region() { + let expr = parse_ok("region r { 42 }"); + if let ExprKind::Region { name, body } = expr.kind { + assert_eq!(name, "r"); + assert!(matches!(body.kind, ExprKind::Lit(Literal::I32(42)))); + } else { + panic!("Expected region expression"); + } + } + + #[test] + fn test_parse_pair() { + let expr = parse_ok("(1, 2)"); + if let ExprKind::Pair { left, right } = expr.kind { + assert!(matches!(left.kind, ExprKind::Lit(Literal::I32(1)))); + assert!(matches!(right.kind, ExprKind::Lit(Literal::I32(2)))); + } else { + panic!("Expected pair"); + } + } + + #[test] + fn test_parse_borrow() { + let expr = parse_ok("&x"); + if let ExprKind::Borrow(inner) = expr.kind { + assert!(matches!(inner.kind, ExprKind::Var(_))); + } else { + panic!("Expected borrow"); + } + } + + #[test] + fn test_parse_drop() { + let expr = parse_ok("drop(x)"); + if let ExprKind::Drop(inner) = expr.kind { + assert!(matches!(inner.kind, ExprKind::Var(_))); + } else { + panic!("Expected drop"); + } + } + + #[test] + fn test_parse_copy() { + let expr = parse_ok("copy(x)"); + if let ExprKind::Copy(inner) = expr.kind { + assert!(matches!(inner.kind, ExprKind::Var(_))); + } else { + panic!("Expected copy"); + } + } + + #[test] + fn test_parse_inl() { + let expr = parse_ok("inl[Bool](42)"); + if let ExprKind::Inl { ty, value } = expr.kind { + assert_eq!(ty, Ty::Base(BaseTy::Bool)); + assert!(matches!(value.kind, ExprKind::Lit(Literal::I32(42)))); + } else { + panic!("Expected inl"); + } + } + + #[test] + fn test_parse_inr() { + let expr = parse_ok("inr[I32](true)"); + if let ExprKind::Inr { ty, value } = expr.kind { + assert_eq!(ty, Ty::Base(BaseTy::I32)); + assert!(matches!(value.kind, ExprKind::Lit(Literal::Bool(true)))); + } else { + panic!("Expected inr"); + } + } + + #[test] + fn test_parse_case() { + let expr = parse_ok("case x of inl(n) -> n inr(b) -> 0 end"); + if let ExprKind::Case { + scrutinee, + left_var, + left_body, + right_var, + right_body, + } = expr.kind + { + assert!(matches!(scrutinee.kind, ExprKind::Var(_))); + assert_eq!(left_var, "n"); + assert_eq!(right_var, "b"); + assert!(matches!(left_body.kind, ExprKind::Var(_))); + assert!(matches!(right_body.kind, ExprKind::Lit(Literal::I32(0)))); + } else { + panic!("Expected case expression"); + } + } + + #[test] + fn test_parse_application() { + let expr = parse_ok("f(x)"); + if let ExprKind::App { func, arg } = expr.kind { + assert!(matches!(func.kind, ExprKind::Var(_))); + assert!(matches!(arg.kind, ExprKind::Var(_))); + } else { + panic!("Expected application"); + } + } + + #[test] + fn test_parse_projection() { + let expr = parse_ok("p.0"); + if let ExprKind::Fst(inner) = expr.kind { + assert!(matches!(inner.kind, ExprKind::Var(_))); + } else { + panic!("Expected fst projection"); + } + } + + #[test] + fn test_parse_module() { + let source = r#" + fn add(x: I32, y: I32): I32 = x + fn id(x: I32): I32 = x + "#; + let module = parse_module(source, "test").expect("should parse"); + assert_eq!(module.decls.len(), 2); + } +}