diff --git a/Cargo.toml b/Cargo.toml index f8bea9f..4e94ba7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,8 +9,9 @@ edition = "2018" [dependencies] lalrpop = "0.18" lalrpop-util = "0.18" -logos = "0.10.0" +logos = "0.11.0-rc2" regex = "1" +logos-derive = "0.11.0-rc3" codespan-reporting = "0.9" structopt = "0.3.12" diff --git a/src/codespan.rs b/src/codespan.rs index fb8aacc..d296aab 100644 --- a/src/codespan.rs +++ b/src/codespan.rs @@ -51,7 +51,7 @@ pub fn codespan<'a>( .with_message("Extra token"), User { error } => Diagnostic::error() .with_message("Invalid token") - .with_labels(vec![Label::primary(file_id, error.0.clone())]) + .with_labels(vec![Label::primary(file_id, *error..*error)]) .with_message("Invalid token"), }; diff --git a/src/error.rs b/src/error.rs index c527819..1520722 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,6 +1,6 @@ -use crate::token_wrap::*; +use crate::lex::Token; -pub type Error<'a> = lalrpop_util::ParseError, LexicalError>; +pub type Error<'a> = lalrpop_util::ParseError, usize>; #[derive(Debug)] pub enum MainError { diff --git a/src/lex.rs b/src/lex.rs index 8e16ea3..b4ad155 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -1,12 +1,9 @@ +pub use logos::Lexer; use logos::Logos; -// Notably absent from the above, present in the below are -// Whitespace, EOF, LexError -#[derive(Logos, Debug)] -pub enum Token { - #[end] - EOF, - +#[derive(Logos, Debug, Clone, PartialEq)] +#[logos(trivia = r"(\p{Whitespace}+|#.*\n)")] +pub enum Token<'a> { #[token = "."] Dot, @@ -87,10 +84,10 @@ pub enum Token { // \x{1d62}-\x{1d6a} // // FancyNameAscii ↔ FancyNameUnicode - #[regex = r"[\\][a-zA-Z][_a-zA-Z0-9]*"] - FancyNameAscii, - #[regex = r"[a-zA-Z\p{Greek}\x{1d49c}-\x{1d59f}\x{2100}-\x{214f}][_a-zA-Z0-9\x{207f}-\x{2089}\x{2090}-\x{209c}\x{1d62}-\x{1d6a}]*"] - Name, + #[regex(r"[\\][a-zA-Z][_a-zA-Z0-9]*", |lex| lex.slice())] + FancyNameAscii(&'a str), + #[regex(r"[a-zA-Z\p{Greek}\x{1d49c}-\x{1d59f}\x{2100}-\x{214f}][_a-zA-Z0-9\x{207f}-\x{2089}\x{2090}-\x{209c}\x{1d62}-\x{1d6a}]*", |lex| lex.slice())] + Name(&'a str), #[token = ":"] Colon, @@ -98,12 +95,6 @@ pub enum Token { #[token = ";"] Semi, - #[regex = r"#.*\n"] - Comment, - - #[regex = r"\p{Whitespace}+"] - Whitespace, - #[error] LexError, } diff --git a/src/main.rs b/src/main.rs index 801e093..67c2072 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,6 @@ mod ast; mod codespan; mod error; mod lex; -mod token_wrap; #[cfg(test)] mod test; @@ -12,9 +11,10 @@ mod test_util; use codespan_reporting::term::termcolor::StandardStream; use codespan_reporting::term::{self, ColorArg}; use error::*; +use logos::Logos; use std::io::Read; use structopt::StructOpt; -use token_wrap::*; + #[derive(Debug, StructOpt)] #[structopt(name = "prop")] pub struct Opts { @@ -59,8 +59,15 @@ fn main() -> Result<(), MainError> { // Not really how i'd like this to be. buf.read_to_string(&mut s)?; - let lexer = Tokens::from_string(&s); - let parse_result = parser::propParser::new().parse(lexer); + + let lex = lex::Token::lexer(&s).spanned(); + let parse_result = parser::propParser::new().parse(lex.map(|(t, r)| { + if t == lex::Token::LexError { + Err(r.start) + } else { + Ok((r.start, t, r.end)) + } + })); match parse_result { Err(e) => { diff --git a/src/prop.lalrpop b/src/prop.lalrpop index 8886d89..f9bea18 100644 --- a/src/prop.lalrpop +++ b/src/prop.lalrpop @@ -1,12 +1,11 @@ -use crate::token_wrap; +use crate::lex::Token; use crate::ast::{Prop, Expr, Binding, Typ}; use std::rc::Rc; -use token_wrap::*; grammar<'a>; extern { type Location = usize; - type Error = LexicalError; + type Error = usize; enum Token<'a> { "⊥" => Token::Bot, @@ -23,10 +22,16 @@ extern { ")" => Token::RParen, ":" => Token::Colon, ";" => Token::Semi, - name => Token::Name(<&'a str>), + fancy_name_unicode => Token::Name(<&'a str>), + fancy_name_ascii => Token::FancyNameAscii(<&'a str>), } } +name: &'a str = { + fancy_name_unicode, + fancy_name_ascii, +} + pub prop = Semi; Binding: Rc = { diff --git a/src/test.rs b/src/test.rs index 406aff9..6b74620 100644 --- a/src/test.rs +++ b/src/test.rs @@ -1,6 +1,7 @@ use crate::error::*; -use crate::token_wrap::*; +use crate::lex; use crate::{parser, test_util}; +use logos::Logos; use unindent::unindent; @@ -108,8 +109,16 @@ fn bad_ascii() -> Result<(), &'static str> { let mut num_fail = 0; for s in invalid_source.iter() { - let lexer = Tokens::from_string(&s); - match parser::propParser::new().parse(lexer) { + let lex = lex::Token::lexer(&s).spanned(); + let parse_result = parser::propParser::new().parse(lex.map(|(t, r)| { + if t == lex::Token::LexError { + Err(r.start) + } else { + Ok((r.start, t, r.end)) + } + })); + + match parse_result { Ok(_) => { // bad println!("parsed but shouldn't: {}", s); @@ -117,7 +126,7 @@ fn bad_ascii() -> Result<(), &'static str> { } Err(e) => { // good - println!("expected error: {}", e); + println!("expected error: {:?}", e); () } } diff --git a/src/test_util.rs b/src/test_util.rs index 9594d75..04d44be 100644 --- a/src/test_util.rs +++ b/src/test_util.rs @@ -1,19 +1,24 @@ use crate::codespan; use crate::error::*; +use crate::lex; use crate::parser; -use crate::token_wrap::*; use codespan_reporting::term; -use codespan_reporting::term::termcolor::{ColorChoice, StandardStream}; +use logos::Logos; pub fn do_test<'a>(sources: &[&'a str]) -> Result<(), Vec<(&'a str, Error<'a>)>> { let (_pass, fail): (Vec<_>, Vec<_>) = sources .iter() .enumerate() .map(|(index, s)| { - ( - index, - parser::propParser::new().parse(Tokens::from_string(s)), - ) + (index, { + parser::propParser::new().parse(lex::Token::lexer(&s).spanned().map(|(t, r)| { + if t == lex::Token::LexError { + Err(r.start) + } else { + Ok((r.start, t, r.end)) + } + })) + }) }) .partition(|(_, r)| r.is_ok()); if fail.is_empty() { @@ -27,21 +32,17 @@ pub fn do_test<'a>(sources: &[&'a str]) -> Result<(), Vec<(&'a str, Error<'a>)>> } } -// FIXME these 2 and print_errors can involve less duplication of slightly different code -// The difference: stdout vs stderr and ColorChoice::Never vs structopt which -// causes problems with cargo test implicit arguments. pub fn expect_success<'a>(result: Result<(), Vec<(&'a str, Error<'a>)>>) -> Result<(), MainError> { match result { Ok(()) => Ok(()), Err(e) => { for (source, error) in e.iter() { - let writer = StandardStream::stdout(ColorChoice::Never); + let mut writer = codespan_reporting::term::termcolor::Buffer::no_color(); let config = codespan_reporting::term::Config::default(); let (files, diagnostic) = codespan::codespan("foo", source, error); - eprintln!("capture stderr?"); - println!("capture stdout?"); - term::emit(&mut writer.lock(), &config, &files, &diagnostic)?; + term::emit(&mut writer, &config, &files, &diagnostic)?; + eprintln!("{}", std::str::from_utf8(writer.as_slice()).unwrap()) } Err(MainError::SomethingWentAwryAndStuffWasPrinted) } @@ -57,11 +58,12 @@ pub fn expect_fail<'a>(result: Result<(), Vec<(&'a str, Error<'a>)>>) -> Result< Err(e) => { for (source, error) in e.iter() { - let writer = StandardStream::stdout(ColorChoice::Never); + let mut writer = codespan_reporting::term::termcolor::Buffer::no_color(); let config = codespan_reporting::term::Config::default(); let (files, diagnostic) = codespan::codespan("foo", source, error); - term::emit(&mut writer.lock(), &config, &files, &diagnostic)?; + term::emit(&mut writer, &config, &files, &diagnostic)?; + eprintln!("{}", std::str::from_utf8(writer.as_slice()).unwrap()) } Ok(()) } diff --git a/src/token_wrap.rs b/src/token_wrap.rs deleted file mode 100644 index 87d2f6a..0000000 --- a/src/token_wrap.rs +++ /dev/null @@ -1,99 +0,0 @@ -use crate::lex; -use logos::Logos; -use std::ops::Range; - -#[derive(Debug, Clone)] -pub enum Token<'a> { - Dot, - Semi, - Colon, - LParen, - RParen, - Bot, - Top, - Disj, - Conj, - Abs, - Neg, - Iff, - Arrow, - Def, - Name(&'a str), -} - -impl<'a> std::fmt::Display for Token<'a> { - #[rustfmt::skip] - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match self { - Token::Dot => write!(f, "."), - Token::Abs => write!(f, "ⲗ"), - Token::Bot => write!(f, "⊥"), - Token::Def => write!(f, "≔"), - Token::Iff => write!(f, "↔"), - Token::Neg => write!(f, "¬"), - Token::Top => write!(f, "⊤"), - Token::Conj => write!(f, "∧"), - Token::Disj => write!(f, "∨"), - Token::Semi => write!(f, ";"), - Token::Arrow => write!(f, "→"), - Token::Colon => write!(f, ":"), - Token::LParen => write!(f, "("), - Token::RParen => write!(f, ")"), - Token::Name(s) => write!(f, "{}", s), - } - } -} - -#[derive(Debug)] -pub struct LexicalError(pub Range); - -impl std::fmt::Display for LexicalError { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "lexical error at {:?}", self.0) - } -} - -pub struct Tokens<'a>(logos::Lexer); -pub type Spanned = Result<(Loc, Tok, Loc), Error>; - -impl<'a> Tokens<'a> { - pub fn from_string(source: &'a str) -> Tokens<'a> { - Tokens(lex::Token::lexer(source)) - } -} - -impl<'a> Iterator for Tokens<'a> { - type Item = Spanned, usize, LexicalError>; - - fn next(&mut self) -> Option { - let lex = &mut self.0; - let range = lex.range(); - let ok = |tok: Token<'a>| Ok((range.start, tok, range.end)); - let token = loop { - match &lex.token { - lex::Token::Whitespace | lex::Token::Comment => lex.advance(), - lex::Token::EOF => return None, - lex::Token::LexError => break Err(LexicalError(range)), - lex::Token::Name => break ok(Token::Name(lex.slice())), - lex::Token::FancyNameAscii => break ok(Token::Name(lex.slice())), - // And the rest are all unary members - lex::Token::Dot => break ok(Token::Dot), - lex::Token::Abs => break ok(Token::Abs), - lex::Token::Bot => break ok(Token::Bot), - lex::Token::Top => break ok(Token::Top), - lex::Token::Neg => break ok(Token::Neg), - lex::Token::Iff => break ok(Token::Iff), - lex::Token::Def => break ok(Token::Def), - lex::Token::Disj => break ok(Token::Disj), - lex::Token::Conj => break ok(Token::Conj), - lex::Token::Semi => break ok(Token::Semi), - lex::Token::Arrow => break ok(Token::Arrow), - lex::Token::Colon => break ok(Token::Colon), - lex::Token::LParen => break ok(Token::LParen), - lex::Token::RParen => break ok(Token::RParen), - } - }; - lex.advance(); - Some(token) - } -}