diff --git a/parser/src/diagnostics.rs b/parser/src/diagnostics.rs index 3f5cdcd..ad62224 100644 --- a/parser/src/diagnostics.rs +++ b/parser/src/diagnostics.rs @@ -75,6 +75,12 @@ pub enum ParsingError { SpecialVariableIndirectCall(Span, String), #[error("Can't chain non-associative operators.")] NonAssociativeOperator(Span), + #[error("Using reserved identifier `{1}` as a namespace is not allowed.")] + ReservedNamespace(Span, String), + #[error( + "Using reserved identifier `{1}` as second component of a qualified name is not allowed." + )] + ReservedQualifiedLiteral(Span, String), } impl ParsingError { @@ -121,6 +127,8 @@ impl ParsingError { Self::SpecialVariableCall(span, _) => Some(span.clone()), Self::SpecialVariableIndirectCall(span, _) => Some(span.clone()), Self::NonAssociativeOperator(span) => Some(span.clone()), + Self::ReservedNamespace(span, _) => Some(span.clone()), + Self::ReservedQualifiedLiteral(span, _) => Some(span.clone()), } } fn hint(&self) -> Option<&'static str> { diff --git a/parser/src/keywords.rs b/parser/src/keywords.rs new file mode 100644 index 0000000..ffd21ed --- /dev/null +++ b/parser/src/keywords.rs @@ -0,0 +1,35 @@ +// This file is part of the uutils awk package. +// +// For the full copyright and license information, please view the LICENSE +// files that was distributed with this source code. + +/// Returns whether `name` is a reserved keyword that cannot appear in a +/// qualified identifier (`ns::name`), matching gawk. +pub fn is_reserved_keyword(name: &str) -> bool { + matches!( + name, + "BEGIN" + | "END" + | "if" + | "else" + | "switch" + | "case" + | "default" + | "do" + | "while" + | "for" + | "in" + | "print" + | "printf" + | "getline" + | "next" + | "nextfile" + | "exit" + | "break" + | "continue" + | "return" + | "delete" + | "function" + | "func" + ) +} diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 5075939..6d1c141 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -8,6 +8,7 @@ mod ast; mod diagnostics; mod idempotency; +mod keywords; mod lex; mod pratt; mod sexpr; @@ -134,7 +135,14 @@ impl<'a> Parser<'a> { } Token::NamespaceDirective => { let namespace = lex.expect_string()?; - self.namespace = lex.lex_ident(namespace.as_ref(), self.arena)?; + let namespace = lex.lex_ident(namespace.as_ref(), self.arena)?; + if keywords::is_reserved_keyword(namespace) { + return Err(ParsingError::ReservedNamespace( + lex.span(), + namespace.to_string(), + )); + } + self.namespace = namespace; lex.expect_with(Token::is_stmnt_end, "expected statement end.".into())?; } Token::ConcurrentDirective => { @@ -529,8 +537,13 @@ impl<'a> Parser<'a> { fn parse_delete(&mut self, lex: &mut Lexer<'a>) -> Result> { let next = lex.expect_next()?; - let Ok(var) = self.get_place(lex, next) else { - return Err(ParsingError::OperatorExpectsVariable(lex.span())); + let var = match self.get_place(lex, next) { + Ok(var) => var, + Err( + err @ (ParsingError::ReservedNamespace(..) + | ParsingError::ReservedQualifiedLiteral(..)), + ) => return Err(err), + Err(_) => return Err(ParsingError::OperatorExpectsVariable(lex.span())), }; let index = if lex.consume(&Token::OpenBracket) { let mut pratt = Pratt::new(self, false); @@ -546,7 +559,9 @@ impl<'a> Parser<'a> { #[tracing::instrument] fn parse_function(&mut self, lex: &mut Lexer<'a>) -> Result<()> { - let name = lex.expect_identifier()?.qualify(self.namespace); + let name = lex + .expect_identifier()? + .try_qualify(self.namespace, &lex.span())?; let args = self.parse_signature(lex, &name)?; lex.consume(&Token::Newline); let body = self.parse_body(lex)?; @@ -571,7 +586,9 @@ impl<'a> Parser<'a> { } loop { - let name = lex.expect_identifier()?.qualify(self.namespace); + let name = lex + .expect_identifier()? + .try_qualify(self.namespace, &lex.span())?; // Linear search is fine for the numbers we are working with. if let Some(arg) = args.iter().find(|&a| a == &name) { return Err(ParsingError::DuplicatedArgument( @@ -643,6 +660,10 @@ impl<'a> Parser<'a> { Token::TypedRegex(_) => Err(ParsingError::UnexpectedTypedRegex(lex.span())), token => match self.get_place(lex, token) { Ok(var) => Ok(Atom::Variable(var)), + Err( + err @ (ParsingError::ReservedNamespace(..) + | ParsingError::ReservedQualifiedLiteral(..)), + ) => Err(err), Err(_) => Err(ParsingError::UnexpectedToken( lex.span(), "is not valid data.".into(), @@ -652,10 +673,10 @@ impl<'a> Parser<'a> { } #[tracing::instrument] - fn get_place(&self, lex: &mut Lexer<'a>, token: Token<'a>) -> Result, Token<'a>> { + fn get_place(&self, lex: &mut Lexer<'a>, token: Token<'a>) -> Result> { match token { Token::Identifier(a) if !(lex.peek_is(&Token::OpenParent) && lex.is_yuxtaposed()) => { - Ok(a.qualify(self.namespace).into()) + Ok(a.try_qualify(self.namespace, &lex.span())?.into()) } Token::NrVariable => Ok(Variable::Nr), Token::NfVariable => Ok(Variable::Nf), @@ -672,7 +693,10 @@ impl<'a> Parser<'a> { Token::RstartVariable => Ok(Variable::Rstart), Token::RlengthVariable => Ok(Variable::Rlength), Token::EnvironVariable => Ok(Variable::Environ), - tok => Err(tok), + tok => Err(ParsingError::UnexpectedToken( + lex.span(), + format!("{tok:?}"), + )), } } } @@ -702,22 +726,35 @@ impl Preprocessor { } trait IdentifierExt<'a> { - fn qualify(self, namespace: &'a str) -> Identifier<'a> + fn try_qualify(self, namespace: &'a str, span: &Span) -> Result> where Self: 'a; } impl<'a> IdentifierExt<'a> for lexer::Identifier<'_> { - fn qualify(self, namespace: &'a str) -> Identifier<'a> + fn try_qualify(self, namespace: &'a str, span: &Span) -> Result> where Self: 'a, { let literal = self.literal; - if let Some(namespace) = self.namespace { - Identifier { namespace, literal } + let namespace = if let Some(ns) = self.namespace { + if keywords::is_reserved_keyword(ns) { + return Err(ParsingError::ReservedNamespace( + span.clone(), + ns.to_string(), + )); + } + if keywords::is_reserved_keyword(literal) { + return Err(ParsingError::ReservedQualifiedLiteral( + span.clone(), + literal.to_string(), + )); + } + ns } else { - Identifier { namespace, literal } - } + namespace + }; + Ok(Identifier { namespace, literal }) } } diff --git a/parser/src/pratt.rs b/parser/src/pratt.rs index 2ca4d95..f8ae251 100644 --- a/parser/src/pratt.rs +++ b/parser/src/pratt.rs @@ -269,10 +269,12 @@ impl<'a, 'b> Pratt<'a, 'b> { name.literal.to_string(), )); } + let span = lex.span(); + let qualified = name.try_qualify(self.parser.namespace, &span)?; self.parser.parse_function_call( lex, - |args| ExprNode::FunctionCall(name.qualify(self.parser.namespace), args), - lex.span(), + move |args| ExprNode::FunctionCall(qualified, args), + span, ) } else if let Token::IndirectCall(name) = next { // Possible gawk bug: it accepts special variables if qualified, @@ -283,16 +285,17 @@ impl<'a, 'b> Pratt<'a, 'b> { name.literal.to_string(), )); } - let name = Variable::User(name.qualify(self.parser.namespace)); + let span = lex.span(); + let name = Variable::User(name.try_qualify(self.parser.namespace, &span)?); self.parser.parse_function_call( lex, - |args| ExprNode::IndirectCall(name, args), - lex.span(), + move |args| ExprNode::IndirectCall(name, args), + span, ) } else if next.is_place() && lex.peek_is(&Token::OpenParent) && lex.is_yuxtaposed() { let name = match self.parser.get_place(lex, next) { Ok(var) => var.to_string(), - Err(tok) => format!("{tok:?}"), + Err(err) => err.to_string(), }; Err(ParsingError::SpecialVariableCall(lex.span(), name)) } else { diff --git a/parser/src/tests.rs b/parser/src/tests.rs index f3abdde..1805678 100644 --- a/parser/src/tests.rs +++ b/parser/src/tests.rs @@ -182,6 +182,18 @@ fn test_parser_invalid_patterns() { test_parser!(is_err!("BEGIN", "END", "BEGINFILE", "ENDFILE", "print 1;")); } +#[test] +fn test_parser_reserved_qualified_identifiers() { + test_parser!(is_err!( + "{ if::while }", + "{ foo::while }", + "{ while::foo }", + "@namespace \"if\"; BEGIN {}", + "function foo::while() {}", + "function while::foo() {}" + )); +} + #[test] fn test_parser_non_assoc() { test_parser!(is_err!(