1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
//! A crate for low-level parsing of the WebAssembly text formats: WAT and WAST.
//!
//! This crate is intended to be a low-level detail of the `wat` crate,
//! providing a low-level parsing API for parsing WebAssembly text format
//! structures. The API provided by this crate is very similar to
//! [`syn`](https://docs.rs/syn) and provides the ability to write customized
//! parsers which may be an extension to the core WebAssembly text format. For
//! more documentation see the [`parser`] module.
//!
//! # High-level Overview
//!
//! This crate provides a few major pieces of functionality
//!
//! * [`lexer`] - this is a raw lexer for the wasm text format. This is not
//! customizable, but if you'd like to iterate over raw tokens this is the
//! module for you. You likely won't use this much.
//!
//! * [`parser`] - this is the workhorse of this crate. The [`parser`] module
//! provides the [`Parse`][] trait primarily and utilities
//! around working with a [`Parser`](`parser::Parser`) to parse streams of
//! tokens.
//!
//! * [`Module`] - this contains an Abstract Syntax Tree (AST) of the
//! WebAssembly Text format (WAT) as well as the unofficial WAST format. This
//! also has a [`Module::encode`] method to emit a module in its binary form.
//!
//! # Stability and WebAssembly Features
//!
//! This crate provides support for many in-progress WebAssembly features such
//! as reference types, multi-value, etc. Be sure to check out the documentation
//! of the [`wast` crate](https://docs.rs/wast) for policy information on crate
//! stability vs WebAssembly Features. The tl;dr; version is that this crate
//! will issue semver-non-breaking releases which will break the parsing of the
//! text format. This crate, unlike `wast`, is expected to have numerous Rust
//! public API changes, all of which will be accompanied with a semver-breaking
//! release.
//!
//! # Compile-time Cargo features
//!
//! This crate has a `wasm-module` feature which is turned on by default which
//! includes all necessary support to parse full WebAssembly modules. If you
//! don't need this (for example you're parsing your own s-expression format)
//! then this feature can be disabled.
//!
//! [`Parse`]: parser::Parse
//! [`LexError`]: lexer::LexError
#![deny(missing_docs, rustdoc::broken_intra_doc_links)]
use std::fmt;
use std::path::{Path, PathBuf};
use unicode_width::UnicodeWidthStr;
#[cfg(feature = "wasm-module")]
mod binary;
#[cfg(feature = "wasm-module")]
mod resolve;
mod ast;
pub use self::ast::*;
pub mod lexer;
pub mod parser;
/// A convenience error type to tie together all the detailed errors produced by
/// this crate.
///
/// This type can be created from a [`lexer::LexError`] or [`parser::Error`].
/// This also contains storage for file/text information so a nice error can be
/// rendered along the same lines of rustc's own error messages (minus the
/// color).
///
/// This type is typically suitable for use in public APIs for consumers of this
/// crate.
#[derive(Debug)]
pub struct Error {
inner: Box<ErrorInner>,
}
#[derive(Debug)]
struct ErrorInner {
text: Option<Text>,
file: Option<PathBuf>,
span: Span,
kind: ErrorKind,
}
#[derive(Debug)]
struct Text {
line: usize,
col: usize,
snippet: String,
}
#[derive(Debug)]
enum ErrorKind {
Lex(lexer::LexError),
Custom(String),
}
impl Error {
fn lex(span: Span, content: &str, kind: lexer::LexError) -> Error {
let mut ret = Error {
inner: Box::new(ErrorInner {
text: None,
file: None,
span,
kind: ErrorKind::Lex(kind),
}),
};
ret.set_text(content);
return ret;
}
fn parse(span: Span, content: &str, message: String) -> Error {
let mut ret = Error {
inner: Box::new(ErrorInner {
text: None,
file: None,
span,
kind: ErrorKind::Custom(message),
}),
};
ret.set_text(content);
return ret;
}
/// Creates a new error with the given `message` which is targeted at the
/// given `span`
///
/// Note that you'll want to ensure that `set_text` or `set_path` is called
/// on the resulting error to improve the rendering of the error message.
pub fn new(span: Span, message: String) -> Error {
Error {
inner: Box::new(ErrorInner {
text: None,
file: None,
span,
kind: ErrorKind::Custom(message),
}),
}
}
/// Return the `Span` for this error.
pub fn span(&self) -> Span {
self.inner.span
}
/// To provide a more useful error this function can be used to extract
/// relevant textual information about this error into the error itself.
///
/// The `contents` here should be the full text of the original file being
/// parsed, and this will extract a sub-slice as necessary to render in the
/// `Display` implementation later on.
pub fn set_text(&mut self, contents: &str) {
if self.inner.text.is_some() {
return;
}
self.inner.text = Some(Text::new(contents, self.inner.span));
}
/// To provide a more useful error this function can be used to set
/// the file name that this error is associated with.
///
/// The `path` here will be stored in this error and later rendered in the
/// `Display` implementation.
pub fn set_path(&mut self, path: &Path) {
if self.inner.file.is_some() {
return;
}
self.inner.file = Some(path.to_path_buf());
}
/// Returns the underlying `LexError`, if any, that describes this error.
pub fn lex_error(&self) -> Option<&lexer::LexError> {
match &self.inner.kind {
ErrorKind::Lex(e) => Some(e),
_ => None,
}
}
/// Returns the underlying message, if any, that describes this error.
pub fn message(&self) -> String {
match &self.inner.kind {
ErrorKind::Lex(e) => e.to_string(),
ErrorKind::Custom(e) => e.clone(),
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let err = match &self.inner.kind {
ErrorKind::Lex(e) => e as &dyn fmt::Display,
ErrorKind::Custom(e) => e as &dyn fmt::Display,
};
let text = match &self.inner.text {
Some(text) => text,
None => {
return write!(f, "{} at byte offset {}", err, self.inner.span.offset);
}
};
let file = self
.inner
.file
.as_ref()
.and_then(|p| p.to_str())
.unwrap_or("<anon>");
write!(
f,
"\
{err}
--> {file}:{line}:{col}
|
{line:4} | {text}
| {marker:>0$}",
text.col + 1,
file = file,
line = text.line + 1,
col = text.col + 1,
err = err,
text = text.snippet,
marker = "^",
)
}
}
impl std::error::Error for Error {}
impl Text {
fn new(content: &str, span: Span) -> Text {
let (line, col) = span.linecol_in(content);
let contents = content.lines().nth(line).unwrap_or("");
let mut snippet = String::new();
for ch in contents.chars() {
match ch {
// Replace tabs with spaces to render consistently
'\t' => {
snippet.push_str(" ");
}
// these codepoints change how text is rendered so for clarity
// in error messages they're dropped.
'\u{202a}' | '\u{202b}' | '\u{202d}' | '\u{202e}' | '\u{2066}' | '\u{2067}'
| '\u{2068}' | '\u{206c}' | '\u{2069}' => {}
c => snippet.push(c),
}
}
// Use the `unicode-width` crate to figure out how wide the snippet, up
// to our "column", actually is. That'll tell us how many spaces to
// place before the `^` character that points at the problem
let col = snippet.get(..col).map(|s| s.width()).unwrap_or(col);
Text { line, col, snippet }
}
}