- Notifications
You must be signed in to change notification settings - Fork 14
Parse to AST and use printers to go from AST to specific targets #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
7ae887b 1b2b221 a838160 43f59dc 553f861 0702332 7733985 c906de3 File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Make parser produce an AST by default. - Switch to lexical binding and fix some broken var references. - Reorganizes reduce functions' signatures a bit.
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,4 @@ | ||
| ;;; clj-parse.el --- Clojure/EDN parser | ||
| ;;; clj-parse.el --- Clojure/EDN parser -*- lexical-binding: t; -*- | ||
| | ||
| ;; Copyright (C) 2017 Arne Brasseur | ||
| | ||
| | @@ -25,6 +25,8 @@ | |
| ;;; Code: | ||
| | ||
| ;; Before emacs 25.1 it's an ELPA package | ||
| | ||
| (require 'a) | ||
| (require 'let-alist) | ||
| (require 'cl-lib) | ||
| (require 'clj-lex) | ||
| | @@ -40,6 +42,24 @@ | |
| :character) | ||
| "Tokens that represent leaf nodes in the AST.") | ||
| | ||
| (defvar clj-parse--closer-tokens '(:rparen | ||
| :rbracket | ||
| :rbrace) | ||
| "Tokens that represent closing of an AST branch.") | ||
| | ||
| (defun clj-parse--is-leaf? (el) | ||
| (member (clj-lex-token-type el) clj-parse--leaf-tokens)) | ||
| | ||
| (defun clj-parse--is-node? (el) | ||
| (a-has-key el 'subnodes)) | ||
| | ||
| (defun clj-parse--is-open-prefix? (el) | ||
| (and (member (clj-lex-token-type el) '(:discard :tag)) | ||
| (not (clj-parse--is-node? el)))) | ||
| | ||
| (defun clj-parse--make-node (type subnodes &rest kvs) | ||
| (apply 'a-list 'type type 'subnodes subnodes kvs)) | ||
| | ||
| ;; The EDN spec is not clear about wether \u0123 and \o012 are supported in | ||
| ;; strings. They are described as character literals, but not as string escape | ||
| ;; codes. In practice all implementations support them (mostly with broken | ||
| | @@ -69,97 +89,84 @@ | |
| (substring s 1 -1))))) | ||
| | ||
| (defun clj-parse-character (c) | ||
| (let* ((form (cdr (assq 'form token))) | ||
| (first-char (elt form 1))) | ||
| (let ((first-char (elt c 1))) | ||
| (cond | ||
| ((equal form "\\newline") ?\n) | ||
| ((equal form "\\return") ?\r) | ||
| ((equal form "\\space") ?\ ) | ||
| ((equal form "\\tab") ?\t) | ||
| ((eq first-char ?u) (string-to-number (substring form 2) 16)) | ||
| ((eq first-char ?o) (string-to-number (substring form 2) 8)) | ||
| ((equal c "\\newline") ?\n) | ||
| ((equal c "\\return") ?\r) | ||
| ((equal c "\\space") ?\ ) | ||
| ((equal c "\\tab") ?\t) | ||
| ((eq first-char ?u) (string-to-number (substring c 2) 16)) | ||
| ((eq first-char ?o) (string-to-number (substring c 2) 8)) | ||
| (t first-char)))) | ||
| | ||
| (defun clj-parse-edn-reduce1 (stack token) | ||
| (cl-case (cdr (assq 'type token)) | ||
| (:whitespace stack) | ||
| (:number (cons (string-to-number (cdr (assq 'form token))) stack)) | ||
| (:nil (cons nil stack)) | ||
| (:true (cons t stack)) | ||
| (:false (cons nil stack)) | ||
| (:symbol (cons (intern (cdr (assq 'form token))) stack)) | ||
| (:keyword (cons (intern (cdr (assq 'form token))) stack)) | ||
| (:string (cons (clj-parse-string (cdr (assq 'form token))) stack)) | ||
| (:character (cons (clj-parse-character (cdr (assq 'form token))) stack)))) | ||
| | ||
| (defun clj-parse-edn-reduceN (stack type coll) | ||
| (if (eq :discard type) | ||
| stack | ||
| (cons | ||
| (cl-case type | ||
| (:whitespace :ws) | ||
| (:number coll) | ||
| (:list (-butlast (cdr coll))) | ||
| (:set (-butlast (cdr coll))) | ||
| (:vector (apply #'vector (-butlast (cdr coll)))) | ||
| (:map (mapcar (lambda (pair) | ||
| (cons (car pair) (cadr pair))) | ||
| (-partition 2 (-butlast (cdr coll)))))) | ||
| stack))) | ||
| | ||
| (defun clj-parse--reduce-coll (stack open-token coll-type reducN) | ||
| (let ((coll nil)) | ||
| (defun clj-parse--next () | ||
| (setq next (clj-lex-next)) | ||
| (while (eq (clj-lex-token-type next) :whitespace) | ||
| (setq next (clj-parse--next))) | ||
| next) | ||
| | ||
| (defun clj-parse--ast-reduce1 (stack leaf) | ||
| (push leaf stack)) | ||
| | ||
| (defun clj-parse--ast-reduceN (stack node subnodes) | ||
| (push | ||
| (cl-case (clj-lex-token-type node) | ||
| (:lparen (clj-parse--make-node :list subnodes)) | ||
| (:lbracket (clj-parse--make-node :vector subnodes)) | ||
| (:set (clj-parse--make-node :set subnodes)) | ||
| (:lbrace (clj-parse--make-node :map subnodes)) | ||
| ||
| (:discard (clj-parse--make-node :discard subnodes))) | ||
| ||
| stack)) | ||
| | ||
| (defun clj-parse--find-opener (stack closer-token) | ||
| (cl-case (clj-lex-token-type closer-token) | ||
| (:rparen :lparen) | ||
| (:rbracket :lbracket) | ||
| (:rbrace (clj-lex-token-type | ||
| (-find (lambda (token) (member (clj-lex-token-type token) '(:lbrace :set))) stack))))) | ||
| | ||
| (defun clj-parse--reduce-coll (stack closer-token reduceN) | ||
| "Reduce collection based on the top of the stack" | ||
| (let ((opener-type (clj-parse--find-opener stack closer-token)) | ||
| (coll nil)) | ||
| (while (and stack | ||
| (not (eq (clj-lex-token-type (car stack)) open-token))) | ||
| (not (eq (clj-lex-token-type (car stack)) opener-type))) | ||
| (push (pop stack) coll)) | ||
| (if (eq (clj-lex-token-type (car stack)) open-token) | ||
| (progn | ||
| (push (pop stack) coll) | ||
| (funcall reduceN stack coll-type coll)) | ||
| ;; Unwound the stack without finding a matching paren: return the original stack | ||
| (reverse list)))) | ||
| | ||
| (if (eq (clj-lex-token-type (car stack)) opener-type) | ||
| (let ((node (pop stack))) | ||
| (funcall reduceN stack node coll)) | ||
| ;; Syntax error | ||
| (error "Syntax Error")))) | ||
| Collaborator There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The parser should not throw syntax errors. Instead it should continue parsing and return a partial result. You can inspect the result afterwards to find syntax errors, or raise them in the reducer. Member Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When you say "the reducer", do you mean the Collaborator There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, you can think of parsing as three layers working together: the lexer, "the parser", "the reducer". The last part is pluggable to support different outputs.
| ||
| | ||
| (defun clj-parse-reduce (reduce1 reduceN) | ||
| (let ((stack nil) | ||
| (token (clj-lex-next))) | ||
| (let ((stack nil)) | ||
| | ||
| (while (not (eq (clj-lex-token-type token) :eof)) | ||
| (while (not (eq (clj-lex-token-type (setq token (clj-parse--next))) :eof)) | ||
| ||
| (message "STACK: %S" stack) | ||
| (message "TOKEN: %S\n" token) | ||
| | ||
| (setf stack | ||
| (if (member (clj-lex-token-type token) | ||
| clj-parse--leaf-tokens) | ||
| (funcall reduce1 stack token) | ||
| (cons token stack))) | ||
| | ||
| ;; Reduce based on the top item on the stack (collections) | ||
| (cl-case (clj-lex-token-type (car stack)) | ||
| (:rparen (setf stack (clj-parse--reduce-coll stack :lparen :list reduceN))) | ||
| (:rbracket (setf stack (clj-parse--reduce-coll stack :lbracket :vector reduceN))) | ||
| (:rbrace | ||
| (let ((open-token (-find (lambda (token) | ||
| (member (clj-lex-token-type token) '(:lbrace :set))) | ||
| stack))) | ||
| | ||
| (cl-case (clj-lex-token-type open-token) | ||
| (:lbrace | ||
| (setf stack (clj-parse--reduce-coll stack :lbrace :map reduceN))) | ||
| (:set | ||
| (setf stack (clj-parse--reduce-coll stack :set :set reduceN))))))) | ||
| | ||
| ;; Reduce based on top two items on the stack | ||
| (if (not (clj-lex-token? (car stack))) ;; top is fully reduced | ||
| (cl-case (clj-lex-token-type (cadr stack)) | ||
| (:discard (setf stack (funcall reduceN (cddr stack) :discard (-take 2 stack)))))) | ||
| | ||
| (setq token (clj-lex-next))) | ||
| | ||
| (message "RESULT: %S" stack) | ||
| stack)) | ||
| (let ((token-type (clj-lex-token-type token))) | ||
| (cond | ||
| ((member token-type clj-parse--leaf-tokens) (setf stack (funcall reduce1 stack token))) | ||
| ((member token-type clj-parse--closer-tokens) (setf stack (clj-parse--reduce-coll stack token reduceN))) | ||
| (t (push token stack)))) | ||
| | ||
| ;; Reduce based on top two items on the stack (special prefixed elements) | ||
| (seq-let [top lookup] stack | ||
| (when (and (clj-parse--is-open-prefix? lookup) | ||
| (or (clj-parse--is-node? top) | ||
| (clj-parse--is-leaf? top))) ;; top is fully reduced | ||
| (setf stack (funcall reduceN (cddr stack) lookup (list top)))))) | ||
| | ||
| ;; reduce root | ||
| (let ((root (clj-parse--make-node :root stack))) | ||
| (message "RESULT: %S" root) | ||
| root))) | ||
| | ||
| (defun clj-parse () | ||
| (clj-parse-reduce 'clj-parse-edn-reduce1 'clj-parse-edn-reduceN)) | ||
| (clj-parse-reduce #'clj-parse--ast-reduce1 #'clj-parse--ast-reduceN)) | ||
| | ||
| (provide 'clj-parse) | ||
| | ||
| | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I made this to simplify
clj-parse--ast-reduce1, and it's ok as long as we only use this parser to produce AST, but if we want to use theclj-parse--reducealgorithm to produce CST (not entirely sure we would want to do that) then we can get rid of it and handle whitespaces when reducing leafs.