#!/usr/bin/env bb (ns minify (:require [babashka.fs :as fs] [clojure.pprint :refer [pprint]] [clojure.string :as str]) (:import 'java.io.StringReader)) ;; TOKENIZE (defn is-whitespace [c] (contains? #{\ \tab \newline \return} c)) (defn tokenize [source] (let [chars (vec source) total-chars (count chars)] (loop [cursor 0 tokens []] ; (when (< cursor 100) (pprint {:tokens tokens})) (if (>= cursor total-chars) tokens (let [c (chars cursor) nxt (get chars (inc cursor))] (cond ;; comment — inner loop scans to end of line (= c \;) (let [end (loop [j cursor] (if (or (>= j total-chars) (= (chars j) \newline)) j (recur (inc j))))] (recur end (conj tokens {:type :comment :value (apply str (subvec chars cursor end))}))) ;; whitespace — consume the whole run as one :space token (is-whitespace c) (let [end (loop [j cursor] (if (or (>= j total-chars) (not (is-whitespace (chars j)))) j (recur (inc j))))] (recur end (conj tokens {:type :space :value nil}))) (contains? #{\( \[ \{} c) (recur (inc cursor) (conj tokens {:type :open-paren :value (str c)})) (contains? #{\) \] \}} c) (recur (inc cursor) (conj tokens {:type :close-paren :value (str c)})) ;; strings (= c \") (let [end (loop [j (inc cursor)] (if (or (>= j total-chars) (= (chars j) \")) (inc j) (recur (inc j))))] (recur end (conj tokens {:type :string :value (apply str (subvec chars cursor end))}))) (and nxt (= c \#) (= nxt \()) (recur (+ cursor 2) (conj tokens {:type :open-annon-fn :value "#("})) :else ; (let [delimiters #{\( \) \[ \] \{ \} \; \" \, \@ \` \'} (let [delimiters #{\( \) \[ \] \{ \} \; \" \, \@ \` \'} cursor2 (loop [j cursor] (if (or (>= j total-chars) (is-whitespace (chars j)) (contains? delimiters (chars j))) j (recur (inc j))))] (if (= cursor2 cursor) (recur (inc cursor) (conj tokens {:type :unknown :value (str c)})) (recur cursor2 (conj tokens {:type :symbol :value (apply str (subvec chars cursor cursor2))})))))))))) ; COLLECT BINDING (def built-ins #{"fn" "local" "λ" "lambda" "require" "let" "+" "-" "/" "*" "<" ">" "<=" ">=" "not=" ".." "var" "if" "when" "and" "or" "not" "do" "set" "each" "for" "while" "true" "$1" "math.floor" "floor" "false" "nil" "size" "textSize" "background" "text" "random" "key" ":setup" ":draw" ":key-pressed" ":L5" "map"}) (defn collect-bindings [tokens] (reduce (fn [acc token] (let [val (:value token)] (cond (nil? val) acc ; handle table access create bindings for each element tools.wrench -> tools wrench (re-matches #"^[a-z]+\..*" val) ; table access (apply conj acc (str/split val #"\.")) (and (> (count val) 1) (not (re-matches #"^:.*" val)) ; exclude keywords ;; (not (re-matches #"^[a-z]+\..*" val)) ; table access (not (re-matches #"\d+" val)) (not (get built-ins val)) (= (:type token) :symbol)) (conj acc val) :else acc))) #{} tokens)) ;; BUILD RENAME MAP (defn short-name-seq "Lazy infinite seq of short names. like a,b,c..aa,bb,cc..zzzzz" [] (let [letters (map str "abcdefghijklmnopqrstuvwxyz")] (mapcat (fn [length] (map (fn [letter] (apply str (repeat length letter))) letters)) (range 1 Long/MAX_VALUE)))) (defn build-rename-map [names] (zipmap names (short-name-seq))) ;; EMIT (def simple-replace-map {"lambda" "λ" "local" "var"}) (def drop-tokens #{:space :comment}) (defn needs-space? "does this token need space before it?" [prev current nxt] (when (= (current :type) :string) (pprint {:current current})) (or (= (current :type) :open-annon-fn) (and prev (= (prev :type) :string) (contains? #{:symbol :open-paren} (current :type))) (= (current :type) :string) ; space needed: prev is :symbol or :close-paren, AND current is :symbol or :open-paren (and prev (or (= (prev :type) :symbol) (= (prev :type) :close-paren)) (or (= (current :type) :symbol) (= (current :type) :open-paren))))) (defn add-space [tokens] (loop [cursor 0 result []] (if (>= cursor (count tokens)) result (let [prev (when (pos? cursor) (get tokens (dec (count result)))) curr (tokens cursor) nxt (when (< (inc cursor) (count tokens)) (tokens (inc cursor)))] (recur (inc cursor) (if (needs-space? prev curr nxt) (conj result (assoc curr :value (str " " (curr :value)))) (conj result curr))))))) (defn transform-tokens "get tokens ready to emit" [tokens re-map] (->> tokens ; drop whitespace and comments (remove (fn [token] (drop-tokens (token :type)))) ; make simple replaces (map (fn [token] (if (simple-replace-map (token :value)) (assoc token :value (simple-replace-map (token :value))) token))) ;; apply rename map (map (fn [token] (let [val (token :value)] (cond (nil? val) token ; keywords (and (re-matches #"^:.*" val) (not (built-ins val))) (assoc token :value (str ":" (re-map (subs val 1)))) ; table access (re-matches #"^[a-z]+\..*" val) (assoc token :value (->> (str/split val #"\.") (map #(re-map %)) (str/join "."))) ; keywords :else (if-let [new-value (re-map (token :value))] (assoc token :value new-value) token))))) (vec) ; add needed space before ( { [ ; space needed: prev is :symbol or :close-paren, AND current is :symbol or :open-paren (add-space))) (defn emit [tokens re-map] (->> (transform-tokens tokens re-map) (reduce (fn [acc token] (conj acc (:value token))) []) (str/join ""))) (defn write-file "Write a file to the file system" [file-name content] (fs/write-lines file-name [content])) (defn gen-output-file-name [file-name] (str/replace file-name #"\.fnl$" ".min.fnl")) (when (first *command-line-args*) (let [file-name (first *command-line-args*) source (slurp file-name) tokens (tokenize source) re-map (build-rename-map (collect-bindings tokens)) output (emit tokens re-map) output-file-name (gen-output-file-name file-name)] (pprint {:output output :token-sample (take 15 tokens) :bindings (build-rename-map (collect-bindings tokens))}) (write-file "tokens.clj" (with-out-str (pprint (transform-tokens tokens re-map)))) (write-file output-file-name output) (println (str "Wrote file " output-file-name))))