Я пытаюсь выучить немного недоразумений, портируя игрушку NFA regexp matcher . Очевидно, моя главная проблема - это представление и манипулирование графиками. Я нашел рабочее решение, но моя реализация (использующая gensym
для эмуляции указателей, в основном) оставляет мне неприятный вкус во рту.
Хотите предложить улучшения, как для представления графиков, так и для общей читабельности и идиом? (оригинальное императивное решение гораздо более читаемо, чем мое текущее).
Ура!
; This is a port of Matt Might's toy regexp library described in
; http://matt.might.net/articles/implementation-of-nfas-and-regular-expressions-in-java/
; char_transitions is a map that associates a character with a set
; of target state names
; empty_transitions is a set of target state names
(defrecord NfaState [final char_transitions empty_transitions])
; 'entry' and 'exit' are state names
; 'states' is a map that associates state names with states
(defrecord Nfa [entry exit states])
(defn- add-empty-edge [nfa curr_state_name next_state_name]
(let [curr_state (curr_state_name (:states nfa))]
(Nfa. (:entry nfa) (:exit nfa) (assoc (:states nfa) curr_state_name (NfaState. (:final curr_state) (:char_transitions curr_state) (conj (:empty_transitions curr_state) next_state_name))))))
(defn- nfa-matches?
([nfa nfa_state_name string] (nfa-matches? nfa nfa_state_name string #{}))
([nfa nfa_state_name string visited]
(let [nfa_state (nfa_state_name (:states nfa))
new_visited (conj visited nfa_state_name)]
(cond
(contains? visited nfa_state_name) false
(empty? string) (or (:final nfa_state)
(some #(nfa-matches? nfa % string new_visited)
(:empty_transitions nfa_state)))
(not (empty? string)) (or
(some #(nfa-matches? nfa % (.substring string 1)) (get (:char_transitions nfa_state) (.charAt string 0) #{}))
(some #(nfa-matches? nfa % string new_visited) (:empty_transitions nfa_state)))
:else false))))
(defn matches? [nfa string]
"Matches a string against an NFA"
(nfa-matches? nfa (:entry nfa) string))
(defn c [ch]
"Creates an NFA which matches the character 'c'"
(let [in (gensym)
out (gensym)]
(Nfa. in out {in (NfaState. false {ch #{out}} #{}) out (NfaState. true {} #{})})))
(defn e []
"Creates an NFA which matches the empty string"
(let [in (gensym)
out (gensym)]
(Nfa. in out {in (NfaState. false {} #{out}) out (NfaState. true {} #{})})))
(defn rep [nfa]
"Creates an NFA which matches zero or more repetitions of the given NFA"
(add-empty-edge (add-empty-edge nfa (:entry nfa) (:exit nfa)) (:exit nfa) (:entry nfa)))
(defn- update-final [nfastate new_final]
(NfaState. new_final (:char_transitions nfastate) (:empty_transitions nfastate)))
(defn s [first second]
"Creates an NFA which matches a sequence of two provided NFAs"
(add-empty-edge (Nfa. (:entry first) (:exit second) (merge (update-in (:states first) [(:exit first)] update-final false) (update-in (:states second) [(:exit second)] update-final true))) (:exit first) (:entry second)))
(defn ou [first second]
"Creates an NFA which matches either provided NFA"
(let [in (gensym)
out (gensym)]
(add-empty-edge (add-empty-edge (Nfa. in out (assoc (merge (update-in (:states first) [(:exit first)] update-final false) (update-in (:states second) [(:exit second)] update-final false)) in (NfaState. false {} #{(:entry first) (:entry second)}) out (NfaState. true {} #{}))) (:exit first) out) (:exit second) out)))
; sugar
(defn st [string]
"Creates an NFA which matches the provided string"
(if (empty? string)
(e)
(s (c (.charAt string 0)) (st (.substring string 1)))))