Theory LocalLexing
theory LocalLexing
imports CFG
begin
typedecl character
type_synonym lexer = "character list ⇒ nat ⇒ nat set"
type_synonym token = "symbol × character list"
type_synonym tokens = "token list"
definition terminal_of_token :: "token ⇒ symbol"
where
"terminal_of_token t = fst t"
definition terminals :: "tokens ⇒ sentence"
where
"terminals ts = map terminal_of_token ts"
definition chars_of_token :: "token ⇒ character list"
where
"chars_of_token t = snd t"
fun chars :: "tokens ⇒ character list"
where
"chars [] = []"
| "chars (t#ts) = (chars_of_token t) @ (chars ts)"
fun charslength :: "tokens ⇒ nat"
where
"charslength cs = length (chars cs)"
definition is_lexer :: "lexer ⇒ bool"
where
"is_lexer lexer =
(∀ D p l. (p ≤ length D ∧ l ∈ lexer D p ⟶ p + l ≤ length D) ∧
(p > length D ⟶ lexer D p = {}))"
type_synonym selector = "token set ⇒ token set ⇒ token set"
definition is_selector :: "selector ⇒ bool"
where
"is_selector sel = (∀ A B. A ⊆ B ⟶ (A ⊆ sel A B ∧ sel A B ⊆ B))"
fun by_length :: "nat ⇒ tokens set ⇒ tokens set"
where
"by_length l tss = { ts . ts ∈ tss ∧ length (chars ts) = l }"
fun funpower :: "('a ⇒ 'a) ⇒ nat ⇒ ('a ⇒ 'a)"
where
"funpower f 0 x = x"
| "funpower f (Suc n) x = f (funpower f n x)"
definition natUnion :: "(nat ⇒ 'a set) ⇒ 'a set"
where
"natUnion f = ⋃ { f n | n. True }"
definition limit :: "('a set ⇒ 'a set) ⇒ 'a set ⇒ 'a set"
where
"limit f x = natUnion (λ n. funpower f n x)"
locale LocalLexing = CFG +
fixes Lex :: "symbol ⇒ lexer"
fixes Sel :: "selector"
assumes Lex_is_lexer: "∀ t ∈ 𝔗. is_lexer (Lex t)"
assumes Sel_is_selector: "is_selector Sel"
fixes Doc :: "character list"
begin
definition admissible :: "tokens ⇒ bool"
where
"admissible ts = (terminals ts ∈ ℒ⇩P)"
definition Append :: "token set ⇒ nat ⇒ tokens set ⇒ tokens set"
where
"Append Z k P = P ∪
{ p @ [t] | p t. p ∈ by_length k P ∧ t ∈ Z ∧ admissible (p @ [t])}"
definition 𝒳 :: "nat ⇒ token set"
where
"𝒳 k = {(t, ω) | t l ω. t ∈ 𝔗 ∧ l ∈ Lex t Doc k ∧ ω = take l (drop k Doc)}"
definition 𝒲 :: "tokens set ⇒ nat ⇒ token set"
where
"𝒲 P k = { u. u ∈ 𝒳 k ∧ (∃ p ∈ by_length k P. admissible (p@[u])) }"
definition 𝒴 :: "token set ⇒ tokens set ⇒ nat ⇒ token set"
where
"𝒴 T P k = Sel T (𝒲 P k)"
fun 𝒫 :: "nat ⇒ nat ⇒ tokens set"
and 𝒬 :: "nat ⇒ tokens set"
and 𝒵 :: "nat ⇒ nat ⇒ token set"
where
"𝒫 0 0 = {[]}"
| "𝒫 k (Suc u) = limit (Append (𝒵 k (Suc u)) k) (𝒫 k u)"
| "𝒫 (Suc k) 0 = 𝒬 k"
| "𝒵 k 0 = {}"
| "𝒵 k (Suc u) = 𝒴 (𝒵 k u) (𝒫 k u) k"
| "𝒬 k = natUnion (𝒫 k)"
definition 𝔓 :: "tokens set"
where
"𝔓 = 𝒬 (length Doc)"
definition ll :: "tokens set"
where
"ll = { p . p ∈ 𝔓 ∧ charslength p = length Doc ∧ terminals p ∈ ℒ }"
end
end