Theory LocalLexing

theory LocalLexing
imports CFG
begin

typedecl character

type_synonym lexer = "character list  nat  nat set"  

type_synonym token = "symbol × character list" 

type_synonym tokens = "token list"

definition terminal_of_token :: "token  symbol"
where
  "terminal_of_token t = fst t"

definition terminals :: "tokens  sentence"
where
  "terminals ts = map terminal_of_token ts"

definition chars_of_token :: "token  character list"
where
  "chars_of_token t = snd t"

fun chars :: "tokens  character list"
where
  "chars [] = []"
| "chars (t#ts) = (chars_of_token t) @ (chars ts)"

fun charslength :: "tokens  nat"
where
  "charslength cs = length (chars cs)"

definition is_lexer :: "lexer  bool"
where
  "is_lexer lexer = 
    ( D p l. (p  length D  l  lexer D p  p + l  length D) 
              (p > length D  lexer D p = {}))"

type_synonym selector = "token set  token set  token set"

definition is_selector :: "selector  bool"
where
  "is_selector sel = ( A B. A  B  (A  sel A B  sel A B  B))" 

fun by_length :: "nat  tokens set  tokens set"
where
  "by_length l tss = { ts . ts  tss  length (chars ts) = l }" 

fun funpower :: "('a  'a)  nat  ('a  'a)"
where
  "funpower f 0 x = x"
| "funpower f (Suc n) x = f (funpower f n x)"

definition natUnion :: "(nat  'a set)  'a set"
where
  "natUnion f =  { f n | n. True }"

definition limit  :: "('a set  'a set)  'a set  'a set"
where
  "limit f x = natUnion (λ n. funpower f n x)"

locale LocalLexing = CFG +
  fixes Lex :: "symbol  lexer"
  fixes Sel :: "selector"
  assumes Lex_is_lexer: " t  𝔗. is_lexer (Lex t)"
  assumes Sel_is_selector: "is_selector Sel"
  fixes Doc :: "character list"
begin

definition admissible :: "tokens  bool"
where
  "admissible ts = (terminals ts  P)" 

definition Append :: "token set  nat  tokens set  tokens set"
where
  "Append Z k P = P  
    { p @ [t] | p t. p  by_length k P  t  Z  admissible (p @ [t])}"

definition 𝒳 :: "nat  token set"
where
  "𝒳 k = {(t, ω) | t l ω. t  𝔗  l  Lex t Doc k  ω = take l (drop k Doc)}"

definition 𝒲 :: "tokens set  nat  token set"
where
  "𝒲 P k =  { u. u  𝒳 k  ( p  by_length k P. admissible (p@[u])) }"

definition 𝒴 :: "token set  tokens set  nat  token set"
where
  "𝒴 T P k = Sel T (𝒲 P k)" 

fun 𝒫 :: "nat  nat  tokens set"
and 𝒬 :: "nat  tokens set"
and 𝒵 :: "nat  nat  token set"
where
  "𝒫 0 0 = {[]}"
| "𝒫 k (Suc u) = limit (Append (𝒵 k (Suc u)) k) (𝒫 k u)"
| "𝒫 (Suc k) 0 = 𝒬 k"
| "𝒵 k 0 = {}"
| "𝒵 k (Suc u) = 𝒴 (𝒵 k u) (𝒫 k u) k"
| "𝒬 k = natUnion (𝒫 k)"

definition 𝔓 :: "tokens set"
where
  "𝔓 = 𝒬 (length Doc)"

definition ll :: "tokens set"
where 
  "ll = { p . p  𝔓  charslength p = length Doc  terminals p   }"

end

end