Text.Format.Generic

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
------------------------------------------------------------------------
-- The Agda standard library
--
-- Format strings for Printf and Scanf
------------------------------------------------------------------------
 
{-# OPTIONS --cubical-compatible --safe #-}
 
module Text.Format.Generic where
 
open import Level using (0ℓ)
open import Effect.Applicative
open import Data.Char.Base using (Char)
open import Data.List.Base as List
open import Data.Maybe.Base as Maybe
open import Data.Nat.Base
open import Data.Product.Base using (_,_)
open import Data.Product.Nary.NonDependent
open import Data.Sum.Base
open import Data.String.Base
import Data.Sum.Effectful.Left as Sumₗ
open import Function.Base
open import Function.Nary.NonDependent using (0ℓs; Sets)
open import Function.Strict
 
------------------------------------------------------------------------
-- Format specifications.
-- Defines the supported %-codes.
 
record FormatSpec : Set₁ where
  field
    ArgChunk : Set
    ArgType  : ArgChunk → Set
    lexArg   : Char → Maybe ArgChunk
 
module _ where
  open FormatSpec
 
  -- Left-biased union of format specs.
  unionSpec : FormatSpec → FormatSpec → FormatSpec
  unionSpec spec₁ spec₂ .ArgChunk  = spec₁ .ArgChunk ⊎ spec₂ .ArgChunk
  unionSpec spec₁ spec₂ .ArgType (inj₁ a) = spec₁ .ArgType a
  unionSpec spec₁ spec₂ .ArgType (inj₂ a) = spec₂ .ArgType a
  unionSpec spec₁ spec₂ .lexArg  c =
    Maybe.map inj₁ (spec₁ .lexArg c) <∣>
    Maybe.map inj₂ (spec₂ .lexArg c)
 
module Format (spec : FormatSpec) where
 
  open FormatSpec spec
 
  ----------------------------------------------------------------------
  -- Basic types
 
  data Chunk : Set where
    Arg : ArgChunk → Chunk
    Raw : String → Chunk
 
  Format : Set
  Format = List Chunk
 
  ----------------------------------------------------------------------
  -- Semantics
 
  size : Format → ℕ
  size = List.sum ∘′ List.map λ { (Raw _) → 0; _ → 1 }
 
  -- Meaning of a format as a list of value types
 
  ⟦_⟧ : (fmt : Format) → Sets (size fmt) 0ℓs
  ⟦ []          ⟧ = _
  ⟦ Arg a  ∷ cs ⟧ = ArgType a , ⟦ cs ⟧
  ⟦ Raw _  ∷ cs ⟧ =             ⟦ cs ⟧
 
  ----------------------------------------------------------------------
  -- Lexer: from Strings to Formats
 
  -- Lexing may fail. To have a useful error message, we defined the
  -- following enumerated type
 
  data Error : Set where
    UnexpectedEndOfString : String → Error
    -- ^ expected a type declaration; found an empty string
    InvalidType : String → Char → String → Error
    -- ^ invalid type declaration
    --   return a focus: prefix processed, character causing failure, rest
 
  lexer : String → Error ⊎ List Chunk
  lexer input = loop [] [] (toList input) where
 
    open RawApplicative (Sumₗ.applicative Error 0ℓ)
 
    -- Type synonyms used locally to document the code
    RevWord = List Char -- Mere characters accumulated so far
    Prefix  = RevWord   -- Prefix of the input String already read
 
    toRevString : RevWord → String
    toRevString = fromList ∘′ reverse
 
    -- Push a Raw token if we have accumulated some mere characters
    push : RevWord → List Chunk → List Chunk
    push [] ks = ks
    push cs ks = Raw (toRevString cs) ∷ ks
 
    -- Main loop
    loop : RevWord → Prefix → List Char → Error ⊎ List Chunk
    type :           Prefix → List Char → Error ⊎ List Chunk
 
    loop acc bef []               = pure (push acc [])
    -- escaped '%' character: treat like a mere character
    loop acc bef ('%' ∷ '%' ∷ cs) = loop ('%' ∷ acc) ('%' ∷ '%' ∷ bef) cs
    -- non escaped '%': type declaration following
    loop acc bef ('%' ∷ cs)       = push acc <$> type ('%' ∷ bef) cs
    -- mere character: push onto the accumulator
    loop acc bef (c ∷ cs)         = loop (c ∷ acc) (c ∷ bef) cs
 
    type bef []       = inj₁ (UnexpectedEndOfString input)
    type bef (c ∷ cs) = _∷_ <$> chunk c ⊛ loop [] (c ∷ bef) cs where
 
      chunk : Char → Error ⊎ Chunk
      chunk c =
        case lexArg c of λ where
          (just ch) → pure (Arg ch)
          nothing   →
            force′ (toRevString bef) λ prefix →
            force′ (fromList cs)     λ suffix →
            inj₁ (InvalidType prefix c suffix)