{-|
Module: IHP.ServerSideComponent.HtmlParser
Copyright: (c) digitally induced GmbH, 2021
Description: Used by serverside DOM diff
-}
module IHP.ServerSideComponent.HtmlParser
( parseHtml
, Node (..)
, Attribute (..)
) where

import CorePrelude
import Text.Megaparsec
import Text.Megaparsec.Char
import Data.Void
import qualified Data.Char as Char
import Data.String.Conversions

data Attribute = Attribute
    { Attribute -> Text
attributeName :: !Text
    , Attribute -> Text
attributeValue :: !Text
    } deriving (Attribute -> Attribute -> Bool
(Attribute -> Attribute -> Bool)
-> (Attribute -> Attribute -> Bool) -> Eq Attribute
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Attribute -> Attribute -> Bool
$c/= :: Attribute -> Attribute -> Bool
== :: Attribute -> Attribute -> Bool
$c== :: Attribute -> Attribute -> Bool
Eq, Int -> Attribute -> ShowS
[Attribute] -> ShowS
Attribute -> String
(Int -> Attribute -> ShowS)
-> (Attribute -> String)
-> ([Attribute] -> ShowS)
-> Show Attribute
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Attribute] -> ShowS
$cshowList :: [Attribute] -> ShowS
show :: Attribute -> String
$cshow :: Attribute -> String
showsPrec :: Int -> Attribute -> ShowS
$cshowsPrec :: Int -> Attribute -> ShowS
Show)

data Node = Node { Node -> Text
tagName :: !Text, Node -> [Attribute]
attributes :: ![Attribute], Node -> [Node]
children :: ![Node], Node -> Int
startOffset :: Int, Node -> Int
endOffset :: Int }
    | TextNode { Node -> Text
textContent :: !Text } -- ^ Note: doesn't unescape chars like <
    | PreEscapedTextNode { textContent :: !Text } -- ^ Used in @script@ or @style@ bodies
    | Children { children :: ![Node] }
    | CommentNode { Node -> Text
comment :: !Text }
    deriving (Node -> Node -> Bool
(Node -> Node -> Bool) -> (Node -> Node -> Bool) -> Eq Node
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Node -> Node -> Bool
$c/= :: Node -> Node -> Bool
== :: Node -> Node -> Bool
$c== :: Node -> Node -> Bool
Eq, Int -> Node -> ShowS
[Node] -> ShowS
Node -> String
(Int -> Node -> ShowS)
-> (Node -> String) -> ([Node] -> ShowS) -> Show Node
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Node] -> ShowS
$cshowList :: [Node] -> ShowS
show :: Node -> String
$cshow :: Node -> String
showsPrec :: Int -> Node -> ShowS
$cshowsPrec :: Int -> Node -> ShowS
Show)

parseHtml :: Text -> Either (ParseErrorBundle Text Void) Node
parseHtml :: Text -> Either (ParseErrorBundle Text Void) Node
parseHtml Text
code = Parsec Void Text Node
-> String -> Text -> Either (ParseErrorBundle Text Void) Node
forall e s a.
Parsec e s a -> String -> s -> Either (ParseErrorBundle s e) a
runParser (Parsec Void Text Node
parser) String
"" Text
code

type Parser = Parsec Void Text

parser :: Parser Node
parser :: Parsec Void Text Node
parser = do
    ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
    Node
node <- Parsec Void Text Node
parseChildren Parsec Void Text Node
-> Parsec Void Text Node -> Parsec Void Text Node
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parsec Void Text Node
parseElement
    ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
    ParsecT Void Text Identity ()
forall e s (m :: * -> *). MonadParsec e s m => m ()
eof
    Node -> Parsec Void Text Node
forall (f :: * -> *) a. Applicative f => a -> f a
pure Node
node

parseElement :: Parsec Void Text Node
parseElement = Parsec Void Text Node -> Parsec Void Text Node
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try Parsec Void Text Node
parseComment Parsec Void Text Node
-> Parsec Void Text Node -> Parsec Void Text Node
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parsec Void Text Node -> Parsec Void Text Node
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try Parsec Void Text Node
parseNormalElement Parsec Void Text Node
-> Parsec Void Text Node -> Parsec Void Text Node
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parsec Void Text Node -> Parsec Void Text Node
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try Parsec Void Text Node
parseSelfClosingElement

parseChildren :: Parsec Void Text Node
parseChildren = [Node] -> Node
Children ([Node] -> Node)
-> ParsecT Void Text Identity [Node] -> Parsec Void Text Node
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parsec Void Text Node -> ParsecT Void Text Identity [Node]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many Parsec Void Text Node
parseChild

parseSelfClosingElement :: Parsec Void Text Node
parseSelfClosingElement = do
    Int
startOffset <- ParsecT Void Text Identity Int
forall e s (m :: * -> *). MonadParsec e s m => m Int
getOffset
    Char
_ <- Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'<'
    Text
name <- Parser Text
parseElementName
    [Attribute]
attributes <- Parser Text -> Parser [Attribute]
forall a. Parser a -> Parser [Attribute]
parseNodeAttributes (Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
">" Parser Text -> Parser Text -> Parser Text
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"/>")
    Int
endOffset <- ParsecT Void Text Identity Int
forall e s (m :: * -> *). MonadParsec e s m => m Int
getOffset
    ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
    Node -> Parsec Void Text Node
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text -> [Attribute] -> [Node] -> Int -> Int -> Node
Node Text
name [Attribute]
attributes [] Int
startOffset Int
endOffset)

parseNormalElement :: Parsec Void Text Node
parseNormalElement = do
    Int
startOffset <- ParsecT Void Text Identity Int
forall e s (m :: * -> *). MonadParsec e s m => m Int
getOffset
    (Text
name, [Attribute]
attributes) <- ParsecT Void Text Identity (Text, [Attribute])
parseOpeningElement
    let parsePreEscapedTextChildren :: ParsecT Void Text Identity [Node]
parsePreEscapedTextChildren = do
                    let closingElement :: Text
closingElement = Text
"</" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
name Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
">"
                    Text
text <- String -> Text
forall a b. ConvertibleStrings a b => a -> b
cs (String -> Text)
-> ParsecT Void Text Identity String -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Void Text Identity Char
-> Parser Text -> ParsecT Void Text Identity String
forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
manyTill ParsecT Void Text Identity Char
forall e s (m :: * -> *). MonadParsec e s m => m (Token s)
anySingle (Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Text
Tokens Text
closingElement)
                    [Node] -> ParsecT Void Text Identity [Node]
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Text -> Node
PreEscapedTextNode Text
text]
    let parseNormalChildren :: ParsecT Void Text Identity [Node]
parseNormalChildren = (ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space ParsecT Void Text Identity ()
-> ParsecT Void Text Identity [Node]
-> ParsecT Void Text Identity [Node]
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Parsec Void Text Node
-> ParsecT Void Text Identity ()
-> ParsecT Void Text Identity [Node]
forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
manyTill (Parsec Void Text Node -> Parsec Void Text Node
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try Parsec Void Text Node
parseChild) (ParsecT Void Text Identity () -> ParsecT Void Text Identity ()
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try (ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space ParsecT Void Text Identity ()
-> ParsecT Void Text Identity () -> ParsecT Void Text Identity ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Tokens Text -> ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, IsString (Tokens s), Semigroup (Tokens s),
 Token s ~ Char) =>
Tokens s -> m ()
parseClosingElement Text
Tokens Text
name))))

    [Node]
children <- case Text
name of
            Text
"script" -> ParsecT Void Text Identity [Node]
parsePreEscapedTextChildren
            Text
"style" -> ParsecT Void Text Identity [Node]
parsePreEscapedTextChildren
            Text
otherwise -> ParsecT Void Text Identity [Node]
parseNormalChildren
    Int
endOffset <- ParsecT Void Text Identity Int
forall e s (m :: * -> *). MonadParsec e s m => m Int
getOffset
    Node -> Parsec Void Text Node
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text -> [Attribute] -> [Node] -> Int -> Int -> Node
Node Text
name [Attribute]
attributes [Node]
children Int
startOffset Int
endOffset)

parseOpeningElement :: ParsecT Void Text Identity (Text, [Attribute])
parseOpeningElement = do
    Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'<'
    Text
name <- Parser Text
parseElementName
    ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
    [Attribute]
attributes <- ParsecT Void Text Identity Char -> Parser [Attribute]
forall a. Parser a -> Parser [Attribute]
parseNodeAttributes (Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'>')
    (Text, [Attribute])
-> ParsecT Void Text Identity (Text, [Attribute])
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text
name, [Attribute]
attributes)

parseComment :: Parser Node
parseComment :: Parsec Void Text Node
parseComment = do
    Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"<!--"
    String
body :: String <- ParsecT Void Text Identity Char
-> Parser Text -> ParsecT Void Text Identity String
forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
manyTill ((Token Text -> Bool) -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy (Bool -> Char -> Bool
forall a b. a -> b -> a
const Bool
True)) (Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"-->")
    ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
    Node -> Parsec Void Text Node
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text -> Node
CommentNode (String -> Text
forall a b. ConvertibleStrings a b => a -> b
cs String
body))


parseNodeAttributes :: Parser a -> Parser [Attribute]
parseNodeAttributes :: Parser a -> Parser [Attribute]
parseNodeAttributes Parser a
end = ParsecT Void Text Identity Attribute
-> Parser a -> Parser [Attribute]
forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
manyTill ParsecT Void Text Identity Attribute
parseNodeAttribute Parser a
end 

parseNodeAttribute :: ParsecT Void Text Identity Attribute
parseNodeAttribute = do
    Text
attributeName <- Parser Text
parseAttributeName
    ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space

    let parseAttributeValue :: Parser Text
parseAttributeValue = do
            Char
_ <- Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'='
            ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
            Text
attributeValue <- Parser Text
parseQuotedValue
            ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
            Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
attributeValue

    Text
attributeValue <- Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" (Maybe Text -> Text)
-> ParsecT Void Text Identity (Maybe Text) -> Parser Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text -> ParsecT Void Text Identity (Maybe Text)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Text
parseAttributeValue
    Attribute -> ParsecT Void Text Identity Attribute
forall (f :: * -> *) a. Applicative f => a -> f a
pure Attribute :: Text -> Text -> Attribute
Attribute { Text
attributeName :: Text
$sel:attributeName:Attribute :: Text
attributeName, Text
attributeValue :: Text
$sel:attributeValue:Attribute :: Text
attributeValue }

parseAttributeName :: Parser Text
parseAttributeName :: Parser Text
parseAttributeName = Maybe String
-> (Token Text -> Bool) -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P Maybe String
forall a. Maybe a
Nothing (\Token Text
c -> Char -> Bool
Char.isAlphaNum Char
Token Text
c Bool -> Bool -> Bool
|| Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-')

parseQuotedValue :: Parser Text
parseQuotedValue :: Parser Text
parseQuotedValue = ParsecT Void Text Identity Char
-> ParsecT Void Text Identity Char -> Parser Text -> Parser Text
forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between (Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'"') (Token Text -> ParsecT Void Text Identity (Token Text)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
Token Text
'"') (Maybe String
-> (Token Text -> Bool) -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhileP Maybe String
forall a. Maybe a
Nothing (\Token Text
c -> Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\"'))

parseClosingElement :: Tokens s -> m ()
parseClosingElement Tokens s
name = do
    Tokens s
_ <- Tokens s -> m (Tokens s)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string (Tokens s
"</" Tokens s -> Tokens s -> Tokens s
forall a. Semigroup a => a -> a -> a
<> Tokens s
name)
    m ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
    Token s -> m (Token s)
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char (Char
Token s
'>')
    () -> m ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()

parseChild :: Parsec Void Text Node
parseChild = Parsec Void Text Node
parseElement Parsec Void Text Node
-> Parsec Void Text Node -> Parsec Void Text Node
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parsec Void Text Node -> Parsec Void Text Node
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try (Parsec Void Text Node
parseElement) Parsec Void Text Node
-> Parsec Void Text Node -> Parsec Void Text Node
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parsec Void Text Node
parseText

parseText :: Parser Node
parseText :: Parsec Void Text Node
parseText = Text -> Node
TextNode (Text -> Node) -> Parser Text -> Parsec Void Text Node
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe String
-> (Token Text -> Bool) -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P (String -> Maybe String
forall a. a -> Maybe a
Just String
"text") (\Token Text
c -> Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'<' Bool -> Bool -> Bool
&& Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'>')

parseElementName :: Parser Text
parseElementName :: Parser Text
parseElementName = do
    Text
name <- Maybe String
-> (Token Text -> Bool) -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P (String -> Maybe String
forall a. a -> Maybe a
Just String
"identifier") (\Token Text
c -> Char -> Bool
Char.isAlphaNum Char
Token Text
c Bool -> Bool -> Bool
|| Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_' Bool -> Bool -> Bool
|| Char
Token Text
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'-')
    ParsecT Void Text Identity ()
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
    Text -> Parser Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
name