{-|
Module: IHP.ServerSideComponent.HtmlParser
Copyright: (c) digitally induced GmbH, 2021
Description: Used by serverside DOM diff
-}
module IHP.ServerSideComponent.HtmlParser
( parseHtml
, Node (..)
, Attribute (..)
) where

import CorePrelude
import Text.Megaparsec
import Text.Megaparsec.Char
import Data.Void
import qualified Data.Char as Char
import Data.String.Conversions

data Attribute = Attribute
    { Attribute -> Text
attributeName :: !Text
    , Attribute -> Text
attributeValue :: !Text
    } deriving (Attribute -> Attribute -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Attribute -> Attribute -> Bool
$c/= :: Attribute -> Attribute -> Bool
== :: Attribute -> Attribute -> Bool
$c== :: Attribute -> Attribute -> Bool
Eq, Int -> Attribute -> ShowS
[Attribute] -> ShowS
Attribute -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Attribute] -> ShowS
$cshowList :: [Attribute] -> ShowS
show :: Attribute -> String
$cshow :: Attribute -> String
showsPrec :: Int -> Attribute -> ShowS
$cshowsPrec :: Int -> Attribute -> ShowS
Show)

data Node = Node { Node -> Text
tagName :: !Text, Node -> [Attribute]
attributes :: ![Attribute], Node -> [Node]
children :: ![Node], Node -> Int
startOffset :: Int, Node -> Int
endOffset :: Int }
    | TextNode { Node -> Text
textContent :: !Text } -- ^ Note: doesn't unescape chars like <
    | PreEscapedTextNode { textContent :: !Text } -- ^ Used in @script@ or @style@ bodies
    | Children { children :: ![Node] }
    | CommentNode { Node -> Text
comment :: !Text }
    deriving (Node -> Node -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Node -> Node -> Bool
$c/= :: Node -> Node -> Bool
== :: Node -> Node -> Bool
$c== :: Node -> Node -> Bool
Eq, Int -> Node -> ShowS
[Node] -> ShowS
Node -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Node] -> ShowS
$cshowList :: [Node] -> ShowS
show :: Node -> String
$cshow :: Node -> String
showsPrec :: Int -> Node -> ShowS
$cshowsPrec :: Int -> Node -> ShowS
Show)

parseHtml :: Text -> Either (ParseErrorBundle Text Void) Node
parseHtml :: Text -> Either (ParseErrorBundle Text Void) Node
parseHtml Text
code = forall e s a.
Parsec e s a -> String -> s -> Either (ParseErrorBundle s e) a
runParser (Parser Node
parser) String
"" Text
code

type Parser = Parsec Void Text

parser :: Parser Node
parser :: Parser Node
parser = do
    forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
    Node
node <- Parser Node
parseChildren forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Node
parseElement
    forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
    forall e s (m :: * -> *). MonadParsec e s m => m ()
eof
    forall (f :: * -> *) a. Applicative f => a -> f a
pure Node
node

parseElement :: Parser Node
parseElement = forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try Parser Node
parseComment forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try Parser Node
parseNormalElement forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try Parser Node
parseSelfClosingElement

parseChildren :: Parser Node
parseChildren = [Node] -> Node
Children forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many Parser Node
parseChild

parseSelfClosingElement :: Parser Node
parseSelfClosingElement = do
    Int
startOffset <- forall e s (m :: * -> *). MonadParsec e s m => m Int
getOffset
    Token Text
_ <- forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'<'
    Text
name <- Parser Text
parseElementName
    [Attribute]
attributes <- forall a. Parser a -> Parser [Attribute]
parseNodeAttributes (forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
">" forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"/>")
    Int
endOffset <- forall e s (m :: * -> *). MonadParsec e s m => m Int
getOffset
    forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
    forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text -> [Attribute] -> [Node] -> Int -> Int -> Node
Node Text
name [Attribute]
attributes [] Int
startOffset Int
endOffset)

parseNormalElement :: Parser Node
parseNormalElement = do
    Int
startOffset <- forall e s (m :: * -> *). MonadParsec e s m => m Int
getOffset
    (Text
name, [Attribute]
attributes) <- ParsecT Void Text Identity (Text, [Attribute])
parseOpeningElement
    let parsePreEscapedTextChildren :: ParsecT Void Text Identity [Node]
parsePreEscapedTextChildren = do
                    let closingElement :: Text
closingElement = Text
"</" forall a. Semigroup a => a -> a -> a
<> Text
name forall a. Semigroup a => a -> a -> a
<> Text
">"
                    Text
text <- forall a b. ConvertibleStrings a b => a -> b
cs forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
manyTill forall e s (m :: * -> *). MonadParsec e s m => m (Token s)
anySingle (forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Text
closingElement)
                    forall (f :: * -> *) a. Applicative f => a -> f a
pure [Text -> Node
PreEscapedTextNode Text
text]
    let parseNormalChildren :: ParsecT Void Text Identity [Node]
parseNormalChildren = (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
manyTill (forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try Parser Node
parseChild) (forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall {s} {m :: * -> *} {e}.
(Token s ~ Char, MonadParsec e s m, Semigroup (Tokens s),
 IsString (Tokens s)) =>
Tokens s -> m ()
parseClosingElement Text
name))))

    [Node]
children <- case Text
name of
            Text
"script" -> ParsecT Void Text Identity [Node]
parsePreEscapedTextChildren
            Text
"style" -> ParsecT Void Text Identity [Node]
parsePreEscapedTextChildren
            Text
otherwise -> ParsecT Void Text Identity [Node]
parseNormalChildren
    Int
endOffset <- forall e s (m :: * -> *). MonadParsec e s m => m Int
getOffset
    forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text -> [Attribute] -> [Node] -> Int -> Int -> Node
Node Text
name [Attribute]
attributes [Node]
children Int
startOffset Int
endOffset)

parseOpeningElement :: ParsecT Void Text Identity (Text, [Attribute])
parseOpeningElement = do
    forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'<'
    Text
name <- Parser Text
parseElementName
    forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
    [Attribute]
attributes <- forall a. Parser a -> Parser [Attribute]
parseNodeAttributes (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'>')
    forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text
name, [Attribute]
attributes)

parseComment :: Parser Node
parseComment :: Parser Node
parseComment = do
    forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"<!--"
    String
body :: String <- forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
manyTill (forall e s (m :: * -> *).
MonadParsec e s m =>
(Token s -> Bool) -> m (Token s)
satisfy (forall a b. a -> b -> a
const Bool
True)) (forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string Tokens Text
"-->")
    forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
    forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text -> Node
CommentNode (forall a b. ConvertibleStrings a b => a -> b
cs String
body))


parseNodeAttributes :: Parser a -> Parser [Attribute]
parseNodeAttributes :: forall a. Parser a -> Parser [Attribute]
parseNodeAttributes Parser a
end = forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
manyTill ParsecT Void Text Identity Attribute
parseNodeAttribute Parser a
end 

parseNodeAttribute :: ParsecT Void Text Identity Attribute
parseNodeAttribute = do
    Text
attributeName <- Parser Text
parseAttributeName
    forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space

    let parseAttributeValue :: Parser Text
parseAttributeValue = do
            Token Text
_ <- forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'='
            forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
            Text
attributeValue <- Parser Text
parseQuotedValue
            forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
            forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
attributeValue

    Text
attributeValue <- forall a. a -> Maybe a -> a
fromMaybe Text
"" forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional Parser Text
parseAttributeValue
    forall (f :: * -> *) a. Applicative f => a -> f a
pure Attribute { Text
attributeName :: Text
$sel:attributeName:Attribute :: Text
attributeName, Text
attributeValue :: Text
$sel:attributeValue:Attribute :: Text
attributeValue }

parseAttributeName :: Parser Text
parseAttributeName :: Parser Text
parseAttributeName = forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P forall a. Maybe a
Nothing (\Token Text
c -> Char -> Bool
Char.isAlphaNum Token Text
c Bool -> Bool -> Bool
|| Token Text
c forall a. Eq a => a -> a -> Bool
== Char
'-')

parseQuotedValue :: Parser Text
parseQuotedValue :: Parser Text
parseQuotedValue = forall (m :: * -> *) open close a.
Applicative m =>
m open -> m close -> m a -> m a
between (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'"') (forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char Char
'"') (forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhileP forall a. Maybe a
Nothing (\Token Text
c -> Token Text
c forall a. Eq a => a -> a -> Bool
/= Char
'\"'))

parseClosingElement :: Tokens s -> m ()
parseClosingElement Tokens s
name = do
    Tokens s
_ <- forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
string (Tokens s
"</" forall a. Semigroup a => a -> a -> a
<> Tokens s
name)
    forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
    forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
Token s -> m (Token s)
char (Char
'>')
    forall (f :: * -> *) a. Applicative f => a -> f a
pure ()

parseChild :: Parser Node
parseChild = Parser Node
parseElement forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
try (Parser Node
parseElement) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Node
parseText

parseText :: Parser Node
parseText :: Parser Node
parseText = Text -> Node
TextNode forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P (forall a. a -> Maybe a
Just String
"text") (\Token Text
c -> Token Text
c forall a. Eq a => a -> a -> Bool
/= Char
'<' Bool -> Bool -> Bool
&& Token Text
c forall a. Eq a => a -> a -> Bool
/= Char
'>')

parseElementName :: Parser Text
parseElementName :: Parser Text
parseElementName = do
    Text
name <- forall e s (m :: * -> *).
MonadParsec e s m =>
Maybe String -> (Token s -> Bool) -> m (Tokens s)
takeWhile1P (forall a. a -> Maybe a
Just String
"identifier") (\Token Text
c -> Char -> Bool
Char.isAlphaNum Token Text
c Bool -> Bool -> Bool
|| Token Text
c forall a. Eq a => a -> a -> Bool
== Char
'_' Bool -> Bool -> Bool
|| Token Text
c forall a. Eq a => a -> a -> Bool
== Char
'-')
    forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m ()
space
    forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
name