chandlr-server/app/Parsing/BodyParserTest.hs

88 lines
2.5 KiB
Haskell

{-# LANGUAGE OverloadedStrings #-}
module Parsing.BodyParser where
import Data.Text (Text)
import qualified Data.Text.IO as Text
import Text.HTML.Parser (parseTokens, canonicalizeTokens, Token (..), Attr(..))
import Text.HTML.Tree (tokensToForest)
import Data.Tree (Forest, Tree (..))
getAttr :: Text -> [ Attr ] -> Maybe Text
getAttr _ [] = Nothing
getAttr attrName (Attr x y:xs)
| x == attrName = Just y
| otherwise = getAttr attrName xs
parsePostBody :: Text -> IO ()
parsePostBody html =
case tokensToForest $ canonicalizeTokens $ parseTokens html of
Left err -> do
print err
Right forest -> forestToPostParts forest
forestToPostParts :: Forest Token -> IO ()
forestToPostParts = mapM_ treeToPostParts
treeToPostParts :: Tree Token -> IO ()
treeToPostParts Node { rootLabel = tok@(TagOpen "a" attrs) } =
do
putStrLn "Anchor Tag Open"
print attrs
print $ getAttr "href" attrs
treeToPostParts Node { rootLabel = tok@(TagSelfClose tagname attrs) } =
do
putStrLn "Tag Self-Close"
putStrLn $ "tagname: " ++ show tagname
putStrLn $ "attrs: " ++ show attrs
-- treeToPostParts Node { rootLabel = (ContentText txt) } = Text.putStrLn txt
treeToPostParts node@Node { rootLabel } =
do
putStrLn $ "Something Else: " ++ show rootLabel
-- Forest == [ Tree Token ]
--
-- data Tree a = Node {
-- rootLabel :: a, -- ^ label value
-- subForest :: [Tree a] -- ^ zero or more child trees
-- }
--
-- Tree a == Tree Token
--
-- data Tree Token = Node {
-- rootLabel :: Token, -- ^ label value
-- subForest :: [Tree Token] -- ^ zero or more child trees
-- }
--
-- and what is a Token? We might need to make a toy program with this
--
-- data Token
-- -- | An opening tag. Attribute ordering is arbitrary. Void elements have a 'TagOpen' but no corresponding 'TagClose'. See 'Text.HTML.Tree.nonClosing'.
-- = TagOpen !TagName [Attr]
-- -- | A self-closing tag.
-- | TagSelfClose !TagName [Attr]
-- -- | A closing tag.
-- | TagClose !TagName
-- -- | The content between tags.
-- | ContentText !Text
-- -- | A single character of content
-- | ContentChar !Char
-- -- | Contents of a comment.
-- | Comment !Builder
-- -- | Doctype
-- | Doctype !Text
-- deriving (Show, Ord, Eq, Generic)
-- Add your main method here
main :: IO ()
main = do
-- Read the contents of the file
fileContents <- Text.readFile "/home/phil/Documents/haskell/chandlr-miso/html/body.html"
parsePostBody fileContents