Parse post body using DOM
- add module for parsing post body - creates and element and sets the innerhtml of it to a string - then iterates over the nodes that were created to get them into a data structure - re-use data-structure from bunkerchan-upload script - parsing logic is similar but uses ghcjs-dom API instead of HXT
This commit is contained in:
parent
388c9a5b28
commit
3b1d130f41
|
@ -77,6 +77,7 @@ executable chandlr
|
|||
Component.Thread.Intro
|
||||
Routes
|
||||
Common.AttachmentType
|
||||
BodyParser
|
||||
|
||||
|
||||
-- LANGUAGE extensions used by modules in this package.
|
||||
|
|
|
@ -0,0 +1,151 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE PackageImports #-}
|
||||
{-# LANGUAGE ScopedTypeVariables #-}
|
||||
{-# LANGUAGE MultiWayIf #-}
|
||||
|
||||
module BodyParser
|
||||
( PostPart (..)
|
||||
, parsePostBody
|
||||
) where
|
||||
|
||||
import Data.Maybe (catMaybes)
|
||||
import GHCJS.DOM (currentDocument)
|
||||
import GHCJS.DOM.Types
|
||||
( Element (..)
|
||||
, JSString
|
||||
, NodeList
|
||||
, uncheckedCastTo
|
||||
)
|
||||
import GHCJS.DOM.JSFFI.Generated.Document
|
||||
import GHCJS.DOM.JSFFI.Generated.Element
|
||||
import GHCJS.DOM.JSFFI.Generated.Node hiding (contains)
|
||||
import qualified GHCJS.DOM.JSFFI.Generated.NodeList as NodeList
|
||||
import GHCJS.DOM.JSFFI.Generated.DOMTokenList (contains)
|
||||
import Data.Text (Text)
|
||||
|
||||
|
||||
data PostPart
|
||||
= SimpleText JSString
|
||||
| PostedUrl JSString
|
||||
| Skip
|
||||
| Quote JSString
|
||||
-- Quotes don't seem to be able to be spoilered
|
||||
-- board links (which appear as quotes but start with >>>) break the tag
|
||||
| GreenText [ PostPart ]
|
||||
| OrangeText [ PostPart ]
|
||||
| RedText [ PostPart ]
|
||||
| Spoiler [ PostPart ]
|
||||
-- you can't seem to spoiler greentext
|
||||
| Bold [ PostPart ]
|
||||
| Underlined [ PostPart ]
|
||||
| Italics [ PostPart ]
|
||||
| Strikethrough [ PostPart ]
|
||||
deriving Show
|
||||
|
||||
|
||||
nodeListToList :: NodeList -> IO [ Node ]
|
||||
nodeListToList l = do
|
||||
len <- NodeList.getLength l
|
||||
nodes <- mapM (NodeList.item l) [0..len-1]
|
||||
return $ catMaybes nodes
|
||||
|
||||
|
||||
-- | Parse the HTML string and add event handlers to certain elements
|
||||
parsePostBody :: Text -> IO [ PostPart ]
|
||||
parsePostBody htmlString = do
|
||||
Just doc <- currentDocument
|
||||
container <- createElement doc ("div" :: Text)
|
||||
|
||||
-- Set the innerHTML of the container to the HTML string
|
||||
setInnerHTML container htmlString
|
||||
|
||||
-- Iterate over the newly created elements in the container
|
||||
children <- getChildNodes container
|
||||
|
||||
parseNodeList children
|
||||
|
||||
|
||||
toPostPart :: Node -> IO PostPart
|
||||
toPostPart node = do
|
||||
node_type <- getNodeType node
|
||||
|
||||
toPostPart_ node_type node
|
||||
|
||||
|
||||
toPostPart_ :: Word -> Node -> IO PostPart
|
||||
toPostPart_ node_type node
|
||||
| node_type == TEXT_NODE =
|
||||
getTextContentUnchecked node >>= return . SimpleText
|
||||
| node_type == ELEMENT_NODE = do
|
||||
tagName :: JSString <- getTagName element
|
||||
|
||||
case tagName of
|
||||
"a" -> parseAnchor element
|
||||
"span" -> parseSpan element
|
||||
"em" -> parseEm element
|
||||
"strong" -> parseStrong element
|
||||
"u" -> parseU element
|
||||
"s" -> parseS element
|
||||
_ -> return $ SimpleText "Unsupported element"
|
||||
| otherwise = return Skip
|
||||
|
||||
where
|
||||
element = uncheckedCastTo Element node
|
||||
|
||||
|
||||
|
||||
parseAnchor :: Element -> IO PostPart
|
||||
parseAnchor element = do
|
||||
m_href :: Maybe JSString <- getAttribute element ("href" :: JSString)
|
||||
|
||||
case m_href of
|
||||
Nothing -> return $ SimpleText "Anchor without href"
|
||||
Just href -> do
|
||||
target <- getAttribute element ("target" :: JSString)
|
||||
|
||||
case target of
|
||||
Just ("_blank" :: JSString) -> return $ PostedUrl href
|
||||
_ -> return $ Quote href
|
||||
|
||||
|
||||
parseSpan :: Element -> IO PostPart
|
||||
parseSpan element = do
|
||||
classList <- getClassList element
|
||||
|
||||
quote <- contains classList ("quote" :: JSString)
|
||||
orangeQuote <- contains classList ("orangeQuote" :: JSString)
|
||||
heading <- contains classList ("heading" :: JSString)
|
||||
spoiler <- contains classList ("spoiler" :: JSString)
|
||||
|
||||
if | quote -> parseChildNodes element >>= return . GreenText
|
||||
| orangeQuote -> parseChildNodes element >>= return . OrangeText
|
||||
| heading -> parseChildNodes element >>= return . RedText
|
||||
| spoiler -> parseChildNodes element >>= return . Spoiler
|
||||
| otherwise -> return $ SimpleText "Unsupported span class"
|
||||
|
||||
|
||||
parseNodeList :: NodeList -> IO [PostPart]
|
||||
parseNodeList nodes = nodeListToList nodes >>= mapM toPostPart
|
||||
|
||||
parseChildNodes :: Element -> IO [PostPart]
|
||||
parseChildNodes element = getChildNodes element >>= parseNodeList
|
||||
|
||||
parseEm :: Element -> IO PostPart
|
||||
parseEm element
|
||||
= parseChildNodes element
|
||||
>>= return . Italics
|
||||
|
||||
parseStrong :: Element -> IO PostPart
|
||||
parseStrong element
|
||||
= parseChildNodes element
|
||||
>>= return . Bold
|
||||
|
||||
parseU :: Element -> IO PostPart
|
||||
parseU element
|
||||
= parseChildNodes element
|
||||
>>= return . Underlined
|
||||
|
||||
parseS :: Element -> IO PostPart
|
||||
parseS element
|
||||
= parseChildNodes element
|
||||
>>= return . Strikethrough
|
Loading…
Reference in New Issue