Attempt to use tagsoup but it doesn't let you traverse the doc
- It's a primitive lexical parser that gives a list of tag subcomponents, we'd need a parser on top of it to parse the body. But there's probably a lib that let's us work with html already.
This commit is contained in:
parent
644011dd23
commit
b0bcc8f835
12
app/Main.hs
12
app/Main.hs
|
@ -49,6 +49,7 @@ import qualified Common.Component.CatalogGrid as Grid
|
||||||
import qualified Common.Component.TimeControl as TC
|
import qualified Common.Component.TimeControl as TC
|
||||||
import Common.FrontEnd.Action (GetThreadArgs (..))
|
import Common.FrontEnd.Action (GetThreadArgs (..))
|
||||||
import qualified Common.Component.Thread.Model as Thread
|
import qualified Common.Component.Thread.Model as Thread
|
||||||
|
import qualified Common.Component.ThreadView as Thread
|
||||||
import Common.Network.SiteType (Site)
|
import Common.Network.SiteType (Site)
|
||||||
|
|
||||||
data HtmlPage a = forall b. (ToJSON b) => HtmlPage (JSONSettings, b, a)
|
data HtmlPage a = forall b. (ToJSON b) => HtmlPage (JSONSettings, b, a)
|
||||||
|
@ -174,11 +175,14 @@ threadView settings website board_pathpart board_thread_id = do
|
||||||
|
|
||||||
case thread_results of
|
case thread_results of
|
||||||
Left err -> throwError $ err500 { errBody = fromString $ show err }
|
Left err -> throwError $ err500 { errBody = fromString $ show err }
|
||||||
Right site -> pure $ render now $ head site
|
Right site -> do
|
||||||
|
let s = head site
|
||||||
|
posts_and_bodies <- liftIO $ Thread.getPostWithBodies s
|
||||||
|
pure $ render posts_and_bodies now s
|
||||||
|
|
||||||
where
|
where
|
||||||
render :: UTCTime -> Site -> HtmlPage (View FE.Action)
|
render :: [ Thread.PostWithBody ] -> UTCTime -> Site -> HtmlPage (View FE.Action)
|
||||||
render t site =
|
render posts_and_bodies t site =
|
||||||
HtmlPage
|
HtmlPage
|
||||||
( settings
|
( settings
|
||||||
, site
|
, site
|
||||||
|
@ -200,7 +204,7 @@ threadView settings website board_pathpart board_thread_id = do
|
||||||
thread_model = Thread.Model
|
thread_model = Thread.Model
|
||||||
{ Thread.site = site
|
{ Thread.site = site
|
||||||
, Thread.media_root = pack $ media_root settings
|
, Thread.media_root = pack $ media_root settings
|
||||||
, Thread.post_bodies = []
|
, Thread.post_bodies = posts_and_bodies
|
||||||
, Thread.current_time = t
|
, Thread.current_time = t
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
module Parsing.BodyParser
|
module Parsing.BodyParser
|
||||||
( PostPart (..)
|
( PostPart (..)
|
||||||
, parsePostBody
|
, parsePostBody
|
||||||
|
@ -5,10 +7,34 @@ module Parsing.BodyParser
|
||||||
, Backlinks
|
, Backlinks
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Data.Text (Text)
|
import Data.Text (Text, unpack)
|
||||||
|
|
||||||
import Common.Parsing.PostPartType
|
import Common.Parsing.PostPartType
|
||||||
import Common.Parsing.PostBodyUtils
|
import Common.Parsing.PostBodyUtils
|
||||||
|
import Common.Parsing.QuoteLinkParser
|
||||||
|
|
||||||
|
tagsToPostParts :: [ Tag Text ] -> [ PostPart ]
|
||||||
|
tagsToPostParts = concatMap tagToPostPart
|
||||||
|
|
||||||
|
|
||||||
|
tagToPostPart :: Tag Text -> [ PostPart ]
|
||||||
|
tagToPostPart (TagText t) = SimpleText t : []
|
||||||
|
tagToPostPart (TagOpen "a" attrs) = parseAnchor attrs : []
|
||||||
|
tagToPostPart (TagOpen "span" attrs) = parseSpan attrs : []
|
||||||
|
|
||||||
|
|
||||||
|
parseAnchor :: [ Attribute Text ] -> PostPart
|
||||||
|
parseAnchor attrs =
|
||||||
|
case lookup "href" attrs of
|
||||||
|
Nothing -> SimpleText "Anchor without href"
|
||||||
|
Just href ->
|
||||||
|
case lookup "target" attrs of
|
||||||
|
Just "_blank" -> PostedUrl href
|
||||||
|
_ -> Quote $ parseURL $ unpack href
|
||||||
|
|
||||||
|
|
||||||
|
parseSpan :: [ Attribute Text ] -> PostPart
|
||||||
|
|
||||||
parsePostBody :: Text -> IO [ PostPart ]
|
parsePostBody :: Text -> IO [ PostPart ]
|
||||||
parsePostBody = undefined
|
parsePostBody html =
|
||||||
|
return $ tagsToPostParts $ parseTags html
|
||||||
|
|
|
@ -122,7 +122,8 @@ executable chandlr-server
|
||||||
http-types,
|
http-types,
|
||||||
cmdargs,
|
cmdargs,
|
||||||
mtl,
|
mtl,
|
||||||
utf8-string
|
utf8-string,
|
||||||
|
tagsoup
|
||||||
|
|
||||||
-- Directories containing source files.
|
-- Directories containing source files.
|
||||||
hs-source-dirs: app
|
hs-source-dirs: app
|
||||||
|
|
Loading…
Reference in New Issue