Attempt to use tagsoup but it doesn't let you traverse the doc

- It's a primitive lexical parser that gives a list of tag
  subcomponents, we'd need a parser on top of it to parse the body.
  But there's probably a lib that let's us work with html already.
This commit is contained in:
towards-a-new-leftypol 2024-03-27 22:56:23 -04:00
parent 644011dd23
commit b0bcc8f835
3 changed files with 38 additions and 7 deletions

View File

@ -49,6 +49,7 @@ import qualified Common.Component.CatalogGrid as Grid
import qualified Common.Component.TimeControl as TC
import Common.FrontEnd.Action (GetThreadArgs (..))
import qualified Common.Component.Thread.Model as Thread
import qualified Common.Component.ThreadView as Thread
import Common.Network.SiteType (Site)
data HtmlPage a = forall b. (ToJSON b) => HtmlPage (JSONSettings, b, a)
@ -174,11 +175,14 @@ threadView settings website board_pathpart board_thread_id = do
case thread_results of
Left err -> throwError $ err500 { errBody = fromString $ show err }
Right site -> pure $ render now $ head site
Right site -> do
let s = head site
posts_and_bodies <- liftIO $ Thread.getPostWithBodies s
pure $ render posts_and_bodies now s
where
render :: UTCTime -> Site -> HtmlPage (View FE.Action)
render t site =
render :: [ Thread.PostWithBody ] -> UTCTime -> Site -> HtmlPage (View FE.Action)
render posts_and_bodies t site =
HtmlPage
( settings
, site
@ -200,7 +204,7 @@ threadView settings website board_pathpart board_thread_id = do
thread_model = Thread.Model
{ Thread.site = site
, Thread.media_root = pack $ media_root settings
, Thread.post_bodies = []
, Thread.post_bodies = posts_and_bodies
, Thread.current_time = t
}

View File

@ -1,3 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
module Parsing.BodyParser
( PostPart (..)
, parsePostBody
@ -5,10 +7,34 @@ module Parsing.BodyParser
, Backlinks
) where
import Data.Text (Text)
import Data.Text (Text, unpack)
import Common.Parsing.PostPartType
import Common.Parsing.PostBodyUtils
import Common.Parsing.QuoteLinkParser
tagsToPostParts :: [ Tag Text ] -> [ PostPart ]
tagsToPostParts = concatMap tagToPostPart
tagToPostPart :: Tag Text -> [ PostPart ]
tagToPostPart (TagText t) = SimpleText t : []
tagToPostPart (TagOpen "a" attrs) = parseAnchor attrs : []
tagToPostPart (TagOpen "span" attrs) = parseSpan attrs : []
parseAnchor :: [ Attribute Text ] -> PostPart
parseAnchor attrs =
case lookup "href" attrs of
Nothing -> SimpleText "Anchor without href"
Just href ->
case lookup "target" attrs of
Just "_blank" -> PostedUrl href
_ -> Quote $ parseURL $ unpack href
parseSpan :: [ Attribute Text ] -> PostPart
parsePostBody :: Text -> IO [ PostPart ]
parsePostBody = undefined
parsePostBody html =
return $ tagsToPostParts $ parseTags html

View File

@ -122,7 +122,8 @@ executable chandlr-server
http-types,
cmdargs,
mtl,
utf8-string
utf8-string,
tagsoup
-- Directories containing source files.
hs-source-dirs: app