commit pre changing the get latest posts per board query

This commit is contained in:
towards-a-new-leftypol 2025-02-14 09:55:18 -05:00
parent d882251ce6
commit f4652eecbb
5 changed files with 87 additions and 32 deletions

View File

@ -286,9 +286,9 @@ SELECT DISTINCT ON (b.board_id)
t.thread_id, t.thread_id,
t.board_thread_id t.board_thread_id
FROM boards b FROM boards b
JOIN threads t ON t.board_id = b.board_id LEFT JOIN threads t ON t.board_id = b.board_id
JOIN posts p ON p.thread_id = t.thread_id LEFT JOIN posts p ON p.thread_id = t.thread_id AND p.is_missing_attachments = false
WHERE p.is_missing_attachments = false; ORDER BY b.board_id, p.creation_time DESC NULLS LAST;
CREATE OR REPLACE FUNCTION get_latest_posts_per_board() CREATE OR REPLACE FUNCTION get_latest_posts_per_board()

View File

@ -553,6 +553,21 @@ processFiles settings fgs tuples = do -- perfect just means that our posts have
in Map.insert pid (x : l) accMap in Map.insert pid (x : l) accMap
localIndexFoldf
:: ([Posts.Post], Map.Map Int64 Int)
-> (Threads.Thread, JSONPost.Post, Client.PostId)
-> ([Posts.Post], Map.Map Int64 Int)
localIndexFoldf (posts, idx_map) (t, p, c) =
case Map.lookup thread_id idx_map of
Nothing -> (post 1 : posts, Map.insert thread_id 1 idx_map)
Just i -> (post (i + 1) : posts, Map.insert thread_id (i + 1) idx_map)
where
post :: Int -> Posts.Post
post i = apiPostToArchivePost i t p
thread_id = Client.thread_id c
createNewPosts createNewPosts
:: J.JSONSettings :: J.JSONSettings
-> [ (Threads.Thread, JSONPost.Post, Client.PostId) ] -> [ (Threads.Thread, JSONPost.Post, Client.PostId) ]
@ -573,7 +588,7 @@ createNewPosts settings tuples = do
-- Map of thread_id to the largest local_idx value (which would be the number of the last post in the thread) -- Map of thread_id to the largest local_idx value (which would be the number of the last post in the thread)
let local_idx :: Map.Map Int64 Int = Map.fromList thread_max_local_idxs let local_idx :: Map.Map Int64 Int = Map.fromList thread_max_local_idxs
let insert_posts :: [ Posts.Post ] = fst $ foldl' foldFn ([], local_idx) to_insert_list let insert_posts :: [ Posts.Post ] = fst $ foldl' localIndexFoldf ([], local_idx) to_insert_list
-- posts to insert are the posts that are not in existing_posts -- posts to insert are the posts that are not in existing_posts
-- so we create a Set (thread_id, board_post_id) ✓ -- so we create a Set (thread_id, board_post_id) ✓
@ -598,21 +613,6 @@ createNewPosts settings tuples = do
newPosts :: [(Threads.Thread, JSONPost.Post, Client.PostId)] -> Set (Int64, Int64) -> [(Threads.Thread, JSONPost.Post, Client.PostId)] newPosts :: [(Threads.Thread, JSONPost.Post, Client.PostId)] -> Set (Int64, Int64) -> [(Threads.Thread, JSONPost.Post, Client.PostId)]
newPosts ts existing_set = filter (\(_, _, c) -> Set.notMember (Client.thread_id c, Client.board_post_id c) existing_set) ts newPosts ts existing_set = filter (\(_, _, c) -> Set.notMember (Client.thread_id c, Client.board_post_id c) existing_set) ts
foldFn
:: ([Posts.Post], Map.Map Int64 Int)
-> (Threads.Thread, JSONPost.Post, Client.PostId)
-> ([Posts.Post], Map.Map Int64 Int)
foldFn (posts, idx_map) (t, p, c) =
case Map.lookup thread_id idx_map of
Nothing -> (post 1 : posts, Map.insert thread_id 1 idx_map)
Just i -> (post (i + 1) : posts, Map.insert thread_id (i + 1) idx_map)
where
post :: Int -> Posts.Post
post i = apiPostToArchivePost i t p
thread_id = Client.thread_id c
data FileGetters = FileGetters data FileGetters = FileGetters
{ getJSONCatalog :: Sites.Site -> String -> IO (Either String [ Catalog ]) { getJSONCatalog :: Sites.Site -> String -> IO (Either String [ Catalog ])

View File

@ -8,7 +8,11 @@ module Lib2
import Control.Monad.Trans.Except (ExceptT (..)) import Control.Monad.Trans.Except (ExceptT (..))
import System.FilePath ((</>)) import System.FilePath ((</>))
import qualified Data.Set as Set import qualified Data.Set as Set
import qualified Data.Map as Map
import Data.Aeson (FromJSON) import Data.Aeson (FromJSON)
import Data.Int (Int64)
import Data.List (sortBy)
import Data.Ord (comparing)
import qualified Network.DataClient as Client import qualified Network.DataClient as Client
import qualified SitesType as Sites import qualified SitesType as Sites
@ -16,7 +20,8 @@ import qualified BoardsType as Boards
import Common.Network.HttpClient (HttpError) import Common.Network.HttpClient (HttpError)
import qualified JSONParsing as JSON import qualified JSONParsing as JSON
import qualified JSONPost import qualified JSONPost
import qualified ThreadType as Threads import qualified ThreadType as Thread
import qualified Common.PostsType as Posts
import Common.Server.JSONSettings (JSONSettings) import Common.Server.JSONSettings (JSONSettings)
import qualified Lib import qualified Lib
@ -25,7 +30,10 @@ data ProgramException = HttpException HttpError
deriving Show deriving Show
liftHttpIO :: IO (Either HttpError a) -> ExceptT ProgramException IO a type IOe a = ExceptT ProgramException IO a
liftHttpIO :: IO (Either HttpError a) -> IOe a
liftHttpIO = ExceptT . fmap (either (Left . HttpException) Right) liftHttpIO = ExceptT . fmap (either (Left . HttpException) Right)
@ -35,7 +43,7 @@ httpSiteGetRequest site path = Client.getJSON $ Sites.url site </> path
httpGetCatalogJSON httpGetCatalogJSON
:: Sites.Site :: Sites.Site
-> Boards.Board -> Boards.Board
-> ExceptT ProgramException IO [ JSON.Catalog ] -> IOe [ JSON.Catalog ]
httpGetCatalogJSON site board = liftHttpIO $ httpSiteGetRequest site path httpGetCatalogJSON site board = liftHttpIO $ httpSiteGetRequest site path
where where
path = Boards.pathpart board </> "catalog.json" path = Boards.pathpart board </> "catalog.json"
@ -44,8 +52,8 @@ httpGetCatalogJSON site board = liftHttpIO $ httpSiteGetRequest site path
httpGetPostsJSON httpGetPostsJSON
:: Sites.Site :: Sites.Site
-> Boards.Board -> Boards.Board
-> Threads.Thread -> Thread.Thread
-> ExceptT ProgramException IO (Threads.Thread, [ JSONPost.Post ]) -> IOe (Thread.Thread, [ JSONPost.Post ])
httpGetPostsJSON site board thread = httpGetPostsJSON site board thread =
liftHttpIO $ liftHttpIO $
fmap ((thread,) . JSONPost.posts) <$> httpSiteGetRequest site path fmap ((thread,) . JSONPost.posts) <$> httpSiteGetRequest site path
@ -53,14 +61,14 @@ httpGetPostsJSON site board thread =
where where
path = Boards.pathpart board path = Boards.pathpart board
</> "res" </> "res"
</> (show (Threads.board_thread_id thread) ++ ".json") </> (show (Thread.board_thread_id thread) ++ ".json")
saveNewThreads saveNewThreads
:: JSONSettings :: JSONSettings
-> Boards.Board -> Boards.Board
-> [ JSON.Thread ] -> [ JSON.Thread ]
-> ExceptT ProgramException IO [ Threads.Thread ] -> IOe [ Thread.Thread ]
saveNewThreads settings board web_threads = do saveNewThreads settings board web_threads = do
existing_threads <- liftHttpIO $ existing_threads <- liftHttpIO $
Client.getThreads Client.getThreads
@ -71,7 +79,7 @@ saveNewThreads settings board web_threads = do
let let
archived_board_thread_ids :: Set.Set Int archived_board_thread_ids :: Set.Set Int
archived_board_thread_ids = archived_board_thread_ids =
Set.fromList $ map Threads.board_thread_id existing_threads Set.fromList $ map Thread.board_thread_id existing_threads
threads_to_create :: [ JSON.Thread ] threads_to_create :: [ JSON.Thread ]
threads_to_create = threads_to_create =
@ -87,3 +95,41 @@ saveNewThreads settings board web_threads = do
(map (Lib.apiThreadToArchiveThread board_id) threads_to_create) (map (Lib.apiThreadToArchiveThread board_id) threads_to_create)
return $ existing_threads ++ new_threads return $ existing_threads ++ new_threads
saveNewPosts
:: JSONSettings
-> [ (Thread.Thread, [ JSONPost.Post ]) ]
-> IOe [ Posts.Post ]
saveNewPosts settings thread_posts = do
existing_posts <- liftHttpIO $ Client.getPosts settings post_ids
thread_max_local_idx <- liftHttpIO $ Client.getThreadMaxLocalIdx settings thread_ids
let existing_set :: Set.Set (Int64, Int64) =
Set.fromList
(map (\x -> (Posts.thread_id x, Posts.board_post_id x))
existing_posts)
let tuples_to_insert :: [ (Thread.Thread, JSONPost.Post, Client.PostId) ] =
sortBy (comparing $ \(_, _, p) -> Client.board_post_id p) $
newPosts post_tuples existing_set
let local_idx :: Map.Map Int64 Int = Map.fromList thread_max_local_idx
return undefined
where
flat_posts = concatMap (\(i, j) -> map (i,) j) thread_posts
post_tuples = map
(\(i, j) -> (i, j, Client.PostId (Thread.thread_id i) (JSONPost.no j)))
flat_posts
post_ids = map (\(_, _, x) -> x) post_tuples
thread_ids :: [ Int64 ]
thread_ids = map (Thread.thread_id . fst) thread_posts
newPosts :: [(Thread.Thread, JSONPost.Post, Client.PostId)] -> Set.Set (Int64, Int64) -> [(Thread.Thread, JSONPost.Post, Client.PostId)]
newPosts xs existing_set = filter (\(_, _, c) -> Set.notMember (Client.thread_id c, Client.board_post_id c) existing_set) xs

View File

@ -3,7 +3,7 @@
module Network.GetLatestPostsPerBoardResponse module Network.GetLatestPostsPerBoardResponse
where where
import Data.Int (Int64) -- import Data.Int (Int64)
import Data.Time.Clock (UTCTime) import Data.Time.Clock (UTCTime)
import Data.Aeson (FromJSON) import Data.Aeson (FromJSON)
import GHC.Generics import GHC.Generics
@ -12,9 +12,15 @@ data GetLatestPostsPerBoardResponse = GetLatestPostsPerBoardResponse
{ board_id :: Int { board_id :: Int
, site_id :: Int , site_id :: Int
, pathpart :: String , pathpart :: String
, post_id :: Maybe Int64 -- , post_id :: Maybe Int64
, board_post_id :: Int64 -- , board_post_id :: Int64
, creation_time :: UTCTime , creation_time :: UTCTime
, thread_id :: Int64 -- , thread_id :: Int64
, board_thread_id :: Integer -- , board_thread_id :: Integer
} deriving (Show, Generic, FromJSON) } deriving (Show, Generic, FromJSON)
-- actually used:
-- site_id
-- pathpart
-- board_id
-- creation_time

View File

@ -54,6 +54,9 @@ threadMain csmr_settings board_elem = do
putStrLn $ Board.pathpart $ QE.board board_elem putStrLn $ Board.pathpart $ QE.board board_elem
-- this is essentially the same as Lib.processBoard -- this is essentially the same as Lib.processBoard
-- but Lib2 uses ExceptT instead of IO, which saves us from writing all
-- of the error handling every time we make an http call. That can be done
-- once at the end.
thread_results <- runExceptT $ do thread_results <- runExceptT $ do
let site = QE.site board_elem let site = QE.site board_elem
let board = QE.board board_elem let board = QE.board board_elem