From f4652eecbbf9f8474ded87af683d24476d74d051 Mon Sep 17 00:00:00 2001 From: towards-a-new-leftypol Date: Fri, 14 Feb 2025 09:55:18 -0500 Subject: [PATCH] commit pre changing the get latest posts per board query --- sql/archive_tests2.sql | 6 +- src/Lib.hs | 32 +++++----- src/Lib2.hs | 62 ++++++++++++++++--- src/Network/GetLatestPostsPerBoardResponse.hs | 16 +++-- src/Sync.hs | 3 + 5 files changed, 87 insertions(+), 32 deletions(-) diff --git a/sql/archive_tests2.sql b/sql/archive_tests2.sql index 3bebdfe..8dbf66a 100644 --- a/sql/archive_tests2.sql +++ b/sql/archive_tests2.sql @@ -286,9 +286,9 @@ SELECT DISTINCT ON (b.board_id) t.thread_id, t.board_thread_id FROM boards b - JOIN threads t ON t.board_id = b.board_id - JOIN posts p ON p.thread_id = t.thread_id - WHERE p.is_missing_attachments = false; + LEFT JOIN threads t ON t.board_id = b.board_id + LEFT JOIN posts p ON p.thread_id = t.thread_id AND p.is_missing_attachments = false + ORDER BY b.board_id, p.creation_time DESC NULLS LAST; CREATE OR REPLACE FUNCTION get_latest_posts_per_board() diff --git a/src/Lib.hs b/src/Lib.hs index f10e36b..9bc58e9 100644 --- a/src/Lib.hs +++ b/src/Lib.hs @@ -553,6 +553,21 @@ processFiles settings fgs tuples = do -- perfect just means that our posts have in Map.insert pid (x : l) accMap +localIndexFoldf + :: ([Posts.Post], Map.Map Int64 Int) + -> (Threads.Thread, JSONPost.Post, Client.PostId) + -> ([Posts.Post], Map.Map Int64 Int) +localIndexFoldf (posts, idx_map) (t, p, c) = + case Map.lookup thread_id idx_map of + Nothing -> (post 1 : posts, Map.insert thread_id 1 idx_map) + Just i -> (post (i + 1) : posts, Map.insert thread_id (i + 1) idx_map) + + where + post :: Int -> Posts.Post + post i = apiPostToArchivePost i t p + + thread_id = Client.thread_id c + createNewPosts :: J.JSONSettings -> [ (Threads.Thread, JSONPost.Post, Client.PostId) ] @@ -573,7 +588,7 @@ createNewPosts settings tuples = do -- Map of thread_id to the largest local_idx value (which would be the number of the last post in the thread) let local_idx :: Map.Map Int64 Int = Map.fromList thread_max_local_idxs - let insert_posts :: [ Posts.Post ] = fst $ foldl' foldFn ([], local_idx) to_insert_list + let insert_posts :: [ Posts.Post ] = fst $ foldl' localIndexFoldf ([], local_idx) to_insert_list -- posts to insert are the posts that are not in existing_posts -- so we create a Set (thread_id, board_post_id) ✓ @@ -598,21 +613,6 @@ createNewPosts settings tuples = do newPosts :: [(Threads.Thread, JSONPost.Post, Client.PostId)] -> Set (Int64, Int64) -> [(Threads.Thread, JSONPost.Post, Client.PostId)] newPosts ts existing_set = filter (\(_, _, c) -> Set.notMember (Client.thread_id c, Client.board_post_id c) existing_set) ts - foldFn - :: ([Posts.Post], Map.Map Int64 Int) - -> (Threads.Thread, JSONPost.Post, Client.PostId) - -> ([Posts.Post], Map.Map Int64 Int) - foldFn (posts, idx_map) (t, p, c) = - case Map.lookup thread_id idx_map of - Nothing -> (post 1 : posts, Map.insert thread_id 1 idx_map) - Just i -> (post (i + 1) : posts, Map.insert thread_id (i + 1) idx_map) - - where - post :: Int -> Posts.Post - post i = apiPostToArchivePost i t p - - thread_id = Client.thread_id c - data FileGetters = FileGetters { getJSONCatalog :: Sites.Site -> String -> IO (Either String [ Catalog ]) diff --git a/src/Lib2.hs b/src/Lib2.hs index 274132e..654a036 100644 --- a/src/Lib2.hs +++ b/src/Lib2.hs @@ -8,7 +8,11 @@ module Lib2 import Control.Monad.Trans.Except (ExceptT (..)) import System.FilePath (()) import qualified Data.Set as Set +import qualified Data.Map as Map import Data.Aeson (FromJSON) +import Data.Int (Int64) +import Data.List (sortBy) +import Data.Ord (comparing) import qualified Network.DataClient as Client import qualified SitesType as Sites @@ -16,7 +20,8 @@ import qualified BoardsType as Boards import Common.Network.HttpClient (HttpError) import qualified JSONParsing as JSON import qualified JSONPost -import qualified ThreadType as Threads +import qualified ThreadType as Thread +import qualified Common.PostsType as Posts import Common.Server.JSONSettings (JSONSettings) import qualified Lib @@ -25,7 +30,10 @@ data ProgramException = HttpException HttpError deriving Show -liftHttpIO :: IO (Either HttpError a) -> ExceptT ProgramException IO a +type IOe a = ExceptT ProgramException IO a + + +liftHttpIO :: IO (Either HttpError a) -> IOe a liftHttpIO = ExceptT . fmap (either (Left . HttpException) Right) @@ -35,7 +43,7 @@ httpSiteGetRequest site path = Client.getJSON $ Sites.url site path httpGetCatalogJSON :: Sites.Site -> Boards.Board - -> ExceptT ProgramException IO [ JSON.Catalog ] + -> IOe [ JSON.Catalog ] httpGetCatalogJSON site board = liftHttpIO $ httpSiteGetRequest site path where path = Boards.pathpart board "catalog.json" @@ -44,8 +52,8 @@ httpGetCatalogJSON site board = liftHttpIO $ httpSiteGetRequest site path httpGetPostsJSON :: Sites.Site -> Boards.Board - -> Threads.Thread - -> ExceptT ProgramException IO (Threads.Thread, [ JSONPost.Post ]) + -> Thread.Thread + -> IOe (Thread.Thread, [ JSONPost.Post ]) httpGetPostsJSON site board thread = liftHttpIO $ fmap ((thread,) . JSONPost.posts) <$> httpSiteGetRequest site path @@ -53,14 +61,14 @@ httpGetPostsJSON site board thread = where path = Boards.pathpart board "res" - (show (Threads.board_thread_id thread) ++ ".json") + (show (Thread.board_thread_id thread) ++ ".json") saveNewThreads :: JSONSettings -> Boards.Board -> [ JSON.Thread ] - -> ExceptT ProgramException IO [ Threads.Thread ] + -> IOe [ Thread.Thread ] saveNewThreads settings board web_threads = do existing_threads <- liftHttpIO $ Client.getThreads @@ -71,7 +79,7 @@ saveNewThreads settings board web_threads = do let archived_board_thread_ids :: Set.Set Int archived_board_thread_ids = - Set.fromList $ map Threads.board_thread_id existing_threads + Set.fromList $ map Thread.board_thread_id existing_threads threads_to_create :: [ JSON.Thread ] threads_to_create = @@ -87,3 +95,41 @@ saveNewThreads settings board web_threads = do (map (Lib.apiThreadToArchiveThread board_id) threads_to_create) return $ existing_threads ++ new_threads + + +saveNewPosts + :: JSONSettings + -> [ (Thread.Thread, [ JSONPost.Post ]) ] + -> IOe [ Posts.Post ] +saveNewPosts settings thread_posts = do + existing_posts <- liftHttpIO $ Client.getPosts settings post_ids + + thread_max_local_idx <- liftHttpIO $ Client.getThreadMaxLocalIdx settings thread_ids + + let existing_set :: Set.Set (Int64, Int64) = + Set.fromList + (map (\x -> (Posts.thread_id x, Posts.board_post_id x)) + existing_posts) + + let tuples_to_insert :: [ (Thread.Thread, JSONPost.Post, Client.PostId) ] = + sortBy (comparing $ \(_, _, p) -> Client.board_post_id p) $ + newPosts post_tuples existing_set + + let local_idx :: Map.Map Int64 Int = Map.fromList thread_max_local_idx + + return undefined + + where + flat_posts = concatMap (\(i, j) -> map (i,) j) thread_posts + + post_tuples = map + (\(i, j) -> (i, j, Client.PostId (Thread.thread_id i) (JSONPost.no j))) + flat_posts + + post_ids = map (\(_, _, x) -> x) post_tuples + + thread_ids :: [ Int64 ] + thread_ids = map (Thread.thread_id . fst) thread_posts + + newPosts :: [(Thread.Thread, JSONPost.Post, Client.PostId)] -> Set.Set (Int64, Int64) -> [(Thread.Thread, JSONPost.Post, Client.PostId)] + newPosts xs existing_set = filter (\(_, _, c) -> Set.notMember (Client.thread_id c, Client.board_post_id c) existing_set) xs diff --git a/src/Network/GetLatestPostsPerBoardResponse.hs b/src/Network/GetLatestPostsPerBoardResponse.hs index aecbfd9..29f03e3 100644 --- a/src/Network/GetLatestPostsPerBoardResponse.hs +++ b/src/Network/GetLatestPostsPerBoardResponse.hs @@ -3,7 +3,7 @@ module Network.GetLatestPostsPerBoardResponse where -import Data.Int (Int64) +-- import Data.Int (Int64) import Data.Time.Clock (UTCTime) import Data.Aeson (FromJSON) import GHC.Generics @@ -12,9 +12,15 @@ data GetLatestPostsPerBoardResponse = GetLatestPostsPerBoardResponse { board_id :: Int , site_id :: Int , pathpart :: String - , post_id :: Maybe Int64 - , board_post_id :: Int64 + -- , post_id :: Maybe Int64 + -- , board_post_id :: Int64 , creation_time :: UTCTime - , thread_id :: Int64 - , board_thread_id :: Integer + -- , thread_id :: Int64 + -- , board_thread_id :: Integer } deriving (Show, Generic, FromJSON) + +-- actually used: +-- site_id +-- pathpart +-- board_id +-- creation_time diff --git a/src/Sync.hs b/src/Sync.hs index f5ac271..de5171d 100644 --- a/src/Sync.hs +++ b/src/Sync.hs @@ -54,6 +54,9 @@ threadMain csmr_settings board_elem = do putStrLn $ Board.pathpart $ QE.board board_elem -- this is essentially the same as Lib.processBoard + -- but Lib2 uses ExceptT instead of IO, which saves us from writing all + -- of the error handling every time we make an http call. That can be done + -- once at the end. thread_results <- runExceptT $ do let site = QE.site board_elem let board = QE.board board_elem