From 03d22a4f2a2062b0c7a4d30ea817f1404b883f4f Mon Sep 17 00:00:00 2001 From: towards-a-new-leftypol Date: Thu, 13 Feb 2025 04:58:59 -0500 Subject: [PATCH] Add is_missing_attachments flag to posts table - When we're performing a Sync, not all threads on the Board are considered. Also there is a brief period of time where a post is inserted, but it's attachments are not yet. We need to be careful because if the program quits here it will not try to load the thread again if the last post is greater than the board's last modified time. This flag adds consistency so we can start with writing a new post that has attachments with the flag set to true, and then update it to false when we've actually successfully saved the attachments. This flag is for now only explicitly used by the sql get_latest_posts_per_board function. (Which got a slight speedup because it didn't need the ORDER BY clause) --- sql/archive_tests2.sql | 7 +++++-- sql/initialize.sql | 3 ++- src/Lib.hs | 1 + src/Lib2.hs | 14 ++++++++------ src/Sync.hs | 12 ++++++++---- 5 files changed, 24 insertions(+), 13 deletions(-) diff --git a/sql/archive_tests2.sql b/sql/archive_tests2.sql index fb178d5..3bebdfe 100644 --- a/sql/archive_tests2.sql +++ b/sql/archive_tests2.sql @@ -288,7 +288,7 @@ SELECT DISTINCT ON (b.board_id) FROM boards b JOIN threads t ON t.board_id = b.board_id JOIN posts p ON p.thread_id = t.thread_id - ORDER BY b.board_id, p.creation_time DESC; + WHERE p.is_missing_attachments = false; CREATE OR REPLACE FUNCTION get_latest_posts_per_board() @@ -314,7 +314,10 @@ RETURNS TABLE ( FROM boards b JOIN threads t ON t.board_id = b.board_id JOIN posts p ON p.thread_id = t.thread_id - ORDER BY b.board_id, p.creation_time DESC; + WHERE p.is_missing_attachments = false; $$ LANGUAGE sql STABLE; SELECT * FROM get_latest_posts_per_board(); +SELECT * FROM boards JOIN sites ON boards.site_id = sites.site_id WHERE sites.name = 'leftychan'; + +ALTER TABLE posts ADD COLUMN is_missing_attachments boolean NOT NULL DEFAULT false; diff --git a/sql/initialize.sql b/sql/initialize.sql index 370b5c0..88f9c99 100644 --- a/sql/initialize.sql +++ b/sql/initialize.sql @@ -77,6 +77,7 @@ CREATE TABLE IF NOT EXISTS posts , thread_id bigint NOT NULL , embed text , local_idx int NOT NULL + , is_missing_attachments boolean NOT NULL DEFAULT false , CONSTRAINT unique_thread_board_id_constraint UNIQUE (thread_id, board_post_id) , CONSTRAINT thread_fk FOREIGN KEY (thread_id) REFERENCES threads (thread_id) ON DELETE CASCADE , CONSTRAINT unique_thread_local_idx UNIQUE (thread_id, local_idx) @@ -424,7 +425,7 @@ RETURNS TABLE ( FROM boards b JOIN threads t ON t.board_id = b.board_id JOIN posts p ON p.thread_id = t.thread_id - ORDER BY b.board_id, p.creation_time DESC; + WHERE p.is_missing_attachments = false; $$ LANGUAGE sql STABLE; diff --git a/src/Lib.hs b/src/Lib.hs index 39c06c8..c60d47c 100644 --- a/src/Lib.hs +++ b/src/Lib.hs @@ -160,6 +160,7 @@ apiThreadToArchiveThread board_id_ json_thread = , Threads.board_id = board_id_ } + epochToUTCTime :: Int -> UTCTime epochToUTCTime = posixSecondsToUTCTime . realToFrac diff --git a/src/Lib2.hs b/src/Lib2.hs index 8431cb0..834e14a 100644 --- a/src/Lib2.hs +++ b/src/Lib2.hs @@ -45,13 +45,15 @@ httpGetPostsJSON :: Sites.Site -> Boards.Board -> Threads.Thread - -> ExceptT ProgramException IO [ JSONPosts.Post ] + -> ExceptT ProgramException IO (Threads.Thread, [ JSONPosts.Post ]) httpGetPostsJSON site board thread = liftHttpIO $ - fmap JSONPosts.posts <$> httpSiteGetRequest site path + fmap ((thread,) . JSONPosts.posts) <$> httpSiteGetRequest site path where - path = Boards.pathpart board "res" (show (Threads.board_thread_id thread) ++ ".json") + path = Boards.pathpart board + "res" + (show (Threads.board_thread_id thread) ++ ".json") saveNewThreads @@ -60,7 +62,7 @@ saveNewThreads -> [ JSON.Thread ] -> ExceptT ProgramException IO [ Threads.Thread ] saveNewThreads settings board web_threads = do - db_threads <- liftHttpIO $ + existing_threads <- liftHttpIO $ Client.getThreads settings (Boards.board_id board) @@ -69,7 +71,7 @@ saveNewThreads settings board web_threads = do let archived_board_thread_ids :: Set.Set Int archived_board_thread_ids = - Set.fromList $ map Threads.board_thread_id db_threads + Set.fromList $ map Threads.board_thread_id existing_threads threads_to_create :: [ JSON.Thread ] threads_to_create = @@ -84,4 +86,4 @@ saveNewThreads settings board web_threads = do settings (map (Lib.apiThreadToArchiveThread board_id) threads_to_create) - return $ db_threads ++ new_threads + return $ existing_threads ++ new_threads diff --git a/src/Sync.hs b/src/Sync.hs index 19e7cf1..41d6af0 100644 --- a/src/Sync.hs +++ b/src/Sync.hs @@ -51,6 +51,7 @@ threadMain :: S.ConsumerJSONSettings -> QE.BoardQueueElem -> IO QE.BoardQueueEle threadMain csmr_settings board_elem = do putStrLn $ Board.pathpart $ QE.board board_elem + -- this is essentially the same as Lib.processBoard thread_results <- runExceptT $ do catalog_results <- Lib2.httpGetCatalogJSON (QE.site board_elem) (QE.board board_elem) @@ -66,7 +67,10 @@ threadMain csmr_settings board_elem = do liftIO $ print changed_threads - Lib2.saveNewThreads settings (QE.board board_elem) changed_threads + threads <- Lib2.saveNewThreads settings (QE.board board_elem) changed_threads + + mapM_ (Lib2.httpGetPostsJSON (QE.site board_elem) (QE.board board_elem)) threads + print thread_results return board_elem @@ -237,12 +241,12 @@ syncWebsites csmr_settings = do -- - ensure that sites in the settings exist in the database! ✓ -- - ensure that boards per site in the settings exist in the database! ✓ -- - finish using ExceptT and use sites, latest_posts_per_board to populate - -- our PriorityQueue + -- our PriorityQueue ✓ -- - write event loop that -- - get pq from stm shared value ✓ -- - uses the pq (there was something about the timestamps in the pq having to be reversed btw) ✓ - -- - ensures threads + -- - ensures threads ✓ -- - has a value that should be added to the pq - -- - uses stm to update pq shared value + -- - uses stm to update pq shared value ✓ -- --