Add is_missing_attachments flag to posts table

- When we're performing a Sync, not all threads on the Board are
  considered. Also there is a brief period of time where a post is
  inserted, but it's attachments are not yet. We need to be careful
  because if the program quits here it will not try to load the thread
  again if the last post is greater than the board's last modified time.

  This flag adds consistency so we can start with writing a new post
  that has attachments with the flag set to true, and then update it to
  false when we've actually successfully saved the attachments.

  This flag is for now only explicitly used by the sql get_latest_posts_per_board
  function. (Which got a slight speedup because it didn't need the ORDER BY
  clause)
This commit is contained in:
towards-a-new-leftypol 2025-02-13 04:58:59 -05:00
parent dc17404eab
commit 03d22a4f2a
5 changed files with 24 additions and 13 deletions

View File

@ -288,7 +288,7 @@ SELECT DISTINCT ON (b.board_id)
FROM boards b
JOIN threads t ON t.board_id = b.board_id
JOIN posts p ON p.thread_id = t.thread_id
ORDER BY b.board_id, p.creation_time DESC;
WHERE p.is_missing_attachments = false;
CREATE OR REPLACE FUNCTION get_latest_posts_per_board()
@ -314,7 +314,10 @@ RETURNS TABLE (
FROM boards b
JOIN threads t ON t.board_id = b.board_id
JOIN posts p ON p.thread_id = t.thread_id
ORDER BY b.board_id, p.creation_time DESC;
WHERE p.is_missing_attachments = false;
$$ LANGUAGE sql STABLE;
SELECT * FROM get_latest_posts_per_board();
SELECT * FROM boards JOIN sites ON boards.site_id = sites.site_id WHERE sites.name = 'leftychan';
ALTER TABLE posts ADD COLUMN is_missing_attachments boolean NOT NULL DEFAULT false;

View File

@ -77,6 +77,7 @@ CREATE TABLE IF NOT EXISTS posts
, thread_id bigint NOT NULL
, embed text
, local_idx int NOT NULL
, is_missing_attachments boolean NOT NULL DEFAULT false
, CONSTRAINT unique_thread_board_id_constraint UNIQUE (thread_id, board_post_id)
, CONSTRAINT thread_fk FOREIGN KEY (thread_id) REFERENCES threads (thread_id) ON DELETE CASCADE
, CONSTRAINT unique_thread_local_idx UNIQUE (thread_id, local_idx)
@ -424,7 +425,7 @@ RETURNS TABLE (
FROM boards b
JOIN threads t ON t.board_id = b.board_id
JOIN posts p ON p.thread_id = t.thread_id
ORDER BY b.board_id, p.creation_time DESC;
WHERE p.is_missing_attachments = false;
$$ LANGUAGE sql STABLE;

View File

@ -160,6 +160,7 @@ apiThreadToArchiveThread board_id_ json_thread =
, Threads.board_id = board_id_
}
epochToUTCTime :: Int -> UTCTime
epochToUTCTime = posixSecondsToUTCTime . realToFrac

View File

@ -45,13 +45,15 @@ httpGetPostsJSON
:: Sites.Site
-> Boards.Board
-> Threads.Thread
-> ExceptT ProgramException IO [ JSONPosts.Post ]
-> ExceptT ProgramException IO (Threads.Thread, [ JSONPosts.Post ])
httpGetPostsJSON site board thread =
liftHttpIO $
fmap JSONPosts.posts <$> httpSiteGetRequest site path
fmap ((thread,) . JSONPosts.posts) <$> httpSiteGetRequest site path
where
path = Boards.pathpart board </> "res" </> (show (Threads.board_thread_id thread) ++ ".json")
path = Boards.pathpart board
</> "res"
</> (show (Threads.board_thread_id thread) ++ ".json")
saveNewThreads
@ -60,7 +62,7 @@ saveNewThreads
-> [ JSON.Thread ]
-> ExceptT ProgramException IO [ Threads.Thread ]
saveNewThreads settings board web_threads = do
db_threads <- liftHttpIO $
existing_threads <- liftHttpIO $
Client.getThreads
settings
(Boards.board_id board)
@ -69,7 +71,7 @@ saveNewThreads settings board web_threads = do
let
archived_board_thread_ids :: Set.Set Int
archived_board_thread_ids =
Set.fromList $ map Threads.board_thread_id db_threads
Set.fromList $ map Threads.board_thread_id existing_threads
threads_to_create :: [ JSON.Thread ]
threads_to_create =
@ -84,4 +86,4 @@ saveNewThreads settings board web_threads = do
settings
(map (Lib.apiThreadToArchiveThread board_id) threads_to_create)
return $ db_threads ++ new_threads
return $ existing_threads ++ new_threads

View File

@ -51,6 +51,7 @@ threadMain :: S.ConsumerJSONSettings -> QE.BoardQueueElem -> IO QE.BoardQueueEle
threadMain csmr_settings board_elem = do
putStrLn $ Board.pathpart $ QE.board board_elem
-- this is essentially the same as Lib.processBoard
thread_results <- runExceptT $ do
catalog_results <- Lib2.httpGetCatalogJSON (QE.site board_elem) (QE.board board_elem)
@ -66,7 +67,10 @@ threadMain csmr_settings board_elem = do
liftIO $ print changed_threads
Lib2.saveNewThreads settings (QE.board board_elem) changed_threads
threads <- Lib2.saveNewThreads settings (QE.board board_elem) changed_threads
mapM_ (Lib2.httpGetPostsJSON (QE.site board_elem) (QE.board board_elem)) threads
print thread_results
return board_elem
@ -237,12 +241,12 @@ syncWebsites csmr_settings = do
-- - ensure that sites in the settings exist in the database! ✓
-- - ensure that boards per site in the settings exist in the database! ✓
-- - finish using ExceptT and use sites, latest_posts_per_board to populate
-- our PriorityQueue
-- our PriorityQueue
-- - write event loop that
-- - get pq from stm shared value ✓
-- - uses the pq (there was something about the timestamps in the pq having to be reversed btw) ✓
-- - ensures threads
-- - ensures threads
-- - has a value that should be added to the pq
-- - uses stm to update pq shared value
-- - uses stm to update pq shared value
--
--