From 518467c7eb7cf0924a1643ba781a9f343a964965 Mon Sep 17 00:00:00 2001 From: towards-a-new-leftypol Date: Wed, 29 Jan 2025 22:50:28 -0500 Subject: [PATCH] Add sql function to get the last post on each board --- consumer_settings.json | 22 ++++++---- sql/archive_tests2.sql | 97 ++++++++++++++++++++++++++++++++++++++++++ sql/initialize.sql | 57 +++++++++++++++++++------ src/PriorityQueue.hs | 4 +- 4 files changed, 157 insertions(+), 23 deletions(-) diff --git a/consumer_settings.json b/consumer_settings.json index 54a5398..352b0ff 100644 --- a/consumer_settings.json +++ b/consumer_settings.json @@ -1,13 +1,17 @@ { - "websites": { - "name": "example", - "root_url": "https://example.net", - "boards": [ - "tech", - "meta" - ] - }, + "websites": [ + { + "name": "example", + "root_url": "https://example.net", + "boards": [ + "tech", + "meta" + ] + } + ], "postgrest_url": "http://localhost:3000", "jwt": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJyb2xlIjoiY2hhbl9hcmNoaXZlciJ9.rGIKZokTDKTuQLIv8138bUby5PELfDipYYIDpJzH02c", - "media_root_path": "/home/phil/linixy/tmp/chan_archive_media2/archive" + "media_root_path": "/home/phil/linixy/tmp/chan_archive_media_repaired/archive", + "http_fill_all": false, + "http_sync_continously": false } diff --git a/sql/archive_tests2.sql b/sql/archive_tests2.sql index 690c81e..cf43e43 100644 --- a/sql/archive_tests2.sql +++ b/sql/archive_tests2.sql @@ -222,3 +222,100 @@ SELECT * FROM boards; SELECT * FROM threads WHERE thread_id = 11314; ANALYZE posts; + +SELECT count(*) from attachments; + +SELECT * FROM attachments WHERE post_id = 253383; +SELECT * from attachments WHERE board_filename = '1722466065515'; +SELECT count(*) attachments WHERE attachment_id < (SELECT attachment_id FROM attachments WHERE board_filename = '1722466065515'); +SELECT max(attachment_id) FROM attachments a; +SELECT pg_get_serial_sequence('attachments', 'attachment_id'); +SELECT setval(pg_get_serial_sequence('attachments', 'attachment_id'), COALESCE(198853, 1), true); + + +UPDATE attachments SET thumb_extension = 'png' +WHERE + attachment_id IN + ( + SELECT a.attachment_id + FROM attachments a + JOIN posts p ON a.post_id = p.post_id + JOIN threads t ON p.thread_id = t.thread_id + JOIN boards b ON t.board_id = b.board_id + JOIN sites s ON b.site_id = s.site_id + WHERE s.name = 'leftychan' + AND a.thumb_extension = 'jpg' + ); + + +SELECT * FROM posts WHERE board_post_id = 129; +SELECT * FROM attachments WHERE post_id = 461287; + +SELECT count(a.*) +FROM attachments a +JOIN posts p ON a.post_id = p.post_id +JOIN threads t ON p.thread_id = t.thread_id +JOIN boards b ON t.board_id = b.board_id +JOIN sites s ON b.site_id = s.site_id +WHERE s.name = 'leftychan' +AND a.thumb_extension = 'jpg'; + + +SELECT * FROM posts +JOIN threads ON threads.thread_id = posts.thread_id +JOIN boards ON boards.board_id = threads.board_id +WHERE boards.pathpart = 'leftypol' + AND boards.site_id = 1 +ORDER BY posts.creation_time DESC +LIMIT 1; + +SELECT * FROM posts +ORDER BY posts.creation_time DESC +LIMIT 1; + +SELECT boards.board_id, boards.pathpart, sites.name FROM boards JOIN sites ON sites.site_id = boards.site_id; + +SELECT DISTINCT ON (b.board_id) + b.board_id, + b.site_id, + b.pathpart, + p.post_id, + p.board_post_id, + p.creation_time, + p.body, + t.thread_id, + t.board_thread_id + FROM boards b + JOIN threads t ON t.board_id = b.board_id + JOIN posts p ON p.thread_id = t.thread_id + ORDER BY b.board_id, p.creation_time DESC; + +CREATE OR REPLACE FUNCTION get_latest_posts_per_board() +RETURNS TABLE ( + board_id int, + site_id int, + pathpart text, + post_id bigint, + board_post_id bigint, + creation_time timestamp with time zone, + body text, + thread_id bigint, + board_thread_id bigint +) AS $$ + SELECT DISTINCT ON (b.board_id) + b.board_id, + b.site_id, + b.pathpart, + p.post_id, + p.board_post_id, + p.creation_time, + p.body, + t.thread_id, + t.board_thread_id + FROM boards b + JOIN threads t ON t.board_id = b.board_id + JOIN posts p ON p.thread_id = t.thread_id + ORDER BY b.board_id, p.creation_time DESC; +$$ LANGUAGE sql STABLE; + +SELECT * FROM get_latest_posts_per_board(); \ No newline at end of file diff --git a/sql/initialize.sql b/sql/initialize.sql index af36995..c3024a9 100644 --- a/sql/initialize.sql +++ b/sql/initialize.sql @@ -21,6 +21,7 @@ DROP TYPE IF EXISTS post_key CASCADE; DROP FUNCTION IF EXISTS update_post_body_search_index; DROP FUNCTION IF EXISTS fetch_top_threads; DROP FUNCTION IF EXISTS fetch_catalog; +DROP FUNCTION IF EXISTS get_latest_posts_per_board; -- It won't let us drop roles otherwise and the IFs are to keep this script idempotent. @@ -222,7 +223,7 @@ CREATE OR REPLACE FUNCTION fetch_top_threads( lookback INT DEFAULT 10000 ) RETURNS TABLE(bump_time TIMESTAMPTZ, post_count BIGINT, thread_id BIGINT, where_to_leave_off TIMESTAMPTZ) -LANGUAGE sql +LANGUAGE sql STABLE AS $$ SELECT max(creation_time) as bump_time, @@ -308,7 +309,7 @@ RETURNS SETOF catalog_grid_result AS $$ JOIN sites ON sites.site_id = boards.site_id LEFT OUTER JOIN attachments ON attachments.post_id = tall_posts.post_id AND attachments.attachment_idx = 1 ORDER BY bump_time DESC; -$$ LANGUAGE sql; +$$ LANGUAGE sql STABLE; -- Function: search_posts @@ -400,6 +401,35 @@ RETURNS SETOF catalog_grid_result AS $$ $$ LANGUAGE sql STABLE; +CREATE OR REPLACE FUNCTION get_latest_posts_per_board() +RETURNS TABLE ( + board_id int, + site_id int, + pathpart text, + post_id bigint, + board_post_id bigint, + creation_time timestamp with time zone, + body text, + thread_id bigint, + board_thread_id bigint +) AS $$ + SELECT DISTINCT ON (b.board_id) + b.board_id, + b.site_id, + b.pathpart, + p.post_id, + p.board_post_id, + p.creation_time, + p.body, + t.thread_id, + t.board_thread_id + FROM boards b + JOIN threads t ON t.board_id = b.board_id + JOIN posts p ON p.thread_id = t.thread_id + ORDER BY b.board_id, p.creation_time DESC; +$$ LANGUAGE sql STABLE; + + /* * Permissions */ @@ -409,18 +439,20 @@ REVOKE EXECUTE ON FUNCTION fetch_catalog FROM PUBLIC; REVOKE EXECUTE ON FUNCTION search_posts FROM PUBLIC; REVOKE EXECUTE ON FUNCTION update_post_body_search_index FROM PUBLIC; REVOKE EXECUTE ON FUNCTION get_posts FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION get_latest_posts_per_board FROM PUBLIC; CREATE ROLE chan_archive_anon nologin; -GRANT CONNECT ON DATABASE chan_archives TO chan_archive_anon; -GRANT SELECT ON sites TO chan_archive_anon; -GRANT SELECT ON boards TO chan_archive_anon; -GRANT SELECT ON threads TO chan_archive_anon; -GRANT SELECT ON posts TO chan_archive_anon; -GRANT SELECT ON attachments TO chan_archive_anon; -GRANT EXECUTE ON FUNCTION fetch_catalog TO chan_archive_anon; -GRANT EXECUTE ON FUNCTION fetch_top_threads TO chan_archive_anon; -GRANT EXECUTE ON FUNCTION search_posts TO chan_archive_anon; -GRANT EXECUTE ON FUNCTION get_posts TO chan_archive_anon; +GRANT CONNECT ON DATABASE chan_archives TO chan_archive_anon; +GRANT SELECT ON sites TO chan_archive_anon; +GRANT SELECT ON boards TO chan_archive_anon; +GRANT SELECT ON threads TO chan_archive_anon; +GRANT SELECT ON posts TO chan_archive_anon; +GRANT SELECT ON attachments TO chan_archive_anon; +GRANT EXECUTE ON FUNCTION fetch_catalog TO chan_archive_anon; +GRANT EXECUTE ON FUNCTION fetch_top_threads TO chan_archive_anon; +GRANT EXECUTE ON FUNCTION search_posts TO chan_archive_anon; +GRANT EXECUTE ON FUNCTION get_posts TO chan_archive_anon; +GRANT EXECUTE ON FUNCTION get_latest_posts_per_board TO chan_archive_anon; -- GRANT usage, select ON SEQUENCE sites_site_id_seq TO chan_archive_anon; -- GRANT usage, select ON SEQUENCE boards_board_id_seq TO chan_archive_anon; @@ -441,6 +473,7 @@ GRANT EXECUTE ON FUNCTION fetch_top_threads TO chan_archiver; GRANT EXECUTE ON FUNCTION fetch_catalog TO chan_archiver; GRANT EXECUTE ON FUNCTION search_posts TO chan_archiver; GRANT EXECUTE ON FUNCTION get_posts TO chan_archiver; +GRANT EXECUTE ON FUNCTION get_latest_posts_per_board TO chan_archiver; GRANT usage, select ON SEQUENCE sites_site_id_seq TO chan_archiver; GRANT usage, select ON SEQUENCE boards_board_id_seq TO chan_archiver; GRANT usage, select ON SEQUENCE threads_thread_id_seq TO chan_archiver; diff --git a/src/PriorityQueue.hs b/src/PriorityQueue.hs index 1de30c3..7935369 100644 --- a/src/PriorityQueue.hs +++ b/src/PriorityQueue.hs @@ -87,7 +87,7 @@ main = do where pf :: (Show a, Show b) => (a, b) -> IO () - pf (a, b) = putStrLn $ (show a) ++ "," ++ (show b) + pf (a, b) = putStrLn $ show a ++ "," ++ show b f _ (xs, gen) = let (x, newgen) = selectSkewedIndex (size q) gen @@ -97,5 +97,5 @@ main = do q = fromList [ Elem i undefined | i <- [1..100] ] countOccurrences :: (Eq a, Ord a) => [a] -> [(a, Int)] -countOccurrences rolls = map (\x -> (head x, length x)) . group . sort $ rolls +countOccurrences = map (\x -> (head x, length x)) . group . sort