chan-delorean/sql/archive_tests2.sql

321 lines
8.3 KiB
PL/PgSQL

CREATE OR REPLACE FUNCTION fetch_top_threads(
p_start_time TIMESTAMPTZ,
lookback INT DEFAULT 10000
)
RETURNS TABLE(bump_time TIMESTAMPTZ, post_count BIGINT, thread_id BIGINT, where_to_leave_off TIMESTAMPTZ)
LANGUAGE sql
AS $$
SELECT
max(creation_time) as bump_time,
count(*),
thread_id,
min(creation_time) as where_to_leave_off
FROM
(
SELECT thread_id, creation_time
FROM posts
WHERE creation_time < p_start_time
ORDER BY creation_time DESC
LIMIT LEAST(lookback, 250000) -- capping the lookback to 250k
) as t
GROUP BY thread_id
ORDER BY bump_time DESC;
$$;
SELECT * FROM fetch_top_threads(NOW(), 1000);
SELECT * FROM fetch_top_threads(NOW() - INTERVAL '10d') top JOIN posts ON top.thread_id = posts.thread_id;
SELECT
ordered_posts.thread_id,
count(*),
array_agg(ordered_posts.body)
FROM (
SELECT top.thread_id, posts.body
FROM fetch_top_threads(NOW() - INTERVAL '10d', 2000) top
JOIN posts ON top.thread_id = posts.thread_id
ORDER BY posts.board_post_id ASC
) AS ordered_posts
GROUP BY ordered_posts.thread_id;
WITH
top AS
(
SELECT * FROM fetch_top_threads(NOW()) top
),
tall_posts AS
(
SELECT top.post_count as estimated_post_count, posts.*
FROM top
JOIN posts ON top.thread_id = posts.thread_id
),
op_posts AS
(
SELECT DISTINCT ON (t.thread_id)
*
FROM tall_posts t
ORDER BY t.thread_id, t.board_post_id
)
SELECT * FROM op_posts;
WITH
top AS
(
SELECT * FROM fetch_top_threads(NOW()) top
),
tall_posts AS
(
SELECT top.post_count as estimated_post_count, posts.*
FROM top
JOIN posts ON top.thread_id = posts.thread_id
),
op_posts AS
(
SELECT DISTINCT ON (t.thread_id)
*
FROM tall_posts t
ORDER BY t.thread_id, t.board_post_id
),
post_counts AS
(
SELECT thread_id, count(*) as post_count FROM
tall_posts
GROUP BY thread_id
)
SELECT post_counts.post_count, op_posts.* FROM op_posts JOIN post_counts ON op_posts.thread_id = post_counts.thread_id;
-- 71ms!
WITH
top AS
(
SELECT * FROM fetch_top_threads(NOW() - INTERVAL '1y', 1000) top
),
tall_posts AS
(
SELECT
top.post_count as estimated_post_count,
top.bump_time,
posts.post_id,
posts.board_post_id,
posts.creation_time,
posts.body,
posts.thread_id
FROM top
JOIN posts ON top.thread_id = posts.thread_id
),
op_posts AS
(
SELECT DISTINCT ON (t.thread_id)
*
FROM tall_posts t
ORDER BY t.thread_id, t.board_post_id
),
post_counts AS
(
SELECT thread_id, count(*) as post_count FROM
tall_posts
GROUP BY thread_id
)
SELECT
post_counts.post_count,
op_posts.*,
threads.board_thread_id,
boards.pathpart,
sites."name"
FROM op_posts
JOIN post_counts ON op_posts.thread_id = post_counts.thread_id
JOIN threads ON op_posts.thread_id = threads.thread_id
JOIN boards ON threads.board_id = boards.board_id
JOIN sites ON sites.site_id = boards.site_id;
CREATE OR REPLACE FUNCTION fetch_catalog(max_time timestamptz, max_row_read int DEFAULT 10000)
RETURNS TABLE (
post_count bigint,
estimated_post_count bigint,
post_id bigint,
board_post_id bigint,
creation_time timestamptz,
bump_time timestamptz,
body text,
subject text,
thread_id bigint,
board_thread_id bigint,
pathpart text,
name text
) AS $$
WITH
top AS
(
SELECT * FROM fetch_top_threads(max_time, max_row_read) AS top
),
tall_posts AS
(
SELECT
top.post_count as estimated_post_count,
posts.post_id,
posts.board_post_id,
posts.creation_time,
top.bump_time,
posts.body,
posts.subject,
posts.thread_id
FROM top
JOIN posts ON top.thread_id = posts.thread_id
WHERE creation_time < max_time
),
op_posts AS
(
SELECT DISTINCT ON (t.thread_id)
*
FROM tall_posts t
ORDER BY t.thread_id, t.board_post_id
),
post_counts AS
(
SELECT thread_id, count(*) as post_count FROM
tall_posts
GROUP BY thread_id
)
SELECT
post_counts.post_count,
op_posts.*,
threads.board_thread_id,
boards.pathpart,
sites."name"
FROM op_posts
JOIN post_counts ON op_posts.thread_id = post_counts.thread_id
JOIN threads ON op_posts.thread_id = threads.thread_id
JOIN boards ON threads.board_id = boards.board_id
JOIN sites ON sites.site_id = boards.site_id
ORDER BY bump_time DESC;
$$ LANGUAGE sql;
SELECT * FROM fetch_catalog(NOW() - INTERVAL '1y', 1001);
SELECT * FROM fetch_catalog(NOW(), 1000);
SELECT count(*) FROM posts;
-- CREATE INDEX idx_posts_thread_board ON posts (thread_id, board_post_id);
ANALYZE posts;
SELECT DISTINCT ON (top.thread_id)
top.thread_id,
posts.post_id,
posts.body,
top.post_count,
top.where_to_leave_off
FROM fetch_top_threads(NOW() - INTERVAL '10d') top
JOIN posts ON top.thread_id = posts.thread_id
ORDER BY top.thread_id, posts.board_post_id;
SELECT * FROM threads WHERE thread_id = 3110;
SELECT * FROM posts WHERE thread_id = 3110 ORDER BY board_post_id ASC;
SELECT * FROM boards;
SELECT * FROM threads WHERE thread_id = 11314;
ANALYZE posts;
SELECT count(*) from attachments;
SELECT * FROM attachments WHERE post_id = 253383;
SELECT * from attachments WHERE board_filename = '1722466065515';
SELECT count(*) attachments WHERE attachment_id < (SELECT attachment_id FROM attachments WHERE board_filename = '1722466065515');
SELECT max(attachment_id) FROM attachments a;
SELECT pg_get_serial_sequence('attachments', 'attachment_id');
SELECT setval(pg_get_serial_sequence('attachments', 'attachment_id'), COALESCE(198853, 1), true);
UPDATE attachments SET thumb_extension = 'png'
WHERE
attachment_id IN
(
SELECT a.attachment_id
FROM attachments a
JOIN posts p ON a.post_id = p.post_id
JOIN threads t ON p.thread_id = t.thread_id
JOIN boards b ON t.board_id = b.board_id
JOIN sites s ON b.site_id = s.site_id
WHERE s.name = 'leftychan'
AND a.thumb_extension = 'jpg'
);
SELECT * FROM posts WHERE board_post_id = 129;
SELECT * FROM attachments WHERE post_id = 461287;
SELECT count(a.*)
FROM attachments a
JOIN posts p ON a.post_id = p.post_id
JOIN threads t ON p.thread_id = t.thread_id
JOIN boards b ON t.board_id = b.board_id
JOIN sites s ON b.site_id = s.site_id
WHERE s.name = 'leftychan'
AND a.thumb_extension = 'jpg';
SELECT * FROM posts
JOIN threads ON threads.thread_id = posts.thread_id
JOIN boards ON boards.board_id = threads.board_id
WHERE boards.pathpart = 'leftypol'
AND boards.site_id = 1
ORDER BY posts.creation_time DESC
LIMIT 1;
SELECT * FROM posts
ORDER BY posts.creation_time DESC
LIMIT 1;
SELECT boards.board_id, boards.pathpart, sites.name FROM boards JOIN sites ON sites.site_id = boards.site_id;
SELECT DISTINCT ON (b.board_id)
b.board_id,
b.site_id,
b.pathpart,
p.post_id,
p.board_post_id,
p.creation_time,
p.body,
t.thread_id,
t.board_thread_id
FROM boards b
JOIN threads t ON t.board_id = b.board_id
JOIN posts p ON p.thread_id = t.thread_id
ORDER BY b.board_id, p.creation_time DESC;
CREATE OR REPLACE FUNCTION get_latest_posts_per_board()
RETURNS TABLE (
board_id int,
site_id int,
pathpart text,
post_id bigint,
board_post_id bigint,
creation_time timestamp with time zone,
body text,
thread_id bigint,
board_thread_id bigint
) AS $$
SELECT DISTINCT ON (b.board_id)
b.board_id,
b.site_id,
b.pathpart,
p.post_id,
p.board_post_id,
p.creation_time,
p.body,
t.thread_id,
t.board_thread_id
FROM boards b
JOIN threads t ON t.board_id = b.board_id
JOIN posts p ON p.thread_id = t.thread_id
ORDER BY b.board_id, p.creation_time DESC;
$$ LANGUAGE sql STABLE;
SELECT * FROM get_latest_posts_per_board();