Compare commits
2 Commits
615ef6e80b
...
c297bae697
Author | SHA1 | Date |
---|---|---|
|
c297bae697 | |
|
7b8b1b2255 |
|
@ -0,0 +1,593 @@
|
|||
select count(*) from posts;
|
||||
|
||||
select board_id, count(*) as post_count from posts natural join threads group by board_id;
|
||||
|
||||
select sum(post_count) from (select board_id, count(*) as post_count from posts join threads on posts.thread_id = threads.thread_id group by threads.board_id) as t;
|
||||
|
||||
select board_id, count(*) from threads natural join boards group by board_id;
|
||||
|
||||
select count(*) from posts;
|
||||
|
||||
SELECT
|
||||
threads.thread_id,
|
||||
MAX(posts.creation_time) AS bump_time,
|
||||
-- FIRST_VALUE(posts.subject) OVER (PARTITION BY threads.thread_id ORDER BY posts.creation_time) AS first_post_subject,
|
||||
FIRST_VALUE(posts.body) OVER (PARTITION BY threads.thread_id ORDER BY posts.creation_time) AS first_post_body,
|
||||
COUNT(*) AS post_count
|
||||
FROM
|
||||
threads
|
||||
JOIN
|
||||
posts ON threads.thread_id = posts.thread_id
|
||||
WHERE
|
||||
posts.creation_time < NOW() -- Specify your desired time here
|
||||
GROUP BY
|
||||
threads.thread_id
|
||||
ORDER BY
|
||||
bump_time DESC
|
||||
LIMIT
|
||||
100;
|
||||
|
||||
|
||||
SELECT
|
||||
thread_data.thread_id,
|
||||
thread_data.bump_time,
|
||||
thread_data.first_post_subject,
|
||||
thread_data.first_post_body,
|
||||
COUNT(*) AS post_count
|
||||
FROM (
|
||||
SELECT
|
||||
threads.thread_id,
|
||||
MAX(posts.creation_time) AS bump_time,
|
||||
-- FIRST_VALUE(posts.subject) OVER (PARTITION BY threads.thread_id ORDER BY posts.creation_time) AS first_post_subject,
|
||||
FIRST_VALUE(posts.body) OVER (PARTITION BY threads.thread_id ORDER BY posts.creation_time) AS first_post_body,
|
||||
posts.creation_time -- This can be removed if it's not needed in outer query
|
||||
FROM
|
||||
threads
|
||||
JOIN
|
||||
posts ON threads.thread_id = posts.thread_id
|
||||
WHERE
|
||||
posts.creation_time < NOW() -- Specify your desired time here
|
||||
) AS thread_data
|
||||
GROUP BY
|
||||
thread_data.thread_id, thread_data.bump_time, thread_data.first_post_subject, thread_data.first_post_body
|
||||
ORDER BY
|
||||
thread_data.bump_time DESC
|
||||
LIMIT
|
||||
100;
|
||||
|
||||
|
||||
explain ANALYZE select
|
||||
|
||||
-- runs in 1.5s
|
||||
SELECT
|
||||
thread_data.thread_id,
|
||||
MAX(thread_data.creation_time) AS bump_time,
|
||||
MIN(thread_data.first_post_body) AS first_post_body, -- Getting the first post's body per thread using MIN as a trick since FIRST_VALUE is the same for all rows per thread in the subquery
|
||||
COUNT(*) AS post_count
|
||||
FROM (
|
||||
SELECT
|
||||
threads.thread_id,
|
||||
posts.creation_time,
|
||||
FIRST_VALUE(posts.body) OVER (PARTITION BY threads.thread_id ORDER BY posts.creation_time) AS first_post_body
|
||||
FROM
|
||||
threads
|
||||
JOIN
|
||||
posts ON threads.thread_id = posts.thread_id
|
||||
WHERE
|
||||
posts.creation_time < NOW() -- Specify your desired time here
|
||||
) AS thread_data
|
||||
GROUP BY
|
||||
thread_data.thread_id
|
||||
ORDER BY
|
||||
bump_time DESC
|
||||
LIMIT
|
||||
100;
|
||||
|
||||
|
||||
-- runs in 640ms
|
||||
WITH LatestPosts AS (
|
||||
-- Get the latest post time per thread
|
||||
SELECT
|
||||
p.thread_id,
|
||||
MAX(p.creation_time) AS bump_time
|
||||
FROM
|
||||
posts p
|
||||
WHERE
|
||||
p.creation_time < NOW() - INTERVAL '365 day'
|
||||
GROUP BY
|
||||
p.thread_id
|
||||
ORDER BY
|
||||
bump_time DESC
|
||||
LIMIT 100
|
||||
),
|
||||
FirstPost AS (
|
||||
-- Get the first post's body per thread
|
||||
SELECT
|
||||
p.thread_id,
|
||||
p.body AS first_post_body
|
||||
FROM
|
||||
posts p
|
||||
JOIN
|
||||
LatestPosts lp ON p.thread_id = lp.thread_id
|
||||
WHERE
|
||||
p.creation_time = (
|
||||
SELECT MIN(p2.creation_time)
|
||||
FROM posts p2
|
||||
WHERE p2.thread_id = p.thread_id
|
||||
)
|
||||
)
|
||||
SELECT
|
||||
lp.thread_id,
|
||||
lp.bump_time,
|
||||
fp.first_post_body,
|
||||
COUNT(p.post_id) AS post_count
|
||||
FROM
|
||||
LatestPosts lp
|
||||
JOIN
|
||||
FirstPost fp ON lp.thread_id = fp.thread_id
|
||||
JOIN
|
||||
posts p ON lp.thread_id = p.thread_id
|
||||
GROUP BY
|
||||
lp.thread_id, lp.bump_time, fp.first_post_body
|
||||
ORDER BY
|
||||
lp.bump_time DESC;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
-- runs in 1.2s
|
||||
WITH LatestPosts AS (
|
||||
-- Get the latest post time per thread
|
||||
SELECT
|
||||
p.thread_id,
|
||||
MAX(p.creation_time) AS bump_time
|
||||
FROM
|
||||
posts p
|
||||
WHERE
|
||||
p.creation_time < NOW() - INTERVAL '365 day'
|
||||
GROUP BY
|
||||
p.thread_id
|
||||
ORDER BY
|
||||
bump_time DESC
|
||||
LIMIT 100
|
||||
),
|
||||
FirstPost AS (
|
||||
-- Get the first post's body per thread
|
||||
SELECT
|
||||
p.thread_id,
|
||||
p.body AS first_post_body
|
||||
FROM (
|
||||
SELECT
|
||||
thread_id,
|
||||
body,
|
||||
RANK() OVER(PARTITION BY thread_id ORDER BY creation_time) as rnk
|
||||
FROM
|
||||
posts
|
||||
) p
|
||||
WHERE
|
||||
rnk = 1
|
||||
AND
|
||||
p.thread_id IN (SELECT thread_id FROM LatestPosts) -- Optional: limit rows to those from LatestPosts CTE
|
||||
)
|
||||
SELECT
|
||||
lp.thread_id,
|
||||
lp.bump_time,
|
||||
fp.first_post_body,
|
||||
COUNT(p.post_id) AS post_count
|
||||
FROM
|
||||
LatestPosts lp
|
||||
JOIN
|
||||
FirstPost fp ON lp.thread_id = fp.thread_id
|
||||
JOIN
|
||||
posts p ON lp.thread_id = p.thread_id
|
||||
GROUP BY
|
||||
lp.thread_id, lp.bump_time, fp.first_post_body
|
||||
ORDER BY
|
||||
lp.bump_time DESC;
|
||||
|
||||
|
||||
|
||||
-- runs in ~420ms
|
||||
WITH LatestPosts AS (
|
||||
-- Get the latest post time per thread
|
||||
SELECT
|
||||
p.thread_id,
|
||||
MAX(p.creation_time) AS bump_time
|
||||
FROM
|
||||
posts p
|
||||
WHERE
|
||||
p.creation_time < NOW() - interval '365 day'
|
||||
GROUP BY
|
||||
p.thread_id
|
||||
ORDER BY
|
||||
bump_time DESC
|
||||
LIMIT 100
|
||||
)
|
||||
-- Main Query
|
||||
SELECT
|
||||
lp.thread_id,
|
||||
lp.bump_time,
|
||||
fp.first_post_body,
|
||||
COUNT(p.post_id) AS post_count
|
||||
FROM
|
||||
LatestPosts lp
|
||||
JOIN LATERAL (
|
||||
SELECT p.body AS first_post_body
|
||||
FROM posts p
|
||||
WHERE p.thread_id = lp.thread_id
|
||||
ORDER BY p.creation_time
|
||||
LIMIT 1
|
||||
) fp ON TRUE
|
||||
JOIN
|
||||
posts p ON lp.thread_id = p.thread_id
|
||||
GROUP BY
|
||||
lp.thread_id, lp.bump_time, fp.first_post_body
|
||||
ORDER BY
|
||||
lp.bump_time DESC;
|
||||
|
||||
|
||||
select sum(counts) from
|
||||
(
|
||||
select max(creation_time) as bump_time, count(*) as counts, thread_id
|
||||
from
|
||||
(
|
||||
select thread_id, creation_time from posts
|
||||
where creation_time < NOW()
|
||||
order by creation_time desc
|
||||
) as t
|
||||
group by thread_id
|
||||
order by bump_time desc
|
||||
limit 2
|
||||
) as t;
|
||||
|
||||
|
||||
|
||||
-- actually we don't need these, because "first" would not be the OP it would be the most recent post. We should just join the results with post
|
||||
-- to get the OPs
|
||||
|
||||
-- Create a function that always returns the first non-NULL value:
|
||||
CREATE OR REPLACE FUNCTION public.first_agg (anyelement, anyelement)
|
||||
RETURNS anyelement
|
||||
LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE AS
|
||||
'SELECT $1';
|
||||
|
||||
-- Then wrap an aggregate around it:
|
||||
CREATE AGGREGATE public.first (anyelement) (
|
||||
SFUNC = public.first_agg
|
||||
, STYPE = anyelement
|
||||
, PARALLEL = safe
|
||||
);
|
||||
|
||||
------------
|
||||
|
||||
|
||||
(
|
||||
select max(creation_time) as bump_time, count(*), thread_id, min(creation_time) as where_to_leave_off
|
||||
from
|
||||
(
|
||||
select thread_id, creation_time, body from posts
|
||||
where creation_time < NOW() - interval '365 day'
|
||||
order by creation_time desc limit 1500
|
||||
|
||||
) as t
|
||||
group by thread_id
|
||||
order by bump_time desc
|
||||
)
|
||||
|
||||
union all
|
||||
|
||||
(
|
||||
select max(creation_time) as bump_time, count(*), thread_id, min(creation_time) as where_to_leave_off
|
||||
from
|
||||
(
|
||||
select thread_id, creation_time, body from posts
|
||||
where creation_time < '2022-10-04 12:31:25.000 -0400'
|
||||
order by creation_time desc limit 500
|
||||
) as t
|
||||
group by thread_id
|
||||
order by bump_time desc
|
||||
);
|
||||
|
||||
|
||||
-- runs in 8ms
|
||||
select max(creation_time) as bump_time, count(*), thread_id, min(creation_time) as where_to_leave_off
|
||||
from
|
||||
(
|
||||
(select thread_id, creation_time, body from posts
|
||||
where creation_time < NOW() - interval '365 day'
|
||||
order by creation_time desc limit 1500)
|
||||
union all
|
||||
(select thread_id, creation_time, body from posts
|
||||
where creation_time < '2022-10-04 12:31:25.000 -0400'
|
||||
order by creation_time desc limit 500)
|
||||
|
||||
) as t
|
||||
group by thread_id
|
||||
order by bump_time desc;
|
||||
|
||||
|
||||
select max(creation_time) as bump_time, count(*), thread_id, min(creation_time) as where_to_leave_off
|
||||
from
|
||||
(
|
||||
select thread_id, creation_time, body from posts
|
||||
where creation_time < NOW() - interval '365 day'
|
||||
order by creation_time desc limit 10000
|
||||
) as t
|
||||
group by thread_id
|
||||
order by bump_time desc;
|
||||
|
||||
|
||||
|
||||
explain analyze SELECT
|
||||
p.thread_id,
|
||||
MAX(p.creation_time) AS bump_time
|
||||
FROM
|
||||
posts p
|
||||
WHERE
|
||||
p.creation_time < NOW() - interval '365 day'
|
||||
GROUP BY
|
||||
p.thread_id
|
||||
ORDER BY
|
||||
bump_time DESC
|
||||
LIMIT 100
|
||||
|
||||
|
||||
-- Create temp table if not exists
|
||||
DROP TABLE IF EXISTS temp_results;
|
||||
|
||||
|
||||
/*
|
||||
* This function scans backwards from p_start_time until we get the desired number of threads.
|
||||
* Since the posts are ordered by creation time, the first post encountered for a particular
|
||||
* thread will have that thread's bump time. Any posts after that will have a timestamp
|
||||
* further back in time, so thread ids that we encounter will not change order the
|
||||
* more we scan.
|
||||
*
|
||||
* This is to avoid a sequence scan on the entire posts table...
|
||||
*/
|
||||
CREATE OR REPLACE FUNCTION fetch_top_threads(p_start_time TIMESTAMPTZ, p_desired_threads INT)
|
||||
RETURNS TABLE(bump_time TIMESTAMPTZ, post_count INT, thread_id INT)
|
||||
LANGUAGE plpgsql
|
||||
AS $$
|
||||
DECLARE
|
||||
limit_size INT := 2000; -- Start with 2000 posts
|
||||
max_iterations INT := 10; -- Maximum iterations to avoid endless loop in case of errors
|
||||
result_count INT := 0;
|
||||
last_min_time TIMESTAMP;
|
||||
BEGIN
|
||||
CREATE TEMP TABLE IF NOT EXISTS temp_results (bump_time TIMESTAMPTZ, post_count INT, thread_id INT, last_fetched_time TIMESTAMP);
|
||||
TRUNCATE temp_results; -- clear the table
|
||||
|
||||
FOR i IN 1..max_iterations LOOP
|
||||
-- Fetch posts starting from the previous min time
|
||||
INSERT INTO temp_results(bump_time, post_count, thread_id, last_fetched_time)
|
||||
SELECT max(t.creation_time) as bump_time, count(*) as post_count, t.thread_id, min(t.creation_time) as last_fetched_time
|
||||
FROM (
|
||||
SELECT p.thread_id, p.creation_time
|
||||
FROM posts p
|
||||
WHERE p.creation_time < COALESCE(last_min_time, p_start_time)
|
||||
ORDER BY p.creation_time DESC
|
||||
LIMIT limit_size
|
||||
) as t
|
||||
GROUP BY t.thread_id;
|
||||
|
||||
-- Check if we have enough threads
|
||||
SELECT COUNT(DISTINCT temp_results.thread_id) INTO result_count FROM temp_results;
|
||||
|
||||
IF result_count >= p_desired_threads THEN
|
||||
EXIT;
|
||||
END IF;
|
||||
|
||||
-- Get the last min time from the temp table for the next iteration
|
||||
SELECT MIN(last_fetched_time) INTO last_min_time FROM temp_results;
|
||||
|
||||
-- Double the limit for the next iteration
|
||||
limit_size := limit_size * 2;
|
||||
|
||||
END LOOP;
|
||||
|
||||
-- Return the results
|
||||
RETURN QUERY SELECT temp_results.bump_time, temp_results.post_count, temp_results.thread_id FROM temp_results ORDER BY temp_results.bump_time DESC;
|
||||
|
||||
DROP TABLE IF EXISTS temp_results;
|
||||
END;
|
||||
$$;
|
||||
|
||||
|
||||
select * FROM fetch_top_threads(NOW(), 100);
|
||||
|
||||
WITH TopThreads AS (
|
||||
SELECT * FROM fetch_top_threads(NOW() - interval '365d', 100)
|
||||
)
|
||||
SELECT
|
||||
tt.thread_id,
|
||||
tt.bump_time,
|
||||
tt.post_count,
|
||||
p.creation_time,
|
||||
p.body,
|
||||
p.post_id
|
||||
FROM
|
||||
TopThreads tt
|
||||
JOIN
|
||||
posts p ON tt.thread_id = p.thread_id
|
||||
ORDER BY
|
||||
p.creation_time asc;
|
||||
|
||||
|
||||
SELECT *
|
||||
FROM
|
||||
fetch_top_threads(NOW() - interval '365d', 100) top
|
||||
JOIN
|
||||
posts ON posts.thread_id = top.thread_id
|
||||
ORDER BY posts.creation_time ASC;
|
||||
|
||||
|
||||
SELECT
|
||||
post_id,
|
||||
board_post_id,
|
||||
posts.thread_id,
|
||||
bump_time,
|
||||
creation_time,
|
||||
body
|
||||
FROM
|
||||
fetch_top_threads(NOW() - interval '365d', 100) top
|
||||
JOIN
|
||||
posts ON posts.thread_id = top.thread_id
|
||||
ORDER BY posts.creation_time ASC;
|
||||
|
||||
|
||||
VACUUM ANALYZE posts;
|
||||
|
||||
SELECT posts.post_id, posts.thread_id, posts.body
|
||||
FROM
|
||||
fetch_top_threads(NOW(), 100) top
|
||||
JOIN
|
||||
posts ON posts.thread_id = top.thread_id;
|
||||
|
||||
|
||||
WITH
|
||||
top AS (
|
||||
SELECT * FROM fetch_top_threads(NOW(), 100) top
|
||||
),
|
||||
joined AS (
|
||||
SELECT posts.post_id, posts.thread_id, posts.body
|
||||
FROM top JOIN posts ON posts.thread_id = top.thread_id
|
||||
ORDER BY posts.creation_time DESC
|
||||
),
|
||||
grouped AS (
|
||||
SELECT min(joined.post_id) op_post_id, joined.thread_id FROM joined
|
||||
GROUP BY joined.thread_id
|
||||
) SELECT * FROM grouped JOIN joined ON op_post_id = joined.post_id;
|
||||
|
||||
|
||||
WITH
|
||||
top AS (
|
||||
SELECT * FROM fetch_top_threads(NOW(), 100) OFFSET 0
|
||||
),
|
||||
joined AS (
|
||||
SELECT posts.post_id, posts.thread_id, posts.body, posts.creation_time
|
||||
FROM top JOIN posts ON posts.thread_id = top.thread_id
|
||||
ORDER BY posts.creation_time DESC
|
||||
),
|
||||
grouped AS (
|
||||
SELECT min(joined.creation_time) op_creation_time, joined.thread_id, count(*) AS post_count FROM joined
|
||||
GROUP BY joined.thread_id
|
||||
) SELECT joined.*, grouped.post_count
|
||||
FROM grouped JOIN joined ON op_creation_time = joined.creation_time AND grouped.thread_id = joined.thread_id;
|
||||
|
||||
|
||||
WITH
|
||||
top AS (
|
||||
SELECT * FROM fetch_top_threads(NOW(), 200) OFFSET 0
|
||||
),
|
||||
joined AS (
|
||||
SELECT
|
||||
posts.post_id,
|
||||
posts.thread_id,
|
||||
posts.body,
|
||||
posts.creation_time,
|
||||
ROW_NUMBER() OVER(PARTITION BY posts.thread_id ORDER BY posts.creation_time, posts.post_id) as rn
|
||||
FROM top
|
||||
JOIN posts ON posts.thread_id = top.thread_id
|
||||
),
|
||||
grouped AS (
|
||||
SELECT joined.post_id, joined.thread_id
|
||||
FROM joined
|
||||
WHERE joined.rn = 1
|
||||
)
|
||||
SELECT joined.*
|
||||
FROM grouped
|
||||
JOIN joined ON grouped.post_id = joined.post_id;
|
||||
|
||||
|
||||
|
||||
SELECT
|
||||
sub.post_id,
|
||||
sub.thread_id,
|
||||
sub.body,
|
||||
sub.creation_time
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
posts.post_id,
|
||||
posts.thread_id,
|
||||
posts.body,
|
||||
posts.creation_time,
|
||||
ROW_NUMBER() OVER(PARTITION BY posts.thread_id ORDER BY posts.creation_time, posts.post_id) as rn
|
||||
FROM fetch_top_threads(NOW(), 200) AS top
|
||||
JOIN posts ON posts.thread_id = top.thread_id
|
||||
) AS sub
|
||||
WHERE
|
||||
sub.rn = 1;
|
||||
|
||||
|
||||
SELECT
|
||||
sub.post_id,
|
||||
sub.thread_id,
|
||||
sub.body,
|
||||
sub.creation_time
|
||||
FROM
|
||||
(
|
||||
SELECT
|
||||
posts.post_id,
|
||||
posts.thread_id,
|
||||
posts.body,
|
||||
posts.creation_time,
|
||||
ROW_NUMBER() OVER(PARTITION BY posts.thread_id ORDER BY posts.creation_time, posts.post_id) as rn
|
||||
FROM
|
||||
(SELECT * FROM fetch_top_threads(NOW(), 200) OFFSET 0) AS top
|
||||
JOIN posts ON posts.thread_id = top.thread_id
|
||||
) AS sub
|
||||
WHERE
|
||||
sub.rn = 1;
|
||||
|
||||
SELECT count(*) FROM fetch_catalog(NOW(), 400, 0);
|
||||
|
||||
SELECT * FROM fetch_top_threads(NOW(), 200);
|
||||
|
||||
|
||||
SELECT count(*) FROM
|
||||
(
|
||||
SELECT top.thread_id, count(*) as post_count FROM fetch_top_threads(NOW(), 200) top JOIN posts ON posts.thread_id = top.thread_id
|
||||
GROUP BY top.thread_id
|
||||
) t;
|
||||
|
||||
-- how do I
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION fetch_catalog(max_time timestamptz, wanted_result_count integer, offset_count integer)
|
||||
RETURNS TABLE (
|
||||
op_post_id bigint,
|
||||
thread_id bigint,
|
||||
post_count bigint,
|
||||
body text,
|
||||
creation_time timestamptz
|
||||
)
|
||||
LANGUAGE SQL
|
||||
AS $$
|
||||
WITH
|
||||
top AS (
|
||||
SELECT * FROM fetch_top_threads(max_time, wanted_result_count) OFFSET offset_count
|
||||
),
|
||||
joined AS (
|
||||
SELECT posts.post_id, posts.thread_id, posts.body, posts.creation_time
|
||||
FROM top JOIN posts ON posts.thread_id = top.thread_id
|
||||
ORDER BY posts.creation_time DESC
|
||||
),
|
||||
grouped AS (
|
||||
SELECT min(joined.post_id) op_post_id, joined.thread_id, count(*) AS post_count FROM joined
|
||||
GROUP BY joined.thread_id
|
||||
) SELECT grouped.*, joined.body, joined.creation_time FROM grouped JOIN joined ON op_post_id = joined.post_id;
|
||||
$$;
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,216 @@
|
|||
CREATE OR REPLACE FUNCTION fetch_top_threads(
|
||||
p_start_time TIMESTAMPTZ,
|
||||
lookback INT DEFAULT 10000
|
||||
)
|
||||
RETURNS TABLE(bump_time TIMESTAMPTZ, post_count BIGINT, thread_id BIGINT, where_to_leave_off TIMESTAMPTZ)
|
||||
LANGUAGE sql
|
||||
AS $$
|
||||
SELECT
|
||||
max(creation_time) as bump_time,
|
||||
count(*),
|
||||
thread_id,
|
||||
min(creation_time) as where_to_leave_off
|
||||
FROM
|
||||
(
|
||||
SELECT thread_id, creation_time
|
||||
FROM posts
|
||||
WHERE creation_time < p_start_time
|
||||
ORDER BY creation_time DESC
|
||||
LIMIT LEAST(lookback, 250000) -- capping the lookback to 250k
|
||||
) as t
|
||||
GROUP BY thread_id
|
||||
ORDER BY bump_time DESC;
|
||||
$$;
|
||||
|
||||
|
||||
SELECT * FROM fetch_top_threads(NOW() - INTERVAL '10d');
|
||||
|
||||
SELECT * FROM fetch_top_threads(NOW() - INTERVAL '10d') top JOIN posts ON top.thread_id = posts.thread_id;
|
||||
|
||||
|
||||
SELECT
|
||||
ordered_posts.thread_id,
|
||||
count(*),
|
||||
array_agg(ordered_posts.body)
|
||||
FROM (
|
||||
SELECT top.thread_id, posts.body
|
||||
FROM fetch_top_threads(NOW() - INTERVAL '10d', 2000) top
|
||||
JOIN posts ON top.thread_id = posts.thread_id
|
||||
ORDER BY posts.board_post_id ASC
|
||||
) AS ordered_posts
|
||||
GROUP BY ordered_posts.thread_id;
|
||||
|
||||
WITH
|
||||
top AS
|
||||
(
|
||||
SELECT * FROM fetch_top_threads(NOW()) top
|
||||
),
|
||||
tall_posts AS
|
||||
(
|
||||
SELECT top.post_count as estimated_post_count, posts.*
|
||||
FROM top
|
||||
JOIN posts ON top.thread_id = posts.thread_id
|
||||
),
|
||||
op_posts AS
|
||||
(
|
||||
SELECT DISTINCT ON (t.thread_id)
|
||||
*
|
||||
FROM tall_posts t
|
||||
ORDER BY t.thread_id, t.board_post_id
|
||||
)
|
||||
SELECT * FROM op_posts;
|
||||
|
||||
|
||||
WITH
|
||||
top AS
|
||||
(
|
||||
SELECT * FROM fetch_top_threads(NOW()) top
|
||||
),
|
||||
tall_posts AS
|
||||
(
|
||||
SELECT top.post_count as estimated_post_count, posts.*
|
||||
FROM top
|
||||
JOIN posts ON top.thread_id = posts.thread_id
|
||||
),
|
||||
op_posts AS
|
||||
(
|
||||
SELECT DISTINCT ON (t.thread_id)
|
||||
*
|
||||
FROM tall_posts t
|
||||
ORDER BY t.thread_id, t.board_post_id
|
||||
),
|
||||
post_counts AS
|
||||
(
|
||||
SELECT thread_id, count(*) as post_count FROM
|
||||
tall_posts
|
||||
GROUP BY thread_id
|
||||
)
|
||||
SELECT post_counts.post_count, op_posts.* FROM op_posts JOIN post_counts ON op_posts.thread_id = post_counts.thread_id;
|
||||
|
||||
|
||||
-- 71ms!
|
||||
WITH
|
||||
top AS
|
||||
(
|
||||
SELECT * FROM fetch_top_threads(NOW() - INTERVAL '1y', 1000) top
|
||||
),
|
||||
tall_posts AS
|
||||
(
|
||||
SELECT
|
||||
top.post_count as estimated_post_count,
|
||||
top.bump_time,
|
||||
posts.post_id,
|
||||
posts.board_post_id,
|
||||
posts.creation_time,
|
||||
posts.body,
|
||||
posts.thread_id
|
||||
FROM top
|
||||
JOIN posts ON top.thread_id = posts.thread_id
|
||||
),
|
||||
op_posts AS
|
||||
(
|
||||
SELECT DISTINCT ON (t.thread_id)
|
||||
*
|
||||
FROM tall_posts t
|
||||
ORDER BY t.thread_id, t.board_post_id
|
||||
),
|
||||
post_counts AS
|
||||
(
|
||||
SELECT thread_id, count(*) as post_count FROM
|
||||
tall_posts
|
||||
GROUP BY thread_id
|
||||
)
|
||||
SELECT
|
||||
post_counts.post_count,
|
||||
op_posts.*,
|
||||
threads.board_thread_id,
|
||||
boards.pathpart,
|
||||
sites."name"
|
||||
FROM op_posts
|
||||
JOIN post_counts ON op_posts.thread_id = post_counts.thread_id
|
||||
JOIN threads ON op_posts.thread_id = threads.thread_id
|
||||
JOIN boards ON threads.board_id = boards.board_id
|
||||
JOIN sites ON sites.site_id = boards.site_id;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION fetch_catalog(max_time timestamptz, max_row_read int DEFAULT 10000)
|
||||
RETURNS TABLE (
|
||||
post_count bigint,
|
||||
estimated_post_count bigint,
|
||||
post_id bigint,
|
||||
board_post_id bigint,
|
||||
creation_time timestamptz,
|
||||
body text,
|
||||
thread_id bigint,
|
||||
board_thread_id bigint,
|
||||
pathpart text,
|
||||
name text
|
||||
) AS $$
|
||||
WITH
|
||||
top AS
|
||||
(
|
||||
SELECT * FROM fetch_top_threads(max_time, max_row_read) AS top
|
||||
),
|
||||
tall_posts AS
|
||||
(
|
||||
SELECT
|
||||
top.post_count as estimated_post_count,
|
||||
posts.post_id,
|
||||
posts.board_post_id,
|
||||
posts.creation_time,
|
||||
posts.body,
|
||||
posts.thread_id
|
||||
FROM top
|
||||
JOIN posts ON top.thread_id = posts.thread_id
|
||||
WHERE creation_time < max_time
|
||||
),
|
||||
op_posts AS
|
||||
(
|
||||
SELECT DISTINCT ON (t.thread_id)
|
||||
*
|
||||
FROM tall_posts t
|
||||
ORDER BY t.thread_id, t.board_post_id
|
||||
),
|
||||
post_counts AS
|
||||
(
|
||||
SELECT thread_id, count(*) as post_count FROM
|
||||
tall_posts
|
||||
GROUP BY thread_id
|
||||
)
|
||||
SELECT
|
||||
post_counts.post_count,
|
||||
op_posts.*,
|
||||
threads.board_thread_id,
|
||||
boards.pathpart,
|
||||
sites."name"
|
||||
FROM op_posts
|
||||
JOIN post_counts ON op_posts.thread_id = post_counts.thread_id
|
||||
JOIN threads ON op_posts.thread_id = threads.thread_id
|
||||
JOIN boards ON threads.board_id = boards.board_id
|
||||
JOIN sites ON sites.site_id = boards.site_id;
|
||||
$$ LANGUAGE sql;
|
||||
|
||||
SELECT * FROM fetch_catalog(NOW() - INTERVAL '1y', 1001);
|
||||
|
||||
|
||||
-- CREATE INDEX idx_posts_thread_board ON posts (thread_id, board_post_id);
|
||||
ANALYZE posts;
|
||||
|
||||
|
||||
SELECT DISTINCT ON (top.thread_id)
|
||||
top.thread_id,
|
||||
posts.post_id,
|
||||
posts.body,
|
||||
top.post_count,
|
||||
top.where_to_leave_off
|
||||
FROM fetch_top_threads(NOW() - INTERVAL '10d') top
|
||||
JOIN posts ON top.thread_id = posts.thread_id
|
||||
ORDER BY top.thread_id, posts.board_post_id;
|
||||
|
||||
|
||||
SELECT * FROM threads WHERE thread_id = 3110;
|
||||
SELECT * FROM posts WHERE thread_id = 3110 ORDER BY board_post_id ASC;
|
||||
SELECT * FROM boards;
|
||||
|
||||
SELECT * FROM threads WHERE thread_id = 11314;
|
||||
ANALYZE posts;
|
|
@ -16,6 +16,9 @@ DROP TABLE IF EXISTS threads CASCADE;
|
|||
DROP TABLE IF EXISTS posts CASCADE;
|
||||
DROP TABLE IF EXISTS attachments CASCADE;
|
||||
DROP FUNCTION IF EXISTS update_post_body_search_index;
|
||||
DROP FUNCTION IF EXISTS fetch_top_threads;
|
||||
DROP FUNCTION IF EXISTS fetch_catalog;
|
||||
|
||||
|
||||
-- It won't let us drop roles otherwise and the IFs are to keep this script idempotent.
|
||||
DO
|
||||
|
@ -72,9 +75,8 @@ CREATE INDEX posts_creation_time_idx ON posts (creation_time);
|
|||
CREATE INDEX posts_body_search_idx ON posts USING GIN (body_search_index);
|
||||
CREATE INDEX posts_thread_id_idx ON posts (thread_id);
|
||||
CREATE INDEX posts_board_post_id_idx ON posts (board_post_id);
|
||||
|
||||
-- This is to optimize joins on thread_id and filtering/sorting by creation_time in 'posts' table.
|
||||
CREATE INDEX posts_thread_id_creation_time_idx ON posts (thread_id, creation_time);
|
||||
CREATE INDEX posts_thread_id_creation_time_idx ON posts (creation_time, thread_id);
|
||||
--CREATE INDEX posts_thread_id_board_post_id_idx ON posts (thread_id, board_post_id);
|
||||
|
||||
CREATE OR REPLACE FUNCTION update_post_body_search_index() RETURNS trigger AS $$
|
||||
BEGIN
|
||||
|
@ -169,6 +171,89 @@ $$ LANGUAGE sql;
|
|||
-- 1:21 for full db (nothing inserted)
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION fetch_top_threads(
|
||||
p_start_time TIMESTAMPTZ,
|
||||
lookback INT DEFAULT 10000
|
||||
)
|
||||
RETURNS TABLE(bump_time TIMESTAMPTZ, post_count BIGINT, thread_id BIGINT, where_to_leave_off TIMESTAMPTZ)
|
||||
LANGUAGE sql
|
||||
AS $$
|
||||
SELECT
|
||||
max(creation_time) as bump_time,
|
||||
count(*),
|
||||
thread_id,
|
||||
min(creation_time) as where_to_leave_off
|
||||
FROM
|
||||
(
|
||||
SELECT thread_id, creation_time
|
||||
FROM posts
|
||||
WHERE creation_time < p_start_time
|
||||
ORDER BY creation_time DESC
|
||||
LIMIT LEAST(lookback, 250000) -- capping the lookback to 250k
|
||||
) as t
|
||||
GROUP BY thread_id
|
||||
ORDER BY bump_time DESC;
|
||||
$$;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION fetch_catalog(max_time timestamptz, max_row_read int DEFAULT 10000)
|
||||
RETURNS TABLE (
|
||||
post_count bigint,
|
||||
estimated_post_count bigint,
|
||||
post_id bigint,
|
||||
board_post_id bigint,
|
||||
creation_time timestamptz,
|
||||
body text,
|
||||
thread_id bigint,
|
||||
board_thread_id bigint,
|
||||
pathpart text,
|
||||
name text
|
||||
) AS $$
|
||||
WITH
|
||||
top AS
|
||||
(
|
||||
SELECT * FROM fetch_top_threads(max_time, max_row_read) AS top
|
||||
),
|
||||
tall_posts AS
|
||||
(
|
||||
SELECT
|
||||
top.post_count as estimated_post_count,
|
||||
posts.post_id,
|
||||
posts.board_post_id,
|
||||
posts.creation_time,
|
||||
posts.body,
|
||||
posts.thread_id
|
||||
FROM top
|
||||
JOIN posts ON top.thread_id = posts.thread_id
|
||||
WHERE creation_time < max_time
|
||||
),
|
||||
op_posts AS
|
||||
(
|
||||
SELECT DISTINCT ON (t.thread_id)
|
||||
*
|
||||
FROM tall_posts t
|
||||
ORDER BY t.thread_id, t.board_post_id
|
||||
),
|
||||
post_counts AS
|
||||
(
|
||||
SELECT thread_id, count(*) as post_count FROM
|
||||
tall_posts
|
||||
GROUP BY thread_id
|
||||
)
|
||||
SELECT
|
||||
post_counts.post_count,
|
||||
op_posts.*,
|
||||
threads.board_thread_id,
|
||||
boards.pathpart,
|
||||
sites."name"
|
||||
FROM op_posts
|
||||
JOIN post_counts ON op_posts.thread_id = post_counts.thread_id
|
||||
JOIN threads ON op_posts.thread_id = threads.thread_id
|
||||
JOIN boards ON threads.board_id = boards.board_id
|
||||
JOIN sites ON sites.site_id = boards.site_id;
|
||||
$$ LANGUAGE sql;
|
||||
|
||||
|
||||
/*
|
||||
* Permissions
|
||||
*/
|
||||
|
@ -194,6 +279,8 @@ GRANT ALL ON posts TO chan_archiver;
|
|||
GRANT ALL ON attachments TO chan_archiver;
|
||||
GRANT EXECUTE ON FUNCTION update_post_body_search_index TO chan_archiver;
|
||||
GRANT EXECUTE ON FUNCTION insert_posts_and_return_ids TO chan_archiver;
|
||||
GRANT EXECUTE ON FUNCTION fetch_top_threads TO chan_archiver;
|
||||
GRANT EXECUTE ON FUNCTION fetch_catalog TO chan_archiver;
|
||||
GRANT usage, select ON SEQUENCE sites_site_id_seq TO chan_archiver;
|
||||
GRANT usage, select ON SEQUENCE boards_board_id_seq TO chan_archiver;
|
||||
GRANT usage, select ON SEQUENCE threads_thread_id_seq TO chan_archiver;
|
||||
|
|
Loading…
Reference in New Issue