Compare commits

...

2 Commits

Author SHA1 Message Date
towards-a-new-leftypol c297bae697 - Change fetch_catalog and fetch_top_threads functions
- neither was correct
    - fetch_top_threads doesn't need to be a loop, it will be up to the
      application whether or not to load more results. This simplifies
      the function, which was sometimes returning more than one row for
      each thread_id anyway
    - fetch_catalog is now fairly performant, and fetches all relevant
      information
2023-10-26 18:19:32 -04:00
towards-a-new-leftypol 7b8b1b2255 commit some sql experiments 2023-10-17 08:19:33 -04:00
3 changed files with 899 additions and 3 deletions

593
sql/archive_tests.sql Normal file
View File

@ -0,0 +1,593 @@
select count(*) from posts;
select board_id, count(*) as post_count from posts natural join threads group by board_id;
select sum(post_count) from (select board_id, count(*) as post_count from posts join threads on posts.thread_id = threads.thread_id group by threads.board_id) as t;
select board_id, count(*) from threads natural join boards group by board_id;
select count(*) from posts;
SELECT
threads.thread_id,
MAX(posts.creation_time) AS bump_time,
-- FIRST_VALUE(posts.subject) OVER (PARTITION BY threads.thread_id ORDER BY posts.creation_time) AS first_post_subject,
FIRST_VALUE(posts.body) OVER (PARTITION BY threads.thread_id ORDER BY posts.creation_time) AS first_post_body,
COUNT(*) AS post_count
FROM
threads
JOIN
posts ON threads.thread_id = posts.thread_id
WHERE
posts.creation_time < NOW() -- Specify your desired time here
GROUP BY
threads.thread_id
ORDER BY
bump_time DESC
LIMIT
100;
SELECT
thread_data.thread_id,
thread_data.bump_time,
thread_data.first_post_subject,
thread_data.first_post_body,
COUNT(*) AS post_count
FROM (
SELECT
threads.thread_id,
MAX(posts.creation_time) AS bump_time,
-- FIRST_VALUE(posts.subject) OVER (PARTITION BY threads.thread_id ORDER BY posts.creation_time) AS first_post_subject,
FIRST_VALUE(posts.body) OVER (PARTITION BY threads.thread_id ORDER BY posts.creation_time) AS first_post_body,
posts.creation_time -- This can be removed if it's not needed in outer query
FROM
threads
JOIN
posts ON threads.thread_id = posts.thread_id
WHERE
posts.creation_time < NOW() -- Specify your desired time here
) AS thread_data
GROUP BY
thread_data.thread_id, thread_data.bump_time, thread_data.first_post_subject, thread_data.first_post_body
ORDER BY
thread_data.bump_time DESC
LIMIT
100;
explain ANALYZE select
-- runs in 1.5s
SELECT
thread_data.thread_id,
MAX(thread_data.creation_time) AS bump_time,
MIN(thread_data.first_post_body) AS first_post_body, -- Getting the first post's body per thread using MIN as a trick since FIRST_VALUE is the same for all rows per thread in the subquery
COUNT(*) AS post_count
FROM (
SELECT
threads.thread_id,
posts.creation_time,
FIRST_VALUE(posts.body) OVER (PARTITION BY threads.thread_id ORDER BY posts.creation_time) AS first_post_body
FROM
threads
JOIN
posts ON threads.thread_id = posts.thread_id
WHERE
posts.creation_time < NOW() -- Specify your desired time here
) AS thread_data
GROUP BY
thread_data.thread_id
ORDER BY
bump_time DESC
LIMIT
100;
-- runs in 640ms
WITH LatestPosts AS (
-- Get the latest post time per thread
SELECT
p.thread_id,
MAX(p.creation_time) AS bump_time
FROM
posts p
WHERE
p.creation_time < NOW() - INTERVAL '365 day'
GROUP BY
p.thread_id
ORDER BY
bump_time DESC
LIMIT 100
),
FirstPost AS (
-- Get the first post's body per thread
SELECT
p.thread_id,
p.body AS first_post_body
FROM
posts p
JOIN
LatestPosts lp ON p.thread_id = lp.thread_id
WHERE
p.creation_time = (
SELECT MIN(p2.creation_time)
FROM posts p2
WHERE p2.thread_id = p.thread_id
)
)
SELECT
lp.thread_id,
lp.bump_time,
fp.first_post_body,
COUNT(p.post_id) AS post_count
FROM
LatestPosts lp
JOIN
FirstPost fp ON lp.thread_id = fp.thread_id
JOIN
posts p ON lp.thread_id = p.thread_id
GROUP BY
lp.thread_id, lp.bump_time, fp.first_post_body
ORDER BY
lp.bump_time DESC;
-- runs in 1.2s
WITH LatestPosts AS (
-- Get the latest post time per thread
SELECT
p.thread_id,
MAX(p.creation_time) AS bump_time
FROM
posts p
WHERE
p.creation_time < NOW() - INTERVAL '365 day'
GROUP BY
p.thread_id
ORDER BY
bump_time DESC
LIMIT 100
),
FirstPost AS (
-- Get the first post's body per thread
SELECT
p.thread_id,
p.body AS first_post_body
FROM (
SELECT
thread_id,
body,
RANK() OVER(PARTITION BY thread_id ORDER BY creation_time) as rnk
FROM
posts
) p
WHERE
rnk = 1
AND
p.thread_id IN (SELECT thread_id FROM LatestPosts) -- Optional: limit rows to those from LatestPosts CTE
)
SELECT
lp.thread_id,
lp.bump_time,
fp.first_post_body,
COUNT(p.post_id) AS post_count
FROM
LatestPosts lp
JOIN
FirstPost fp ON lp.thread_id = fp.thread_id
JOIN
posts p ON lp.thread_id = p.thread_id
GROUP BY
lp.thread_id, lp.bump_time, fp.first_post_body
ORDER BY
lp.bump_time DESC;
-- runs in ~420ms
WITH LatestPosts AS (
-- Get the latest post time per thread
SELECT
p.thread_id,
MAX(p.creation_time) AS bump_time
FROM
posts p
WHERE
p.creation_time < NOW() - interval '365 day'
GROUP BY
p.thread_id
ORDER BY
bump_time DESC
LIMIT 100
)
-- Main Query
SELECT
lp.thread_id,
lp.bump_time,
fp.first_post_body,
COUNT(p.post_id) AS post_count
FROM
LatestPosts lp
JOIN LATERAL (
SELECT p.body AS first_post_body
FROM posts p
WHERE p.thread_id = lp.thread_id
ORDER BY p.creation_time
LIMIT 1
) fp ON TRUE
JOIN
posts p ON lp.thread_id = p.thread_id
GROUP BY
lp.thread_id, lp.bump_time, fp.first_post_body
ORDER BY
lp.bump_time DESC;
select sum(counts) from
(
select max(creation_time) as bump_time, count(*) as counts, thread_id
from
(
select thread_id, creation_time from posts
where creation_time < NOW()
order by creation_time desc
) as t
group by thread_id
order by bump_time desc
limit 2
) as t;
-- actually we don't need these, because "first" would not be the OP it would be the most recent post. We should just join the results with post
-- to get the OPs
-- Create a function that always returns the first non-NULL value:
CREATE OR REPLACE FUNCTION public.first_agg (anyelement, anyelement)
RETURNS anyelement
LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE AS
'SELECT $1';
-- Then wrap an aggregate around it:
CREATE AGGREGATE public.first (anyelement) (
SFUNC = public.first_agg
, STYPE = anyelement
, PARALLEL = safe
);
------------
(
select max(creation_time) as bump_time, count(*), thread_id, min(creation_time) as where_to_leave_off
from
(
select thread_id, creation_time, body from posts
where creation_time < NOW() - interval '365 day'
order by creation_time desc limit 1500
) as t
group by thread_id
order by bump_time desc
)
union all
(
select max(creation_time) as bump_time, count(*), thread_id, min(creation_time) as where_to_leave_off
from
(
select thread_id, creation_time, body from posts
where creation_time < '2022-10-04 12:31:25.000 -0400'
order by creation_time desc limit 500
) as t
group by thread_id
order by bump_time desc
);
-- runs in 8ms
select max(creation_time) as bump_time, count(*), thread_id, min(creation_time) as where_to_leave_off
from
(
(select thread_id, creation_time, body from posts
where creation_time < NOW() - interval '365 day'
order by creation_time desc limit 1500)
union all
(select thread_id, creation_time, body from posts
where creation_time < '2022-10-04 12:31:25.000 -0400'
order by creation_time desc limit 500)
) as t
group by thread_id
order by bump_time desc;
select max(creation_time) as bump_time, count(*), thread_id, min(creation_time) as where_to_leave_off
from
(
select thread_id, creation_time, body from posts
where creation_time < NOW() - interval '365 day'
order by creation_time desc limit 10000
) as t
group by thread_id
order by bump_time desc;
explain analyze SELECT
p.thread_id,
MAX(p.creation_time) AS bump_time
FROM
posts p
WHERE
p.creation_time < NOW() - interval '365 day'
GROUP BY
p.thread_id
ORDER BY
bump_time DESC
LIMIT 100
-- Create temp table if not exists
DROP TABLE IF EXISTS temp_results;
/*
* This function scans backwards from p_start_time until we get the desired number of threads.
* Since the posts are ordered by creation time, the first post encountered for a particular
* thread will have that thread's bump time. Any posts after that will have a timestamp
* further back in time, so thread ids that we encounter will not change order the
* more we scan.
*
* This is to avoid a sequence scan on the entire posts table...
*/
CREATE OR REPLACE FUNCTION fetch_top_threads(p_start_time TIMESTAMPTZ, p_desired_threads INT)
RETURNS TABLE(bump_time TIMESTAMPTZ, post_count INT, thread_id INT)
LANGUAGE plpgsql
AS $$
DECLARE
limit_size INT := 2000; -- Start with 2000 posts
max_iterations INT := 10; -- Maximum iterations to avoid endless loop in case of errors
result_count INT := 0;
last_min_time TIMESTAMP;
BEGIN
CREATE TEMP TABLE IF NOT EXISTS temp_results (bump_time TIMESTAMPTZ, post_count INT, thread_id INT, last_fetched_time TIMESTAMP);
TRUNCATE temp_results; -- clear the table
FOR i IN 1..max_iterations LOOP
-- Fetch posts starting from the previous min time
INSERT INTO temp_results(bump_time, post_count, thread_id, last_fetched_time)
SELECT max(t.creation_time) as bump_time, count(*) as post_count, t.thread_id, min(t.creation_time) as last_fetched_time
FROM (
SELECT p.thread_id, p.creation_time
FROM posts p
WHERE p.creation_time < COALESCE(last_min_time, p_start_time)
ORDER BY p.creation_time DESC
LIMIT limit_size
) as t
GROUP BY t.thread_id;
-- Check if we have enough threads
SELECT COUNT(DISTINCT temp_results.thread_id) INTO result_count FROM temp_results;
IF result_count >= p_desired_threads THEN
EXIT;
END IF;
-- Get the last min time from the temp table for the next iteration
SELECT MIN(last_fetched_time) INTO last_min_time FROM temp_results;
-- Double the limit for the next iteration
limit_size := limit_size * 2;
END LOOP;
-- Return the results
RETURN QUERY SELECT temp_results.bump_time, temp_results.post_count, temp_results.thread_id FROM temp_results ORDER BY temp_results.bump_time DESC;
DROP TABLE IF EXISTS temp_results;
END;
$$;
select * FROM fetch_top_threads(NOW(), 100);
WITH TopThreads AS (
SELECT * FROM fetch_top_threads(NOW() - interval '365d', 100)
)
SELECT
tt.thread_id,
tt.bump_time,
tt.post_count,
p.creation_time,
p.body,
p.post_id
FROM
TopThreads tt
JOIN
posts p ON tt.thread_id = p.thread_id
ORDER BY
p.creation_time asc;
SELECT *
FROM
fetch_top_threads(NOW() - interval '365d', 100) top
JOIN
posts ON posts.thread_id = top.thread_id
ORDER BY posts.creation_time ASC;
SELECT
post_id,
board_post_id,
posts.thread_id,
bump_time,
creation_time,
body
FROM
fetch_top_threads(NOW() - interval '365d', 100) top
JOIN
posts ON posts.thread_id = top.thread_id
ORDER BY posts.creation_time ASC;
VACUUM ANALYZE posts;
SELECT posts.post_id, posts.thread_id, posts.body
FROM
fetch_top_threads(NOW(), 100) top
JOIN
posts ON posts.thread_id = top.thread_id;
WITH
top AS (
SELECT * FROM fetch_top_threads(NOW(), 100) top
),
joined AS (
SELECT posts.post_id, posts.thread_id, posts.body
FROM top JOIN posts ON posts.thread_id = top.thread_id
ORDER BY posts.creation_time DESC
),
grouped AS (
SELECT min(joined.post_id) op_post_id, joined.thread_id FROM joined
GROUP BY joined.thread_id
) SELECT * FROM grouped JOIN joined ON op_post_id = joined.post_id;
WITH
top AS (
SELECT * FROM fetch_top_threads(NOW(), 100) OFFSET 0
),
joined AS (
SELECT posts.post_id, posts.thread_id, posts.body, posts.creation_time
FROM top JOIN posts ON posts.thread_id = top.thread_id
ORDER BY posts.creation_time DESC
),
grouped AS (
SELECT min(joined.creation_time) op_creation_time, joined.thread_id, count(*) AS post_count FROM joined
GROUP BY joined.thread_id
) SELECT joined.*, grouped.post_count
FROM grouped JOIN joined ON op_creation_time = joined.creation_time AND grouped.thread_id = joined.thread_id;
WITH
top AS (
SELECT * FROM fetch_top_threads(NOW(), 200) OFFSET 0
),
joined AS (
SELECT
posts.post_id,
posts.thread_id,
posts.body,
posts.creation_time,
ROW_NUMBER() OVER(PARTITION BY posts.thread_id ORDER BY posts.creation_time, posts.post_id) as rn
FROM top
JOIN posts ON posts.thread_id = top.thread_id
),
grouped AS (
SELECT joined.post_id, joined.thread_id
FROM joined
WHERE joined.rn = 1
)
SELECT joined.*
FROM grouped
JOIN joined ON grouped.post_id = joined.post_id;
SELECT
sub.post_id,
sub.thread_id,
sub.body,
sub.creation_time
FROM
(
SELECT
posts.post_id,
posts.thread_id,
posts.body,
posts.creation_time,
ROW_NUMBER() OVER(PARTITION BY posts.thread_id ORDER BY posts.creation_time, posts.post_id) as rn
FROM fetch_top_threads(NOW(), 200) AS top
JOIN posts ON posts.thread_id = top.thread_id
) AS sub
WHERE
sub.rn = 1;
SELECT
sub.post_id,
sub.thread_id,
sub.body,
sub.creation_time
FROM
(
SELECT
posts.post_id,
posts.thread_id,
posts.body,
posts.creation_time,
ROW_NUMBER() OVER(PARTITION BY posts.thread_id ORDER BY posts.creation_time, posts.post_id) as rn
FROM
(SELECT * FROM fetch_top_threads(NOW(), 200) OFFSET 0) AS top
JOIN posts ON posts.thread_id = top.thread_id
) AS sub
WHERE
sub.rn = 1;
SELECT count(*) FROM fetch_catalog(NOW(), 400, 0);
SELECT * FROM fetch_top_threads(NOW(), 200);
SELECT count(*) FROM
(
SELECT top.thread_id, count(*) as post_count FROM fetch_top_threads(NOW(), 200) top JOIN posts ON posts.thread_id = top.thread_id
GROUP BY top.thread_id
) t;
-- how do I
CREATE OR REPLACE FUNCTION fetch_catalog(max_time timestamptz, wanted_result_count integer, offset_count integer)
RETURNS TABLE (
op_post_id bigint,
thread_id bigint,
post_count bigint,
body text,
creation_time timestamptz
)
LANGUAGE SQL
AS $$
WITH
top AS (
SELECT * FROM fetch_top_threads(max_time, wanted_result_count) OFFSET offset_count
),
joined AS (
SELECT posts.post_id, posts.thread_id, posts.body, posts.creation_time
FROM top JOIN posts ON posts.thread_id = top.thread_id
ORDER BY posts.creation_time DESC
),
grouped AS (
SELECT min(joined.post_id) op_post_id, joined.thread_id, count(*) AS post_count FROM joined
GROUP BY joined.thread_id
) SELECT grouped.*, joined.body, joined.creation_time FROM grouped JOIN joined ON op_post_id = joined.post_id;
$$;

216
sql/archive_tests2.sql Normal file
View File

@ -0,0 +1,216 @@
CREATE OR REPLACE FUNCTION fetch_top_threads(
p_start_time TIMESTAMPTZ,
lookback INT DEFAULT 10000
)
RETURNS TABLE(bump_time TIMESTAMPTZ, post_count BIGINT, thread_id BIGINT, where_to_leave_off TIMESTAMPTZ)
LANGUAGE sql
AS $$
SELECT
max(creation_time) as bump_time,
count(*),
thread_id,
min(creation_time) as where_to_leave_off
FROM
(
SELECT thread_id, creation_time
FROM posts
WHERE creation_time < p_start_time
ORDER BY creation_time DESC
LIMIT LEAST(lookback, 250000) -- capping the lookback to 250k
) as t
GROUP BY thread_id
ORDER BY bump_time DESC;
$$;
SELECT * FROM fetch_top_threads(NOW() - INTERVAL '10d');
SELECT * FROM fetch_top_threads(NOW() - INTERVAL '10d') top JOIN posts ON top.thread_id = posts.thread_id;
SELECT
ordered_posts.thread_id,
count(*),
array_agg(ordered_posts.body)
FROM (
SELECT top.thread_id, posts.body
FROM fetch_top_threads(NOW() - INTERVAL '10d', 2000) top
JOIN posts ON top.thread_id = posts.thread_id
ORDER BY posts.board_post_id ASC
) AS ordered_posts
GROUP BY ordered_posts.thread_id;
WITH
top AS
(
SELECT * FROM fetch_top_threads(NOW()) top
),
tall_posts AS
(
SELECT top.post_count as estimated_post_count, posts.*
FROM top
JOIN posts ON top.thread_id = posts.thread_id
),
op_posts AS
(
SELECT DISTINCT ON (t.thread_id)
*
FROM tall_posts t
ORDER BY t.thread_id, t.board_post_id
)
SELECT * FROM op_posts;
WITH
top AS
(
SELECT * FROM fetch_top_threads(NOW()) top
),
tall_posts AS
(
SELECT top.post_count as estimated_post_count, posts.*
FROM top
JOIN posts ON top.thread_id = posts.thread_id
),
op_posts AS
(
SELECT DISTINCT ON (t.thread_id)
*
FROM tall_posts t
ORDER BY t.thread_id, t.board_post_id
),
post_counts AS
(
SELECT thread_id, count(*) as post_count FROM
tall_posts
GROUP BY thread_id
)
SELECT post_counts.post_count, op_posts.* FROM op_posts JOIN post_counts ON op_posts.thread_id = post_counts.thread_id;
-- 71ms!
WITH
top AS
(
SELECT * FROM fetch_top_threads(NOW() - INTERVAL '1y', 1000) top
),
tall_posts AS
(
SELECT
top.post_count as estimated_post_count,
top.bump_time,
posts.post_id,
posts.board_post_id,
posts.creation_time,
posts.body,
posts.thread_id
FROM top
JOIN posts ON top.thread_id = posts.thread_id
),
op_posts AS
(
SELECT DISTINCT ON (t.thread_id)
*
FROM tall_posts t
ORDER BY t.thread_id, t.board_post_id
),
post_counts AS
(
SELECT thread_id, count(*) as post_count FROM
tall_posts
GROUP BY thread_id
)
SELECT
post_counts.post_count,
op_posts.*,
threads.board_thread_id,
boards.pathpart,
sites."name"
FROM op_posts
JOIN post_counts ON op_posts.thread_id = post_counts.thread_id
JOIN threads ON op_posts.thread_id = threads.thread_id
JOIN boards ON threads.board_id = boards.board_id
JOIN sites ON sites.site_id = boards.site_id;
CREATE OR REPLACE FUNCTION fetch_catalog(max_time timestamptz, max_row_read int DEFAULT 10000)
RETURNS TABLE (
post_count bigint,
estimated_post_count bigint,
post_id bigint,
board_post_id bigint,
creation_time timestamptz,
body text,
thread_id bigint,
board_thread_id bigint,
pathpart text,
name text
) AS $$
WITH
top AS
(
SELECT * FROM fetch_top_threads(max_time, max_row_read) AS top
),
tall_posts AS
(
SELECT
top.post_count as estimated_post_count,
posts.post_id,
posts.board_post_id,
posts.creation_time,
posts.body,
posts.thread_id
FROM top
JOIN posts ON top.thread_id = posts.thread_id
WHERE creation_time < max_time
),
op_posts AS
(
SELECT DISTINCT ON (t.thread_id)
*
FROM tall_posts t
ORDER BY t.thread_id, t.board_post_id
),
post_counts AS
(
SELECT thread_id, count(*) as post_count FROM
tall_posts
GROUP BY thread_id
)
SELECT
post_counts.post_count,
op_posts.*,
threads.board_thread_id,
boards.pathpart,
sites."name"
FROM op_posts
JOIN post_counts ON op_posts.thread_id = post_counts.thread_id
JOIN threads ON op_posts.thread_id = threads.thread_id
JOIN boards ON threads.board_id = boards.board_id
JOIN sites ON sites.site_id = boards.site_id;
$$ LANGUAGE sql;
SELECT * FROM fetch_catalog(NOW() - INTERVAL '1y', 1001);
-- CREATE INDEX idx_posts_thread_board ON posts (thread_id, board_post_id);
ANALYZE posts;
SELECT DISTINCT ON (top.thread_id)
top.thread_id,
posts.post_id,
posts.body,
top.post_count,
top.where_to_leave_off
FROM fetch_top_threads(NOW() - INTERVAL '10d') top
JOIN posts ON top.thread_id = posts.thread_id
ORDER BY top.thread_id, posts.board_post_id;
SELECT * FROM threads WHERE thread_id = 3110;
SELECT * FROM posts WHERE thread_id = 3110 ORDER BY board_post_id ASC;
SELECT * FROM boards;
SELECT * FROM threads WHERE thread_id = 11314;
ANALYZE posts;

View File

@ -16,6 +16,9 @@ DROP TABLE IF EXISTS threads CASCADE;
DROP TABLE IF EXISTS posts CASCADE;
DROP TABLE IF EXISTS attachments CASCADE;
DROP FUNCTION IF EXISTS update_post_body_search_index;
DROP FUNCTION IF EXISTS fetch_top_threads;
DROP FUNCTION IF EXISTS fetch_catalog;
-- It won't let us drop roles otherwise and the IFs are to keep this script idempotent.
DO
@ -72,9 +75,8 @@ CREATE INDEX posts_creation_time_idx ON posts (creation_time);
CREATE INDEX posts_body_search_idx ON posts USING GIN (body_search_index);
CREATE INDEX posts_thread_id_idx ON posts (thread_id);
CREATE INDEX posts_board_post_id_idx ON posts (board_post_id);
-- This is to optimize joins on thread_id and filtering/sorting by creation_time in 'posts' table.
CREATE INDEX posts_thread_id_creation_time_idx ON posts (thread_id, creation_time);
CREATE INDEX posts_thread_id_creation_time_idx ON posts (creation_time, thread_id);
--CREATE INDEX posts_thread_id_board_post_id_idx ON posts (thread_id, board_post_id);
CREATE OR REPLACE FUNCTION update_post_body_search_index() RETURNS trigger AS $$
BEGIN
@ -169,6 +171,89 @@ $$ LANGUAGE sql;
-- 1:21 for full db (nothing inserted)
CREATE OR REPLACE FUNCTION fetch_top_threads(
p_start_time TIMESTAMPTZ,
lookback INT DEFAULT 10000
)
RETURNS TABLE(bump_time TIMESTAMPTZ, post_count BIGINT, thread_id BIGINT, where_to_leave_off TIMESTAMPTZ)
LANGUAGE sql
AS $$
SELECT
max(creation_time) as bump_time,
count(*),
thread_id,
min(creation_time) as where_to_leave_off
FROM
(
SELECT thread_id, creation_time
FROM posts
WHERE creation_time < p_start_time
ORDER BY creation_time DESC
LIMIT LEAST(lookback, 250000) -- capping the lookback to 250k
) as t
GROUP BY thread_id
ORDER BY bump_time DESC;
$$;
CREATE OR REPLACE FUNCTION fetch_catalog(max_time timestamptz, max_row_read int DEFAULT 10000)
RETURNS TABLE (
post_count bigint,
estimated_post_count bigint,
post_id bigint,
board_post_id bigint,
creation_time timestamptz,
body text,
thread_id bigint,
board_thread_id bigint,
pathpart text,
name text
) AS $$
WITH
top AS
(
SELECT * FROM fetch_top_threads(max_time, max_row_read) AS top
),
tall_posts AS
(
SELECT
top.post_count as estimated_post_count,
posts.post_id,
posts.board_post_id,
posts.creation_time,
posts.body,
posts.thread_id
FROM top
JOIN posts ON top.thread_id = posts.thread_id
WHERE creation_time < max_time
),
op_posts AS
(
SELECT DISTINCT ON (t.thread_id)
*
FROM tall_posts t
ORDER BY t.thread_id, t.board_post_id
),
post_counts AS
(
SELECT thread_id, count(*) as post_count FROM
tall_posts
GROUP BY thread_id
)
SELECT
post_counts.post_count,
op_posts.*,
threads.board_thread_id,
boards.pathpart,
sites."name"
FROM op_posts
JOIN post_counts ON op_posts.thread_id = post_counts.thread_id
JOIN threads ON op_posts.thread_id = threads.thread_id
JOIN boards ON threads.board_id = boards.board_id
JOIN sites ON sites.site_id = boards.site_id;
$$ LANGUAGE sql;
/*
* Permissions
*/
@ -194,6 +279,8 @@ GRANT ALL ON posts TO chan_archiver;
GRANT ALL ON attachments TO chan_archiver;
GRANT EXECUTE ON FUNCTION update_post_body_search_index TO chan_archiver;
GRANT EXECUTE ON FUNCTION insert_posts_and_return_ids TO chan_archiver;
GRANT EXECUTE ON FUNCTION fetch_top_threads TO chan_archiver;
GRANT EXECUTE ON FUNCTION fetch_catalog TO chan_archiver;
GRANT usage, select ON SEQUENCE sites_site_id_seq TO chan_archiver;
GRANT usage, select ON SEQUENCE boards_board_id_seq TO chan_archiver;
GRANT usage, select ON SEQUENCE threads_thread_id_seq TO chan_archiver;