- Change fetch_catalog and fetch_top_threads functions
- neither was correct - fetch_top_threads doesn't need to be a loop, it will be up to the application whether or not to load more results. This simplifies the function, which was sometimes returning more than one row for each thread_id anyway - fetch_catalog is now fairly performant, and fetches all relevant information
This commit is contained in:
parent
7b8b1b2255
commit
c297bae697
|
@ -28,7 +28,7 @@ LIMIT
|
||||||
100;
|
100;
|
||||||
|
|
||||||
|
|
||||||
SELECT
|
SELECT
|
||||||
thread_data.thread_id,
|
thread_data.thread_id,
|
||||||
thread_data.bump_time,
|
thread_data.bump_time,
|
||||||
thread_data.first_post_subject,
|
thread_data.first_post_subject,
|
||||||
|
@ -313,6 +313,16 @@ group by thread_id
|
||||||
order by bump_time desc;
|
order by bump_time desc;
|
||||||
|
|
||||||
|
|
||||||
|
select max(creation_time) as bump_time, count(*), thread_id, min(creation_time) as where_to_leave_off
|
||||||
|
from
|
||||||
|
(
|
||||||
|
select thread_id, creation_time, body from posts
|
||||||
|
where creation_time < NOW() - interval '365 day'
|
||||||
|
order by creation_time desc limit 10000
|
||||||
|
) as t
|
||||||
|
group by thread_id
|
||||||
|
order by bump_time desc;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
explain analyze SELECT
|
explain analyze SELECT
|
||||||
|
@ -330,8 +340,8 @@ LIMIT 100
|
||||||
|
|
||||||
|
|
||||||
-- Create temp table if not exists
|
-- Create temp table if not exists
|
||||||
drop table if exists temp_results;
|
DROP TABLE IF EXISTS temp_results;
|
||||||
CREATE TEMP TABLE IF NOT EXISTS temp_results (bump_time TIMESTAMPTZ, post_count INT, thread_id INT, last_fetched_time TIMESTAMP);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function scans backwards from p_start_time until we get the desired number of threads.
|
* This function scans backwards from p_start_time until we get the desired number of threads.
|
||||||
|
@ -351,7 +361,8 @@ DECLARE
|
||||||
max_iterations INT := 10; -- Maximum iterations to avoid endless loop in case of errors
|
max_iterations INT := 10; -- Maximum iterations to avoid endless loop in case of errors
|
||||||
result_count INT := 0;
|
result_count INT := 0;
|
||||||
last_min_time TIMESTAMP;
|
last_min_time TIMESTAMP;
|
||||||
BEGIN
|
BEGIN
|
||||||
|
CREATE TEMP TABLE IF NOT EXISTS temp_results (bump_time TIMESTAMPTZ, post_count INT, thread_id INT, last_fetched_time TIMESTAMP);
|
||||||
TRUNCATE temp_results; -- clear the table
|
TRUNCATE temp_results; -- clear the table
|
||||||
|
|
||||||
FOR i IN 1..max_iterations LOOP
|
FOR i IN 1..max_iterations LOOP
|
||||||
|
@ -368,9 +379,7 @@ BEGIN
|
||||||
GROUP BY t.thread_id;
|
GROUP BY t.thread_id;
|
||||||
|
|
||||||
-- Check if we have enough threads
|
-- Check if we have enough threads
|
||||||
--SELECT temp_results.thread_id INTO result_count FROM temp_results;
|
SELECT COUNT(DISTINCT temp_results.thread_id) INTO result_count FROM temp_results;
|
||||||
SELECT COUNT(DISTINCT temp_results.thread_id) INTO result_count FROM temp_results; -- specify the table name here to avoid ambiguity
|
|
||||||
|
|
||||||
|
|
||||||
IF result_count >= p_desired_threads THEN
|
IF result_count >= p_desired_threads THEN
|
||||||
EXIT;
|
EXIT;
|
||||||
|
@ -387,13 +396,14 @@ BEGIN
|
||||||
-- Return the results
|
-- Return the results
|
||||||
RETURN QUERY SELECT temp_results.bump_time, temp_results.post_count, temp_results.thread_id FROM temp_results ORDER BY temp_results.bump_time DESC;
|
RETURN QUERY SELECT temp_results.bump_time, temp_results.post_count, temp_results.thread_id FROM temp_results ORDER BY temp_results.bump_time DESC;
|
||||||
|
|
||||||
|
DROP TABLE IF EXISTS temp_results;
|
||||||
END;
|
END;
|
||||||
$$;
|
$$;
|
||||||
|
|
||||||
|
|
||||||
select * FROM fetch_top_threads(NOW(), 300) offset 0;
|
select * FROM fetch_top_threads(NOW(), 100);
|
||||||
|
|
||||||
explain analyze WITH TopThreads AS (
|
WITH TopThreads AS (
|
||||||
SELECT * FROM fetch_top_threads(NOW() - interval '365d', 100)
|
SELECT * FROM fetch_top_threads(NOW() - interval '365d', 100)
|
||||||
)
|
)
|
||||||
SELECT
|
SELECT
|
||||||
|
@ -409,3 +419,175 @@ JOIN
|
||||||
posts p ON tt.thread_id = p.thread_id
|
posts p ON tt.thread_id = p.thread_id
|
||||||
ORDER BY
|
ORDER BY
|
||||||
p.creation_time asc;
|
p.creation_time asc;
|
||||||
|
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
fetch_top_threads(NOW() - interval '365d', 100) top
|
||||||
|
JOIN
|
||||||
|
posts ON posts.thread_id = top.thread_id
|
||||||
|
ORDER BY posts.creation_time ASC;
|
||||||
|
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
post_id,
|
||||||
|
board_post_id,
|
||||||
|
posts.thread_id,
|
||||||
|
bump_time,
|
||||||
|
creation_time,
|
||||||
|
body
|
||||||
|
FROM
|
||||||
|
fetch_top_threads(NOW() - interval '365d', 100) top
|
||||||
|
JOIN
|
||||||
|
posts ON posts.thread_id = top.thread_id
|
||||||
|
ORDER BY posts.creation_time ASC;
|
||||||
|
|
||||||
|
|
||||||
|
VACUUM ANALYZE posts;
|
||||||
|
|
||||||
|
SELECT posts.post_id, posts.thread_id, posts.body
|
||||||
|
FROM
|
||||||
|
fetch_top_threads(NOW(), 100) top
|
||||||
|
JOIN
|
||||||
|
posts ON posts.thread_id = top.thread_id;
|
||||||
|
|
||||||
|
|
||||||
|
WITH
|
||||||
|
top AS (
|
||||||
|
SELECT * FROM fetch_top_threads(NOW(), 100) top
|
||||||
|
),
|
||||||
|
joined AS (
|
||||||
|
SELECT posts.post_id, posts.thread_id, posts.body
|
||||||
|
FROM top JOIN posts ON posts.thread_id = top.thread_id
|
||||||
|
ORDER BY posts.creation_time DESC
|
||||||
|
),
|
||||||
|
grouped AS (
|
||||||
|
SELECT min(joined.post_id) op_post_id, joined.thread_id FROM joined
|
||||||
|
GROUP BY joined.thread_id
|
||||||
|
) SELECT * FROM grouped JOIN joined ON op_post_id = joined.post_id;
|
||||||
|
|
||||||
|
|
||||||
|
WITH
|
||||||
|
top AS (
|
||||||
|
SELECT * FROM fetch_top_threads(NOW(), 100) OFFSET 0
|
||||||
|
),
|
||||||
|
joined AS (
|
||||||
|
SELECT posts.post_id, posts.thread_id, posts.body, posts.creation_time
|
||||||
|
FROM top JOIN posts ON posts.thread_id = top.thread_id
|
||||||
|
ORDER BY posts.creation_time DESC
|
||||||
|
),
|
||||||
|
grouped AS (
|
||||||
|
SELECT min(joined.creation_time) op_creation_time, joined.thread_id, count(*) AS post_count FROM joined
|
||||||
|
GROUP BY joined.thread_id
|
||||||
|
) SELECT joined.*, grouped.post_count
|
||||||
|
FROM grouped JOIN joined ON op_creation_time = joined.creation_time AND grouped.thread_id = joined.thread_id;
|
||||||
|
|
||||||
|
|
||||||
|
WITH
|
||||||
|
top AS (
|
||||||
|
SELECT * FROM fetch_top_threads(NOW(), 200) OFFSET 0
|
||||||
|
),
|
||||||
|
joined AS (
|
||||||
|
SELECT
|
||||||
|
posts.post_id,
|
||||||
|
posts.thread_id,
|
||||||
|
posts.body,
|
||||||
|
posts.creation_time,
|
||||||
|
ROW_NUMBER() OVER(PARTITION BY posts.thread_id ORDER BY posts.creation_time, posts.post_id) as rn
|
||||||
|
FROM top
|
||||||
|
JOIN posts ON posts.thread_id = top.thread_id
|
||||||
|
),
|
||||||
|
grouped AS (
|
||||||
|
SELECT joined.post_id, joined.thread_id
|
||||||
|
FROM joined
|
||||||
|
WHERE joined.rn = 1
|
||||||
|
)
|
||||||
|
SELECT joined.*
|
||||||
|
FROM grouped
|
||||||
|
JOIN joined ON grouped.post_id = joined.post_id;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
sub.post_id,
|
||||||
|
sub.thread_id,
|
||||||
|
sub.body,
|
||||||
|
sub.creation_time
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
posts.post_id,
|
||||||
|
posts.thread_id,
|
||||||
|
posts.body,
|
||||||
|
posts.creation_time,
|
||||||
|
ROW_NUMBER() OVER(PARTITION BY posts.thread_id ORDER BY posts.creation_time, posts.post_id) as rn
|
||||||
|
FROM fetch_top_threads(NOW(), 200) AS top
|
||||||
|
JOIN posts ON posts.thread_id = top.thread_id
|
||||||
|
) AS sub
|
||||||
|
WHERE
|
||||||
|
sub.rn = 1;
|
||||||
|
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
sub.post_id,
|
||||||
|
sub.thread_id,
|
||||||
|
sub.body,
|
||||||
|
sub.creation_time
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
posts.post_id,
|
||||||
|
posts.thread_id,
|
||||||
|
posts.body,
|
||||||
|
posts.creation_time,
|
||||||
|
ROW_NUMBER() OVER(PARTITION BY posts.thread_id ORDER BY posts.creation_time, posts.post_id) as rn
|
||||||
|
FROM
|
||||||
|
(SELECT * FROM fetch_top_threads(NOW(), 200) OFFSET 0) AS top
|
||||||
|
JOIN posts ON posts.thread_id = top.thread_id
|
||||||
|
) AS sub
|
||||||
|
WHERE
|
||||||
|
sub.rn = 1;
|
||||||
|
|
||||||
|
SELECT count(*) FROM fetch_catalog(NOW(), 400, 0);
|
||||||
|
|
||||||
|
SELECT * FROM fetch_top_threads(NOW(), 200);
|
||||||
|
|
||||||
|
|
||||||
|
SELECT count(*) FROM
|
||||||
|
(
|
||||||
|
SELECT top.thread_id, count(*) as post_count FROM fetch_top_threads(NOW(), 200) top JOIN posts ON posts.thread_id = top.thread_id
|
||||||
|
GROUP BY top.thread_id
|
||||||
|
) t;
|
||||||
|
|
||||||
|
-- how do I
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION fetch_catalog(max_time timestamptz, wanted_result_count integer, offset_count integer)
|
||||||
|
RETURNS TABLE (
|
||||||
|
op_post_id bigint,
|
||||||
|
thread_id bigint,
|
||||||
|
post_count bigint,
|
||||||
|
body text,
|
||||||
|
creation_time timestamptz
|
||||||
|
)
|
||||||
|
LANGUAGE SQL
|
||||||
|
AS $$
|
||||||
|
WITH
|
||||||
|
top AS (
|
||||||
|
SELECT * FROM fetch_top_threads(max_time, wanted_result_count) OFFSET offset_count
|
||||||
|
),
|
||||||
|
joined AS (
|
||||||
|
SELECT posts.post_id, posts.thread_id, posts.body, posts.creation_time
|
||||||
|
FROM top JOIN posts ON posts.thread_id = top.thread_id
|
||||||
|
ORDER BY posts.creation_time DESC
|
||||||
|
),
|
||||||
|
grouped AS (
|
||||||
|
SELECT min(joined.post_id) op_post_id, joined.thread_id, count(*) AS post_count FROM joined
|
||||||
|
GROUP BY joined.thread_id
|
||||||
|
) SELECT grouped.*, joined.body, joined.creation_time FROM grouped JOIN joined ON op_post_id = joined.post_id;
|
||||||
|
$$;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,216 @@
|
||||||
|
CREATE OR REPLACE FUNCTION fetch_top_threads(
|
||||||
|
p_start_time TIMESTAMPTZ,
|
||||||
|
lookback INT DEFAULT 10000
|
||||||
|
)
|
||||||
|
RETURNS TABLE(bump_time TIMESTAMPTZ, post_count BIGINT, thread_id BIGINT, where_to_leave_off TIMESTAMPTZ)
|
||||||
|
LANGUAGE sql
|
||||||
|
AS $$
|
||||||
|
SELECT
|
||||||
|
max(creation_time) as bump_time,
|
||||||
|
count(*),
|
||||||
|
thread_id,
|
||||||
|
min(creation_time) as where_to_leave_off
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT thread_id, creation_time
|
||||||
|
FROM posts
|
||||||
|
WHERE creation_time < p_start_time
|
||||||
|
ORDER BY creation_time DESC
|
||||||
|
LIMIT LEAST(lookback, 250000) -- capping the lookback to 250k
|
||||||
|
) as t
|
||||||
|
GROUP BY thread_id
|
||||||
|
ORDER BY bump_time DESC;
|
||||||
|
$$;
|
||||||
|
|
||||||
|
|
||||||
|
SELECT * FROM fetch_top_threads(NOW() - INTERVAL '10d');
|
||||||
|
|
||||||
|
SELECT * FROM fetch_top_threads(NOW() - INTERVAL '10d') top JOIN posts ON top.thread_id = posts.thread_id;
|
||||||
|
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
ordered_posts.thread_id,
|
||||||
|
count(*),
|
||||||
|
array_agg(ordered_posts.body)
|
||||||
|
FROM (
|
||||||
|
SELECT top.thread_id, posts.body
|
||||||
|
FROM fetch_top_threads(NOW() - INTERVAL '10d', 2000) top
|
||||||
|
JOIN posts ON top.thread_id = posts.thread_id
|
||||||
|
ORDER BY posts.board_post_id ASC
|
||||||
|
) AS ordered_posts
|
||||||
|
GROUP BY ordered_posts.thread_id;
|
||||||
|
|
||||||
|
WITH
|
||||||
|
top AS
|
||||||
|
(
|
||||||
|
SELECT * FROM fetch_top_threads(NOW()) top
|
||||||
|
),
|
||||||
|
tall_posts AS
|
||||||
|
(
|
||||||
|
SELECT top.post_count as estimated_post_count, posts.*
|
||||||
|
FROM top
|
||||||
|
JOIN posts ON top.thread_id = posts.thread_id
|
||||||
|
),
|
||||||
|
op_posts AS
|
||||||
|
(
|
||||||
|
SELECT DISTINCT ON (t.thread_id)
|
||||||
|
*
|
||||||
|
FROM tall_posts t
|
||||||
|
ORDER BY t.thread_id, t.board_post_id
|
||||||
|
)
|
||||||
|
SELECT * FROM op_posts;
|
||||||
|
|
||||||
|
|
||||||
|
WITH
|
||||||
|
top AS
|
||||||
|
(
|
||||||
|
SELECT * FROM fetch_top_threads(NOW()) top
|
||||||
|
),
|
||||||
|
tall_posts AS
|
||||||
|
(
|
||||||
|
SELECT top.post_count as estimated_post_count, posts.*
|
||||||
|
FROM top
|
||||||
|
JOIN posts ON top.thread_id = posts.thread_id
|
||||||
|
),
|
||||||
|
op_posts AS
|
||||||
|
(
|
||||||
|
SELECT DISTINCT ON (t.thread_id)
|
||||||
|
*
|
||||||
|
FROM tall_posts t
|
||||||
|
ORDER BY t.thread_id, t.board_post_id
|
||||||
|
),
|
||||||
|
post_counts AS
|
||||||
|
(
|
||||||
|
SELECT thread_id, count(*) as post_count FROM
|
||||||
|
tall_posts
|
||||||
|
GROUP BY thread_id
|
||||||
|
)
|
||||||
|
SELECT post_counts.post_count, op_posts.* FROM op_posts JOIN post_counts ON op_posts.thread_id = post_counts.thread_id;
|
||||||
|
|
||||||
|
|
||||||
|
-- 71ms!
|
||||||
|
WITH
|
||||||
|
top AS
|
||||||
|
(
|
||||||
|
SELECT * FROM fetch_top_threads(NOW() - INTERVAL '1y', 1000) top
|
||||||
|
),
|
||||||
|
tall_posts AS
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
top.post_count as estimated_post_count,
|
||||||
|
top.bump_time,
|
||||||
|
posts.post_id,
|
||||||
|
posts.board_post_id,
|
||||||
|
posts.creation_time,
|
||||||
|
posts.body,
|
||||||
|
posts.thread_id
|
||||||
|
FROM top
|
||||||
|
JOIN posts ON top.thread_id = posts.thread_id
|
||||||
|
),
|
||||||
|
op_posts AS
|
||||||
|
(
|
||||||
|
SELECT DISTINCT ON (t.thread_id)
|
||||||
|
*
|
||||||
|
FROM tall_posts t
|
||||||
|
ORDER BY t.thread_id, t.board_post_id
|
||||||
|
),
|
||||||
|
post_counts AS
|
||||||
|
(
|
||||||
|
SELECT thread_id, count(*) as post_count FROM
|
||||||
|
tall_posts
|
||||||
|
GROUP BY thread_id
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
post_counts.post_count,
|
||||||
|
op_posts.*,
|
||||||
|
threads.board_thread_id,
|
||||||
|
boards.pathpart,
|
||||||
|
sites."name"
|
||||||
|
FROM op_posts
|
||||||
|
JOIN post_counts ON op_posts.thread_id = post_counts.thread_id
|
||||||
|
JOIN threads ON op_posts.thread_id = threads.thread_id
|
||||||
|
JOIN boards ON threads.board_id = boards.board_id
|
||||||
|
JOIN sites ON sites.site_id = boards.site_id;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION fetch_catalog(max_time timestamptz, max_row_read int DEFAULT 10000)
|
||||||
|
RETURNS TABLE (
|
||||||
|
post_count bigint,
|
||||||
|
estimated_post_count bigint,
|
||||||
|
post_id bigint,
|
||||||
|
board_post_id bigint,
|
||||||
|
creation_time timestamptz,
|
||||||
|
body text,
|
||||||
|
thread_id bigint,
|
||||||
|
board_thread_id bigint,
|
||||||
|
pathpart text,
|
||||||
|
name text
|
||||||
|
) AS $$
|
||||||
|
WITH
|
||||||
|
top AS
|
||||||
|
(
|
||||||
|
SELECT * FROM fetch_top_threads(max_time, max_row_read) AS top
|
||||||
|
),
|
||||||
|
tall_posts AS
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
top.post_count as estimated_post_count,
|
||||||
|
posts.post_id,
|
||||||
|
posts.board_post_id,
|
||||||
|
posts.creation_time,
|
||||||
|
posts.body,
|
||||||
|
posts.thread_id
|
||||||
|
FROM top
|
||||||
|
JOIN posts ON top.thread_id = posts.thread_id
|
||||||
|
WHERE creation_time < max_time
|
||||||
|
),
|
||||||
|
op_posts AS
|
||||||
|
(
|
||||||
|
SELECT DISTINCT ON (t.thread_id)
|
||||||
|
*
|
||||||
|
FROM tall_posts t
|
||||||
|
ORDER BY t.thread_id, t.board_post_id
|
||||||
|
),
|
||||||
|
post_counts AS
|
||||||
|
(
|
||||||
|
SELECT thread_id, count(*) as post_count FROM
|
||||||
|
tall_posts
|
||||||
|
GROUP BY thread_id
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
post_counts.post_count,
|
||||||
|
op_posts.*,
|
||||||
|
threads.board_thread_id,
|
||||||
|
boards.pathpart,
|
||||||
|
sites."name"
|
||||||
|
FROM op_posts
|
||||||
|
JOIN post_counts ON op_posts.thread_id = post_counts.thread_id
|
||||||
|
JOIN threads ON op_posts.thread_id = threads.thread_id
|
||||||
|
JOIN boards ON threads.board_id = boards.board_id
|
||||||
|
JOIN sites ON sites.site_id = boards.site_id;
|
||||||
|
$$ LANGUAGE sql;
|
||||||
|
|
||||||
|
SELECT * FROM fetch_catalog(NOW() - INTERVAL '1y', 1001);
|
||||||
|
|
||||||
|
|
||||||
|
-- CREATE INDEX idx_posts_thread_board ON posts (thread_id, board_post_id);
|
||||||
|
ANALYZE posts;
|
||||||
|
|
||||||
|
|
||||||
|
SELECT DISTINCT ON (top.thread_id)
|
||||||
|
top.thread_id,
|
||||||
|
posts.post_id,
|
||||||
|
posts.body,
|
||||||
|
top.post_count,
|
||||||
|
top.where_to_leave_off
|
||||||
|
FROM fetch_top_threads(NOW() - INTERVAL '10d') top
|
||||||
|
JOIN posts ON top.thread_id = posts.thread_id
|
||||||
|
ORDER BY top.thread_id, posts.board_post_id;
|
||||||
|
|
||||||
|
|
||||||
|
SELECT * FROM threads WHERE thread_id = 3110;
|
||||||
|
SELECT * FROM posts WHERE thread_id = 3110 ORDER BY board_post_id ASC;
|
||||||
|
SELECT * FROM boards;
|
||||||
|
|
||||||
|
SELECT * FROM threads WHERE thread_id = 11314;
|
||||||
|
ANALYZE posts;
|
|
@ -16,6 +16,9 @@ DROP TABLE IF EXISTS threads CASCADE;
|
||||||
DROP TABLE IF EXISTS posts CASCADE;
|
DROP TABLE IF EXISTS posts CASCADE;
|
||||||
DROP TABLE IF EXISTS attachments CASCADE;
|
DROP TABLE IF EXISTS attachments CASCADE;
|
||||||
DROP FUNCTION IF EXISTS update_post_body_search_index;
|
DROP FUNCTION IF EXISTS update_post_body_search_index;
|
||||||
|
DROP FUNCTION IF EXISTS fetch_top_threads;
|
||||||
|
DROP FUNCTION IF EXISTS fetch_catalog;
|
||||||
|
|
||||||
|
|
||||||
-- It won't let us drop roles otherwise and the IFs are to keep this script idempotent.
|
-- It won't let us drop roles otherwise and the IFs are to keep this script idempotent.
|
||||||
DO
|
DO
|
||||||
|
@ -73,6 +76,7 @@ CREATE INDEX posts_body_search_idx ON posts USING GIN (body_search_index);
|
||||||
CREATE INDEX posts_thread_id_idx ON posts (thread_id);
|
CREATE INDEX posts_thread_id_idx ON posts (thread_id);
|
||||||
CREATE INDEX posts_board_post_id_idx ON posts (board_post_id);
|
CREATE INDEX posts_board_post_id_idx ON posts (board_post_id);
|
||||||
CREATE INDEX posts_thread_id_creation_time_idx ON posts (creation_time, thread_id);
|
CREATE INDEX posts_thread_id_creation_time_idx ON posts (creation_time, thread_id);
|
||||||
|
--CREATE INDEX posts_thread_id_board_post_id_idx ON posts (thread_id, board_post_id);
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION update_post_body_search_index() RETURNS trigger AS $$
|
CREATE OR REPLACE FUNCTION update_post_body_search_index() RETURNS trigger AS $$
|
||||||
BEGIN
|
BEGIN
|
||||||
|
@ -167,6 +171,89 @@ $$ LANGUAGE sql;
|
||||||
-- 1:21 for full db (nothing inserted)
|
-- 1:21 for full db (nothing inserted)
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION fetch_top_threads(
|
||||||
|
p_start_time TIMESTAMPTZ,
|
||||||
|
lookback INT DEFAULT 10000
|
||||||
|
)
|
||||||
|
RETURNS TABLE(bump_time TIMESTAMPTZ, post_count BIGINT, thread_id BIGINT, where_to_leave_off TIMESTAMPTZ)
|
||||||
|
LANGUAGE sql
|
||||||
|
AS $$
|
||||||
|
SELECT
|
||||||
|
max(creation_time) as bump_time,
|
||||||
|
count(*),
|
||||||
|
thread_id,
|
||||||
|
min(creation_time) as where_to_leave_off
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT thread_id, creation_time
|
||||||
|
FROM posts
|
||||||
|
WHERE creation_time < p_start_time
|
||||||
|
ORDER BY creation_time DESC
|
||||||
|
LIMIT LEAST(lookback, 250000) -- capping the lookback to 250k
|
||||||
|
) as t
|
||||||
|
GROUP BY thread_id
|
||||||
|
ORDER BY bump_time DESC;
|
||||||
|
$$;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION fetch_catalog(max_time timestamptz, max_row_read int DEFAULT 10000)
|
||||||
|
RETURNS TABLE (
|
||||||
|
post_count bigint,
|
||||||
|
estimated_post_count bigint,
|
||||||
|
post_id bigint,
|
||||||
|
board_post_id bigint,
|
||||||
|
creation_time timestamptz,
|
||||||
|
body text,
|
||||||
|
thread_id bigint,
|
||||||
|
board_thread_id bigint,
|
||||||
|
pathpart text,
|
||||||
|
name text
|
||||||
|
) AS $$
|
||||||
|
WITH
|
||||||
|
top AS
|
||||||
|
(
|
||||||
|
SELECT * FROM fetch_top_threads(max_time, max_row_read) AS top
|
||||||
|
),
|
||||||
|
tall_posts AS
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
top.post_count as estimated_post_count,
|
||||||
|
posts.post_id,
|
||||||
|
posts.board_post_id,
|
||||||
|
posts.creation_time,
|
||||||
|
posts.body,
|
||||||
|
posts.thread_id
|
||||||
|
FROM top
|
||||||
|
JOIN posts ON top.thread_id = posts.thread_id
|
||||||
|
WHERE creation_time < max_time
|
||||||
|
),
|
||||||
|
op_posts AS
|
||||||
|
(
|
||||||
|
SELECT DISTINCT ON (t.thread_id)
|
||||||
|
*
|
||||||
|
FROM tall_posts t
|
||||||
|
ORDER BY t.thread_id, t.board_post_id
|
||||||
|
),
|
||||||
|
post_counts AS
|
||||||
|
(
|
||||||
|
SELECT thread_id, count(*) as post_count FROM
|
||||||
|
tall_posts
|
||||||
|
GROUP BY thread_id
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
post_counts.post_count,
|
||||||
|
op_posts.*,
|
||||||
|
threads.board_thread_id,
|
||||||
|
boards.pathpart,
|
||||||
|
sites."name"
|
||||||
|
FROM op_posts
|
||||||
|
JOIN post_counts ON op_posts.thread_id = post_counts.thread_id
|
||||||
|
JOIN threads ON op_posts.thread_id = threads.thread_id
|
||||||
|
JOIN boards ON threads.board_id = boards.board_id
|
||||||
|
JOIN sites ON sites.site_id = boards.site_id;
|
||||||
|
$$ LANGUAGE sql;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Permissions
|
* Permissions
|
||||||
*/
|
*/
|
||||||
|
@ -192,6 +279,8 @@ GRANT ALL ON posts TO chan_archiver;
|
||||||
GRANT ALL ON attachments TO chan_archiver;
|
GRANT ALL ON attachments TO chan_archiver;
|
||||||
GRANT EXECUTE ON FUNCTION update_post_body_search_index TO chan_archiver;
|
GRANT EXECUTE ON FUNCTION update_post_body_search_index TO chan_archiver;
|
||||||
GRANT EXECUTE ON FUNCTION insert_posts_and_return_ids TO chan_archiver;
|
GRANT EXECUTE ON FUNCTION insert_posts_and_return_ids TO chan_archiver;
|
||||||
|
GRANT EXECUTE ON FUNCTION fetch_top_threads TO chan_archiver;
|
||||||
|
GRANT EXECUTE ON FUNCTION fetch_catalog TO chan_archiver;
|
||||||
GRANT usage, select ON SEQUENCE sites_site_id_seq TO chan_archiver;
|
GRANT usage, select ON SEQUENCE sites_site_id_seq TO chan_archiver;
|
||||||
GRANT usage, select ON SEQUENCE boards_board_id_seq TO chan_archiver;
|
GRANT usage, select ON SEQUENCE boards_board_id_seq TO chan_archiver;
|
||||||
GRANT usage, select ON SEQUENCE threads_thread_id_seq TO chan_archiver;
|
GRANT usage, select ON SEQUENCE threads_thread_id_seq TO chan_archiver;
|
||||||
|
|
Loading…
Reference in New Issue