Speed up fetch_catalog

- also add limit to number of rows search returns (to let front-end
  decide how many to display)
This commit is contained in:
towards-a-new-leftypol 2024-03-02 03:02:31 -05:00
parent 09bb17f19c
commit 1e0289750d
4 changed files with 78 additions and 81 deletions

11
sql/add_post_idx.sql Normal file
View File

@ -0,0 +1,11 @@
WITH indexed_posts AS (
SELECT
post_id,
ROW_NUMBER() OVER (PARTITION BY thread_id ORDER BY board_post_id) AS index
FROM
posts
)
UPDATE posts
SET local_idx = indexed_posts.index
FROM indexed_posts
WHERE posts.post_id = indexed_posts.post_id;

View File

@ -25,7 +25,7 @@ DROP FUNCTION IF EXISTS search_posts;
-- file_thumb_extension text -- file_thumb_extension text
-- ); -- );
CREATE OR REPLACE FUNCTION search_posts(search_text text) CREATE OR REPLACE FUNCTION search_posts(search_text text, max_rows integer DEFAULT 1000)
RETURNS SETOF catalog_grid_result AS $$ RETURNS SETOF catalog_grid_result AS $$
WITH WITH
query AS ( query AS (
@ -54,7 +54,7 @@ RETURNS SETOF catalog_grid_result AS $$
AND attachments.attachment_idx = 1 AND attachments.attachment_idx = 1
, query , query
WHERE p.body_search_index @@ query.query WHERE p.body_search_index @@ query.query
LIMIT 2000 LIMIT max_rows
) )
SELECT SELECT
0 AS estimated_post_count, 0 AS estimated_post_count,

View File

@ -74,14 +74,17 @@ CREATE TABLE IF NOT EXISTS posts
, body_search_index tsvector , body_search_index tsvector
, thread_id bigint NOT NULL , thread_id bigint NOT NULL
, embed text , embed text
, local_idx int NOT NULL
, CONSTRAINT unique_thread_board_id_constraint UNIQUE (thread_id, board_post_id) , CONSTRAINT unique_thread_board_id_constraint UNIQUE (thread_id, board_post_id)
, CONSTRAINT thread_fk FOREIGN KEY (thread_id) REFERENCES threads (thread_id) ON DELETE CASCADE , CONSTRAINT thread_fk FOREIGN KEY (thread_id) REFERENCES threads (thread_id) ON DELETE CASCADE
, CONSTRAINT unique_thread_local_idx UNIQUE (thread_id, local_idx)
); );
CREATE INDEX posts_creation_time_idx ON posts (creation_time); CREATE INDEX posts_creation_time_idx ON posts (creation_time);
CREATE INDEX posts_body_search_idx ON posts USING GIN (body_search_index); CREATE INDEX posts_body_search_idx ON posts USING GIN (body_search_index);
CREATE INDEX posts_thread_id_idx ON posts (thread_id); CREATE INDEX posts_thread_id_idx ON posts (thread_id);
CREATE INDEX posts_board_post_id_idx ON posts (board_post_id); CREATE INDEX posts_board_post_id_idx ON posts (board_post_id);
CREATE INDEX posts_thread_id_creation_time_idx ON posts (creation_time, thread_id); CREATE INDEX posts_thread_id_creation_time_idx ON posts (creation_time, thread_id);
CREATE INDEX posts_local_idx_idx ON posts (local_idx);
--CREATE INDEX posts_thread_id_board_post_id_idx ON posts (thread_id, board_post_id); --CREATE INDEX posts_thread_id_board_post_id_idx ON posts (thread_id, board_post_id);
CREATE OR REPLACE FUNCTION update_post_body_search_index() RETURNS trigger AS $$ CREATE OR REPLACE FUNCTION update_post_body_search_index() RETURNS trigger AS $$
@ -96,6 +99,8 @@ BEGIN
END; END;
$$ LANGUAGE plpgsql; $$ LANGUAGE plpgsql;
-- WARNING: maybe disable this before doing full table operations on the posts table,
-- like populating a new column, since this will cause it to rebuild the entire text search index
CREATE TRIGGER trigger_update_post_body_search_index CREATE TRIGGER trigger_update_post_body_search_index
BEFORE INSERT OR UPDATE BEFORE INSERT OR UPDATE
ON posts ON posts
@ -243,64 +248,58 @@ CREATE TYPE catalog_grid_result AS
); );
CREATE OR REPLACE FUNCTION fetch_catalog(max_time timestamptz, max_row_read int DEFAULT 10000) CREATE OR REPLACE FUNCTION search_posts(search_text text, max_rows integer DEFAULT 1000)
RETURNS SETOF catalog_grid_result AS $$ RETURNS SETOF catalog_grid_result AS $$
WITH WITH
top AS query AS (
( SELECT websearch_to_tsquery('english', search_text) AS query
SELECT * FROM fetch_top_threads(max_time, max_row_read) AS top
), ),
tall_posts AS result_set AS (
(
SELECT SELECT
top.post_count as estimated_post_count, p.*,
posts.post_id, threads.board_thread_id,
posts.board_post_id, pathpart,
posts.creation_time, sites.name AS site_name,
top.bump_time, attachments.mimetype as file_mimetype,
posts.body, attachments.illegal as file_illegal,
posts.subject, -- attachments.resolution as file_resolution,
posts.thread_id, attachments.board_filename as file_name,
posts.embed attachments.file_extension,
FROM top attachments.thumb_extension as file_thumb_extension,
JOIN posts ON top.thread_id = posts.thread_id ts_rank(p.body_search_index, query.query)
WHERE creation_time < max_time / (1 + EXTRACT(EPOCH FROM AGE(p.creation_time)) / (3600 * 24)) AS relevance
), FROM posts p
op_posts AS JOIN threads ON threads.thread_id = p.thread_id
( JOIN boards ON boards.board_id = threads.board_id
SELECT DISTINCT ON (t.thread_id) JOIN sites ON sites.site_id = boards.site_id
* LEFT OUTER JOIN attachments
FROM tall_posts t ON attachments.post_id = p.post_id
ORDER BY t.thread_id, t.board_post_id AND attachments.attachment_idx = 1
)-- , , query
-- post_counts AS WHERE p.body_search_index @@ query.query
-- ( LIMIT max_rows
-- SELECT thread_id, count(*) as post_count FROM )
-- tall_posts
-- GROUP BY thread_id
-- )
SELECT SELECT
-- post_counts.post_count, 0 AS estimated_post_count,
op_posts.*, result_set.post_id,
threads.board_thread_id, -- this should be part of the url path when creating links, not thread_id (that's internal) result_set.board_post_id,
boards.pathpart, result_set.creation_time,
sites."name", result_set.creation_time AS bump_time,
-- sites.site_id, result_set.body,
attachments.mimetype as file_mimetype, result_set.subject,
attachments.illegal as file_illegal, result_set.thread_id,
-- attachments.resolution as file_resolution, result_set.embed,
attachments.board_filename as file_name, result_set.board_thread_id,
attachments.file_extension, result_set.pathpart,
attachments.thumb_extension as file_thumb_extension result_set.site_name,
FROM op_posts result_set.file_mimetype,
-- JOIN post_counts ON op_posts.thread_id = post_counts.thread_idh result_set.file_illegal,
JOIN threads ON op_posts.thread_id = threads.thread_id result_set.file_name,
JOIN boards ON threads.board_id = boards.board_id result_set.file_extension,
JOIN sites ON sites.site_id = boards.site_id result_set.file_thumb_extension
LEFT OUTER JOIN attachments ON attachments.post_id = op_posts.post_id AND attachments.attachment_idx = 1 FROM result_set
ORDER BY bump_time DESC; ORDER BY result_set.relevance DESC;
$$ LANGUAGE sql; $$ LANGUAGE sql STABLE;
-- Function: search_posts -- Function: search_posts
-- --

View File

@ -3,6 +3,7 @@ BEGIN TRANSACTION;
DROP TYPE IF EXISTS catalog_grid_result CASCADE; DROP TYPE IF EXISTS catalog_grid_result CASCADE;
DROP FUNCTION IF EXISTS fetch_catalog; DROP FUNCTION IF EXISTS fetch_catalog;
-- OLD: 121ms
CREATE TYPE catalog_grid_result AS CREATE TYPE catalog_grid_result AS
( (
-- post_count bigint, -- post_count bigint,
@ -37,7 +38,7 @@ RETURNS SETOF catalog_grid_result AS $$
tall_posts AS tall_posts AS
( (
SELECT SELECT
top.post_count as estimated_post_count, top.post_count AS estimated_post_count,
posts.post_id, posts.post_id,
posts.board_post_id, posts.board_post_id,
posts.creation_time, posts.creation_time,
@ -47,41 +48,27 @@ RETURNS SETOF catalog_grid_result AS $$
posts.thread_id, posts.thread_id,
posts.embed posts.embed
FROM top FROM top
JOIN posts ON top.thread_id = posts.thread_id JOIN posts ON top.thread_id = posts.thread_id AND posts.local_idx = 1
WHERE creation_time < max_time WHERE creation_time < max_time
), )
op_posts AS
(
SELECT DISTINCT ON (t.thread_id)
*
FROM tall_posts t
ORDER BY t.thread_id, t.board_post_id
)-- ,
-- post_counts AS
-- (
-- SELECT thread_id, count(*) as post_count FROM
-- tall_posts
-- GROUP BY thread_id
-- )
SELECT SELECT
-- post_counts.post_count, -- post_counts.post_count,
op_posts.*, tall_posts.*,
threads.board_thread_id, -- this should be part of the url path when creating links, not thread_id (that's internal) threads.board_thread_id, -- this should be part of the url path when creating links, not thread_id (that's internal)
boards.pathpart, boards.pathpart,
sites."name", sites."name",
-- sites.site_id, -- sites.site_id,
attachments.mimetype as file_mimetype, attachments.mimetype AS file_mimetype,
attachments.illegal as file_illegal, attachments.illegal AS file_illegal,
-- attachments.resolution as file_resolution, -- attachments.resolution AS file_resolution,
attachments.board_filename as file_name, attachments.board_filename AS file_name,
attachments.file_extension, attachments.file_extension,
attachments.thumb_extension as file_thumb_extension attachments.thumb_extension AS file_thumb_extension
FROM op_posts FROM tall_posts
-- JOIN post_counts ON op_posts.thread_id = post_counts.thread_id JOIN threads ON tall_posts.thread_id = threads.thread_id
JOIN threads ON op_posts.thread_id = threads.thread_id
JOIN boards ON threads.board_id = boards.board_id JOIN boards ON threads.board_id = boards.board_id
JOIN sites ON sites.site_id = boards.site_id JOIN sites ON sites.site_id = boards.site_id
LEFT OUTER JOIN attachments ON attachments.post_id = op_posts.post_id AND attachments.attachment_idx = 1 LEFT OUTER JOIN attachments ON attachments.post_id = tall_posts.post_id AND attachments.attachment_idx = 1
ORDER BY bump_time DESC; ORDER BY bump_time DESC;
$$ LANGUAGE sql; $$ LANGUAGE sql;