From 1e0289750d88eed010a020049ab509579a6753b1 Mon Sep 17 00:00:00 2001 From: towards-a-new-leftypol Date: Sat, 2 Mar 2024 03:02:31 -0500 Subject: [PATCH] Speed up fetch_catalog - also add limit to number of rows search returns (to let front-end decide how many to display) --- sql/add_post_idx.sql | 11 ++++ sql/fix_search_posts.sql | 4 +- sql/initialize.sql | 105 +++++++++++++++++------------------ sql/remake_fetch_catalog.sql | 39 +++++-------- 4 files changed, 78 insertions(+), 81 deletions(-) create mode 100644 sql/add_post_idx.sql diff --git a/sql/add_post_idx.sql b/sql/add_post_idx.sql new file mode 100644 index 0000000..9940724 --- /dev/null +++ b/sql/add_post_idx.sql @@ -0,0 +1,11 @@ +WITH indexed_posts AS ( + SELECT + post_id, + ROW_NUMBER() OVER (PARTITION BY thread_id ORDER BY board_post_id) AS index + FROM + posts +) +UPDATE posts +SET local_idx = indexed_posts.index +FROM indexed_posts +WHERE posts.post_id = indexed_posts.post_id; diff --git a/sql/fix_search_posts.sql b/sql/fix_search_posts.sql index d778bfc..c7b8986 100644 --- a/sql/fix_search_posts.sql +++ b/sql/fix_search_posts.sql @@ -25,7 +25,7 @@ DROP FUNCTION IF EXISTS search_posts; -- file_thumb_extension text -- ); -CREATE OR REPLACE FUNCTION search_posts(search_text text) +CREATE OR REPLACE FUNCTION search_posts(search_text text, max_rows integer DEFAULT 1000) RETURNS SETOF catalog_grid_result AS $$ WITH query AS ( @@ -54,7 +54,7 @@ RETURNS SETOF catalog_grid_result AS $$ AND attachments.attachment_idx = 1 , query WHERE p.body_search_index @@ query.query - LIMIT 2000 + LIMIT max_rows ) SELECT 0 AS estimated_post_count, diff --git a/sql/initialize.sql b/sql/initialize.sql index 8fb046d..26c593a 100644 --- a/sql/initialize.sql +++ b/sql/initialize.sql @@ -74,14 +74,17 @@ CREATE TABLE IF NOT EXISTS posts , body_search_index tsvector , thread_id bigint NOT NULL , embed text + , local_idx int NOT NULL , CONSTRAINT unique_thread_board_id_constraint UNIQUE (thread_id, board_post_id) , CONSTRAINT thread_fk FOREIGN KEY (thread_id) REFERENCES threads (thread_id) ON DELETE CASCADE + , CONSTRAINT unique_thread_local_idx UNIQUE (thread_id, local_idx) ); CREATE INDEX posts_creation_time_idx ON posts (creation_time); CREATE INDEX posts_body_search_idx ON posts USING GIN (body_search_index); CREATE INDEX posts_thread_id_idx ON posts (thread_id); CREATE INDEX posts_board_post_id_idx ON posts (board_post_id); CREATE INDEX posts_thread_id_creation_time_idx ON posts (creation_time, thread_id); +CREATE INDEX posts_local_idx_idx ON posts (local_idx); --CREATE INDEX posts_thread_id_board_post_id_idx ON posts (thread_id, board_post_id); CREATE OR REPLACE FUNCTION update_post_body_search_index() RETURNS trigger AS $$ @@ -96,6 +99,8 @@ BEGIN END; $$ LANGUAGE plpgsql; +-- WARNING: maybe disable this before doing full table operations on the posts table, +-- like populating a new column, since this will cause it to rebuild the entire text search index CREATE TRIGGER trigger_update_post_body_search_index BEFORE INSERT OR UPDATE ON posts @@ -243,64 +248,58 @@ CREATE TYPE catalog_grid_result AS ); -CREATE OR REPLACE FUNCTION fetch_catalog(max_time timestamptz, max_row_read int DEFAULT 10000) +CREATE OR REPLACE FUNCTION search_posts(search_text text, max_rows integer DEFAULT 1000) RETURNS SETOF catalog_grid_result AS $$ WITH - top AS - ( - SELECT * FROM fetch_top_threads(max_time, max_row_read) AS top + query AS ( + SELECT websearch_to_tsquery('english', search_text) AS query ), - tall_posts AS - ( + result_set AS ( SELECT - top.post_count as estimated_post_count, - posts.post_id, - posts.board_post_id, - posts.creation_time, - top.bump_time, - posts.body, - posts.subject, - posts.thread_id, - posts.embed - FROM top - JOIN posts ON top.thread_id = posts.thread_id - WHERE creation_time < max_time - ), - op_posts AS - ( - SELECT DISTINCT ON (t.thread_id) - * - FROM tall_posts t - ORDER BY t.thread_id, t.board_post_id - )-- , - -- post_counts AS - -- ( - -- SELECT thread_id, count(*) as post_count FROM - -- tall_posts - -- GROUP BY thread_id - -- ) + p.*, + threads.board_thread_id, + pathpart, + sites.name AS site_name, + attachments.mimetype as file_mimetype, + attachments.illegal as file_illegal, + -- attachments.resolution as file_resolution, + attachments.board_filename as file_name, + attachments.file_extension, + attachments.thumb_extension as file_thumb_extension, + ts_rank(p.body_search_index, query.query) + / (1 + EXTRACT(EPOCH FROM AGE(p.creation_time)) / (3600 * 24)) AS relevance + FROM posts p + JOIN threads ON threads.thread_id = p.thread_id + JOIN boards ON boards.board_id = threads.board_id + JOIN sites ON sites.site_id = boards.site_id + LEFT OUTER JOIN attachments + ON attachments.post_id = p.post_id + AND attachments.attachment_idx = 1 + , query + WHERE p.body_search_index @@ query.query + LIMIT max_rows + ) SELECT - -- post_counts.post_count, - op_posts.*, - threads.board_thread_id, -- this should be part of the url path when creating links, not thread_id (that's internal) - boards.pathpart, - sites."name", - -- sites.site_id, - attachments.mimetype as file_mimetype, - attachments.illegal as file_illegal, - -- attachments.resolution as file_resolution, - attachments.board_filename as file_name, - attachments.file_extension, - attachments.thumb_extension as file_thumb_extension - FROM op_posts - -- JOIN post_counts ON op_posts.thread_id = post_counts.thread_idh - JOIN threads ON op_posts.thread_id = threads.thread_id - JOIN boards ON threads.board_id = boards.board_id - JOIN sites ON sites.site_id = boards.site_id - LEFT OUTER JOIN attachments ON attachments.post_id = op_posts.post_id AND attachments.attachment_idx = 1 - ORDER BY bump_time DESC; -$$ LANGUAGE sql; - + 0 AS estimated_post_count, + result_set.post_id, + result_set.board_post_id, + result_set.creation_time, + result_set.creation_time AS bump_time, + result_set.body, + result_set.subject, + result_set.thread_id, + result_set.embed, + result_set.board_thread_id, + result_set.pathpart, + result_set.site_name, + result_set.file_mimetype, + result_set.file_illegal, + result_set.file_name, + result_set.file_extension, + result_set.file_thumb_extension + FROM result_set + ORDER BY result_set.relevance DESC; +$$ LANGUAGE sql STABLE; -- Function: search_posts -- diff --git a/sql/remake_fetch_catalog.sql b/sql/remake_fetch_catalog.sql index ab40aaf..35fbefc 100644 --- a/sql/remake_fetch_catalog.sql +++ b/sql/remake_fetch_catalog.sql @@ -3,6 +3,7 @@ BEGIN TRANSACTION; DROP TYPE IF EXISTS catalog_grid_result CASCADE; DROP FUNCTION IF EXISTS fetch_catalog; +-- OLD: 121ms CREATE TYPE catalog_grid_result AS ( -- post_count bigint, @@ -37,7 +38,7 @@ RETURNS SETOF catalog_grid_result AS $$ tall_posts AS ( SELECT - top.post_count as estimated_post_count, + top.post_count AS estimated_post_count, posts.post_id, posts.board_post_id, posts.creation_time, @@ -47,41 +48,27 @@ RETURNS SETOF catalog_grid_result AS $$ posts.thread_id, posts.embed FROM top - JOIN posts ON top.thread_id = posts.thread_id + JOIN posts ON top.thread_id = posts.thread_id AND posts.local_idx = 1 WHERE creation_time < max_time - ), - op_posts AS - ( - SELECT DISTINCT ON (t.thread_id) - * - FROM tall_posts t - ORDER BY t.thread_id, t.board_post_id - )-- , - -- post_counts AS - -- ( - -- SELECT thread_id, count(*) as post_count FROM - -- tall_posts - -- GROUP BY thread_id - -- ) + ) SELECT -- post_counts.post_count, - op_posts.*, + tall_posts.*, threads.board_thread_id, -- this should be part of the url path when creating links, not thread_id (that's internal) boards.pathpart, sites."name", -- sites.site_id, - attachments.mimetype as file_mimetype, - attachments.illegal as file_illegal, - -- attachments.resolution as file_resolution, - attachments.board_filename as file_name, + attachments.mimetype AS file_mimetype, + attachments.illegal AS file_illegal, + -- attachments.resolution AS file_resolution, + attachments.board_filename AS file_name, attachments.file_extension, - attachments.thumb_extension as file_thumb_extension - FROM op_posts - -- JOIN post_counts ON op_posts.thread_id = post_counts.thread_id - JOIN threads ON op_posts.thread_id = threads.thread_id + attachments.thumb_extension AS file_thumb_extension + FROM tall_posts + JOIN threads ON tall_posts.thread_id = threads.thread_id JOIN boards ON threads.board_id = boards.board_id JOIN sites ON sites.site_id = boards.site_id - LEFT OUTER JOIN attachments ON attachments.post_id = op_posts.post_id AND attachments.attachment_idx = 1 + LEFT OUTER JOIN attachments ON attachments.post_id = tall_posts.post_id AND attachments.attachment_idx = 1 ORDER BY bump_time DESC; $$ LANGUAGE sql;