sql: improve search_posts function
This commit is contained in:
parent
96546a8f7e
commit
9e3c054c53
|
@ -592,12 +592,15 @@ SELECT * FROM posts WHERE body_search_index @@ websearch_to_tsquery('english', '
|
||||||
|
|
||||||
|
|
||||||
WITH query AS (
|
WITH query AS (
|
||||||
SELECT websearch_to_tsquery('english', 'Anonymous') AS query
|
SELECT websearch_to_tsquery('english', 'sound cringe irl') AS query
|
||||||
)
|
)
|
||||||
SELECT p.*, ts_rank(p.body_search_index, query.query) AS relevance
|
SELECT p.*, pathpart, board_thread_id,
|
||||||
FROM posts p, query
|
ts_rank(p.body_search_index, query.query)
|
||||||
|
/ (1 + EXTRACT(EPOCH FROM AGE(p.creation_time)) / (3600 * 24)) AS relevance
|
||||||
|
FROM posts p JOIN threads ON threads.thread_id = p.thread_id JOIN boards ON boards.board_id = threads.board_id, query
|
||||||
WHERE p.body_search_index @@ query.query
|
WHERE p.body_search_index @@ query.query
|
||||||
ORDER BY relevance DESC;
|
ORDER BY relevance
|
||||||
|
DESC;
|
||||||
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION search_posts(search_text TEXT)
|
CREATE OR REPLACE FUNCTION search_posts(search_text TEXT)
|
||||||
|
@ -606,11 +609,15 @@ AS $$
|
||||||
SELECT p.*
|
SELECT p.*
|
||||||
FROM posts p
|
FROM posts p
|
||||||
WHERE p.body_search_index @@ websearch_to_tsquery('english', search_text)
|
WHERE p.body_search_index @@ websearch_to_tsquery('english', search_text)
|
||||||
ORDER BY ts_rank(p.body_search_index, websearch_to_tsquery('english', search_text)) DESC;
|
ORDER BY
|
||||||
|
ts_rank(p.body_search_index, websearch_to_tsquery('english', search_text))
|
||||||
|
/
|
||||||
|
(1 + EXTRACT(EPOCH FROM AGE(p.creation_time)) / (3600 * 24))
|
||||||
|
DESC;
|
||||||
$$ LANGUAGE sql STABLE;
|
$$ LANGUAGE sql STABLE;
|
||||||
|
|
||||||
|
|
||||||
SELECT * FROM search_posts('alt chans') s
|
SELECT * FROM search_posts('archive') s
|
||||||
JOIN threads ON threads.thread_id = s.thread_id
|
JOIN threads ON threads.thread_id = s.thread_id
|
||||||
JOIN boards ON boards.board_id = threads.board_id;
|
JOIN boards ON boards.board_id = threads.board_id;
|
||||||
|
|
||||||
|
@ -641,3 +648,8 @@ SET body_search_index = (
|
||||||
)
|
)
|
||||||
WHERE board_post_id = 476524;
|
WHERE board_post_id = 476524;
|
||||||
|
|
||||||
|
SELECT count(*)
|
||||||
|
FROM posts
|
||||||
|
WHERE creation_time >= CURRENT_DATE AT TIME ZONE 'UTC'
|
||||||
|
AND creation_time < (CURRENT_DATE + INTERVAL '1 day') AT TIME ZONE 'UTC';
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,7 @@ AS $$
|
||||||
$$;
|
$$;
|
||||||
|
|
||||||
|
|
||||||
SELECT * FROM fetch_top_threads(NOW() - INTERVAL '10d');
|
SELECT * FROM fetch_top_threads(NOW(), 1000);
|
||||||
|
|
||||||
SELECT * FROM fetch_top_threads(NOW() - INTERVAL '10d') top JOIN posts ON top.thread_id = posts.thread_id;
|
SELECT * FROM fetch_top_threads(NOW() - INTERVAL '10d') top JOIN posts ON top.thread_id = posts.thread_id;
|
||||||
|
|
||||||
|
@ -197,7 +197,7 @@ $$ LANGUAGE sql;
|
||||||
|
|
||||||
SELECT * FROM fetch_catalog(NOW() - INTERVAL '1y', 1001);
|
SELECT * FROM fetch_catalog(NOW() - INTERVAL '1y', 1001);
|
||||||
|
|
||||||
SELECT * FROM fetch_catalog(NOW(), 5000);
|
SELECT * FROM fetch_catalog(NOW(), 1000);
|
||||||
|
|
||||||
SELECT count(*) FROM posts;
|
SELECT count(*) FROM posts;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
BEGIN TRANSACTION;
|
||||||
|
|
||||||
|
DROP FUNCTION IF EXISTS search_posts;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION search_posts(search_text text)
|
||||||
|
RETURNS TABLE (
|
||||||
|
post posts,
|
||||||
|
pathpart text,
|
||||||
|
board_thread_id bigint,
|
||||||
|
relevance double precision
|
||||||
|
) AS $$
|
||||||
|
WITH query AS (
|
||||||
|
SELECT websearch_to_tsquery('english', search_text) AS query
|
||||||
|
)
|
||||||
|
SELECT p::posts AS post, pathpart, board_thread_id,
|
||||||
|
ts_rank(p.body_search_index, query.query)
|
||||||
|
/ (1 + EXTRACT(EPOCH FROM AGE(p.creation_time)) / (3600 * 24)) AS relevance
|
||||||
|
FROM posts p JOIN threads ON threads.thread_id = p.thread_id JOIN boards ON boards.board_id = threads.board_id, query
|
||||||
|
WHERE p.body_search_index @@ query.query
|
||||||
|
ORDER BY relevance
|
||||||
|
DESC
|
||||||
|
$$ LANGUAGE sql STABLE;
|
||||||
|
|
||||||
|
GRANT EXECUTE ON FUNCTION search_posts TO chan_archive_anon;
|
||||||
|
GRANT EXECUTE ON FUNCTION search_posts TO chan_archiver;
|
||||||
|
REVOKE EXECUTE ON FUNCTION search_posts FROM PUBLIC;
|
||||||
|
|
||||||
|
ROLLBACK;
|
|
@ -320,13 +320,23 @@ $$ LANGUAGE sql;
|
||||||
-- Example:
|
-- Example:
|
||||||
-- -- To search for posts related to 'quantum computing':
|
-- -- To search for posts related to 'quantum computing':
|
||||||
-- SELECT * FROM search_posts('quantum computing');
|
-- SELECT * FROM search_posts('quantum computing');
|
||||||
CREATE OR REPLACE FUNCTION search_posts(search_text TEXT)
|
CREATE OR REPLACE FUNCTION search_posts(search_text text)
|
||||||
RETURNS SETOF posts
|
RETURNS TABLE (
|
||||||
AS $$
|
post posts,
|
||||||
SELECT p.*
|
pathpart text,
|
||||||
FROM posts p
|
board_thread_id bigint,
|
||||||
WHERE p.body_search_index @@ websearch_to_tsquery('english', search_text)
|
relevance double precision
|
||||||
ORDER BY ts_rank(p.body_search_index, websearch_to_tsquery('english', search_text)) DESC;
|
) AS $$
|
||||||
|
WITH query AS (
|
||||||
|
SELECT websearch_to_tsquery('english', search_text) AS query
|
||||||
|
)
|
||||||
|
SELECT p::posts AS post, pathpart, board_thread_id,
|
||||||
|
ts_rank(p.body_search_index, query.query)
|
||||||
|
/ (1 + EXTRACT(EPOCH FROM AGE(p.creation_time)) / (3600 * 24)) AS relevance
|
||||||
|
FROM posts p JOIN threads ON threads.thread_id = p.thread_id JOIN boards ON boards.board_id = threads.board_id, query
|
||||||
|
WHERE p.body_search_index @@ query.query
|
||||||
|
ORDER BY relevance
|
||||||
|
DESC
|
||||||
$$ LANGUAGE sql STABLE;
|
$$ LANGUAGE sql STABLE;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
BEGIN TRANSACTION;
|
||||||
|
|
||||||
|
DROP TYPE IF EXISTS dimension CASCADE;
|
||||||
|
DROP TABLE IF EXISTS attachments CASCADE;
|
||||||
|
|
||||||
|
CREATE TYPE dimension AS
|
||||||
|
( width int
|
||||||
|
, height int
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS attachments
|
||||||
|
( attachment_id bigserial primary key
|
||||||
|
, mimetype text NOT NULL
|
||||||
|
, creation_time timestamp with time zone NOT NULL
|
||||||
|
, sha256_hash text NOT NULL
|
||||||
|
, phash bigint
|
||||||
|
, illegal boolean NOT NULL DEFAULT false
|
||||||
|
, post_id bigint NOT NULL
|
||||||
|
, resolution dimension
|
||||||
|
, file_extension text
|
||||||
|
, thumb_extension text
|
||||||
|
, original_filename text
|
||||||
|
, board_filename text NOT NULL
|
||||||
|
, spoiler boolean NOT NULL DEFAULT true
|
||||||
|
, file_size_bytes int
|
||||||
|
, CONSTRAINT post_fk FOREIGN KEY (post_id) REFERENCES posts (post_id) ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
CREATE INDEX attachments_creation_time_idx ON attachments (creation_time);
|
||||||
|
CREATE INDEX attachments_post_id_idx ON attachments (post_id);
|
||||||
|
CREATE INDEX attachments_sha256_hash_idx ON attachments (sha256_hash);
|
||||||
|
--
|
||||||
|
-- Index using the bktree extension for quickly getting the closest phashes
|
||||||
|
CREATE INDEX attachments_phash_bktree_index ON attachments USING spgist (phash bktree_ops);
|
||||||
|
|
||||||
|
|
||||||
|
GRANT SELECT ON attachments TO chan_archive_anon;
|
||||||
|
GRANT ALL ON attachments TO chan_archiver;
|
||||||
|
GRANT usage, select ON SEQUENCE attachments_attachment_id_seq TO chan_archiver;
|
||||||
|
|
||||||
|
COMMIT;
|
|
@ -0,0 +1,87 @@
|
||||||
|
BEGIN TRANSACTION;
|
||||||
|
|
||||||
|
DROP FUNCTION IF EXISTS fetch_catalog;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION fetch_catalog(max_time timestamptz, max_row_read int DEFAULT 10000)
|
||||||
|
RETURNS TABLE (
|
||||||
|
-- post_count bigint,
|
||||||
|
estimated_post_count bigint,
|
||||||
|
post_id bigint,
|
||||||
|
board_post_id bigint,
|
||||||
|
creation_time timestamptz,
|
||||||
|
bump_time timestamptz,
|
||||||
|
body text,
|
||||||
|
subject text,
|
||||||
|
thread_id bigint,
|
||||||
|
board_thread_id bigint,
|
||||||
|
pathpart text,
|
||||||
|
site_name text,
|
||||||
|
file_mimetype text,
|
||||||
|
file_illegal boolean,
|
||||||
|
-- file_resolution dimension,
|
||||||
|
file_name text,
|
||||||
|
file_extension text,
|
||||||
|
file_thumb_extension text
|
||||||
|
) AS $$
|
||||||
|
WITH
|
||||||
|
top AS
|
||||||
|
(
|
||||||
|
SELECT * FROM fetch_top_threads(max_time, max_row_read) AS top
|
||||||
|
),
|
||||||
|
tall_posts AS
|
||||||
|
(
|
||||||
|
SELECT
|
||||||
|
top.post_count as estimated_post_count,
|
||||||
|
posts.post_id,
|
||||||
|
posts.board_post_id,
|
||||||
|
posts.creation_time,
|
||||||
|
top.bump_time,
|
||||||
|
posts.body,
|
||||||
|
posts.subject,
|
||||||
|
posts.thread_id
|
||||||
|
FROM top
|
||||||
|
JOIN posts ON top.thread_id = posts.thread_id
|
||||||
|
WHERE creation_time < max_time
|
||||||
|
),
|
||||||
|
op_posts AS
|
||||||
|
(
|
||||||
|
SELECT DISTINCT ON (t.thread_id)
|
||||||
|
*
|
||||||
|
FROM tall_posts t
|
||||||
|
ORDER BY t.thread_id, t.board_post_id
|
||||||
|
)-- ,
|
||||||
|
-- post_counts AS
|
||||||
|
-- (
|
||||||
|
-- SELECT thread_id, count(*) as post_count FROM
|
||||||
|
-- tall_posts
|
||||||
|
-- GROUP BY thread_id
|
||||||
|
-- )
|
||||||
|
SELECT
|
||||||
|
-- post_counts.post_count,
|
||||||
|
op_posts.*,
|
||||||
|
threads.board_thread_id, -- this should be part of the url path when creating links, not thread_id (that's internal)
|
||||||
|
boards.pathpart,
|
||||||
|
sites."name",
|
||||||
|
-- sites.site_id,
|
||||||
|
attachments.mimetype as file_mimetype,
|
||||||
|
attachments.illegal as file_illegal,
|
||||||
|
-- attachments.resolution as file_resolution,
|
||||||
|
attachments.board_filename as file_name,
|
||||||
|
attachments.file_extension,
|
||||||
|
attachments.thumb_extension as file_thumb_extension
|
||||||
|
FROM op_posts
|
||||||
|
-- JOIN post_counts ON op_posts.thread_id = post_counts.thread_id
|
||||||
|
JOIN threads ON op_posts.thread_id = threads.thread_id
|
||||||
|
JOIN boards ON threads.board_id = boards.board_id
|
||||||
|
JOIN sites ON sites.site_id = boards.site_id
|
||||||
|
LEFT OUTER JOIN attachments ON attachments.post_id = op_posts.post_id
|
||||||
|
ORDER BY bump_time DESC;
|
||||||
|
$$ LANGUAGE sql;
|
||||||
|
|
||||||
|
|
||||||
|
REVOKE EXECUTE ON FUNCTION fetch_catalog FROM PUBLIC;
|
||||||
|
GRANT EXECUTE ON FUNCTION fetch_catalog TO chan_archive_anon;
|
||||||
|
GRANT EXECUTE ON FUNCTION fetch_catalog TO chan_archiver;
|
||||||
|
|
||||||
|
-- ROLLBACK;
|
||||||
|
COMMIT;
|
Loading…
Reference in New Issue