sql: improve search_posts function
This commit is contained in:
parent
96546a8f7e
commit
9e3c054c53
|
@ -592,12 +592,15 @@ SELECT * FROM posts WHERE body_search_index @@ websearch_to_tsquery('english', '
|
|||
|
||||
|
||||
WITH query AS (
|
||||
SELECT websearch_to_tsquery('english', 'Anonymous') AS query
|
||||
SELECT websearch_to_tsquery('english', 'sound cringe irl') AS query
|
||||
)
|
||||
SELECT p.*, ts_rank(p.body_search_index, query.query) AS relevance
|
||||
FROM posts p, query
|
||||
SELECT p.*, pathpart, board_thread_id,
|
||||
ts_rank(p.body_search_index, query.query)
|
||||
/ (1 + EXTRACT(EPOCH FROM AGE(p.creation_time)) / (3600 * 24)) AS relevance
|
||||
FROM posts p JOIN threads ON threads.thread_id = p.thread_id JOIN boards ON boards.board_id = threads.board_id, query
|
||||
WHERE p.body_search_index @@ query.query
|
||||
ORDER BY relevance DESC;
|
||||
ORDER BY relevance
|
||||
DESC;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION search_posts(search_text TEXT)
|
||||
|
@ -606,11 +609,15 @@ AS $$
|
|||
SELECT p.*
|
||||
FROM posts p
|
||||
WHERE p.body_search_index @@ websearch_to_tsquery('english', search_text)
|
||||
ORDER BY ts_rank(p.body_search_index, websearch_to_tsquery('english', search_text)) DESC;
|
||||
ORDER BY
|
||||
ts_rank(p.body_search_index, websearch_to_tsquery('english', search_text))
|
||||
/
|
||||
(1 + EXTRACT(EPOCH FROM AGE(p.creation_time)) / (3600 * 24))
|
||||
DESC;
|
||||
$$ LANGUAGE sql STABLE;
|
||||
|
||||
|
||||
SELECT * FROM search_posts('alt chans') s
|
||||
SELECT * FROM search_posts('archive') s
|
||||
JOIN threads ON threads.thread_id = s.thread_id
|
||||
JOIN boards ON boards.board_id = threads.board_id;
|
||||
|
||||
|
@ -641,3 +648,8 @@ SET body_search_index = (
|
|||
)
|
||||
WHERE board_post_id = 476524;
|
||||
|
||||
SELECT count(*)
|
||||
FROM posts
|
||||
WHERE creation_time >= CURRENT_DATE AT TIME ZONE 'UTC'
|
||||
AND creation_time < (CURRENT_DATE + INTERVAL '1 day') AT TIME ZONE 'UTC';
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ AS $$
|
|||
$$;
|
||||
|
||||
|
||||
SELECT * FROM fetch_top_threads(NOW() - INTERVAL '10d');
|
||||
SELECT * FROM fetch_top_threads(NOW(), 1000);
|
||||
|
||||
SELECT * FROM fetch_top_threads(NOW() - INTERVAL '10d') top JOIN posts ON top.thread_id = posts.thread_id;
|
||||
|
||||
|
@ -197,7 +197,7 @@ $$ LANGUAGE sql;
|
|||
|
||||
SELECT * FROM fetch_catalog(NOW() - INTERVAL '1y', 1001);
|
||||
|
||||
SELECT * FROM fetch_catalog(NOW(), 5000);
|
||||
SELECT * FROM fetch_catalog(NOW(), 1000);
|
||||
|
||||
SELECT count(*) FROM posts;
|
||||
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
BEGIN TRANSACTION;
|
||||
|
||||
DROP FUNCTION IF EXISTS search_posts;
|
||||
|
||||
CREATE OR REPLACE FUNCTION search_posts(search_text text)
|
||||
RETURNS TABLE (
|
||||
post posts,
|
||||
pathpart text,
|
||||
board_thread_id bigint,
|
||||
relevance double precision
|
||||
) AS $$
|
||||
WITH query AS (
|
||||
SELECT websearch_to_tsquery('english', search_text) AS query
|
||||
)
|
||||
SELECT p::posts AS post, pathpart, board_thread_id,
|
||||
ts_rank(p.body_search_index, query.query)
|
||||
/ (1 + EXTRACT(EPOCH FROM AGE(p.creation_time)) / (3600 * 24)) AS relevance
|
||||
FROM posts p JOIN threads ON threads.thread_id = p.thread_id JOIN boards ON boards.board_id = threads.board_id, query
|
||||
WHERE p.body_search_index @@ query.query
|
||||
ORDER BY relevance
|
||||
DESC
|
||||
$$ LANGUAGE sql STABLE;
|
||||
|
||||
GRANT EXECUTE ON FUNCTION search_posts TO chan_archive_anon;
|
||||
GRANT EXECUTE ON FUNCTION search_posts TO chan_archiver;
|
||||
REVOKE EXECUTE ON FUNCTION search_posts FROM PUBLIC;
|
||||
|
||||
ROLLBACK;
|
|
@ -320,13 +320,23 @@ $$ LANGUAGE sql;
|
|||
-- Example:
|
||||
-- -- To search for posts related to 'quantum computing':
|
||||
-- SELECT * FROM search_posts('quantum computing');
|
||||
CREATE OR REPLACE FUNCTION search_posts(search_text TEXT)
|
||||
RETURNS SETOF posts
|
||||
AS $$
|
||||
SELECT p.*
|
||||
FROM posts p
|
||||
WHERE p.body_search_index @@ websearch_to_tsquery('english', search_text)
|
||||
ORDER BY ts_rank(p.body_search_index, websearch_to_tsquery('english', search_text)) DESC;
|
||||
CREATE OR REPLACE FUNCTION search_posts(search_text text)
|
||||
RETURNS TABLE (
|
||||
post posts,
|
||||
pathpart text,
|
||||
board_thread_id bigint,
|
||||
relevance double precision
|
||||
) AS $$
|
||||
WITH query AS (
|
||||
SELECT websearch_to_tsquery('english', search_text) AS query
|
||||
)
|
||||
SELECT p::posts AS post, pathpart, board_thread_id,
|
||||
ts_rank(p.body_search_index, query.query)
|
||||
/ (1 + EXTRACT(EPOCH FROM AGE(p.creation_time)) / (3600 * 24)) AS relevance
|
||||
FROM posts p JOIN threads ON threads.thread_id = p.thread_id JOIN boards ON boards.board_id = threads.board_id, query
|
||||
WHERE p.body_search_index @@ query.query
|
||||
ORDER BY relevance
|
||||
DESC
|
||||
$$ LANGUAGE sql STABLE;
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
BEGIN TRANSACTION;
|
||||
|
||||
DROP TYPE IF EXISTS dimension CASCADE;
|
||||
DROP TABLE IF EXISTS attachments CASCADE;
|
||||
|
||||
CREATE TYPE dimension AS
|
||||
( width int
|
||||
, height int
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS attachments
|
||||
( attachment_id bigserial primary key
|
||||
, mimetype text NOT NULL
|
||||
, creation_time timestamp with time zone NOT NULL
|
||||
, sha256_hash text NOT NULL
|
||||
, phash bigint
|
||||
, illegal boolean NOT NULL DEFAULT false
|
||||
, post_id bigint NOT NULL
|
||||
, resolution dimension
|
||||
, file_extension text
|
||||
, thumb_extension text
|
||||
, original_filename text
|
||||
, board_filename text NOT NULL
|
||||
, spoiler boolean NOT NULL DEFAULT true
|
||||
, file_size_bytes int
|
||||
, CONSTRAINT post_fk FOREIGN KEY (post_id) REFERENCES posts (post_id) ON DELETE CASCADE
|
||||
);
|
||||
CREATE INDEX attachments_creation_time_idx ON attachments (creation_time);
|
||||
CREATE INDEX attachments_post_id_idx ON attachments (post_id);
|
||||
CREATE INDEX attachments_sha256_hash_idx ON attachments (sha256_hash);
|
||||
--
|
||||
-- Index using the bktree extension for quickly getting the closest phashes
|
||||
CREATE INDEX attachments_phash_bktree_index ON attachments USING spgist (phash bktree_ops);
|
||||
|
||||
|
||||
GRANT SELECT ON attachments TO chan_archive_anon;
|
||||
GRANT ALL ON attachments TO chan_archiver;
|
||||
GRANT usage, select ON SEQUENCE attachments_attachment_id_seq TO chan_archiver;
|
||||
|
||||
COMMIT;
|
|
@ -0,0 +1,87 @@
|
|||
BEGIN TRANSACTION;
|
||||
|
||||
DROP FUNCTION IF EXISTS fetch_catalog;
|
||||
|
||||
CREATE OR REPLACE FUNCTION fetch_catalog(max_time timestamptz, max_row_read int DEFAULT 10000)
|
||||
RETURNS TABLE (
|
||||
-- post_count bigint,
|
||||
estimated_post_count bigint,
|
||||
post_id bigint,
|
||||
board_post_id bigint,
|
||||
creation_time timestamptz,
|
||||
bump_time timestamptz,
|
||||
body text,
|
||||
subject text,
|
||||
thread_id bigint,
|
||||
board_thread_id bigint,
|
||||
pathpart text,
|
||||
site_name text,
|
||||
file_mimetype text,
|
||||
file_illegal boolean,
|
||||
-- file_resolution dimension,
|
||||
file_name text,
|
||||
file_extension text,
|
||||
file_thumb_extension text
|
||||
) AS $$
|
||||
WITH
|
||||
top AS
|
||||
(
|
||||
SELECT * FROM fetch_top_threads(max_time, max_row_read) AS top
|
||||
),
|
||||
tall_posts AS
|
||||
(
|
||||
SELECT
|
||||
top.post_count as estimated_post_count,
|
||||
posts.post_id,
|
||||
posts.board_post_id,
|
||||
posts.creation_time,
|
||||
top.bump_time,
|
||||
posts.body,
|
||||
posts.subject,
|
||||
posts.thread_id
|
||||
FROM top
|
||||
JOIN posts ON top.thread_id = posts.thread_id
|
||||
WHERE creation_time < max_time
|
||||
),
|
||||
op_posts AS
|
||||
(
|
||||
SELECT DISTINCT ON (t.thread_id)
|
||||
*
|
||||
FROM tall_posts t
|
||||
ORDER BY t.thread_id, t.board_post_id
|
||||
)-- ,
|
||||
-- post_counts AS
|
||||
-- (
|
||||
-- SELECT thread_id, count(*) as post_count FROM
|
||||
-- tall_posts
|
||||
-- GROUP BY thread_id
|
||||
-- )
|
||||
SELECT
|
||||
-- post_counts.post_count,
|
||||
op_posts.*,
|
||||
threads.board_thread_id, -- this should be part of the url path when creating links, not thread_id (that's internal)
|
||||
boards.pathpart,
|
||||
sites."name",
|
||||
-- sites.site_id,
|
||||
attachments.mimetype as file_mimetype,
|
||||
attachments.illegal as file_illegal,
|
||||
-- attachments.resolution as file_resolution,
|
||||
attachments.board_filename as file_name,
|
||||
attachments.file_extension,
|
||||
attachments.thumb_extension as file_thumb_extension
|
||||
FROM op_posts
|
||||
-- JOIN post_counts ON op_posts.thread_id = post_counts.thread_id
|
||||
JOIN threads ON op_posts.thread_id = threads.thread_id
|
||||
JOIN boards ON threads.board_id = boards.board_id
|
||||
JOIN sites ON sites.site_id = boards.site_id
|
||||
LEFT OUTER JOIN attachments ON attachments.post_id = op_posts.post_id
|
||||
ORDER BY bump_time DESC;
|
||||
$$ LANGUAGE sql;
|
||||
|
||||
|
||||
REVOKE EXECUTE ON FUNCTION fetch_catalog FROM PUBLIC;
|
||||
GRANT EXECUTE ON FUNCTION fetch_catalog TO chan_archive_anon;
|
||||
GRANT EXECUTE ON FUNCTION fetch_catalog TO chan_archiver;
|
||||
|
||||
-- ROLLBACK;
|
||||
COMMIT;
|
Loading…
Reference in New Issue