diff --git a/fetch_catalog_pgrest_test.sh b/fetch_catalog_pgrest_test.sh index 908802c..bf165f1 100644 --- a/fetch_catalog_pgrest_test.sh +++ b/fetch_catalog_pgrest_test.sh @@ -2,5 +2,5 @@ time curl \ -v \ -H "Content-Type: application/json" \ - -d '{ "max_time": "2023-10-26", "max_row_read": 1001 }' \ + -d '{ "max_time": "2023-11-02", "max_row_read": 1001 }' \ -X POST http://localhost:3000/rpc/fetch_catalog diff --git a/search_test.sh b/search_test.sh new file mode 100644 index 0000000..ee8a36a --- /dev/null +++ b/search_test.sh @@ -0,0 +1,6 @@ +#--header "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJyb2xlIjoiY2hhbl9hcmNoaXZlciJ9.rGIKZokTDKTuQLIv8138bUby5PELfDipYYIDpJzH02c" \ +time curl \ + -v \ + -H "Content-Type: application/json" \ + -d '{ "search_text": "found this board lainchan" }' \ + -X POST http://localhost:3000/rpc/search_posts diff --git a/sql/archive_tests.sql b/sql/archive_tests.sql index 4d09d1c..7a9f471 100644 --- a/sql/archive_tests.sql +++ b/sql/archive_tests.sql @@ -589,12 +589,32 @@ $$; SELECT * FROM posts WHERE body_search_index @@ websearch_to_tsquery('english', 'holocaust'); -SELECT *, - ts_rank(body_search_index, websearch_to_tsquery('english', 'holocaust')) AS relevance -FROM posts -WHERE body_search_index @@ websearch_to_tsquery('english', 'holocaust') + + +WITH query AS ( + SELECT websearch_to_tsquery('english', 'Anonymous') AS query +) +SELECT p.*, ts_rank(p.body_search_index, query.query) AS relevance +FROM posts p, query +WHERE p.body_search_index @@ query.query ORDER BY relevance DESC; + +CREATE OR REPLACE FUNCTION search_posts(search_text TEXT) +RETURNS SETOF posts +AS $$ +SELECT p.* +FROM posts p +WHERE p.body_search_index @@ websearch_to_tsquery('english', search_text) +ORDER BY ts_rank(p.body_search_index, websearch_to_tsquery('english', search_text)) DESC; +$$ LANGUAGE sql STABLE; + +SELECT * FROM search_posts('found this board lainchan'); + +SELECT * FROM information_schema.role_routine_grants; + + + SELECT to_tsvector('english', body) FROM posts WHERE board_post_id = 476524; SELECT (setweight(to_tsvector('english', COALESCE(subject, '')), 'A') || setweight(to_tsvector('english', COALESCE(name, '')), 'B') || diff --git a/sql/initialize.sql b/sql/initialize.sql index aa1be8b..184659d 100644 --- a/sql/initialize.sql +++ b/sql/initialize.sql @@ -267,17 +267,66 @@ RETURNS TABLE ( $$ LANGUAGE sql; + +-- Function: search_posts +-- +-- This function performs a full-text search on the `posts` table using PostgreSQL's text search features. +-- It takes a single argument `search_text`, which represents the text to search for within the `posts` body content. +-- The function is designed to return rows from the `posts` table where the `body_search_index` tsvector column matches +-- the websearch to tsquery conversion of the input `search_text`. Results are ranked by their relevance to the search +-- text, with the most relevant posts appearing first. +-- +-- The function uses the `websearch_to_tsquery` function for parsing the provided search text into a tsquery object +-- using the 'english' configuration. It then ranks the results using the `ts_rank` function based on the match +-- between the `body_search_index` column and the tsquery object, ordering the posts from most to least relevant. +-- +-- Note that the `relevance` score is used only for ordering the results; it is not included in the function's return set. +-- +-- Parameters: +-- - search_text: TEXT, the text to be searched in the `posts` body. +-- +-- Returns: +-- - A SETOF rows from the `posts` table ordered by the relevance of the full-text search. +-- +-- Usage: +-- SELECT * FROM search_posts('Desired search text'); +-- +-- The function is marked as STABLE, indicating that it does not modify the database and always returns the same +-- results for the same input when the underlying data does not change. +-- +-- Example: +-- -- To search for posts related to 'quantum computing': +-- SELECT * FROM search_posts('quantum computing'); +CREATE OR REPLACE FUNCTION search_posts(search_text TEXT) +RETURNS SETOF posts +AS $$ +SELECT p.* +FROM posts p +WHERE p.body_search_index @@ websearch_to_tsquery('english', search_text) +ORDER BY ts_rank(p.body_search_index, websearch_to_tsquery('english', search_text)) DESC; +$$ LANGUAGE sql STABLE; + + /* * Permissions */ +REVOKE EXECUTE ON FUNCTION insert_posts_and_return_ids FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION fetch_top_threads FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION fetch_catalog FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION search_posts FROM PUBLIC; +REVOKE EXECUTE ON FUNCTION update_post_body_search_index FROM PUBLIC; CREATE ROLE chan_archive_anon nologin; -GRANT CONNECT ON DATABASE chan_archives TO chan_archive_anon; -GRANT SELECT ON sites TO chan_archive_anon; -GRANT SELECT ON boards TO chan_archive_anon; -GRANT SELECT ON threads TO chan_archive_anon; -GRANT SELECT ON posts TO chan_archive_anon; -GRANT SELECT ON attachments TO chan_archive_anon; +GRANT CONNECT ON DATABASE chan_archives TO chan_archive_anon; +GRANT SELECT ON sites TO chan_archive_anon; +GRANT SELECT ON boards TO chan_archive_anon; +GRANT SELECT ON threads TO chan_archive_anon; +GRANT SELECT ON posts TO chan_archive_anon; +GRANT SELECT ON attachments TO chan_archive_anon; +GRANT EXECUTE ON FUNCTION fetch_catalog TO chan_archive_anon; +GRANT EXECUTE ON FUNCTION fetch_top_threads TO chan_archive_anon; +GRANT EXECUTE ON FUNCTION search_posts TO chan_archive_anon; + -- GRANT usage, select ON SEQUENCE sites_site_id_seq TO chan_archive_anon; -- GRANT usage, select ON SEQUENCE boards_board_id_seq TO chan_archive_anon; GRANT chan_archive_anon TO admin; @@ -294,6 +343,7 @@ GRANT EXECUTE ON FUNCTION update_post_body_search_index TO chan_archiver; GRANT EXECUTE ON FUNCTION insert_posts_and_return_ids TO chan_archiver; GRANT EXECUTE ON FUNCTION fetch_top_threads TO chan_archiver; GRANT EXECUTE ON FUNCTION fetch_catalog TO chan_archiver; +GRANT EXECUTE ON FUNCTION search_posts TO chan_archiver; GRANT usage, select ON SEQUENCE sites_site_id_seq TO chan_archiver; GRANT usage, select ON SEQUENCE boards_board_id_seq TO chan_archiver; GRANT usage, select ON SEQUENCE threads_thread_id_seq TO chan_archiver;