diff --git a/fetch_catalog_pgrest_test.sh b/fetch_catalog_pgrest_test.sh new file mode 100644 index 0000000..908802c --- /dev/null +++ b/fetch_catalog_pgrest_test.sh @@ -0,0 +1,6 @@ +#--header "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJyb2xlIjoiY2hhbl9hcmNoaXZlciJ9.rGIKZokTDKTuQLIv8138bUby5PELfDipYYIDpJzH02c" \ +time curl \ + -v \ + -H "Content-Type: application/json" \ + -d '{ "max_time": "2023-10-26", "max_row_read": 1001 }' \ + -X POST http://localhost:3000/rpc/fetch_catalog diff --git a/sql/archive_tests.sql b/sql/archive_tests.sql index 83a451b..333ab69 100644 --- a/sql/archive_tests.sql +++ b/sql/archive_tests.sql @@ -588,6 +588,20 @@ grouped AS ( $$; +SELECT * FROM posts WHERE body_search_index @@ websearch_to_tsquery('english', 'TRUE CHRISTIAN'); +SELECT to_tsvector('english', body) FROM posts WHERE board_post_id = 476524; +SELECT (setweight(to_tsvector('english', COALESCE(subject, '')), 'A') || + setweight(to_tsvector('english', COALESCE(name, '')), 'B') || + setweight(to_tsvector('english', COALESCE(body, '')), 'C')) FROM posts WHERE board_post_id = 476524; +SELECT * FROM posts WHERE board_post_id = 476524; +UPDATE posts SET subject = NULL WHERE board_post_id = 476524; +UPDATE posts +SET body_search_index = ( + setweight(to_tsvector('english', COALESCE(subject, '')), 'A') || + setweight(to_tsvector('english', COALESCE(name, '')), 'B') || + setweight(to_tsvector('english', COALESCE(body, '')), 'C') +) +WHERE board_post_id = 476524; diff --git a/sql/archive_tests2.sql b/sql/archive_tests2.sql index ad66fa5..be828ea 100644 --- a/sql/archive_tests2.sql +++ b/sql/archive_tests2.sql @@ -192,6 +192,7 @@ $$ LANGUAGE sql; SELECT * FROM fetch_catalog(NOW() - INTERVAL '1y', 1001); +SELECT * FROM fetch_catalog(NOW(), 2000); -- CREATE INDEX idx_posts_thread_board ON posts (thread_id, board_post_id); ANALYZE posts; diff --git a/sql/initialize.sql b/sql/initialize.sql index d68ddbf..1f50ce0 100644 --- a/sql/initialize.sql +++ b/sql/initialize.sql @@ -66,6 +66,9 @@ CREATE TABLE IF NOT EXISTS posts , board_post_id bigint NOT NULL , creation_time timestamp with time zone NOT NULL , body text + , subject text + , name text + , email text , body_search_index tsvector , thread_id bigint NOT NULL , CONSTRAINT unique_thread_board_id_constraint UNIQUE (thread_id, board_post_id) @@ -80,7 +83,12 @@ CREATE INDEX posts_thread_id_creation_time_idx ON posts (creation_time, thread_i CREATE OR REPLACE FUNCTION update_post_body_search_index() RETURNS trigger AS $$ BEGIN - NEW.body_search_index := to_tsvector('english', NEW.body); + NEW.body_search_index := + ( + setweight(to_tsvector('english', COALESCE(NEW.subject, '')), 'A') || + setweight(to_tsvector('english', COALESCE(NEW.name, '')), 'B') || + setweight(to_tsvector('english', COALESCE(NEW.body, '')), 'C') + ); RETURN NEW; END; $$ LANGUAGE plpgsql; @@ -157,8 +165,8 @@ to_insert AS ( WHERE s.post_id IS NULL ), inserted AS ( - INSERT INTO posts (board_post_id, creation_time, body, thread_id) - SELECT board_post_id, creation_time, body, thread_id + INSERT INTO posts (board_post_id, creation_time, body, subject, name, email, thread_id) + SELECT board_post_id, creation_time, body, subject, name, email, thread_id FROM to_insert RETURNING post_id, board_post_id, thread_id ) @@ -204,6 +212,7 @@ RETURNS TABLE ( board_post_id bigint, creation_time timestamptz, body text, + subject text, thread_id bigint, board_thread_id bigint, pathpart text, @@ -222,6 +231,7 @@ RETURNS TABLE ( posts.board_post_id, posts.creation_time, posts.body, + posts.subject, posts.thread_id FROM top JOIN posts ON top.thread_id = posts.thread_id diff --git a/src/Backfill.hs b/src/Backfill.hs index aaddce0..7976ac0 100644 --- a/src/Backfill.hs +++ b/src/Backfill.hs @@ -199,6 +199,8 @@ apiPostToArchivePost thread post = , Posts.board_post_id = JSONPosts.no post , Posts.creation_time = posixSecondsToUTCTime (realToFrac $ JSONPosts.time post) , Posts.body = JSONPosts.com post + , Posts.name = JSONPosts.name post + , Posts.subject = JSONPosts.sub post , Posts.thread_id = Threads.thread_id thread } diff --git a/src/JSONParsing.hs b/src/JSONParsing.hs index cf3bf19..cbc8bd4 100644 --- a/src/JSONParsing.hs +++ b/src/JSONParsing.hs @@ -52,11 +52,6 @@ instance FromJSON Catalog parseJSONCatalog :: FilePath -> IO (Either String [Catalog]) parseJSONCatalog path = B.readFile path >>= return . eitherDecode -{- -parsePosts :: FilePath -> IO (Either String Post.PostWrapper) -parsePosts path = B.readFile path >>= return . eitherDecode --} - parsePosts :: FilePath -> IO (Either String Post.PostWrapper) parsePosts path = do diff --git a/src/JSONPost.hs b/src/JSONPost.hs index 5179398..2451664 100644 --- a/src/JSONPost.hs +++ b/src/JSONPost.hs @@ -13,6 +13,7 @@ data Post = Post { no :: Int64 , com :: Maybe Text , name :: Maybe Text + , sub :: Maybe Text , time :: Int , omitted_posts :: Maybe Int , omitted_images :: Maybe Int diff --git a/src/PostsType.hs b/src/PostsType.hs index bb6ff66..a018369 100644 --- a/src/PostsType.hs +++ b/src/PostsType.hs @@ -15,5 +15,7 @@ data Post = Post , board_post_id :: Int64 , creation_time :: UTCTime , body :: Maybe Text + , name :: Maybe Text + , subject :: Maybe Text , thread_id :: Int } deriving (Show, Generic, FromJSON, ToJSON)