From 103bf8601718176ce227de3b11a6d3c2a2a13293 Mon Sep 17 00:00:00 2001 From: towards-a-new-leftypol Date: Tue, 16 Jan 2024 17:13:55 -0500 Subject: [PATCH] WIP: actually adding attachments - attachments table was already defined - change the hash type to sha256 - define sql function to upsert attachment rows --- backfill_settings.json | 1 + sql/initialize.sql | 32 +++++++++++++++++++++++++++++++- src/AttachmentType.hs | 19 +++++++++++++++++++ src/JSONSettings.hs | 1 + 4 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 src/AttachmentType.hs diff --git a/backfill_settings.json b/backfill_settings.json index 31ff7cf..cfe5fc2 100644 --- a/backfill_settings.json +++ b/backfill_settings.json @@ -2,6 +2,7 @@ "postgrest_url": "http://localhost:3000", "jwt": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJyb2xlIjoiY2hhbl9hcmNoaXZlciJ9.rGIKZokTDKTuQLIv8138bUby5PELfDipYYIDpJzH02c", "backup_read_root": "/home/phil/linixy/tmp/leftypol_back/lainchan.leftypol.org", + "media_root_path": "/home/phil/linixy/tmp/chan_archive_media", "site_name": "leftychan", "site_url": "https://leftychan.net" } diff --git a/sql/initialize.sql b/sql/initialize.sql index c5d7dc4..71f0ba2 100644 --- a/sql/initialize.sql +++ b/sql/initialize.sql @@ -103,7 +103,7 @@ CREATE TABLE IF NOT EXISTS attachments ( attachment_id bigserial primary key , mimetype text NOT NULL , creation_time timestamp with time zone NOT NULL - , sha256_hash text NOT NULL + , sha256_hash text NOT NULL UNIQUE , phash bigint , illegal boolean NOT NULL DEFAULT false , post_id bigint NOT NULL @@ -181,6 +181,36 @@ $$ LANGUAGE sql; -- 1:21 for full db (nothing inserted) +CREATE OR REPLACE FUNCTION insert_attachments_and_return_ids( + attachments_payload attachments[]) +RETURNS TABLE (attachment_id bigint, post_id bigint, sha256_hash text) AS $$ +WITH +selected AS ( + SELECT attachment_id, post_id, sha256_hash + FROM attachments + WHERE sha256_hash IN ( + SELECT sha256_hash FROM unnest(attachments_payload) + ) +), +to_insert AS ( + SELECT new_a.* + FROM unnest(attachments_payload) AS new_a + LEFT OUTER JOIN selected s + ON new_a.sha256_hash = s.sha256_hash + WHERE s.attachment_id IS NULL +), +inserted AS ( + INSERT INTO attachments (mimetype, creation_time, sha256_hash, phash, illegal, post_id) + SELECT mimetype, creation_time, sha256_hash, phash, illegal, post_id + FROM to_insert + RETURNING attachment_id, post_id, sha256_hash +) +SELECT * FROM inserted +UNION ALL +SELECT * FROM selected; +$$ LANGUAGE sql; + + CREATE OR REPLACE FUNCTION fetch_top_threads( p_start_time TIMESTAMPTZ, lookback INT DEFAULT 10000 diff --git a/src/AttachmentType.hs b/src/AttachmentType.hs new file mode 100644 index 0000000..6ef5c74 --- /dev/null +++ b/src/AttachmentType.hs @@ -0,0 +1,19 @@ +{-# LANGUAGE DeriveAnyClass #-} +module AttachmentType +( Attachment (..) +) where + +import GHC.Generics +import Data.Int (Int64) +import Data.Aeson (FromJSON) +import Data.Text (Text) + +data Attachment = Attachment + { attachment_id :: Maybe Int64 + , mimetype :: Text + , creation_time :: UTCTime + , sha256_hash :: Int + , phash :: Int64 + , phash :: Bool + , post_id :: Int64 + } deriving (Show, Generic, FromJSON) diff --git a/src/JSONSettings.hs b/src/JSONSettings.hs index 6e794db..144b52d 100644 --- a/src/JSONSettings.hs +++ b/src/JSONSettings.hs @@ -9,6 +9,7 @@ data JSONSettings = JSONSettings { postgrest_url :: String , jwt :: String , backup_read_root :: FilePath + , media_root_path :: FilePath , site_name :: String , site_url :: String } deriving (Show, Generic)