Compare commits
3 Commits
34753c176a
...
1113539321
Author | SHA1 | Date |
---|---|---|
|
1113539321 | |
|
518467c7eb | |
|
7bf61c0dd2 |
|
@ -80,6 +80,7 @@ executable chan-delorean
|
|||
Data.WordUtil
|
||||
Network.DataClient
|
||||
Network.DataClientTypes
|
||||
Network.GetLatestPostsPerBoardResponse
|
||||
Common.Server.JSONSettings
|
||||
Common.Server.ConsumerSettings
|
||||
|
||||
|
|
|
@ -1,13 +1,17 @@
|
|||
{
|
||||
"websites": {
|
||||
"name": "example",
|
||||
"root_url": "https://example.net",
|
||||
"boards": [
|
||||
"tech",
|
||||
"meta"
|
||||
]
|
||||
},
|
||||
"websites": [
|
||||
{
|
||||
"name": "example",
|
||||
"root_url": "https://example.net",
|
||||
"boards": [
|
||||
"tech",
|
||||
"meta"
|
||||
]
|
||||
}
|
||||
],
|
||||
"postgrest_url": "http://localhost:3000",
|
||||
"jwt": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJyb2xlIjoiY2hhbl9hcmNoaXZlciJ9.rGIKZokTDKTuQLIv8138bUby5PELfDipYYIDpJzH02c",
|
||||
"media_root_path": "/home/phil/linixy/tmp/chan_archive_media2/archive"
|
||||
"media_root_path": "/home/phil/linixy/tmp/chan_archive_media_repaired/archive",
|
||||
"http_fill_all": false,
|
||||
"http_sync_continously": true
|
||||
}
|
||||
|
|
|
@ -222,3 +222,99 @@ SELECT * FROM boards;
|
|||
|
||||
SELECT * FROM threads WHERE thread_id = 11314;
|
||||
ANALYZE posts;
|
||||
|
||||
SELECT count(*) from attachments;
|
||||
|
||||
SELECT * FROM attachments WHERE post_id = 253383;
|
||||
SELECT * from attachments WHERE board_filename = '1722466065515';
|
||||
SELECT count(*) attachments WHERE attachment_id < (SELECT attachment_id FROM attachments WHERE board_filename = '1722466065515');
|
||||
SELECT max(attachment_id) FROM attachments a;
|
||||
SELECT pg_get_serial_sequence('attachments', 'attachment_id');
|
||||
SELECT setval(pg_get_serial_sequence('attachments', 'attachment_id'), COALESCE(198853, 1), true);
|
||||
|
||||
|
||||
UPDATE attachments SET thumb_extension = 'png'
|
||||
WHERE
|
||||
attachment_id IN
|
||||
(
|
||||
SELECT a.attachment_id
|
||||
FROM attachments a
|
||||
JOIN posts p ON a.post_id = p.post_id
|
||||
JOIN threads t ON p.thread_id = t.thread_id
|
||||
JOIN boards b ON t.board_id = b.board_id
|
||||
JOIN sites s ON b.site_id = s.site_id
|
||||
WHERE s.name = 'leftychan'
|
||||
AND a.thumb_extension = 'jpg'
|
||||
);
|
||||
|
||||
|
||||
SELECT * FROM posts WHERE board_post_id = 129;
|
||||
SELECT * FROM attachments WHERE post_id = 461287;
|
||||
|
||||
SELECT count(a.*)
|
||||
FROM attachments a
|
||||
JOIN posts p ON a.post_id = p.post_id
|
||||
JOIN threads t ON p.thread_id = t.thread_id
|
||||
JOIN boards b ON t.board_id = b.board_id
|
||||
JOIN sites s ON b.site_id = s.site_id
|
||||
WHERE s.name = 'leftychan'
|
||||
AND a.thumb_extension = 'jpg';
|
||||
|
||||
|
||||
SELECT * FROM posts
|
||||
JOIN threads ON threads.thread_id = posts.thread_id
|
||||
JOIN boards ON boards.board_id = threads.board_id
|
||||
WHERE boards.pathpart = 'leftypol'
|
||||
AND boards.site_id = 1
|
||||
ORDER BY posts.creation_time DESC
|
||||
LIMIT 1;
|
||||
|
||||
SELECT * FROM posts
|
||||
ORDER BY posts.creation_time DESC
|
||||
LIMIT 1;
|
||||
|
||||
SELECT boards.board_id, boards.pathpart, sites.name FROM boards JOIN sites ON sites.site_id = boards.site_id;
|
||||
|
||||
SELECT DISTINCT ON (b.board_id)
|
||||
b.board_id,
|
||||
b.site_id,
|
||||
b.pathpart,
|
||||
p.post_id,
|
||||
p.board_post_id,
|
||||
p.creation_time,
|
||||
p.body,
|
||||
t.thread_id,
|
||||
t.board_thread_id
|
||||
FROM boards b
|
||||
JOIN threads t ON t.board_id = b.board_id
|
||||
JOIN posts p ON p.thread_id = t.thread_id
|
||||
ORDER BY b.board_id, p.creation_time DESC;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_latest_posts_per_board()
|
||||
RETURNS TABLE (
|
||||
board_id int,
|
||||
site_id int,
|
||||
pathpart text,
|
||||
post_id bigint,
|
||||
board_post_id bigint,
|
||||
creation_time timestamp with time zone,
|
||||
thread_id bigint,
|
||||
board_thread_id bigint
|
||||
) AS $$
|
||||
SELECT DISTINCT ON (b.board_id)
|
||||
b.board_id,
|
||||
b.site_id,
|
||||
b.pathpart,
|
||||
p.post_id,
|
||||
p.board_post_id,
|
||||
p.creation_time,
|
||||
t.thread_id,
|
||||
t.board_thread_id
|
||||
FROM boards b
|
||||
JOIN threads t ON t.board_id = b.board_id
|
||||
JOIN posts p ON p.thread_id = t.thread_id
|
||||
ORDER BY b.board_id, p.creation_time DESC;
|
||||
$$ LANGUAGE sql STABLE;
|
||||
|
||||
SELECT * FROM get_latest_posts_per_board();
|
||||
|
|
|
@ -21,6 +21,7 @@ DROP TYPE IF EXISTS post_key CASCADE;
|
|||
DROP FUNCTION IF EXISTS update_post_body_search_index;
|
||||
DROP FUNCTION IF EXISTS fetch_top_threads;
|
||||
DROP FUNCTION IF EXISTS fetch_catalog;
|
||||
DROP FUNCTION IF EXISTS get_latest_posts_per_board;
|
||||
|
||||
|
||||
-- It won't let us drop roles otherwise and the IFs are to keep this script idempotent.
|
||||
|
@ -222,7 +223,7 @@ CREATE OR REPLACE FUNCTION fetch_top_threads(
|
|||
lookback INT DEFAULT 10000
|
||||
)
|
||||
RETURNS TABLE(bump_time TIMESTAMPTZ, post_count BIGINT, thread_id BIGINT, where_to_leave_off TIMESTAMPTZ)
|
||||
LANGUAGE sql
|
||||
LANGUAGE sql STABLE
|
||||
AS $$
|
||||
SELECT
|
||||
max(creation_time) as bump_time,
|
||||
|
@ -266,6 +267,51 @@ CREATE TYPE catalog_grid_result AS
|
|||
);
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION fetch_catalog(max_time timestamptz, max_row_read int DEFAULT 10000)
|
||||
RETURNS SETOF catalog_grid_result AS $$
|
||||
WITH
|
||||
top AS
|
||||
(
|
||||
SELECT * FROM fetch_top_threads(max_time, max_row_read) AS top
|
||||
),
|
||||
tall_posts AS
|
||||
(
|
||||
SELECT
|
||||
top.post_count AS estimated_post_count,
|
||||
posts.post_id,
|
||||
posts.board_post_id,
|
||||
posts.creation_time,
|
||||
top.bump_time,
|
||||
posts.body,
|
||||
posts.subject,
|
||||
posts.thread_id,
|
||||
posts.embed
|
||||
FROM top
|
||||
JOIN posts ON top.thread_id = posts.thread_id AND posts.local_idx = 1
|
||||
WHERE creation_time < max_time
|
||||
)
|
||||
SELECT
|
||||
-- post_counts.post_count,
|
||||
tall_posts.*,
|
||||
threads.board_thread_id, -- this should be part of the url path when creating links, not thread_id (that's internal)
|
||||
boards.pathpart,
|
||||
sites."name",
|
||||
-- sites.site_id,
|
||||
attachments.mimetype AS file_mimetype,
|
||||
attachments.illegal AS file_illegal,
|
||||
-- attachments.resolution AS file_resolution,
|
||||
attachments.board_filename AS file_name,
|
||||
attachments.file_extension,
|
||||
attachments.thumb_extension AS file_thumb_extension
|
||||
FROM tall_posts
|
||||
JOIN threads ON tall_posts.thread_id = threads.thread_id
|
||||
JOIN boards ON threads.board_id = boards.board_id
|
||||
JOIN sites ON sites.site_id = boards.site_id
|
||||
LEFT OUTER JOIN attachments ON attachments.post_id = tall_posts.post_id AND attachments.attachment_idx = 1
|
||||
ORDER BY bump_time DESC;
|
||||
$$ LANGUAGE sql STABLE;
|
||||
|
||||
|
||||
-- Function: search_posts
|
||||
--
|
||||
-- This function performs a full-text search on the `posts` table using PostgreSQL's text search features.
|
||||
|
@ -355,6 +401,33 @@ RETURNS SETOF catalog_grid_result AS $$
|
|||
$$ LANGUAGE sql STABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_latest_posts_per_board()
|
||||
RETURNS TABLE (
|
||||
board_id int,
|
||||
site_id int,
|
||||
pathpart text,
|
||||
post_id bigint,
|
||||
board_post_id bigint,
|
||||
creation_time timestamp with time zone,
|
||||
thread_id bigint,
|
||||
board_thread_id bigint
|
||||
) AS $$
|
||||
SELECT DISTINCT ON (b.board_id)
|
||||
b.board_id,
|
||||
b.site_id,
|
||||
b.pathpart,
|
||||
p.post_id,
|
||||
p.board_post_id,
|
||||
p.creation_time,
|
||||
t.thread_id,
|
||||
t.board_thread_id
|
||||
FROM boards b
|
||||
JOIN threads t ON t.board_id = b.board_id
|
||||
JOIN posts p ON p.thread_id = t.thread_id
|
||||
ORDER BY b.board_id, p.creation_time DESC;
|
||||
$$ LANGUAGE sql STABLE;
|
||||
|
||||
|
||||
/*
|
||||
* Permissions
|
||||
*/
|
||||
|
@ -364,18 +437,20 @@ REVOKE EXECUTE ON FUNCTION fetch_catalog FROM PUBLIC;
|
|||
REVOKE EXECUTE ON FUNCTION search_posts FROM PUBLIC;
|
||||
REVOKE EXECUTE ON FUNCTION update_post_body_search_index FROM PUBLIC;
|
||||
REVOKE EXECUTE ON FUNCTION get_posts FROM PUBLIC;
|
||||
REVOKE EXECUTE ON FUNCTION get_latest_posts_per_board FROM PUBLIC;
|
||||
|
||||
CREATE ROLE chan_archive_anon nologin;
|
||||
GRANT CONNECT ON DATABASE chan_archives TO chan_archive_anon;
|
||||
GRANT SELECT ON sites TO chan_archive_anon;
|
||||
GRANT SELECT ON boards TO chan_archive_anon;
|
||||
GRANT SELECT ON threads TO chan_archive_anon;
|
||||
GRANT SELECT ON posts TO chan_archive_anon;
|
||||
GRANT SELECT ON attachments TO chan_archive_anon;
|
||||
GRANT EXECUTE ON FUNCTION fetch_catalog TO chan_archive_anon;
|
||||
GRANT EXECUTE ON FUNCTION fetch_top_threads TO chan_archive_anon;
|
||||
GRANT EXECUTE ON FUNCTION search_posts TO chan_archive_anon;
|
||||
GRANT EXECUTE ON FUNCTION get_posts TO chan_archive_anon;
|
||||
GRANT CONNECT ON DATABASE chan_archives TO chan_archive_anon;
|
||||
GRANT SELECT ON sites TO chan_archive_anon;
|
||||
GRANT SELECT ON boards TO chan_archive_anon;
|
||||
GRANT SELECT ON threads TO chan_archive_anon;
|
||||
GRANT SELECT ON posts TO chan_archive_anon;
|
||||
GRANT SELECT ON attachments TO chan_archive_anon;
|
||||
GRANT EXECUTE ON FUNCTION fetch_catalog TO chan_archive_anon;
|
||||
GRANT EXECUTE ON FUNCTION fetch_top_threads TO chan_archive_anon;
|
||||
GRANT EXECUTE ON FUNCTION search_posts TO chan_archive_anon;
|
||||
GRANT EXECUTE ON FUNCTION get_posts TO chan_archive_anon;
|
||||
GRANT EXECUTE ON FUNCTION get_latest_posts_per_board TO chan_archive_anon;
|
||||
|
||||
-- GRANT usage, select ON SEQUENCE sites_site_id_seq TO chan_archive_anon;
|
||||
-- GRANT usage, select ON SEQUENCE boards_board_id_seq TO chan_archive_anon;
|
||||
|
@ -396,6 +471,7 @@ GRANT EXECUTE ON FUNCTION fetch_top_threads TO chan_archiver;
|
|||
GRANT EXECUTE ON FUNCTION fetch_catalog TO chan_archiver;
|
||||
GRANT EXECUTE ON FUNCTION search_posts TO chan_archiver;
|
||||
GRANT EXECUTE ON FUNCTION get_posts TO chan_archiver;
|
||||
GRANT EXECUTE ON FUNCTION get_latest_posts_per_board TO chan_archiver;
|
||||
GRANT usage, select ON SEQUENCE sites_site_id_seq TO chan_archiver;
|
||||
GRANT usage, select ON SEQUENCE boards_board_id_seq TO chan_archiver;
|
||||
GRANT usage, select ON SEQUENCE threads_thread_id_seq TO chan_archiver;
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE DeriveAnyClass #-}
|
||||
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
|
||||
{-# HLINT ignore "Use <&>" #-}
|
||||
|
||||
module Network.DataClient
|
||||
( HttpError(..)
|
||||
|
@ -19,6 +21,7 @@ module Network.DataClient
|
|||
, postAttachments
|
||||
, getJSON
|
||||
, getFile
|
||||
, getLatestPostsPerBoard
|
||||
) where
|
||||
|
||||
import Control.Monad (forM)
|
||||
|
@ -49,6 +52,7 @@ import qualified Common.AttachmentType as Attachments
|
|||
import qualified Common.PostsType as Posts
|
||||
import Common.Network.HttpClient
|
||||
import qualified Network.DataClientTypes as T
|
||||
import qualified Network.GetLatestPostsPerBoardResponse as GLPPBR
|
||||
|
||||
|
||||
data PostId = PostId
|
||||
|
@ -233,14 +237,20 @@ getFile url = do
|
|||
case result of
|
||||
Left (err :: HttpError) -> do
|
||||
putStrLn $ "getFile " ++ url ++ " Error!"
|
||||
putStrLn $ show err
|
||||
print err
|
||||
return Nothing
|
||||
Right lbs -> do
|
||||
putStrLn $ "getFile " ++ url ++ " SUCCESS!"
|
||||
tmp_root <- getCanonicalTemporaryDirectory
|
||||
(tmp_filepath, tmp_filehandle) <- openBinaryTempFile tmp_root "chan.attachment"
|
||||
putStrLn $ "Created " ++ tmp_filepath
|
||||
putStrLn $ "Writing attachment..."
|
||||
putStrLn "Writing attachment..."
|
||||
LBS.hPut tmp_filehandle lbs
|
||||
hClose tmp_filehandle
|
||||
return $ Just tmp_filepath
|
||||
|
||||
|
||||
-- | Function to handle each chunk.
|
||||
getLatestPostsPerBoard :: T.JSONSettings -> IO (Either HttpError [ GLPPBR.GetLatestPostsPerBoardResponse ])
|
||||
getLatestPostsPerBoard settings =
|
||||
post settings "/rpc/get_latest_posts_per_board" mempty False >>= return . eitherDecodeResponse
|
||||
|
|
|
@ -10,4 +10,3 @@ data ThreadMaxIdx = ThreadMaxIdx
|
|||
{ thread_id :: Int64
|
||||
, max_idx :: Int
|
||||
} deriving (Show, Generic, FromJSON)
|
||||
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
{-# LANGUAGE DeriveAnyClass #-}
|
||||
|
||||
module Network.GetLatestPostsPerBoardResponse
|
||||
where
|
||||
|
||||
import Data.Int (Int64)
|
||||
import Data.Time.Clock (UTCTime)
|
||||
import Data.Aeson (FromJSON)
|
||||
import GHC.Generics
|
||||
|
||||
data GetLatestPostsPerBoardResponse = GetLatestPostsPerBoardResponse
|
||||
{ board_id :: Int
|
||||
, site_id :: Int
|
||||
, pathpart :: String
|
||||
, post_id :: Maybe Int64
|
||||
, board_post_id :: Int64
|
||||
, creation_time :: UTCTime
|
||||
, thread_id :: Int64
|
||||
, board_thread_id :: Integer
|
||||
} deriving (Show, Generic, FromJSON)
|
|
@ -87,7 +87,7 @@ main = do
|
|||
|
||||
where
|
||||
pf :: (Show a, Show b) => (a, b) -> IO ()
|
||||
pf (a, b) = putStrLn $ (show a) ++ "," ++ (show b)
|
||||
pf (a, b) = putStrLn $ show a ++ "," ++ show b
|
||||
|
||||
f _ (xs, gen) =
|
||||
let (x, newgen) = selectSkewedIndex (size q) gen
|
||||
|
@ -97,5 +97,5 @@ main = do
|
|||
q = fromList [ Elem i undefined | i <- [1..100] ]
|
||||
|
||||
countOccurrences :: (Eq a, Ord a) => [a] -> [(a, Int)]
|
||||
countOccurrences rolls = map (\x -> (head x, length x)) . group . sort $ rolls
|
||||
countOccurrences = map (\x -> (head x, length x)) . group . sort
|
||||
|
||||
|
|
32
src/Sync.hs
32
src/Sync.hs
|
@ -1,21 +1,31 @@
|
|||
{-# LANGUAGE RecordWildCards #-}
|
||||
|
||||
module Sync where
|
||||
|
||||
import Common.Server.ConsumerSettings
|
||||
import Lib (getBoards, toClientSettings)
|
||||
import SitesType (Site)
|
||||
import BoardsType (Board)
|
||||
import Common.Server.ConsumerSettings as Settings
|
||||
import Common.Server.JSONSettings as JSONSettings
|
||||
import Network.DataClient (getLatestPostsPerBoard)
|
||||
|
||||
getSiteBoards :: ConsumerJSONSettings -> JSONSiteSettings -> IO (Site, [ Board ])
|
||||
getSiteBoards settings site_settings =
|
||||
let client_settings = toClientSettings settings site_settings
|
||||
in getBoards
|
||||
client_settings
|
||||
(boards site_settings)
|
||||
consumerSettingsToPartialJSONSettings :: Settings.ConsumerJSONSettings -> JSONSettings.JSONSettings
|
||||
consumerSettingsToPartialJSONSettings ConsumerJSONSettings {..} =
|
||||
JSONSettings
|
||||
{ JSONSettings.postgrest_url = postgrest_url
|
||||
, JSONSettings.jwt = jwt
|
||||
, backup_read_root = undefined
|
||||
, JSONSettings.media_root_path
|
||||
, site_name = undefined
|
||||
, site_url = undefined
|
||||
}
|
||||
|
||||
syncWebsites :: ConsumerJSONSettings -> IO ()
|
||||
syncWebsites _ = do
|
||||
syncWebsites consumer_settings = do
|
||||
putStrLn "Starting channel web synchronization."
|
||||
|
||||
let json_settings = consumerSettingsToPartialJSONSettings consumer_settings
|
||||
|
||||
asdf <- getLatestPostsPerBoard json_settings
|
||||
|
||||
print asdf
|
||||
-- first we need all the (Site, Board) tuples
|
||||
-- perhaps we even want all (Site, Board, Thread) pairs
|
||||
-- But then we don't load the posts of each thread, instead only do
|
||||
|
|
Loading…
Reference in New Issue