Fix ensuring correct local_idx for posts
- query each thread's max local_idx before processing
This commit is contained in:
parent
335be14f7a
commit
7c7c6df955
|
@ -67,7 +67,6 @@ executable chan-delorean
|
||||||
-- Modules included in this executable, other than Main.
|
-- Modules included in this executable, other than Main.
|
||||||
other-modules:
|
other-modules:
|
||||||
JSONParsing
|
JSONParsing
|
||||||
DataClient
|
|
||||||
JSONSettings
|
JSONSettings
|
||||||
SitesType
|
SitesType
|
||||||
BoardsType
|
BoardsType
|
||||||
|
@ -79,6 +78,8 @@ executable chan-delorean
|
||||||
Common.Network.HttpClient
|
Common.Network.HttpClient
|
||||||
Hash
|
Hash
|
||||||
Data.WordUtil
|
Data.WordUtil
|
||||||
|
Network.DataClient
|
||||||
|
Network.DataClientTypes
|
||||||
|
|
||||||
-- LANGUAGE extensions used by modules in this package.
|
-- LANGUAGE extensions used by modules in this package.
|
||||||
-- other-extensions:
|
-- other-extensions:
|
||||||
|
|
|
@ -381,7 +381,8 @@ GRANT EXECUTE ON FUNCTION get_posts TO chan_archive_anon;
|
||||||
-- GRANT usage, select ON SEQUENCE boards_board_id_seq TO chan_archive_anon;
|
-- GRANT usage, select ON SEQUENCE boards_board_id_seq TO chan_archive_anon;
|
||||||
GRANT chan_archive_anon TO admin;
|
GRANT chan_archive_anon TO admin;
|
||||||
|
|
||||||
CREATE ROLE chan_archiver noinherit login password 'test_password';
|
CREATE ROLE chan_archiver noinherit login password 'test_password'
|
||||||
|
SET pgrst.db_aggregates_enabled = 'true';
|
||||||
GRANT CONNECT ON DATABASE chan_archives TO chan_archiver;
|
GRANT CONNECT ON DATABASE chan_archives TO chan_archiver;
|
||||||
GRANT chan_archive_anon TO chan_archiver;
|
GRANT chan_archive_anon TO chan_archiver;
|
||||||
GRANT ALL ON sites TO chan_archiver;
|
GRANT ALL ON sites TO chan_archiver;
|
||||||
|
|
|
@ -36,7 +36,7 @@ import JSONParsing
|
||||||
import JSONSettings
|
import JSONSettings
|
||||||
import qualified JSONCommonTypes as JS
|
import qualified JSONCommonTypes as JS
|
||||||
import qualified JSONPost as JSONPosts
|
import qualified JSONPost as JSONPosts
|
||||||
import qualified DataClient as Client
|
import qualified Network.DataClient as Client
|
||||||
import qualified SitesType as Sites
|
import qualified SitesType as Sites
|
||||||
import qualified BoardsType as Boards
|
import qualified BoardsType as Boards
|
||||||
import qualified ThreadType as Threads
|
import qualified ThreadType as Threads
|
||||||
|
@ -45,6 +45,8 @@ import qualified Common.PostsType as Posts
|
||||||
import qualified Hash as Hash
|
import qualified Hash as Hash
|
||||||
import qualified Data.WordUtil as Words
|
import qualified Data.WordUtil as Words
|
||||||
|
|
||||||
|
import Debug.Trace (trace, traceShowId)
|
||||||
|
|
||||||
newtype SettingsCLI = SettingsCLI
|
newtype SettingsCLI = SettingsCLI
|
||||||
{ jsonFile :: FilePath
|
{ jsonFile :: FilePath
|
||||||
} deriving (Show, Data, Typeable)
|
} deriving (Show, Data, Typeable)
|
||||||
|
@ -202,7 +204,11 @@ readPosts settings board thread = do
|
||||||
|
|
||||||
|
|
||||||
apiPostToPostKey :: Threads.Thread -> JSONPosts.Post -> Client.PostId
|
apiPostToPostKey :: Threads.Thread -> JSONPosts.Post -> Client.PostId
|
||||||
apiPostToPostKey thread post = Client.PostId (Threads.thread_id thread) (JSONPosts.no post)
|
apiPostToPostKey thread post =
|
||||||
|
Client.PostId
|
||||||
|
{ Client.thread_id = (Threads.thread_id thread)
|
||||||
|
, Client.board_post_id = (JSONPosts.no post)
|
||||||
|
}
|
||||||
|
|
||||||
-- Convert Post to DbPost
|
-- Convert Post to DbPost
|
||||||
apiPostToArchivePost :: Int -> Threads.Thread -> JSONPosts.Post -> Posts.Post
|
apiPostToArchivePost :: Int -> Threads.Thread -> JSONPosts.Post -> Posts.Post
|
||||||
|
@ -531,18 +537,20 @@ createNewPosts
|
||||||
-> [ (Threads.Thread, JSONPosts.Post, Client.PostId) ]
|
-> [ (Threads.Thread, JSONPosts.Post, Client.PostId) ]
|
||||||
-> IO [ Posts.Post ]
|
-> IO [ Posts.Post ]
|
||||||
createNewPosts settings tuples = do
|
createNewPosts settings tuples = do
|
||||||
existing_post_results <- Client.getPosts settings (map (\(_, _, c) -> c) tuples)
|
existing_post_results <- Client.getPosts settings $ map (\(_, _, c) -> c) tuples
|
||||||
existing_posts <- either handleError return existing_post_results
|
existing_posts <- either handleError return existing_post_results
|
||||||
|
|
||||||
|
thread_max_local_idx_result <- Client.getThreadMaxLocalIdx settings thread_ids
|
||||||
|
thread_max_local_idxs <- either handleError return thread_max_local_idx_result
|
||||||
|
|
||||||
let existing_set :: Set (Int64, Int64) = Set.fromList (map (\x -> (Posts.thread_id x, Posts.board_post_id x)) existing_posts)
|
let existing_set :: Set (Int64, Int64) = Set.fromList (map (\x -> (Posts.thread_id x, Posts.board_post_id x)) existing_posts)
|
||||||
|
|
||||||
let to_insert_list :: [ (Threads.Thread, JSONPosts.Post, Client.PostId) ] =
|
let to_insert_list :: [ (Threads.Thread, JSONPosts.Post, Client.PostId) ] =
|
||||||
sortBy (comparing $ \(_, _, p) -> Client.board_post_id p) $
|
sortBy (comparing $ \(_, _, p) -> Client.board_post_id p) $
|
||||||
determineNew tuples existing_set
|
newPosts tuples existing_set
|
||||||
|
|
||||||
-- Map of thread_id to the largest local_idx value (which would be the number of the last post in the thread)
|
-- Map of thread_id to the largest local_idx value (which would be the number of the last post in the thread)
|
||||||
let local_idx :: Map.Map Int64 Int = foldr (Map.unionWith max) Map.empty $
|
let local_idx :: Map.Map Int64 Int = Map.fromList thread_max_local_idxs
|
||||||
map (\x -> Map.singleton (Posts.thread_id x) (Posts.local_idx x)) existing_posts
|
|
||||||
|
|
||||||
let insert_posts = fst $ foldl' foldFn ([], local_idx) to_insert_list
|
let insert_posts = fst $ foldl' foldFn ([], local_idx) to_insert_list
|
||||||
|
|
||||||
|
@ -555,6 +563,7 @@ createNewPosts settings tuples = do
|
||||||
-- - need to compare posts already in the map with another post and keep the max local_idx ✓
|
-- - need to compare posts already in the map with another post and keep the max local_idx ✓
|
||||||
-- to get the new local_idx, we must order the to_insert_list by board_post_id, and look up each entry ✓
|
-- to get the new local_idx, we must order the to_insert_list by board_post_id, and look up each entry ✓
|
||||||
|
|
||||||
|
print insert_posts
|
||||||
posts_result <- Client.postPosts settings insert_posts
|
posts_result <- Client.postPosts settings insert_posts
|
||||||
new_posts <- either handleError return posts_result
|
new_posts <- either handleError return posts_result
|
||||||
return $ existing_posts ++ new_posts
|
return $ existing_posts ++ new_posts
|
||||||
|
@ -562,14 +571,17 @@ createNewPosts settings tuples = do
|
||||||
where
|
where
|
||||||
handleError err = print err >> exitFailure
|
handleError err = print err >> exitFailure
|
||||||
|
|
||||||
determineNew :: [(Threads.Thread, JSONPosts.Post, Client.PostId)] -> Set (Int64, Int64) -> [(Threads.Thread, JSONPosts.Post, Client.PostId)]
|
thread_ids :: [ Int64 ]
|
||||||
determineNew ts existing_set = filter (\(_, _, c) -> Set.notMember (Client.thread_id c, Client.board_post_id c) existing_set) ts
|
thread_ids = Set.elems $ Set.fromList $ map (\(t, _, _) -> Threads.thread_id t) tuples
|
||||||
|
|
||||||
|
newPosts :: [(Threads.Thread, JSONPosts.Post, Client.PostId)] -> Set (Int64, Int64) -> [(Threads.Thread, JSONPosts.Post, Client.PostId)]
|
||||||
|
newPosts ts existing_set = filter (\(_, _, c) -> Set.notMember (Client.thread_id c, Client.board_post_id c) existing_set) ts
|
||||||
|
|
||||||
foldFn :: ([Posts.Post], Map.Map Int64 Int) -> (Threads.Thread, JSONPosts.Post, Client.PostId) -> ([Posts.Post], Map.Map Int64 Int)
|
foldFn :: ([Posts.Post], Map.Map Int64 Int) -> (Threads.Thread, JSONPosts.Post, Client.PostId) -> ([Posts.Post], Map.Map Int64 Int)
|
||||||
foldFn (posts, idx_map) (t, p, c) =
|
foldFn (posts, idx_map) (t, p, c) = trace "foldFn" $
|
||||||
case Map.lookup thread_id idx_map of
|
case Map.lookup thread_id (trace ("map size: " ++ (show $ length idx_map)) idx_map) of
|
||||||
Nothing -> ((post 1) : posts, Map.insert thread_id 1 idx_map)
|
Nothing -> (traceShowId (post 1) : posts, trace ("inserting_1 " ++ show thread_id ++ ": 1")$ Map.insert thread_id 1 idx_map)
|
||||||
Just i -> ((post (i + 1)) : posts, Map.insert thread_id (i + 1) idx_map)
|
Just i -> (traceShowId (post (i + 1)) : posts, trace ("inserting_2 " ++ show thread_id ++ ": " ++ show (i + 1)) $ Map.insert thread_id (i + 1) idx_map)
|
||||||
|
|
||||||
where
|
where
|
||||||
post :: Int -> Posts.Post
|
post :: Int -> Posts.Post
|
||||||
|
@ -666,4 +678,6 @@ main = do
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
putStrLn "Error: Invalid JSON format."
|
putStrLn "Error: Invalid JSON format."
|
||||||
exitFailure
|
exitFailure
|
||||||
Just settings -> processBackupDirectory settings
|
Just settings -> do
|
||||||
|
processBackupDirectory settings
|
||||||
|
putStrLn "Done!"
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit aed106f40e24125d9057d3a9dda934a41c6d68a0
|
Subproject commit ffe952e22939b21f8576d79752c863a95c208e1e
|
|
@ -1,7 +1,7 @@
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
{-# LANGUAGE DeriveAnyClass #-}
|
{-# LANGUAGE DeriveAnyClass #-}
|
||||||
|
|
||||||
module DataClient
|
module Network.DataClient
|
||||||
( HttpError(..)
|
( HttpError(..)
|
||||||
, PostId (..)
|
, PostId (..)
|
||||||
, get
|
, get
|
||||||
|
@ -11,6 +11,7 @@ module DataClient
|
||||||
, post
|
, post
|
||||||
, postBoards
|
, postBoards
|
||||||
, getThreads
|
, getThreads
|
||||||
|
, getThreadMaxLocalIdx
|
||||||
, postThreads
|
, postThreads
|
||||||
, getPosts
|
, getPosts
|
||||||
, postPosts
|
, postPosts
|
||||||
|
@ -43,14 +44,14 @@ import qualified ThreadType as Threads
|
||||||
import qualified Common.AttachmentType as Attachments
|
import qualified Common.AttachmentType as Attachments
|
||||||
import qualified Common.PostsType as Posts
|
import qualified Common.PostsType as Posts
|
||||||
import Common.Network.HttpClient
|
import Common.Network.HttpClient
|
||||||
|
import qualified Network.DataClientTypes as T
|
||||||
|
|
||||||
|
|
||||||
data PostId = PostId
|
data PostId = PostId
|
||||||
{ board_post_id :: Int64
|
{ thread_id :: Int64
|
||||||
, thread_id :: Int64
|
, board_post_id :: Int64
|
||||||
} deriving (Show, Generic, ToJSON)
|
} deriving (Show, Generic, ToJSON)
|
||||||
|
|
||||||
|
|
||||||
getSiteBoards :: T.JSONSettings -> Int -> IO (Either HttpError [ Boards.Board ])
|
getSiteBoards :: T.JSONSettings -> Int -> IO (Either HttpError [ Boards.Board ])
|
||||||
getSiteBoards settings site_id_ = get settings path >>= return . eitherDecodeResponse
|
getSiteBoards settings site_id_ = get settings path >>= return . eitherDecodeResponse
|
||||||
where
|
where
|
||||||
|
@ -116,6 +117,19 @@ getThreads settings board_id board_thread_ids =
|
||||||
ids :: String = intercalate "," $ map show board_thread_ids
|
ids :: String = intercalate "," $ map show board_thread_ids
|
||||||
|
|
||||||
|
|
||||||
|
getThreadMaxLocalIdx :: T.JSONSettings -> [ Int64 ] -> IO (Either HttpError [(Int64, Int)])
|
||||||
|
getThreadMaxLocalIdx settings thread_ids = do
|
||||||
|
result :: Either HttpError [ T.ThreadMaxIdx ] <- get settings path >>= return . eitherDecodeResponse
|
||||||
|
|
||||||
|
let results = result >>= \x -> return $ map (\t -> (T.thread_id t, T.max_idx t)) x
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
where
|
||||||
|
path = "/posts?select=thread_id,max_idx:local_idx.max()&thread_id=in.(" ++ ids ++ ")"
|
||||||
|
ids :: String = intercalate "," $ map show thread_ids
|
||||||
|
|
||||||
|
|
||||||
-- | Splits a list into chunks of a given size.
|
-- | Splits a list into chunks of a given size.
|
||||||
chunkList :: Int -> [a] -> [[a]]
|
chunkList :: Int -> [a] -> [[a]]
|
||||||
chunkList _ [] = []
|
chunkList _ [] = []
|
||||||
|
@ -172,6 +186,7 @@ getPosts :: T.JSONSettings -> [ PostId ] -> IO (Either HttpError [Posts.Post])
|
||||||
getPosts settings xs = do
|
getPosts settings xs = do
|
||||||
results <- forM (chunkList chunkSize xs) (getPostsChunk settings)
|
results <- forM (chunkList chunkSize xs) (getPostsChunk settings)
|
||||||
return $ combineResults results
|
return $ combineResults results
|
||||||
|
|
||||||
where
|
where
|
||||||
chunkSize = 1000
|
chunkSize = 1000
|
||||||
|
|
||||||
|
@ -180,7 +195,7 @@ postPosts
|
||||||
:: T.JSONSettings
|
:: T.JSONSettings
|
||||||
-> [ Posts.Post ]
|
-> [ Posts.Post ]
|
||||||
-> IO (Either HttpError [ Posts.Post ])
|
-> IO (Either HttpError [ Posts.Post ])
|
||||||
postPosts settings posts = do
|
postPosts settings posts =
|
||||||
post settings "/posts" payload True >>= return . eitherDecodeResponse
|
post settings "/posts" payload True >>= return . eitherDecodeResponse
|
||||||
|
|
||||||
where
|
where
|
|
@ -0,0 +1,13 @@
|
||||||
|
{-# LANGUAGE DeriveAnyClass #-}
|
||||||
|
|
||||||
|
module Network.DataClientTypes where
|
||||||
|
|
||||||
|
import Data.Int (Int64)
|
||||||
|
import Data.Aeson (FromJSON)
|
||||||
|
import GHC.Generics
|
||||||
|
|
||||||
|
data ThreadMaxIdx = ThreadMaxIdx
|
||||||
|
{ thread_id :: Int64
|
||||||
|
, max_idx :: Int
|
||||||
|
} deriving (Show, Generic, FromJSON)
|
||||||
|
|
Loading…
Reference in New Issue