Copy files to destination
- build destination filepath from site, board, thread and old board filename
This commit is contained in:
parent
762a60f399
commit
acb6f5ddb4
141
src/Backfill.hs
141
src/Backfill.hs
|
@ -11,7 +11,12 @@ import Control.Monad (filterM)
|
||||||
import Data.Aeson (decode)
|
import Data.Aeson (decode)
|
||||||
import qualified Data.ByteString.Lazy as B
|
import qualified Data.ByteString.Lazy as B
|
||||||
import System.Console.CmdArgs
|
import System.Console.CmdArgs
|
||||||
import System.Directory (listDirectory, doesFileExist)
|
import System.Directory
|
||||||
|
( listDirectory
|
||||||
|
, doesFileExist
|
||||||
|
, copyFile
|
||||||
|
, createDirectoryIfMissing
|
||||||
|
)
|
||||||
import System.FilePath ((</>))
|
import System.FilePath ((</>))
|
||||||
import Data.List (find, isSuffixOf, foldl')
|
import Data.List (find, isSuffixOf, foldl')
|
||||||
import qualified Data.Set as Set
|
import qualified Data.Set as Set
|
||||||
|
@ -224,15 +229,18 @@ concatMapM op = foldr f (pure [])
|
||||||
pure $ x_ ++ xs_
|
pure $ x_ ++ xs_
|
||||||
|
|
||||||
|
|
||||||
setPostIdInPosts :: [(Sites.Site, Threads.Thread, JSONPosts.Post, Posts.Post)] -> [ Client.PostId ] -> [(Sites.Site, Threads.Thread, JSONPosts.Post, Posts.Post)]
|
setPostIdInPosts
|
||||||
|
:: [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)]
|
||||||
|
-> [ Client.PostId ]
|
||||||
|
-> [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)]
|
||||||
setPostIdInPosts tuples ids = map f ids
|
setPostIdInPosts tuples ids = map f ids
|
||||||
where
|
where
|
||||||
post_map :: Map.Map (Int64, Int64) (Sites.Site, Threads.Thread, JSONPosts.Post, Posts.Post)
|
post_map :: Map.Map (Int64, Int64) (Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)
|
||||||
post_map = Map.fromList (map (\(a, b, i, j) -> ((Posts.thread_id j, Posts.board_post_id j), (a, b, i, j))) tuples)
|
post_map = Map.fromList (map (\(a, b, c, i, j) -> ((Posts.thread_id j, Posts.board_post_id j), (a, b, c, i, j))) tuples)
|
||||||
|
|
||||||
f :: Client.PostId -> (Sites.Site, Threads.Thread, JSONPosts.Post, Posts.Post)
|
f :: Client.PostId -> (Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)
|
||||||
f (Client.PostId { Client.post_id = asdf1, Client.thread_id = asdf2, Client.board_post_id = asdf3 }) =
|
f (Client.PostId { Client.post_id = asdf1, Client.thread_id = asdf2, Client.board_post_id = asdf3 }) =
|
||||||
(\(a, b, i, j) -> (a, b, i, j { Posts.post_id = Just asdf1 })) (post_map Map.! (asdf2, asdf3))
|
(\(a, b, c, i, j) -> (a, b, c, i, j { Posts.post_id = Just asdf1 })) (post_map Map.! (asdf2, asdf3))
|
||||||
|
|
||||||
|
|
||||||
fileToAttachment :: Posts.Post -> JS.File -> At.Attachment
|
fileToAttachment :: Posts.Post -> JS.File -> At.Attachment
|
||||||
|
@ -274,15 +282,57 @@ phash_mimetypes = Set.fromList
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
copyFiles :: JSONSettings -> (At.Paths, At.Attachment) -> IO ()
|
copyFiles :: JSONSettings -> (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO ()
|
||||||
copyFiles settings (p, _) = undefined
|
copyFiles settings (site, board, thread, _, path, attachment) = do
|
||||||
|
destination_exists <- doesFileExist dest
|
||||||
|
|
||||||
|
if not destination_exists
|
||||||
|
then do
|
||||||
|
src_exists <- doesFileExist src
|
||||||
|
|
||||||
|
createDirectoryIfMissing True common_dest
|
||||||
|
|
||||||
|
if src_exists
|
||||||
|
then copyFile src dest
|
||||||
|
else return ()
|
||||||
|
|
||||||
|
thumb_exists <- doesFileExist thumb_src
|
||||||
|
|
||||||
|
if thumb_exists
|
||||||
|
then copyFile thumb_src thumb_dest
|
||||||
|
else return ()
|
||||||
|
|
||||||
|
else return ()
|
||||||
|
|
||||||
-- src = (At.file_path | At.thumb_path)
|
-- src = (At.file_path | At.thumb_path)
|
||||||
-- dest = <media_root>/<website_name>/<boardpart>/<board_thread_id>/<sha>.<ext>
|
-- dest = <media_root>/<website_name>/<boardpart>/<board_thread_id>/<sha>.<ext>
|
||||||
|
|
||||||
|
where
|
||||||
|
src :: FilePath
|
||||||
|
src = At.file_path path
|
||||||
|
|
||||||
processFiles :: JSONSettings -> [(Sites.Site, Threads.Thread, JSONPosts.Post, Posts.Post)] -> IO ()
|
thumb_src :: FilePath
|
||||||
|
thumb_src = At.file_path path
|
||||||
|
|
||||||
|
dest :: FilePath
|
||||||
|
dest = common_dest
|
||||||
|
<> "/" <> (unpack $ At.board_filename attachment)
|
||||||
|
|
||||||
|
thumb_dest :: FilePath
|
||||||
|
thumb_dest = common_dest
|
||||||
|
<> "/thumbnail_" <> (unpack $ At.board_filename attachment)
|
||||||
|
|
||||||
|
common_dest :: FilePath
|
||||||
|
common_dest
|
||||||
|
= (JSONSettings.media_root_path settings)
|
||||||
|
<> "/" <> Sites.name site
|
||||||
|
<> "/" <> Boards.pathpart board
|
||||||
|
<> "/" <> (show $ Threads.board_thread_id thread)
|
||||||
|
|
||||||
|
|
||||||
|
processFiles :: JSONSettings -> [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)] -> IO ()
|
||||||
processFiles settings tuples = do -- perfect just means that our posts have ids, they're already inserted into the db
|
processFiles settings tuples = do -- perfect just means that our posts have ids, they're already inserted into the db
|
||||||
let ps = map (\(_, _, _, x) -> x) tuples
|
let ps = map (\(_, _, _, _, x) -> x) tuples
|
||||||
|
|
||||||
existing_attachments_result <- Client.getAttachments settings (map (fromJust . Posts.post_id) ps)
|
existing_attachments_result <- Client.getAttachments settings (map (fromJust . Posts.post_id) ps)
|
||||||
|
|
||||||
|
@ -291,28 +341,28 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
|
||||||
putStrLn $ "Error fetching attachments: " ++ show err
|
putStrLn $ "Error fetching attachments: " ++ show err
|
||||||
exitFailure
|
exitFailure
|
||||||
Right existing_attachments -> do
|
Right existing_attachments -> do
|
||||||
let map_existing :: Map.Map Int64 [ At.Attachment ] =
|
let map_existing :: Map.Map (Int64, Text) [ At.Attachment ] =
|
||||||
foldl' (insertRecord At.post_id) Map.empty existing_attachments
|
foldl'
|
||||||
|
(insertRecord (\a -> (At.post_id a, At.board_filename a)))
|
||||||
|
Map.empty
|
||||||
|
existing_attachments
|
||||||
|
|
||||||
let attachments_on_board :: [(Sites.Site, Threads.Thread, Posts.Post, At.Paths, At.Attachment)] =
|
let attachments_on_board :: [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)] =
|
||||||
concatMap parseAttachments tuples
|
concatMap parseAttachments tuples
|
||||||
-- attachments_on_board are the only files that can be copied into the archive dir right now
|
-- attachments_on_board are the only files that can be copied into the archive dir right now
|
||||||
-- since that's where we have the src filename. except here the Attachment doesn't have a sha hash yet
|
-- since that's where we have the src filename. except here the Attachment doesn't have a sha hash yet
|
||||||
-- so we can't build the destination filename.
|
-- so we can't build the destination filename.
|
||||||
|
|
||||||
let map_should_exist :: Map.Map Int64 [(Sites.Site, Threads.Thread, Posts.Post, At.Paths, At.Attachment)] =
|
let map_should_exist :: Map.Map (Int64, Text) [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)] =
|
||||||
foldl' (insertRecord (At.post_id . (\(_, _, _, _, a) -> a))) Map.empty attachments_on_board
|
foldl'
|
||||||
|
(insertRecord (\(_, _, _, _, _, a) -> (At.post_id a, At.board_filename a)))
|
||||||
|
Map.empty
|
||||||
|
attachments_on_board
|
||||||
|
|
||||||
-- basically we have a map of the post_id to attachments for that post_id
|
let to_insert_map =
|
||||||
-- compare the number of attachments under each post_id in the map
|
Map.filterWithKey
|
||||||
-- assembled from the API with the map assembled from data in the db.
|
(\k _ -> not $ k `Map.member` map_existing)
|
||||||
-- if the number of files differ, then likely there are new attachments.
|
map_should_exist
|
||||||
-- This isn't fool-proof (there are probably weird cases where the
|
|
||||||
-- number of files for a post is the same but the files themselves
|
|
||||||
-- are different) but it's a quicker way of seeing which attachments
|
|
||||||
-- need to be saved than finding the sha256 hash of every single existing
|
|
||||||
-- file.
|
|
||||||
let to_insert_map = Map.filterWithKey (compareAttMaps map_existing) map_should_exist
|
|
||||||
|
|
||||||
let to_insert = foldr (++) [] $ Map.elems to_insert_map
|
let to_insert = foldr (++) [] $ Map.elems to_insert_map
|
||||||
|
|
||||||
|
@ -320,12 +370,7 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
|
||||||
|
|
||||||
with_hashes <- mapM computeAttachmentHash to_insert_exist
|
with_hashes <- mapM computeAttachmentHash to_insert_exist
|
||||||
|
|
||||||
let existing_hashes :: Set.Set (Int64, Text) =
|
attachments_result <- Client.postAttachments settings with_hashes
|
||||||
Set.fromList $ map (\x -> (At.post_id x, At.sha256_hash x)) existing_attachments
|
|
||||||
|
|
||||||
let to_insert_ = filter ((`Set.notMember` existing_hashes) . (\x -> (At.post_id x , At.sha256_hash x))) with_hashes
|
|
||||||
|
|
||||||
attachments_result <- Client.postAttachments settings to_insert_
|
|
||||||
|
|
||||||
case attachments_result of
|
case attachments_result of
|
||||||
Left err -> do
|
Left err -> do
|
||||||
|
@ -334,13 +379,14 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
|
||||||
|
|
||||||
Right saved -> do
|
Right saved -> do
|
||||||
putStrLn $ "Saved " ++ (show $ length saved) ++ " attachments!"
|
putStrLn $ "Saved " ++ (show $ length saved) ++ " attachments!"
|
||||||
|
mapM_ (copyFiles settings) attachments_on_board
|
||||||
|
|
||||||
where
|
where
|
||||||
attachmentFileExists :: (Sites.Site, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO Bool
|
attachmentFileExists :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO Bool
|
||||||
attachmentFileExists (_, _, _, p, _) = doesFileExist (At.file_path p)
|
attachmentFileExists (_, _, _, _, p, _) = doesFileExist (At.file_path p)
|
||||||
|
|
||||||
computeAttachmentHash :: (Sites.Site, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO At.Attachment
|
computeAttachmentHash :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO At.Attachment
|
||||||
computeAttachmentHash (_, _, _, p, q) = do
|
computeAttachmentHash (_, _, _, _, p, q) = do
|
||||||
let f = At.file_path p
|
let f = At.file_path p
|
||||||
|
|
||||||
putStrLn $ "Reading " ++ f
|
putStrLn $ "Reading " ++ f
|
||||||
|
@ -412,18 +458,21 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
|
||||||
return (p, attachment)
|
return (p, attachment)
|
||||||
|
|
||||||
|
|
||||||
notDeleted :: (a, b, c, At.Paths, At.Attachment) -> Bool
|
notDeleted :: (a, b, c, d, At.Paths, At.Attachment) -> Bool
|
||||||
notDeleted (_, _, _, p, _) = not $ "deleted" `isSuffixOf` (At.file_path p)
|
notDeleted (_, _, _, _, p, _) = not $ "deleted" `isSuffixOf` (At.file_path p)
|
||||||
|
|
||||||
|
|
||||||
withPathPrefix :: Text -> FilePath
|
withPathPrefix :: Text -> FilePath
|
||||||
withPathPrefix = ((<>) $ backup_read_root settings) . unpack
|
withPathPrefix = ((<>) $ backup_read_root settings) . unpack
|
||||||
|
|
||||||
parseAttachments :: (Sites.Site, Threads.Thread, JSONPosts.Post, Posts.Post) -> [(Sites.Site, Threads.Thread, Posts.Post, At.Paths, At.Attachment)]
|
parseAttachments
|
||||||
parseAttachments (site, thread, p, q) = filter notDeleted $
|
:: (Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)
|
||||||
|
-> [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)]
|
||||||
|
parseAttachments (site, board, thread, p, q) = filter notDeleted $
|
||||||
case JSONPosts.files p of
|
case JSONPosts.files p of
|
||||||
Just files -> map (\x ->
|
Just files -> map (\x ->
|
||||||
( site
|
( site
|
||||||
|
, board
|
||||||
, thread
|
, thread
|
||||||
, q
|
, q
|
||||||
, At.Paths (withPathPrefix $ JS.file_path x) (withPathPrefix $ JS.thumb_path x)
|
, At.Paths (withPathPrefix $ JS.file_path x) (withPathPrefix $ JS.thumb_path x)
|
||||||
|
@ -437,6 +486,7 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
|
||||||
dim = (JSONPosts.w p) >>= \w -> ((JSONPosts.h p) >>= \h -> Just $ At.Dimension w h)
|
dim = (JSONPosts.w p) >>= \w -> ((JSONPosts.h p) >>= \h -> Just $ At.Dimension w h)
|
||||||
in
|
in
|
||||||
( site
|
( site
|
||||||
|
, board
|
||||||
, thread
|
, thread
|
||||||
, q
|
, q
|
||||||
, paths
|
, paths
|
||||||
|
@ -458,13 +508,6 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
|
||||||
l = Map.findWithDefault [] pid accMap
|
l = Map.findWithDefault [] pid accMap
|
||||||
in Map.insert pid (x : l) accMap
|
in Map.insert pid (x : l) accMap
|
||||||
|
|
||||||
compareAttMaps
|
|
||||||
:: Map.Map Int64 [ At.Attachment ]
|
|
||||||
-> Int64
|
|
||||||
-> [((Sites.Site, Threads.Thread, Posts.Post, At.Paths, At.Attachment))]
|
|
||||||
-> Bool
|
|
||||||
compareAttMaps existing k v
|
|
||||||
= (maybe (-1) length (Map.lookup k existing)) /= length v
|
|
||||||
|
|
||||||
processBoard :: JSONSettings -> Sites.Site -> Boards.Board -> IO ()
|
processBoard :: JSONSettings -> Sites.Site -> Boards.Board -> IO ()
|
||||||
processBoard settings site board = do
|
processBoard settings site board = do
|
||||||
|
@ -481,11 +524,11 @@ processBoard settings site board = do
|
||||||
|
|
||||||
all_posts_on_board :: [(Threads.Thread, [ JSONPosts.Post ])] <- mapM (readPosts settings board) all_threads_for_board
|
all_posts_on_board :: [(Threads.Thread, [ JSONPosts.Post ])] <- mapM (readPosts settings board) all_threads_for_board
|
||||||
|
|
||||||
let tuples :: [(Sites.Site, Threads.Thread, JSONPosts.Post, Posts.Post)] = concatMap
|
let tuples :: [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)] = concatMap
|
||||||
(\(t, posts) -> map (\p -> (site, t, p, apiPostToArchivePost t p)) posts)
|
(\(t, posts) -> map (\p -> (site, board, t, p, apiPostToArchivePost t p)) posts)
|
||||||
all_posts_on_board
|
all_posts_on_board
|
||||||
|
|
||||||
posts_result <- Client.postPosts settings (map (\(_, _, _, d) -> d) tuples)
|
posts_result <- Client.postPosts settings (map (\(_, _, _, _, d) -> d) tuples)
|
||||||
|
|
||||||
case posts_result of
|
case posts_result of
|
||||||
Left err -> print err
|
Left err -> print err
|
||||||
|
|
Loading…
Reference in New Issue