Abstract out everywhere that references reading src files from local dir

- Now we have two binaries: one to read from local dir one from http
    - both work on the same json api files
This commit is contained in:
towards-a-new-leftypol 2024-04-09 19:40:29 -04:00
parent 67870cab36
commit fc321d8531
2 changed files with 72 additions and 45 deletions

View File

@ -24,7 +24,7 @@ import Data.Set (Set)
import Data.Time.Clock.POSIX (posixSecondsToUTCTime) import Data.Time.Clock.POSIX (posixSecondsToUTCTime)
import Data.Time.Clock (UTCTime) import Data.Time.Clock (UTCTime)
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Maybe (fromJust) import Data.Maybe (fromJust, catMaybes)
import Data.Text (Text, unpack) import Data.Text (Text, unpack)
import qualified Data.Text as T import qualified Data.Text as T
import Data.Text.Encoding (decodeUtf8) import Data.Text.Encoding (decodeUtf8)
@ -300,33 +300,9 @@ phash_mimetypes = Set.fromList
] ]
-- TODO: This will need to be a move or copy copyOrMoveFiles :: JSONSettings -> FileGetters -> Details -> IO ()
-- - move in the case of downloading from network copyOrMoveFiles settings fgs (site, board, thread, _, path, attachment) = do
-- - first download to temporary file (copyOrMove fgs) common_dest (src, dest) (thumb_src, thumb_dest)
-- - then in this function instead of copyFile move it.
-- - copy file if loading from filesystem (eg. from backup), since we want
-- to keep the original.
copyFiles :: JSONSettings -> (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO ()
copyFiles settings (site, board, thread, _, path, attachment) = do
destination_exists <- doesFileExist dest
if not destination_exists
then do
src_exists <- doesFileExist src
createDirectoryIfMissing True common_dest
if src_exists
then putStrLn ("Copying " ++ src) >> copyFile src dest
else return ()
thumb_exists <- doesFileExist thumb_src
if thumb_exists
then putStrLn ("Copying " ++ thumb_src) >> copyFile thumb_src thumb_dest
else return ()
else return ()
-- src = (At.file_path | At.thumb_path) -- src = (At.file_path | At.thumb_path)
-- dest = <media_root>/<website_name>/<boardpart>/<board_thread_id>/<sha>.<ext> -- dest = <media_root>/<website_name>/<boardpart>/<board_thread_id>/<sha>.<ext>
@ -356,8 +332,15 @@ copyFiles settings (site, board, thread, _, path, attachment) = do
</> (show $ Threads.board_thread_id thread) </> (show $ Threads.board_thread_id thread)
processFiles :: JSONSettings -> [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)] -> IO () type Details = (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)
processFiles settings tuples = do -- perfect just means that our posts have ids, they're already inserted into the db
processFiles
:: JSONSettings
-> FileGetters
-> [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)]
-> IO ()
processFiles settings fgs tuples = do -- perfect just means that our posts have ids, they're already inserted into the db
let ps = map (\(_, _, _, _, x) -> x) tuples let ps = map (\(_, _, _, _, x) -> x) tuples
existing_attachments_result <- Client.getAttachments settings (map (fromJust . Posts.post_id) ps) existing_attachments_result <- Client.getAttachments settings (map (fromJust . Posts.post_id) ps)
@ -373,13 +356,13 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
Map.empty Map.empty
existing_attachments existing_attachments
let attachments_on_board :: [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)] = let attachments_on_board :: [ Details ] =
concatMap parseAttachments tuples concatMap parseAttachments tuples
-- attachments_on_board are the only files that can be copied into the archive dir right now -- attachments_on_board are the only files that can be copied into the archive dir right now
-- since that's where we have the src filename. except here the Attachment doesn't have a sha hash yet -- since that's where we have the src filename. except here the Attachment doesn't have a sha hash yet
-- so we can't build the destination filename. -- so we can't build the destination filename.
let map_should_exist :: Map.Map (Int64, Text) [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)] = let map_should_exist :: Map.Map (Int64, Text) [ Details ] =
foldl' foldl'
(insertRecord (\(_, _, _, _, _, a) -> (At.post_id a, At.board_filename a))) (insertRecord (\(_, _, _, _, _, a) -> (At.post_id a, At.board_filename a)))
Map.empty Map.empty
@ -392,7 +375,10 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
let to_insert = foldr (++) [] $ Map.elems to_insert_map let to_insert = foldr (++) [] $ Map.elems to_insert_map
to_insert_exist <- filterM attachmentFileExists to_insert to_insert_ <- mapM ensureAttachmentExists to_insert
let to_insert_exist = catMaybes to_insert_
-- to_insert_exist <- filterM attachmentFileExists to_insert
with_hashes <- mapM computeAttachmentHash to_insert_exist with_hashes <- mapM computeAttachmentHash to_insert_exist
@ -405,14 +391,15 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
Right saved -> do Right saved -> do
putStrLn $ "Saved " ++ (show $ length saved) ++ " attachments!" putStrLn $ "Saved " ++ (show $ length saved) ++ " attachments!"
mapM_ (copyFiles settings) attachments_on_board mapM_ (copyOrMoveFiles settings fgs) attachments_on_board
where where
attachmentFileExists :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO Bool ensureAttachmentExists :: Details -> IO (Maybe Details)
attachmentFileExists (_, _, _, _, p, _) = doesFileExist (At.file_path p) ensureAttachmentExists (a, b, c, d, p, f) =
-- TODO: this actually has to download a file in the consumer case (attachmentPaths fgs) p >>=
return . (maybe Nothing (\x -> Just (a, b, c, d, x, f)))
computeAttachmentHash :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO At.Attachment computeAttachmentHash :: Details -> IO At.Attachment
computeAttachmentHash (_, _, _, _, p, q) = do computeAttachmentHash (_, _, _, _, p, q) = do
let f = At.file_path p let f = At.file_path p
@ -490,15 +477,12 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
notDeleted :: (a, b, c, d, At.Paths, At.Attachment) -> Bool notDeleted :: (a, b, c, d, At.Paths, At.Attachment) -> Bool
notDeleted (_, _, _, _, p, _) = not $ "deleted" `isSuffixOf` (At.file_path p) notDeleted (_, _, _, _, p, _) = not $ "deleted" `isSuffixOf` (At.file_path p)
-- TODO: we don't have backup_read_root if reading from network.
-- - solution? one should obviously be a URL then.
withPathPrefix :: Text -> FilePath withPathPrefix :: Text -> FilePath
withPathPrefix = ((<>) $ backup_read_root settings) . unpack withPathPrefix = (addPathPrefix fgs) . unpack
parseAttachments parseAttachments
:: (Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post) :: (Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)
-> [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)] -> [ Details ]
parseAttachments (site, board, thread, p, q) = filter notDeleted $ parseAttachments (site, board, thread, p, q) = filter notDeleted $
case JSONPosts.files p of case JSONPosts.files p of
Just files -> map Just files -> map
@ -606,6 +590,9 @@ createNewPosts settings tuples = do
data FileGetters = FileGetters data FileGetters = FileGetters
{ getJSONCatalog :: Sites.Site -> String -> IO (Either String [ Catalog ]) { getJSONCatalog :: Sites.Site -> String -> IO (Either String [ Catalog ])
, getJSONPosts :: Sites.Site -> String -> IO (Either String JSONPosts.PostWrapper) , getJSONPosts :: Sites.Site -> String -> IO (Either String JSONPosts.PostWrapper)
, addPathPrefix :: String -> String
, attachmentPaths :: At.Paths -> IO (Maybe At.Paths)
, copyOrMove :: String -> (String, String) -> (String, String) -> IO ()
} }
@ -613,6 +600,30 @@ localFileGetters :: JSONSettings -> FileGetters
localFileGetters settings = FileGetters localFileGetters settings = FileGetters
{ getJSONCatalog = const $ parseJSONCatalog . withRoot { getJSONCatalog = const $ parseJSONCatalog . withRoot
, getJSONPosts = const $ parsePosts . withRoot , getJSONPosts = const $ parsePosts . withRoot
, addPathPrefix = ((++) $ backup_read_root settings)
, attachmentPaths = \p -> do
exists <- doesFileExist (At.file_path p)
if exists then return (Just p) else return Nothing
, copyOrMove = \common_dest (src, dest) (thumb_src, thumb_dest) -> do
destination_exists <- doesFileExist dest
if not destination_exists
then do
src_exists <- doesFileExist src
createDirectoryIfMissing True common_dest
if src_exists
then putStrLn ("Copying " ++ src) >> copyFile src dest
else return ()
thumb_exists <- doesFileExist thumb_src
if thumb_exists
then putStrLn ("Copying " ++ thumb_src) >> copyFile thumb_src thumb_dest
else return ()
else return ()
} }
where where
@ -647,7 +658,7 @@ processBoard settings fgs@FileGetters {..} site board = do
let perfect_post_pairs = addPostsToTuples tuples posts_result let perfect_post_pairs = addPostsToTuples tuples posts_result
processFiles settings perfect_post_pairs processFiles settings fgs perfect_post_pairs
Left errMsg -> Left errMsg ->
putStrLn $ "Failed to parse the JSON file in directory: " putStrLn $ "Failed to parse the JSON file in directory: "

View File

@ -9,6 +9,7 @@ import Data.Aeson (decode)
import System.FilePath ((</>)) import System.FilePath ((</>))
import Control.Concurrent.Async (mapConcurrently) import Control.Concurrent.Async (mapConcurrently)
import Data.Aeson (FromJSON) import Data.Aeson (FromJSON)
import System.Directory (createDirectoryIfMissing, renameFile)
import qualified SitesType as Sites import qualified SitesType as Sites
import Common.Server.ConsumerSettings import Common.Server.ConsumerSettings
@ -18,6 +19,7 @@ import Lib
, FileGetters (..) , FileGetters (..)
) )
import qualified Network.DataClient as Client import qualified Network.DataClient as Client
import qualified Common.AttachmentType as At
newtype CliArgs = CliArgs newtype CliArgs = CliArgs
{ settingsFile :: String { settingsFile :: String
@ -53,9 +55,23 @@ getSettings = do
Just settings -> return settings Just settings -> return settings
httpFileGetters :: JSONSettings -> FileGetters httpFileGetters :: JSONSettings -> FileGetters
httpFileGetters _ = FileGetters httpFileGetters settings = FileGetters
{ getJSONCatalog = httpGetJSON { getJSONCatalog = httpGetJSON
, getJSONPosts = httpGetJSON , getJSONPosts = httpGetJSON
, addPathPrefix = ((++) $ site_url settings)
-- attachmentPaths here actually doesn't get the paths of the attachment,
-- it downloads them into a temporary file and gets that path of that.
, attachmentPaths = \paths -> do
filepath <- Client.getFile (At.file_path paths)
thumbpath <- Client.getFile (At.thumbnail_path paths)
return $ filepath >>= \fp ->
thumbpath >>= \tp ->
return (At.Paths fp tp)
, copyOrMove = \common_dest (src, dest) (thumb_src, thumb_dest) -> do
createDirectoryIfMissing True common_dest
renameFile src dest
renameFile thumb_src thumb_dest
} }
httpGetJSON :: (FromJSON a) => Sites.Site -> String -> IO (Either String a) httpGetJSON :: (FromJSON a) => Sites.Site -> String -> IO (Either String a)