Abstract out everywhere that references reading src files from local dir
- Now we have two binaries: one to read from local dir one from http - both work on the same json api files
This commit is contained in:
parent
67870cab36
commit
fc321d8531
99
src/Lib.hs
99
src/Lib.hs
|
@ -24,7 +24,7 @@ import Data.Set (Set)
|
||||||
import Data.Time.Clock.POSIX (posixSecondsToUTCTime)
|
import Data.Time.Clock.POSIX (posixSecondsToUTCTime)
|
||||||
import Data.Time.Clock (UTCTime)
|
import Data.Time.Clock (UTCTime)
|
||||||
import qualified Data.Map as Map
|
import qualified Data.Map as Map
|
||||||
import Data.Maybe (fromJust)
|
import Data.Maybe (fromJust, catMaybes)
|
||||||
import Data.Text (Text, unpack)
|
import Data.Text (Text, unpack)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import Data.Text.Encoding (decodeUtf8)
|
import Data.Text.Encoding (decodeUtf8)
|
||||||
|
@ -300,33 +300,9 @@ phash_mimetypes = Set.fromList
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
-- TODO: This will need to be a move or copy
|
copyOrMoveFiles :: JSONSettings -> FileGetters -> Details -> IO ()
|
||||||
-- - move in the case of downloading from network
|
copyOrMoveFiles settings fgs (site, board, thread, _, path, attachment) = do
|
||||||
-- - first download to temporary file
|
(copyOrMove fgs) common_dest (src, dest) (thumb_src, thumb_dest)
|
||||||
-- - then in this function instead of copyFile move it.
|
|
||||||
-- - copy file if loading from filesystem (eg. from backup), since we want
|
|
||||||
-- to keep the original.
|
|
||||||
copyFiles :: JSONSettings -> (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO ()
|
|
||||||
copyFiles settings (site, board, thread, _, path, attachment) = do
|
|
||||||
destination_exists <- doesFileExist dest
|
|
||||||
|
|
||||||
if not destination_exists
|
|
||||||
then do
|
|
||||||
src_exists <- doesFileExist src
|
|
||||||
|
|
||||||
createDirectoryIfMissing True common_dest
|
|
||||||
|
|
||||||
if src_exists
|
|
||||||
then putStrLn ("Copying " ++ src) >> copyFile src dest
|
|
||||||
else return ()
|
|
||||||
|
|
||||||
thumb_exists <- doesFileExist thumb_src
|
|
||||||
|
|
||||||
if thumb_exists
|
|
||||||
then putStrLn ("Copying " ++ thumb_src) >> copyFile thumb_src thumb_dest
|
|
||||||
else return ()
|
|
||||||
|
|
||||||
else return ()
|
|
||||||
|
|
||||||
-- src = (At.file_path | At.thumb_path)
|
-- src = (At.file_path | At.thumb_path)
|
||||||
-- dest = <media_root>/<website_name>/<boardpart>/<board_thread_id>/<sha>.<ext>
|
-- dest = <media_root>/<website_name>/<boardpart>/<board_thread_id>/<sha>.<ext>
|
||||||
|
@ -356,8 +332,15 @@ copyFiles settings (site, board, thread, _, path, attachment) = do
|
||||||
</> (show $ Threads.board_thread_id thread)
|
</> (show $ Threads.board_thread_id thread)
|
||||||
|
|
||||||
|
|
||||||
processFiles :: JSONSettings -> [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)] -> IO ()
|
type Details = (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)
|
||||||
processFiles settings tuples = do -- perfect just means that our posts have ids, they're already inserted into the db
|
|
||||||
|
|
||||||
|
processFiles
|
||||||
|
:: JSONSettings
|
||||||
|
-> FileGetters
|
||||||
|
-> [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)]
|
||||||
|
-> IO ()
|
||||||
|
processFiles settings fgs tuples = do -- perfect just means that our posts have ids, they're already inserted into the db
|
||||||
let ps = map (\(_, _, _, _, x) -> x) tuples
|
let ps = map (\(_, _, _, _, x) -> x) tuples
|
||||||
|
|
||||||
existing_attachments_result <- Client.getAttachments settings (map (fromJust . Posts.post_id) ps)
|
existing_attachments_result <- Client.getAttachments settings (map (fromJust . Posts.post_id) ps)
|
||||||
|
@ -373,13 +356,13 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
|
||||||
Map.empty
|
Map.empty
|
||||||
existing_attachments
|
existing_attachments
|
||||||
|
|
||||||
let attachments_on_board :: [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)] =
|
let attachments_on_board :: [ Details ] =
|
||||||
concatMap parseAttachments tuples
|
concatMap parseAttachments tuples
|
||||||
-- attachments_on_board are the only files that can be copied into the archive dir right now
|
-- attachments_on_board are the only files that can be copied into the archive dir right now
|
||||||
-- since that's where we have the src filename. except here the Attachment doesn't have a sha hash yet
|
-- since that's where we have the src filename. except here the Attachment doesn't have a sha hash yet
|
||||||
-- so we can't build the destination filename.
|
-- so we can't build the destination filename.
|
||||||
|
|
||||||
let map_should_exist :: Map.Map (Int64, Text) [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)] =
|
let map_should_exist :: Map.Map (Int64, Text) [ Details ] =
|
||||||
foldl'
|
foldl'
|
||||||
(insertRecord (\(_, _, _, _, _, a) -> (At.post_id a, At.board_filename a)))
|
(insertRecord (\(_, _, _, _, _, a) -> (At.post_id a, At.board_filename a)))
|
||||||
Map.empty
|
Map.empty
|
||||||
|
@ -392,7 +375,10 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
|
||||||
|
|
||||||
let to_insert = foldr (++) [] $ Map.elems to_insert_map
|
let to_insert = foldr (++) [] $ Map.elems to_insert_map
|
||||||
|
|
||||||
to_insert_exist <- filterM attachmentFileExists to_insert
|
to_insert_ <- mapM ensureAttachmentExists to_insert
|
||||||
|
|
||||||
|
let to_insert_exist = catMaybes to_insert_
|
||||||
|
-- to_insert_exist <- filterM attachmentFileExists to_insert
|
||||||
|
|
||||||
with_hashes <- mapM computeAttachmentHash to_insert_exist
|
with_hashes <- mapM computeAttachmentHash to_insert_exist
|
||||||
|
|
||||||
|
@ -405,14 +391,15 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
|
||||||
|
|
||||||
Right saved -> do
|
Right saved -> do
|
||||||
putStrLn $ "Saved " ++ (show $ length saved) ++ " attachments!"
|
putStrLn $ "Saved " ++ (show $ length saved) ++ " attachments!"
|
||||||
mapM_ (copyFiles settings) attachments_on_board
|
mapM_ (copyOrMoveFiles settings fgs) attachments_on_board
|
||||||
|
|
||||||
where
|
where
|
||||||
attachmentFileExists :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO Bool
|
ensureAttachmentExists :: Details -> IO (Maybe Details)
|
||||||
attachmentFileExists (_, _, _, _, p, _) = doesFileExist (At.file_path p)
|
ensureAttachmentExists (a, b, c, d, p, f) =
|
||||||
-- TODO: this actually has to download a file in the consumer case
|
(attachmentPaths fgs) p >>=
|
||||||
|
return . (maybe Nothing (\x -> Just (a, b, c, d, x, f)))
|
||||||
|
|
||||||
computeAttachmentHash :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO At.Attachment
|
computeAttachmentHash :: Details -> IO At.Attachment
|
||||||
computeAttachmentHash (_, _, _, _, p, q) = do
|
computeAttachmentHash (_, _, _, _, p, q) = do
|
||||||
let f = At.file_path p
|
let f = At.file_path p
|
||||||
|
|
||||||
|
@ -490,15 +477,12 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
|
||||||
notDeleted :: (a, b, c, d, At.Paths, At.Attachment) -> Bool
|
notDeleted :: (a, b, c, d, At.Paths, At.Attachment) -> Bool
|
||||||
notDeleted (_, _, _, _, p, _) = not $ "deleted" `isSuffixOf` (At.file_path p)
|
notDeleted (_, _, _, _, p, _) = not $ "deleted" `isSuffixOf` (At.file_path p)
|
||||||
|
|
||||||
|
|
||||||
-- TODO: we don't have backup_read_root if reading from network.
|
|
||||||
-- - solution? one should obviously be a URL then.
|
|
||||||
withPathPrefix :: Text -> FilePath
|
withPathPrefix :: Text -> FilePath
|
||||||
withPathPrefix = ((<>) $ backup_read_root settings) . unpack
|
withPathPrefix = (addPathPrefix fgs) . unpack
|
||||||
|
|
||||||
parseAttachments
|
parseAttachments
|
||||||
:: (Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)
|
:: (Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)
|
||||||
-> [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)]
|
-> [ Details ]
|
||||||
parseAttachments (site, board, thread, p, q) = filter notDeleted $
|
parseAttachments (site, board, thread, p, q) = filter notDeleted $
|
||||||
case JSONPosts.files p of
|
case JSONPosts.files p of
|
||||||
Just files -> map
|
Just files -> map
|
||||||
|
@ -606,6 +590,9 @@ createNewPosts settings tuples = do
|
||||||
data FileGetters = FileGetters
|
data FileGetters = FileGetters
|
||||||
{ getJSONCatalog :: Sites.Site -> String -> IO (Either String [ Catalog ])
|
{ getJSONCatalog :: Sites.Site -> String -> IO (Either String [ Catalog ])
|
||||||
, getJSONPosts :: Sites.Site -> String -> IO (Either String JSONPosts.PostWrapper)
|
, getJSONPosts :: Sites.Site -> String -> IO (Either String JSONPosts.PostWrapper)
|
||||||
|
, addPathPrefix :: String -> String
|
||||||
|
, attachmentPaths :: At.Paths -> IO (Maybe At.Paths)
|
||||||
|
, copyOrMove :: String -> (String, String) -> (String, String) -> IO ()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -613,6 +600,30 @@ localFileGetters :: JSONSettings -> FileGetters
|
||||||
localFileGetters settings = FileGetters
|
localFileGetters settings = FileGetters
|
||||||
{ getJSONCatalog = const $ parseJSONCatalog . withRoot
|
{ getJSONCatalog = const $ parseJSONCatalog . withRoot
|
||||||
, getJSONPosts = const $ parsePosts . withRoot
|
, getJSONPosts = const $ parsePosts . withRoot
|
||||||
|
, addPathPrefix = ((++) $ backup_read_root settings)
|
||||||
|
, attachmentPaths = \p -> do
|
||||||
|
exists <- doesFileExist (At.file_path p)
|
||||||
|
if exists then return (Just p) else return Nothing
|
||||||
|
, copyOrMove = \common_dest (src, dest) (thumb_src, thumb_dest) -> do
|
||||||
|
destination_exists <- doesFileExist dest
|
||||||
|
|
||||||
|
if not destination_exists
|
||||||
|
then do
|
||||||
|
src_exists <- doesFileExist src
|
||||||
|
|
||||||
|
createDirectoryIfMissing True common_dest
|
||||||
|
|
||||||
|
if src_exists
|
||||||
|
then putStrLn ("Copying " ++ src) >> copyFile src dest
|
||||||
|
else return ()
|
||||||
|
|
||||||
|
thumb_exists <- doesFileExist thumb_src
|
||||||
|
|
||||||
|
if thumb_exists
|
||||||
|
then putStrLn ("Copying " ++ thumb_src) >> copyFile thumb_src thumb_dest
|
||||||
|
else return ()
|
||||||
|
|
||||||
|
else return ()
|
||||||
}
|
}
|
||||||
|
|
||||||
where
|
where
|
||||||
|
@ -647,7 +658,7 @@ processBoard settings fgs@FileGetters {..} site board = do
|
||||||
|
|
||||||
let perfect_post_pairs = addPostsToTuples tuples posts_result
|
let perfect_post_pairs = addPostsToTuples tuples posts_result
|
||||||
|
|
||||||
processFiles settings perfect_post_pairs
|
processFiles settings fgs perfect_post_pairs
|
||||||
|
|
||||||
Left errMsg ->
|
Left errMsg ->
|
||||||
putStrLn $ "Failed to parse the JSON file in directory: "
|
putStrLn $ "Failed to parse the JSON file in directory: "
|
||||||
|
|
18
src/Main.hs
18
src/Main.hs
|
@ -9,6 +9,7 @@ import Data.Aeson (decode)
|
||||||
import System.FilePath ((</>))
|
import System.FilePath ((</>))
|
||||||
import Control.Concurrent.Async (mapConcurrently)
|
import Control.Concurrent.Async (mapConcurrently)
|
||||||
import Data.Aeson (FromJSON)
|
import Data.Aeson (FromJSON)
|
||||||
|
import System.Directory (createDirectoryIfMissing, renameFile)
|
||||||
|
|
||||||
import qualified SitesType as Sites
|
import qualified SitesType as Sites
|
||||||
import Common.Server.ConsumerSettings
|
import Common.Server.ConsumerSettings
|
||||||
|
@ -18,6 +19,7 @@ import Lib
|
||||||
, FileGetters (..)
|
, FileGetters (..)
|
||||||
)
|
)
|
||||||
import qualified Network.DataClient as Client
|
import qualified Network.DataClient as Client
|
||||||
|
import qualified Common.AttachmentType as At
|
||||||
|
|
||||||
newtype CliArgs = CliArgs
|
newtype CliArgs = CliArgs
|
||||||
{ settingsFile :: String
|
{ settingsFile :: String
|
||||||
|
@ -53,9 +55,23 @@ getSettings = do
|
||||||
Just settings -> return settings
|
Just settings -> return settings
|
||||||
|
|
||||||
httpFileGetters :: JSONSettings -> FileGetters
|
httpFileGetters :: JSONSettings -> FileGetters
|
||||||
httpFileGetters _ = FileGetters
|
httpFileGetters settings = FileGetters
|
||||||
{ getJSONCatalog = httpGetJSON
|
{ getJSONCatalog = httpGetJSON
|
||||||
, getJSONPosts = httpGetJSON
|
, getJSONPosts = httpGetJSON
|
||||||
|
, addPathPrefix = ((++) $ site_url settings)
|
||||||
|
-- attachmentPaths here actually doesn't get the paths of the attachment,
|
||||||
|
-- it downloads them into a temporary file and gets that path of that.
|
||||||
|
, attachmentPaths = \paths -> do
|
||||||
|
filepath <- Client.getFile (At.file_path paths)
|
||||||
|
thumbpath <- Client.getFile (At.thumbnail_path paths)
|
||||||
|
|
||||||
|
return $ filepath >>= \fp ->
|
||||||
|
thumbpath >>= \tp ->
|
||||||
|
return (At.Paths fp tp)
|
||||||
|
, copyOrMove = \common_dest (src, dest) (thumb_src, thumb_dest) -> do
|
||||||
|
createDirectoryIfMissing True common_dest
|
||||||
|
renameFile src dest
|
||||||
|
renameFile thumb_src thumb_dest
|
||||||
}
|
}
|
||||||
|
|
||||||
httpGetJSON :: (FromJSON a) => Sites.Site -> String -> IO (Either String a)
|
httpGetJSON :: (FromJSON a) => Sites.Site -> String -> IO (Either String a)
|
||||||
|
|
Loading…
Reference in New Issue