Compare commits

..

No commits in common. "fc321d85317e39c3499578dd351ced7672c1f760" and "1c6c1250e33af8a748b361e610ef65dcbed705fc" have entirely different histories.

4 changed files with 54 additions and 114 deletions

View File

@ -102,8 +102,7 @@ executable chan-delorean
cryptonite, cryptonite,
memory, memory,
mime-types, mime-types,
perceptual-hash, perceptual-hash
temporary
-- Directories containing source files. -- Directories containing source files.
hs-source-dirs: src hs-source-dirs: src
@ -158,8 +157,7 @@ executable chan-delorean-consoomer
memory, memory,
mime-types, mime-types,
perceptual-hash, perceptual-hash,
async, async
temporary
-- Directories containing source files. -- Directories containing source files.
hs-source-dirs: src hs-source-dirs: src

View File

@ -24,7 +24,7 @@ import Data.Set (Set)
import Data.Time.Clock.POSIX (posixSecondsToUTCTime) import Data.Time.Clock.POSIX (posixSecondsToUTCTime)
import Data.Time.Clock (UTCTime) import Data.Time.Clock (UTCTime)
import qualified Data.Map as Map import qualified Data.Map as Map
import Data.Maybe (fromJust, catMaybes) import Data.Maybe (fromJust)
import Data.Text (Text, unpack) import Data.Text (Text, unpack)
import qualified Data.Text as T import qualified Data.Text as T
import Data.Text.Encoding (decodeUtf8) import Data.Text.Encoding (decodeUtf8)
@ -182,24 +182,27 @@ ensureThreads settings board all_threads = do
readPosts readPosts
:: FileGetters :: JSONSettings
-> Sites.Site -> FileGetters
-> Boards.Board -> Boards.Board
-> Threads.Thread -> Threads.Thread
-> IO (Threads.Thread, [ JSONPosts.Post ]) -> IO (Threads.Thread, [ JSONPosts.Post ])
readPosts FileGetters {..} site board thread = do readPosts settings fgs board thread = do
result <- getJSONPosts site relative_path -- parsePosts :: FilePath -> IO (Either String Post.PostWrapper)
result <- parsePosts thread_filename
case result of case result of
Left err -> do Left err -> do
putStrLn $ "Failed to parse the JSON file " ++ relative_path ++ " error: " ++ err putStrLn $ "Failed to parse the JSON file " ++ thread_filename ++ " error: " ++ err
putStrLn $ "Site: " ++ show site
return (thread, []) return (thread, [])
Right posts_wrapper -> return (thread, JSONPosts.posts posts_wrapper) Right posts_wrapper -> return (thread, JSONPosts.posts posts_wrapper)
where where
relative_path :: FilePath thread_filename :: FilePath
relative_path = Boards.pathpart board </> "res" </> (show (Threads.board_thread_id thread) ++ ".json") thread_filename = backupDir </> "res" </> (show (Threads.board_thread_id thread) ++ ".json")
backupDir :: FilePath
backupDir = backup_read_root settings </> Boards.pathpart board
apiPostToPostKey :: Threads.Thread -> JSONPosts.Post -> Client.PostId apiPostToPostKey :: Threads.Thread -> JSONPosts.Post -> Client.PostId
@ -209,7 +212,6 @@ apiPostToPostKey thread post =
, Client.board_post_id = (JSONPosts.no post) , Client.board_post_id = (JSONPosts.no post)
} }
-- Convert Post to DbPost -- Convert Post to DbPost
apiPostToArchivePost :: Int -> Threads.Thread -> JSONPosts.Post -> Posts.Post apiPostToArchivePost :: Int -> Threads.Thread -> JSONPosts.Post -> Posts.Post
apiPostToArchivePost local_idx thread post = apiPostToArchivePost local_idx thread post =
@ -226,7 +228,6 @@ apiPostToArchivePost local_idx thread post =
, Posts.local_idx = local_idx , Posts.local_idx = local_idx
} }
-- | A version of 'concatMap' that works with a monadic predicate. -- | A version of 'concatMap' that works with a monadic predicate.
-- Stolen from package extra Control.Monad.Extra -- Stolen from package extra Control.Monad.Extra
concatMapM :: Monad m => (a -> m [b]) -> [a] -> m [b] concatMapM :: Monad m => (a -> m [b]) -> [a] -> m [b]
@ -300,9 +301,27 @@ phash_mimetypes = Set.fromList
] ]
copyOrMoveFiles :: JSONSettings -> FileGetters -> Details -> IO () copyFiles :: JSONSettings -> (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO ()
copyOrMoveFiles settings fgs (site, board, thread, _, path, attachment) = do copyFiles settings (site, board, thread, _, path, attachment) = do
(copyOrMove fgs) common_dest (src, dest) (thumb_src, thumb_dest) destination_exists <- doesFileExist dest
if not destination_exists
then do
src_exists <- doesFileExist src
createDirectoryIfMissing True common_dest
if src_exists
then putStrLn ("Copying " ++ src) >> copyFile src dest
else return ()
thumb_exists <- doesFileExist thumb_src
if thumb_exists
then putStrLn ("Copying " ++ thumb_src) >> copyFile thumb_src thumb_dest
else return ()
else return ()
-- src = (At.file_path | At.thumb_path) -- src = (At.file_path | At.thumb_path)
-- dest = <media_root>/<website_name>/<boardpart>/<board_thread_id>/<sha>.<ext> -- dest = <media_root>/<website_name>/<boardpart>/<board_thread_id>/<sha>.<ext>
@ -332,15 +351,8 @@ copyOrMoveFiles settings fgs (site, board, thread, _, path, attachment) = do
</> (show $ Threads.board_thread_id thread) </> (show $ Threads.board_thread_id thread)
type Details = (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) processFiles :: JSONSettings -> [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)] -> IO ()
processFiles settings tuples = do -- perfect just means that our posts have ids, they're already inserted into the db
processFiles
:: JSONSettings
-> FileGetters
-> [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)]
-> IO ()
processFiles settings fgs tuples = do -- perfect just means that our posts have ids, they're already inserted into the db
let ps = map (\(_, _, _, _, x) -> x) tuples let ps = map (\(_, _, _, _, x) -> x) tuples
existing_attachments_result <- Client.getAttachments settings (map (fromJust . Posts.post_id) ps) existing_attachments_result <- Client.getAttachments settings (map (fromJust . Posts.post_id) ps)
@ -356,13 +368,13 @@ processFiles settings fgs tuples = do -- perfect just means that our posts have
Map.empty Map.empty
existing_attachments existing_attachments
let attachments_on_board :: [ Details ] = let attachments_on_board :: [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)] =
concatMap parseAttachments tuples concatMap parseAttachments tuples
-- attachments_on_board are the only files that can be copied into the archive dir right now -- attachments_on_board are the only files that can be copied into the archive dir right now
-- since that's where we have the src filename. except here the Attachment doesn't have a sha hash yet -- since that's where we have the src filename. except here the Attachment doesn't have a sha hash yet
-- so we can't build the destination filename. -- so we can't build the destination filename.
let map_should_exist :: Map.Map (Int64, Text) [ Details ] = let map_should_exist :: Map.Map (Int64, Text) [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)] =
foldl' foldl'
(insertRecord (\(_, _, _, _, _, a) -> (At.post_id a, At.board_filename a))) (insertRecord (\(_, _, _, _, _, a) -> (At.post_id a, At.board_filename a)))
Map.empty Map.empty
@ -375,10 +387,7 @@ processFiles settings fgs tuples = do -- perfect just means that our posts have
let to_insert = foldr (++) [] $ Map.elems to_insert_map let to_insert = foldr (++) [] $ Map.elems to_insert_map
to_insert_ <- mapM ensureAttachmentExists to_insert to_insert_exist <- filterM attachmentFileExists to_insert
let to_insert_exist = catMaybes to_insert_
-- to_insert_exist <- filterM attachmentFileExists to_insert
with_hashes <- mapM computeAttachmentHash to_insert_exist with_hashes <- mapM computeAttachmentHash to_insert_exist
@ -391,15 +400,13 @@ processFiles settings fgs tuples = do -- perfect just means that our posts have
Right saved -> do Right saved -> do
putStrLn $ "Saved " ++ (show $ length saved) ++ " attachments!" putStrLn $ "Saved " ++ (show $ length saved) ++ " attachments!"
mapM_ (copyOrMoveFiles settings fgs) attachments_on_board mapM_ (copyFiles settings) attachments_on_board
where where
ensureAttachmentExists :: Details -> IO (Maybe Details) attachmentFileExists :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO Bool
ensureAttachmentExists (a, b, c, d, p, f) = attachmentFileExists (_, _, _, _, p, _) = doesFileExist (At.file_path p)
(attachmentPaths fgs) p >>=
return . (maybe Nothing (\x -> Just (a, b, c, d, x, f)))
computeAttachmentHash :: Details -> IO At.Attachment computeAttachmentHash :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO At.Attachment
computeAttachmentHash (_, _, _, _, p, q) = do computeAttachmentHash (_, _, _, _, p, q) = do
let f = At.file_path p let f = At.file_path p
@ -477,12 +484,13 @@ processFiles settings fgs tuples = do -- perfect just means that our posts have
notDeleted :: (a, b, c, d, At.Paths, At.Attachment) -> Bool notDeleted :: (a, b, c, d, At.Paths, At.Attachment) -> Bool
notDeleted (_, _, _, _, p, _) = not $ "deleted" `isSuffixOf` (At.file_path p) notDeleted (_, _, _, _, p, _) = not $ "deleted" `isSuffixOf` (At.file_path p)
withPathPrefix :: Text -> FilePath withPathPrefix :: Text -> FilePath
withPathPrefix = (addPathPrefix fgs) . unpack withPathPrefix = ((<>) $ backup_read_root settings) . unpack
parseAttachments parseAttachments
:: (Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post) :: (Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)
-> [ Details ] -> [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)]
parseAttachments (site, board, thread, p, q) = filter notDeleted $ parseAttachments (site, board, thread, p, q) = filter notDeleted $
case JSONPosts.files p of case JSONPosts.files p of
Just files -> map Just files -> map
@ -589,46 +597,14 @@ createNewPosts settings tuples = do
data FileGetters = FileGetters data FileGetters = FileGetters
{ getJSONCatalog :: Sites.Site -> String -> IO (Either String [ Catalog ]) { getJSONCatalog :: Sites.Site -> String -> IO (Either String [ Catalog ])
, getJSONPosts :: Sites.Site -> String -> IO (Either String JSONPosts.PostWrapper)
, addPathPrefix :: String -> String
, attachmentPaths :: At.Paths -> IO (Maybe At.Paths)
, copyOrMove :: String -> (String, String) -> (String, String) -> IO ()
} }
localFileGetters :: JSONSettings -> FileGetters localFileGetters :: JSONSettings -> FileGetters
localFileGetters settings = FileGetters localFileGetters settings = FileGetters
{ getJSONCatalog = const $ parseJSONCatalog . withRoot { getJSONCatalog = const $ parseJSONCatalog . (backup_read_root settings </>)
, getJSONPosts = const $ parsePosts . withRoot
, addPathPrefix = ((++) $ backup_read_root settings)
, attachmentPaths = \p -> do
exists <- doesFileExist (At.file_path p)
if exists then return (Just p) else return Nothing
, copyOrMove = \common_dest (src, dest) (thumb_src, thumb_dest) -> do
destination_exists <- doesFileExist dest
if not destination_exists
then do
src_exists <- doesFileExist src
createDirectoryIfMissing True common_dest
if src_exists
then putStrLn ("Copying " ++ src) >> copyFile src dest
else return ()
thumb_exists <- doesFileExist thumb_src
if thumb_exists
then putStrLn ("Copying " ++ thumb_src) >> copyFile thumb_src thumb_dest
else return ()
else return ()
} }
where
withRoot = (backup_read_root settings </>)
processBoard :: JSONSettings -> FileGetters -> Sites.Site -> Boards.Board -> IO () processBoard :: JSONSettings -> FileGetters -> Sites.Site -> Boards.Board -> IO ()
processBoard settings fgs@FileGetters {..} site board = do processBoard settings fgs@FileGetters {..} site board = do
@ -643,7 +619,8 @@ processBoard settings fgs@FileGetters {..} site board = do
all_threads_for_board :: [ Threads.Thread ] <- ensureThreads settings board threads_on_board all_threads_for_board :: [ Threads.Thread ] <- ensureThreads settings board threads_on_board
all_posts_on_board :: [(Threads.Thread, [ JSONPosts.Post ])] <- mapM (readPosts fgs site board) all_threads_for_board all_posts_on_board :: [(Threads.Thread, [ JSONPosts.Post ])] <- mapM (readPosts settings fgs board) all_threads_for_board
let tuples :: [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post)] = concatMap let tuples :: [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post)] = concatMap
(\(t, posts) -> map (\p -> (site, board, t, p)) posts) (\(t, posts) -> map (\p -> (site, board, t, p)) posts)
@ -658,7 +635,7 @@ processBoard settings fgs@FileGetters {..} site board = do
let perfect_post_pairs = addPostsToTuples tuples posts_result let perfect_post_pairs = addPostsToTuples tuples posts_result
processFiles settings fgs perfect_post_pairs processFiles settings perfect_post_pairs
Left errMsg -> Left errMsg ->
putStrLn $ "Failed to parse the JSON file in directory: " putStrLn $ "Failed to parse the JSON file in directory: "

View File

@ -8,8 +8,6 @@ import System.Console.CmdArgs (cmdArgs, Data, Typeable)
import Data.Aeson (decode) import Data.Aeson (decode)
import System.FilePath ((</>)) import System.FilePath ((</>))
import Control.Concurrent.Async (mapConcurrently) import Control.Concurrent.Async (mapConcurrently)
import Data.Aeson (FromJSON)
import System.Directory (createDirectoryIfMissing, renameFile)
import qualified SitesType as Sites import qualified SitesType as Sites
import Common.Server.ConsumerSettings import Common.Server.ConsumerSettings
@ -18,8 +16,8 @@ import Lib
( processBoards ( processBoards
, FileGetters (..) , FileGetters (..)
) )
import JSONParsing (Catalog)
import qualified Network.DataClient as Client import qualified Network.DataClient as Client
import qualified Common.AttachmentType as At
newtype CliArgs = CliArgs newtype CliArgs = CliArgs
{ settingsFile :: String { settingsFile :: String
@ -55,26 +53,11 @@ getSettings = do
Just settings -> return settings Just settings -> return settings
httpFileGetters :: JSONSettings -> FileGetters httpFileGetters :: JSONSettings -> FileGetters
httpFileGetters settings = FileGetters httpFileGetters _ = FileGetters
{ getJSONCatalog = httpGetJSON { getJSONCatalog = httpGetJSON
, getJSONPosts = httpGetJSON
, addPathPrefix = ((++) $ site_url settings)
-- attachmentPaths here actually doesn't get the paths of the attachment,
-- it downloads them into a temporary file and gets that path of that.
, attachmentPaths = \paths -> do
filepath <- Client.getFile (At.file_path paths)
thumbpath <- Client.getFile (At.thumbnail_path paths)
return $ filepath >>= \fp ->
thumbpath >>= \tp ->
return (At.Paths fp tp)
, copyOrMove = \common_dest (src, dest) (thumb_src, thumb_dest) -> do
createDirectoryIfMissing True common_dest
renameFile src dest
renameFile thumb_src thumb_dest
} }
httpGetJSON :: (FromJSON a) => Sites.Site -> String -> IO (Either String a) httpGetJSON :: Sites.Site -> String -> IO (Either String [Catalog])
httpGetJSON site path = (Client.getJSON $ Sites.url site </> path) httpGetJSON site path = (Client.getJSON $ Sites.url site </> path)
>>= getErrMsg >>= getErrMsg
where where

View File

@ -18,7 +18,6 @@ module Network.DataClient
, getAttachments , getAttachments
, postAttachments , postAttachments
, getJSON , getJSON
, getFile
) where ) where
import Control.Monad (forM) import Control.Monad (forM)
@ -38,7 +37,6 @@ import Data.Aeson
, Value , Value
) )
import GHC.Generics import GHC.Generics
import System.IO.Temp (openBinaryTempFile, getCanonicalTemporaryDirectory)
import qualified Common.Server.JSONSettings as T import qualified Common.Server.JSONSettings as T
import qualified SitesType as Sites import qualified SitesType as Sites
@ -222,19 +220,3 @@ eitherDecodeResponse (Right bs) =
getJSON :: (FromJSON a) => String -> IO (Either HttpError a) getJSON :: (FromJSON a) => String -> IO (Either HttpError a)
getJSON url = get_ url [] >>= return . eitherDecodeResponse getJSON url = get_ url [] >>= return . eitherDecodeResponse
getFile :: String -> IO (Maybe String)
getFile url = do
result <- get_ url []
case result of
Left (err :: HttpError) -> do
putStrLn $ show err
return Nothing
Right lbs -> do
tmp_root <- getCanonicalTemporaryDirectory
(tmp_filepath, tmp_filehandle) <- openBinaryTempFile tmp_root "chan.attachment"
LBS.writeFile tmp_filepath lbs
LBS.hPut tmp_filehandle lbs
return $ Just tmp_filepath