diff --git a/chan-delorean.cabal b/chan-delorean.cabal index 3dd2901..261d823 100644 --- a/chan-delorean.cabal +++ b/chan-delorean.cabal @@ -102,7 +102,8 @@ executable chan-delorean cryptonite, memory, mime-types, - perceptual-hash + perceptual-hash, + temporary -- Directories containing source files. hs-source-dirs: src @@ -157,7 +158,8 @@ executable chan-delorean-consoomer memory, mime-types, perceptual-hash, - async + async, + temporary -- Directories containing source files. hs-source-dirs: src diff --git a/src/Lib.hs b/src/Lib.hs index e0b53be..56ee134 100644 --- a/src/Lib.hs +++ b/src/Lib.hs @@ -182,27 +182,24 @@ ensureThreads settings board all_threads = do readPosts - :: JSONSettings - -> FileGetters + :: FileGetters + -> Sites.Site -> Boards.Board -> Threads.Thread -> IO (Threads.Thread, [ JSONPosts.Post ]) -readPosts settings fgs board thread = do --- parsePosts :: FilePath -> IO (Either String Post.PostWrapper) - result <- parsePosts thread_filename +readPosts FileGetters {..} site board thread = do + result <- getJSONPosts site relative_path case result of Left err -> do - putStrLn $ "Failed to parse the JSON file " ++ thread_filename ++ " error: " ++ err + putStrLn $ "Failed to parse the JSON file " ++ relative_path ++ " error: " ++ err + putStrLn $ "Site: " ++ show site return (thread, []) Right posts_wrapper -> return (thread, JSONPosts.posts posts_wrapper) where - thread_filename :: FilePath - thread_filename = backupDir "res" (show (Threads.board_thread_id thread) ++ ".json") - - backupDir :: FilePath - backupDir = backup_read_root settings Boards.pathpart board + relative_path :: FilePath + relative_path = Boards.pathpart board "res" (show (Threads.board_thread_id thread) ++ ".json") apiPostToPostKey :: Threads.Thread -> JSONPosts.Post -> Client.PostId @@ -212,6 +209,7 @@ apiPostToPostKey thread post = , Client.board_post_id = (JSONPosts.no post) } + -- Convert Post to DbPost apiPostToArchivePost :: Int -> Threads.Thread -> JSONPosts.Post -> Posts.Post apiPostToArchivePost local_idx thread post = @@ -228,6 +226,7 @@ apiPostToArchivePost local_idx thread post = , Posts.local_idx = local_idx } + -- | A version of 'concatMap' that works with a monadic predicate. -- Stolen from package extra Control.Monad.Extra concatMapM :: Monad m => (a -> m [b]) -> [a] -> m [b] @@ -301,6 +300,12 @@ phash_mimetypes = Set.fromList ] +-- TODO: This will need to be a move or copy +-- - move in the case of downloading from network +-- - first download to temporary file +-- - then in this function instead of copyFile move it. +-- - copy file if loading from filesystem (eg. from backup), since we want +-- to keep the original. copyFiles :: JSONSettings -> (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO () copyFiles settings (site, board, thread, _, path, attachment) = do destination_exists <- doesFileExist dest @@ -405,6 +410,7 @@ processFiles settings tuples = do -- perfect just means that our posts have ids, where attachmentFileExists :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO Bool attachmentFileExists (_, _, _, _, p, _) = doesFileExist (At.file_path p) + -- TODO: this actually has to download a file in the consumer case computeAttachmentHash :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO At.Attachment computeAttachmentHash (_, _, _, _, p, q) = do @@ -485,6 +491,8 @@ processFiles settings tuples = do -- perfect just means that our posts have ids, notDeleted (_, _, _, _, p, _) = not $ "deleted" `isSuffixOf` (At.file_path p) + -- TODO: we don't have backup_read_root if reading from network. + -- - solution? one should obviously be a URL then. withPathPrefix :: Text -> FilePath withPathPrefix = ((<>) $ backup_read_root settings) . unpack @@ -597,14 +605,19 @@ createNewPosts settings tuples = do data FileGetters = FileGetters { getJSONCatalog :: Sites.Site -> String -> IO (Either String [ Catalog ]) + , getJSONPosts :: Sites.Site -> String -> IO (Either String JSONPosts.PostWrapper) } localFileGetters :: JSONSettings -> FileGetters localFileGetters settings = FileGetters - { getJSONCatalog = const $ parseJSONCatalog . (backup_read_root settings ) + { getJSONCatalog = const $ parseJSONCatalog . withRoot + , getJSONPosts = const $ parsePosts . withRoot } + where + withRoot = (backup_read_root settings ) + processBoard :: JSONSettings -> FileGetters -> Sites.Site -> Boards.Board -> IO () processBoard settings fgs@FileGetters {..} site board = do @@ -619,8 +632,7 @@ processBoard settings fgs@FileGetters {..} site board = do all_threads_for_board :: [ Threads.Thread ] <- ensureThreads settings board threads_on_board - all_posts_on_board :: [(Threads.Thread, [ JSONPosts.Post ])] <- mapM (readPosts settings fgs board) all_threads_for_board - + all_posts_on_board :: [(Threads.Thread, [ JSONPosts.Post ])] <- mapM (readPosts fgs site board) all_threads_for_board let tuples :: [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post)] = concatMap (\(t, posts) -> map (\p -> (site, board, t, p)) posts) diff --git a/src/Main.hs b/src/Main.hs index 1de3ac8..84210ce 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -8,6 +8,7 @@ import System.Console.CmdArgs (cmdArgs, Data, Typeable) import Data.Aeson (decode) import System.FilePath (()) import Control.Concurrent.Async (mapConcurrently) +import Data.Aeson (FromJSON) import qualified SitesType as Sites import Common.Server.ConsumerSettings @@ -16,7 +17,6 @@ import Lib ( processBoards , FileGetters (..) ) -import JSONParsing (Catalog) import qualified Network.DataClient as Client newtype CliArgs = CliArgs @@ -55,9 +55,10 @@ getSettings = do httpFileGetters :: JSONSettings -> FileGetters httpFileGetters _ = FileGetters { getJSONCatalog = httpGetJSON + , getJSONPosts = httpGetJSON } -httpGetJSON :: Sites.Site -> String -> IO (Either String [Catalog]) +httpGetJSON :: (FromJSON a) => Sites.Site -> String -> IO (Either String a) httpGetJSON site path = (Client.getJSON $ Sites.url site path) >>= getErrMsg where diff --git a/src/Network/DataClient.hs b/src/Network/DataClient.hs index d5ebeab..7004cb2 100644 --- a/src/Network/DataClient.hs +++ b/src/Network/DataClient.hs @@ -18,6 +18,7 @@ module Network.DataClient , getAttachments , postAttachments , getJSON + , getFile ) where import Control.Monad (forM) @@ -37,6 +38,7 @@ import Data.Aeson , Value ) import GHC.Generics +import System.IO.Temp (openBinaryTempFile, getCanonicalTemporaryDirectory) import qualified Common.Server.JSONSettings as T import qualified SitesType as Sites @@ -220,3 +222,19 @@ eitherDecodeResponse (Right bs) = getJSON :: (FromJSON a) => String -> IO (Either HttpError a) getJSON url = get_ url [] >>= return . eitherDecodeResponse + + +getFile :: String -> IO (Maybe String) +getFile url = do + result <- get_ url [] + + case result of + Left (err :: HttpError) -> do + putStrLn $ show err + return Nothing + Right lbs -> do + tmp_root <- getCanonicalTemporaryDirectory + (tmp_filepath, tmp_filehandle) <- openBinaryTempFile tmp_root "chan.attachment" + LBS.writeFile tmp_filepath lbs + LBS.hPut tmp_filehandle lbs + return $ Just tmp_filepath