Add a way to save a file by doing a GET request over http to DataClient

This commit is contained in:
towards-a-new-leftypol 2024-04-08 02:14:06 -04:00
parent 1c6c1250e3
commit 67870cab36
4 changed files with 51 additions and 18 deletions

View File

@ -102,7 +102,8 @@ executable chan-delorean
cryptonite, cryptonite,
memory, memory,
mime-types, mime-types,
perceptual-hash perceptual-hash,
temporary
-- Directories containing source files. -- Directories containing source files.
hs-source-dirs: src hs-source-dirs: src
@ -157,7 +158,8 @@ executable chan-delorean-consoomer
memory, memory,
mime-types, mime-types,
perceptual-hash, perceptual-hash,
async async,
temporary
-- Directories containing source files. -- Directories containing source files.
hs-source-dirs: src hs-source-dirs: src

View File

@ -182,27 +182,24 @@ ensureThreads settings board all_threads = do
readPosts readPosts
:: JSONSettings :: FileGetters
-> FileGetters -> Sites.Site
-> Boards.Board -> Boards.Board
-> Threads.Thread -> Threads.Thread
-> IO (Threads.Thread, [ JSONPosts.Post ]) -> IO (Threads.Thread, [ JSONPosts.Post ])
readPosts settings fgs board thread = do readPosts FileGetters {..} site board thread = do
-- parsePosts :: FilePath -> IO (Either String Post.PostWrapper) result <- getJSONPosts site relative_path
result <- parsePosts thread_filename
case result of case result of
Left err -> do Left err -> do
putStrLn $ "Failed to parse the JSON file " ++ thread_filename ++ " error: " ++ err putStrLn $ "Failed to parse the JSON file " ++ relative_path ++ " error: " ++ err
putStrLn $ "Site: " ++ show site
return (thread, []) return (thread, [])
Right posts_wrapper -> return (thread, JSONPosts.posts posts_wrapper) Right posts_wrapper -> return (thread, JSONPosts.posts posts_wrapper)
where where
thread_filename :: FilePath relative_path :: FilePath
thread_filename = backupDir </> "res" </> (show (Threads.board_thread_id thread) ++ ".json") relative_path = Boards.pathpart board </> "res" </> (show (Threads.board_thread_id thread) ++ ".json")
backupDir :: FilePath
backupDir = backup_read_root settings </> Boards.pathpart board
apiPostToPostKey :: Threads.Thread -> JSONPosts.Post -> Client.PostId apiPostToPostKey :: Threads.Thread -> JSONPosts.Post -> Client.PostId
@ -212,6 +209,7 @@ apiPostToPostKey thread post =
, Client.board_post_id = (JSONPosts.no post) , Client.board_post_id = (JSONPosts.no post)
} }
-- Convert Post to DbPost -- Convert Post to DbPost
apiPostToArchivePost :: Int -> Threads.Thread -> JSONPosts.Post -> Posts.Post apiPostToArchivePost :: Int -> Threads.Thread -> JSONPosts.Post -> Posts.Post
apiPostToArchivePost local_idx thread post = apiPostToArchivePost local_idx thread post =
@ -228,6 +226,7 @@ apiPostToArchivePost local_idx thread post =
, Posts.local_idx = local_idx , Posts.local_idx = local_idx
} }
-- | A version of 'concatMap' that works with a monadic predicate. -- | A version of 'concatMap' that works with a monadic predicate.
-- Stolen from package extra Control.Monad.Extra -- Stolen from package extra Control.Monad.Extra
concatMapM :: Monad m => (a -> m [b]) -> [a] -> m [b] concatMapM :: Monad m => (a -> m [b]) -> [a] -> m [b]
@ -301,6 +300,12 @@ phash_mimetypes = Set.fromList
] ]
-- TODO: This will need to be a move or copy
-- - move in the case of downloading from network
-- - first download to temporary file
-- - then in this function instead of copyFile move it.
-- - copy file if loading from filesystem (eg. from backup), since we want
-- to keep the original.
copyFiles :: JSONSettings -> (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO () copyFiles :: JSONSettings -> (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO ()
copyFiles settings (site, board, thread, _, path, attachment) = do copyFiles settings (site, board, thread, _, path, attachment) = do
destination_exists <- doesFileExist dest destination_exists <- doesFileExist dest
@ -405,6 +410,7 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
where where
attachmentFileExists :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO Bool attachmentFileExists :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO Bool
attachmentFileExists (_, _, _, _, p, _) = doesFileExist (At.file_path p) attachmentFileExists (_, _, _, _, p, _) = doesFileExist (At.file_path p)
-- TODO: this actually has to download a file in the consumer case
computeAttachmentHash :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO At.Attachment computeAttachmentHash :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO At.Attachment
computeAttachmentHash (_, _, _, _, p, q) = do computeAttachmentHash (_, _, _, _, p, q) = do
@ -485,6 +491,8 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
notDeleted (_, _, _, _, p, _) = not $ "deleted" `isSuffixOf` (At.file_path p) notDeleted (_, _, _, _, p, _) = not $ "deleted" `isSuffixOf` (At.file_path p)
-- TODO: we don't have backup_read_root if reading from network.
-- - solution? one should obviously be a URL then.
withPathPrefix :: Text -> FilePath withPathPrefix :: Text -> FilePath
withPathPrefix = ((<>) $ backup_read_root settings) . unpack withPathPrefix = ((<>) $ backup_read_root settings) . unpack
@ -597,14 +605,19 @@ createNewPosts settings tuples = do
data FileGetters = FileGetters data FileGetters = FileGetters
{ getJSONCatalog :: Sites.Site -> String -> IO (Either String [ Catalog ]) { getJSONCatalog :: Sites.Site -> String -> IO (Either String [ Catalog ])
, getJSONPosts :: Sites.Site -> String -> IO (Either String JSONPosts.PostWrapper)
} }
localFileGetters :: JSONSettings -> FileGetters localFileGetters :: JSONSettings -> FileGetters
localFileGetters settings = FileGetters localFileGetters settings = FileGetters
{ getJSONCatalog = const $ parseJSONCatalog . (backup_read_root settings </>) { getJSONCatalog = const $ parseJSONCatalog . withRoot
, getJSONPosts = const $ parsePosts . withRoot
} }
where
withRoot = (backup_read_root settings </>)
processBoard :: JSONSettings -> FileGetters -> Sites.Site -> Boards.Board -> IO () processBoard :: JSONSettings -> FileGetters -> Sites.Site -> Boards.Board -> IO ()
processBoard settings fgs@FileGetters {..} site board = do processBoard settings fgs@FileGetters {..} site board = do
@ -619,8 +632,7 @@ processBoard settings fgs@FileGetters {..} site board = do
all_threads_for_board :: [ Threads.Thread ] <- ensureThreads settings board threads_on_board all_threads_for_board :: [ Threads.Thread ] <- ensureThreads settings board threads_on_board
all_posts_on_board :: [(Threads.Thread, [ JSONPosts.Post ])] <- mapM (readPosts settings fgs board) all_threads_for_board all_posts_on_board :: [(Threads.Thread, [ JSONPosts.Post ])] <- mapM (readPosts fgs site board) all_threads_for_board
let tuples :: [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post)] = concatMap let tuples :: [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post)] = concatMap
(\(t, posts) -> map (\p -> (site, board, t, p)) posts) (\(t, posts) -> map (\p -> (site, board, t, p)) posts)

View File

@ -8,6 +8,7 @@ import System.Console.CmdArgs (cmdArgs, Data, Typeable)
import Data.Aeson (decode) import Data.Aeson (decode)
import System.FilePath ((</>)) import System.FilePath ((</>))
import Control.Concurrent.Async (mapConcurrently) import Control.Concurrent.Async (mapConcurrently)
import Data.Aeson (FromJSON)
import qualified SitesType as Sites import qualified SitesType as Sites
import Common.Server.ConsumerSettings import Common.Server.ConsumerSettings
@ -16,7 +17,6 @@ import Lib
( processBoards ( processBoards
, FileGetters (..) , FileGetters (..)
) )
import JSONParsing (Catalog)
import qualified Network.DataClient as Client import qualified Network.DataClient as Client
newtype CliArgs = CliArgs newtype CliArgs = CliArgs
@ -55,9 +55,10 @@ getSettings = do
httpFileGetters :: JSONSettings -> FileGetters httpFileGetters :: JSONSettings -> FileGetters
httpFileGetters _ = FileGetters httpFileGetters _ = FileGetters
{ getJSONCatalog = httpGetJSON { getJSONCatalog = httpGetJSON
, getJSONPosts = httpGetJSON
} }
httpGetJSON :: Sites.Site -> String -> IO (Either String [Catalog]) httpGetJSON :: (FromJSON a) => Sites.Site -> String -> IO (Either String a)
httpGetJSON site path = (Client.getJSON $ Sites.url site </> path) httpGetJSON site path = (Client.getJSON $ Sites.url site </> path)
>>= getErrMsg >>= getErrMsg
where where

View File

@ -18,6 +18,7 @@ module Network.DataClient
, getAttachments , getAttachments
, postAttachments , postAttachments
, getJSON , getJSON
, getFile
) where ) where
import Control.Monad (forM) import Control.Monad (forM)
@ -37,6 +38,7 @@ import Data.Aeson
, Value , Value
) )
import GHC.Generics import GHC.Generics
import System.IO.Temp (openBinaryTempFile, getCanonicalTemporaryDirectory)
import qualified Common.Server.JSONSettings as T import qualified Common.Server.JSONSettings as T
import qualified SitesType as Sites import qualified SitesType as Sites
@ -220,3 +222,19 @@ eitherDecodeResponse (Right bs) =
getJSON :: (FromJSON a) => String -> IO (Either HttpError a) getJSON :: (FromJSON a) => String -> IO (Either HttpError a)
getJSON url = get_ url [] >>= return . eitherDecodeResponse getJSON url = get_ url [] >>= return . eitherDecodeResponse
getFile :: String -> IO (Maybe String)
getFile url = do
result <- get_ url []
case result of
Left (err :: HttpError) -> do
putStrLn $ show err
return Nothing
Right lbs -> do
tmp_root <- getCanonicalTemporaryDirectory
(tmp_filepath, tmp_filehandle) <- openBinaryTempFile tmp_root "chan.attachment"
LBS.writeFile tmp_filepath lbs
LBS.hPut tmp_filehandle lbs
return $ Just tmp_filepath