Compare commits

..

2 Commits

Author SHA1 Message Date
towards-a-new-leftypol fc321d8531 Abstract out everywhere that references reading src files from local dir
- Now we have two binaries: one to read from local dir one from http
    - both work on the same json api files
2024-04-09 19:40:29 -04:00
towards-a-new-leftypol 67870cab36 Add a way to save a file by doing a GET request over http to DataClient 2024-04-08 02:14:06 -04:00
4 changed files with 114 additions and 54 deletions

View File

@ -102,7 +102,8 @@ executable chan-delorean
cryptonite,
memory,
mime-types,
perceptual-hash
perceptual-hash,
temporary
-- Directories containing source files.
hs-source-dirs: src
@ -157,7 +158,8 @@ executable chan-delorean-consoomer
memory,
mime-types,
perceptual-hash,
async
async,
temporary
-- Directories containing source files.
hs-source-dirs: src

View File

@ -24,7 +24,7 @@ import Data.Set (Set)
import Data.Time.Clock.POSIX (posixSecondsToUTCTime)
import Data.Time.Clock (UTCTime)
import qualified Data.Map as Map
import Data.Maybe (fromJust)
import Data.Maybe (fromJust, catMaybes)
import Data.Text (Text, unpack)
import qualified Data.Text as T
import Data.Text.Encoding (decodeUtf8)
@ -182,27 +182,24 @@ ensureThreads settings board all_threads = do
readPosts
:: JSONSettings
-> FileGetters
:: FileGetters
-> Sites.Site
-> Boards.Board
-> Threads.Thread
-> IO (Threads.Thread, [ JSONPosts.Post ])
readPosts settings fgs board thread = do
-- parsePosts :: FilePath -> IO (Either String Post.PostWrapper)
result <- parsePosts thread_filename
readPosts FileGetters {..} site board thread = do
result <- getJSONPosts site relative_path
case result of
Left err -> do
putStrLn $ "Failed to parse the JSON file " ++ thread_filename ++ " error: " ++ err
putStrLn $ "Failed to parse the JSON file " ++ relative_path ++ " error: " ++ err
putStrLn $ "Site: " ++ show site
return (thread, [])
Right posts_wrapper -> return (thread, JSONPosts.posts posts_wrapper)
where
thread_filename :: FilePath
thread_filename = backupDir </> "res" </> (show (Threads.board_thread_id thread) ++ ".json")
backupDir :: FilePath
backupDir = backup_read_root settings </> Boards.pathpart board
relative_path :: FilePath
relative_path = Boards.pathpart board </> "res" </> (show (Threads.board_thread_id thread) ++ ".json")
apiPostToPostKey :: Threads.Thread -> JSONPosts.Post -> Client.PostId
@ -212,6 +209,7 @@ apiPostToPostKey thread post =
, Client.board_post_id = (JSONPosts.no post)
}
-- Convert Post to DbPost
apiPostToArchivePost :: Int -> Threads.Thread -> JSONPosts.Post -> Posts.Post
apiPostToArchivePost local_idx thread post =
@ -228,6 +226,7 @@ apiPostToArchivePost local_idx thread post =
, Posts.local_idx = local_idx
}
-- | A version of 'concatMap' that works with a monadic predicate.
-- Stolen from package extra Control.Monad.Extra
concatMapM :: Monad m => (a -> m [b]) -> [a] -> m [b]
@ -301,27 +300,9 @@ phash_mimetypes = Set.fromList
]
copyFiles :: JSONSettings -> (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO ()
copyFiles settings (site, board, thread, _, path, attachment) = do
destination_exists <- doesFileExist dest
if not destination_exists
then do
src_exists <- doesFileExist src
createDirectoryIfMissing True common_dest
if src_exists
then putStrLn ("Copying " ++ src) >> copyFile src dest
else return ()
thumb_exists <- doesFileExist thumb_src
if thumb_exists
then putStrLn ("Copying " ++ thumb_src) >> copyFile thumb_src thumb_dest
else return ()
else return ()
copyOrMoveFiles :: JSONSettings -> FileGetters -> Details -> IO ()
copyOrMoveFiles settings fgs (site, board, thread, _, path, attachment) = do
(copyOrMove fgs) common_dest (src, dest) (thumb_src, thumb_dest)
-- src = (At.file_path | At.thumb_path)
-- dest = <media_root>/<website_name>/<boardpart>/<board_thread_id>/<sha>.<ext>
@ -351,8 +332,15 @@ copyFiles settings (site, board, thread, _, path, attachment) = do
</> (show $ Threads.board_thread_id thread)
processFiles :: JSONSettings -> [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)] -> IO ()
processFiles settings tuples = do -- perfect just means that our posts have ids, they're already inserted into the db
type Details = (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)
processFiles
:: JSONSettings
-> FileGetters
-> [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)]
-> IO ()
processFiles settings fgs tuples = do -- perfect just means that our posts have ids, they're already inserted into the db
let ps = map (\(_, _, _, _, x) -> x) tuples
existing_attachments_result <- Client.getAttachments settings (map (fromJust . Posts.post_id) ps)
@ -368,13 +356,13 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
Map.empty
existing_attachments
let attachments_on_board :: [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)] =
let attachments_on_board :: [ Details ] =
concatMap parseAttachments tuples
-- attachments_on_board are the only files that can be copied into the archive dir right now
-- since that's where we have the src filename. except here the Attachment doesn't have a sha hash yet
-- so we can't build the destination filename.
let map_should_exist :: Map.Map (Int64, Text) [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)] =
let map_should_exist :: Map.Map (Int64, Text) [ Details ] =
foldl'
(insertRecord (\(_, _, _, _, _, a) -> (At.post_id a, At.board_filename a)))
Map.empty
@ -387,7 +375,10 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
let to_insert = foldr (++) [] $ Map.elems to_insert_map
to_insert_exist <- filterM attachmentFileExists to_insert
to_insert_ <- mapM ensureAttachmentExists to_insert
let to_insert_exist = catMaybes to_insert_
-- to_insert_exist <- filterM attachmentFileExists to_insert
with_hashes <- mapM computeAttachmentHash to_insert_exist
@ -400,13 +391,15 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
Right saved -> do
putStrLn $ "Saved " ++ (show $ length saved) ++ " attachments!"
mapM_ (copyFiles settings) attachments_on_board
mapM_ (copyOrMoveFiles settings fgs) attachments_on_board
where
attachmentFileExists :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO Bool
attachmentFileExists (_, _, _, _, p, _) = doesFileExist (At.file_path p)
ensureAttachmentExists :: Details -> IO (Maybe Details)
ensureAttachmentExists (a, b, c, d, p, f) =
(attachmentPaths fgs) p >>=
return . (maybe Nothing (\x -> Just (a, b, c, d, x, f)))
computeAttachmentHash :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO At.Attachment
computeAttachmentHash :: Details -> IO At.Attachment
computeAttachmentHash (_, _, _, _, p, q) = do
let f = At.file_path p
@ -484,13 +477,12 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
notDeleted :: (a, b, c, d, At.Paths, At.Attachment) -> Bool
notDeleted (_, _, _, _, p, _) = not $ "deleted" `isSuffixOf` (At.file_path p)
withPathPrefix :: Text -> FilePath
withPathPrefix = ((<>) $ backup_read_root settings) . unpack
withPathPrefix = (addPathPrefix fgs) . unpack
parseAttachments
:: (Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)
-> [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)]
-> [ Details ]
parseAttachments (site, board, thread, p, q) = filter notDeleted $
case JSONPosts.files p of
Just files -> map
@ -597,14 +589,46 @@ createNewPosts settings tuples = do
data FileGetters = FileGetters
{ getJSONCatalog :: Sites.Site -> String -> IO (Either String [ Catalog ])
, getJSONPosts :: Sites.Site -> String -> IO (Either String JSONPosts.PostWrapper)
, addPathPrefix :: String -> String
, attachmentPaths :: At.Paths -> IO (Maybe At.Paths)
, copyOrMove :: String -> (String, String) -> (String, String) -> IO ()
}
localFileGetters :: JSONSettings -> FileGetters
localFileGetters settings = FileGetters
{ getJSONCatalog = const $ parseJSONCatalog . (backup_read_root settings </>)
{ getJSONCatalog = const $ parseJSONCatalog . withRoot
, getJSONPosts = const $ parsePosts . withRoot
, addPathPrefix = ((++) $ backup_read_root settings)
, attachmentPaths = \p -> do
exists <- doesFileExist (At.file_path p)
if exists then return (Just p) else return Nothing
, copyOrMove = \common_dest (src, dest) (thumb_src, thumb_dest) -> do
destination_exists <- doesFileExist dest
if not destination_exists
then do
src_exists <- doesFileExist src
createDirectoryIfMissing True common_dest
if src_exists
then putStrLn ("Copying " ++ src) >> copyFile src dest
else return ()
thumb_exists <- doesFileExist thumb_src
if thumb_exists
then putStrLn ("Copying " ++ thumb_src) >> copyFile thumb_src thumb_dest
else return ()
else return ()
}
where
withRoot = (backup_read_root settings </>)
processBoard :: JSONSettings -> FileGetters -> Sites.Site -> Boards.Board -> IO ()
processBoard settings fgs@FileGetters {..} site board = do
@ -619,8 +643,7 @@ processBoard settings fgs@FileGetters {..} site board = do
all_threads_for_board :: [ Threads.Thread ] <- ensureThreads settings board threads_on_board
all_posts_on_board :: [(Threads.Thread, [ JSONPosts.Post ])] <- mapM (readPosts settings fgs board) all_threads_for_board
all_posts_on_board :: [(Threads.Thread, [ JSONPosts.Post ])] <- mapM (readPosts fgs site board) all_threads_for_board
let tuples :: [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post)] = concatMap
(\(t, posts) -> map (\p -> (site, board, t, p)) posts)
@ -635,7 +658,7 @@ processBoard settings fgs@FileGetters {..} site board = do
let perfect_post_pairs = addPostsToTuples tuples posts_result
processFiles settings perfect_post_pairs
processFiles settings fgs perfect_post_pairs
Left errMsg ->
putStrLn $ "Failed to parse the JSON file in directory: "

View File

@ -8,6 +8,8 @@ import System.Console.CmdArgs (cmdArgs, Data, Typeable)
import Data.Aeson (decode)
import System.FilePath ((</>))
import Control.Concurrent.Async (mapConcurrently)
import Data.Aeson (FromJSON)
import System.Directory (createDirectoryIfMissing, renameFile)
import qualified SitesType as Sites
import Common.Server.ConsumerSettings
@ -16,8 +18,8 @@ import Lib
( processBoards
, FileGetters (..)
)
import JSONParsing (Catalog)
import qualified Network.DataClient as Client
import qualified Common.AttachmentType as At
newtype CliArgs = CliArgs
{ settingsFile :: String
@ -53,11 +55,26 @@ getSettings = do
Just settings -> return settings
httpFileGetters :: JSONSettings -> FileGetters
httpFileGetters _ = FileGetters
httpFileGetters settings = FileGetters
{ getJSONCatalog = httpGetJSON
, getJSONPosts = httpGetJSON
, addPathPrefix = ((++) $ site_url settings)
-- attachmentPaths here actually doesn't get the paths of the attachment,
-- it downloads them into a temporary file and gets that path of that.
, attachmentPaths = \paths -> do
filepath <- Client.getFile (At.file_path paths)
thumbpath <- Client.getFile (At.thumbnail_path paths)
return $ filepath >>= \fp ->
thumbpath >>= \tp ->
return (At.Paths fp tp)
, copyOrMove = \common_dest (src, dest) (thumb_src, thumb_dest) -> do
createDirectoryIfMissing True common_dest
renameFile src dest
renameFile thumb_src thumb_dest
}
httpGetJSON :: Sites.Site -> String -> IO (Either String [Catalog])
httpGetJSON :: (FromJSON a) => Sites.Site -> String -> IO (Either String a)
httpGetJSON site path = (Client.getJSON $ Sites.url site </> path)
>>= getErrMsg
where

View File

@ -18,6 +18,7 @@ module Network.DataClient
, getAttachments
, postAttachments
, getJSON
, getFile
) where
import Control.Monad (forM)
@ -37,6 +38,7 @@ import Data.Aeson
, Value
)
import GHC.Generics
import System.IO.Temp (openBinaryTempFile, getCanonicalTemporaryDirectory)
import qualified Common.Server.JSONSettings as T
import qualified SitesType as Sites
@ -220,3 +222,19 @@ eitherDecodeResponse (Right bs) =
getJSON :: (FromJSON a) => String -> IO (Either HttpError a)
getJSON url = get_ url [] >>= return . eitherDecodeResponse
getFile :: String -> IO (Maybe String)
getFile url = do
result <- get_ url []
case result of
Left (err :: HttpError) -> do
putStrLn $ show err
return Nothing
Right lbs -> do
tmp_root <- getCanonicalTemporaryDirectory
(tmp_filepath, tmp_filehandle) <- openBinaryTempFile tmp_root "chan.attachment"
LBS.writeFile tmp_filepath lbs
LBS.hPut tmp_filehandle lbs
return $ Just tmp_filepath