Compare commits
2 Commits
1c6c1250e3
...
fc321d8531
Author | SHA1 | Date |
---|---|---|
towards-a-new-leftypol | fc321d8531 | |
towards-a-new-leftypol | 67870cab36 |
|
@ -102,7 +102,8 @@ executable chan-delorean
|
|||
cryptonite,
|
||||
memory,
|
||||
mime-types,
|
||||
perceptual-hash
|
||||
perceptual-hash,
|
||||
temporary
|
||||
|
||||
-- Directories containing source files.
|
||||
hs-source-dirs: src
|
||||
|
@ -157,7 +158,8 @@ executable chan-delorean-consoomer
|
|||
memory,
|
||||
mime-types,
|
||||
perceptual-hash,
|
||||
async
|
||||
async,
|
||||
temporary
|
||||
|
||||
-- Directories containing source files.
|
||||
hs-source-dirs: src
|
||||
|
|
121
src/Lib.hs
121
src/Lib.hs
|
@ -24,7 +24,7 @@ import Data.Set (Set)
|
|||
import Data.Time.Clock.POSIX (posixSecondsToUTCTime)
|
||||
import Data.Time.Clock (UTCTime)
|
||||
import qualified Data.Map as Map
|
||||
import Data.Maybe (fromJust)
|
||||
import Data.Maybe (fromJust, catMaybes)
|
||||
import Data.Text (Text, unpack)
|
||||
import qualified Data.Text as T
|
||||
import Data.Text.Encoding (decodeUtf8)
|
||||
|
@ -182,27 +182,24 @@ ensureThreads settings board all_threads = do
|
|||
|
||||
|
||||
readPosts
|
||||
:: JSONSettings
|
||||
-> FileGetters
|
||||
:: FileGetters
|
||||
-> Sites.Site
|
||||
-> Boards.Board
|
||||
-> Threads.Thread
|
||||
-> IO (Threads.Thread, [ JSONPosts.Post ])
|
||||
readPosts settings fgs board thread = do
|
||||
-- parsePosts :: FilePath -> IO (Either String Post.PostWrapper)
|
||||
result <- parsePosts thread_filename
|
||||
readPosts FileGetters {..} site board thread = do
|
||||
result <- getJSONPosts site relative_path
|
||||
|
||||
case result of
|
||||
Left err -> do
|
||||
putStrLn $ "Failed to parse the JSON file " ++ thread_filename ++ " error: " ++ err
|
||||
putStrLn $ "Failed to parse the JSON file " ++ relative_path ++ " error: " ++ err
|
||||
putStrLn $ "Site: " ++ show site
|
||||
return (thread, [])
|
||||
Right posts_wrapper -> return (thread, JSONPosts.posts posts_wrapper)
|
||||
|
||||
where
|
||||
thread_filename :: FilePath
|
||||
thread_filename = backupDir </> "res" </> (show (Threads.board_thread_id thread) ++ ".json")
|
||||
|
||||
backupDir :: FilePath
|
||||
backupDir = backup_read_root settings </> Boards.pathpart board
|
||||
relative_path :: FilePath
|
||||
relative_path = Boards.pathpart board </> "res" </> (show (Threads.board_thread_id thread) ++ ".json")
|
||||
|
||||
|
||||
apiPostToPostKey :: Threads.Thread -> JSONPosts.Post -> Client.PostId
|
||||
|
@ -212,6 +209,7 @@ apiPostToPostKey thread post =
|
|||
, Client.board_post_id = (JSONPosts.no post)
|
||||
}
|
||||
|
||||
|
||||
-- Convert Post to DbPost
|
||||
apiPostToArchivePost :: Int -> Threads.Thread -> JSONPosts.Post -> Posts.Post
|
||||
apiPostToArchivePost local_idx thread post =
|
||||
|
@ -228,6 +226,7 @@ apiPostToArchivePost local_idx thread post =
|
|||
, Posts.local_idx = local_idx
|
||||
}
|
||||
|
||||
|
||||
-- | A version of 'concatMap' that works with a monadic predicate.
|
||||
-- Stolen from package extra Control.Monad.Extra
|
||||
concatMapM :: Monad m => (a -> m [b]) -> [a] -> m [b]
|
||||
|
@ -301,27 +300,9 @@ phash_mimetypes = Set.fromList
|
|||
]
|
||||
|
||||
|
||||
copyFiles :: JSONSettings -> (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO ()
|
||||
copyFiles settings (site, board, thread, _, path, attachment) = do
|
||||
destination_exists <- doesFileExist dest
|
||||
|
||||
if not destination_exists
|
||||
then do
|
||||
src_exists <- doesFileExist src
|
||||
|
||||
createDirectoryIfMissing True common_dest
|
||||
|
||||
if src_exists
|
||||
then putStrLn ("Copying " ++ src) >> copyFile src dest
|
||||
else return ()
|
||||
|
||||
thumb_exists <- doesFileExist thumb_src
|
||||
|
||||
if thumb_exists
|
||||
then putStrLn ("Copying " ++ thumb_src) >> copyFile thumb_src thumb_dest
|
||||
else return ()
|
||||
|
||||
else return ()
|
||||
copyOrMoveFiles :: JSONSettings -> FileGetters -> Details -> IO ()
|
||||
copyOrMoveFiles settings fgs (site, board, thread, _, path, attachment) = do
|
||||
(copyOrMove fgs) common_dest (src, dest) (thumb_src, thumb_dest)
|
||||
|
||||
-- src = (At.file_path | At.thumb_path)
|
||||
-- dest = <media_root>/<website_name>/<boardpart>/<board_thread_id>/<sha>.<ext>
|
||||
|
@ -351,8 +332,15 @@ copyFiles settings (site, board, thread, _, path, attachment) = do
|
|||
</> (show $ Threads.board_thread_id thread)
|
||||
|
||||
|
||||
processFiles :: JSONSettings -> [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)] -> IO ()
|
||||
processFiles settings tuples = do -- perfect just means that our posts have ids, they're already inserted into the db
|
||||
type Details = (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)
|
||||
|
||||
|
||||
processFiles
|
||||
:: JSONSettings
|
||||
-> FileGetters
|
||||
-> [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)]
|
||||
-> IO ()
|
||||
processFiles settings fgs tuples = do -- perfect just means that our posts have ids, they're already inserted into the db
|
||||
let ps = map (\(_, _, _, _, x) -> x) tuples
|
||||
|
||||
existing_attachments_result <- Client.getAttachments settings (map (fromJust . Posts.post_id) ps)
|
||||
|
@ -368,13 +356,13 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
|
|||
Map.empty
|
||||
existing_attachments
|
||||
|
||||
let attachments_on_board :: [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)] =
|
||||
let attachments_on_board :: [ Details ] =
|
||||
concatMap parseAttachments tuples
|
||||
-- attachments_on_board are the only files that can be copied into the archive dir right now
|
||||
-- since that's where we have the src filename. except here the Attachment doesn't have a sha hash yet
|
||||
-- so we can't build the destination filename.
|
||||
|
||||
let map_should_exist :: Map.Map (Int64, Text) [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)] =
|
||||
let map_should_exist :: Map.Map (Int64, Text) [ Details ] =
|
||||
foldl'
|
||||
(insertRecord (\(_, _, _, _, _, a) -> (At.post_id a, At.board_filename a)))
|
||||
Map.empty
|
||||
|
@ -387,7 +375,10 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
|
|||
|
||||
let to_insert = foldr (++) [] $ Map.elems to_insert_map
|
||||
|
||||
to_insert_exist <- filterM attachmentFileExists to_insert
|
||||
to_insert_ <- mapM ensureAttachmentExists to_insert
|
||||
|
||||
let to_insert_exist = catMaybes to_insert_
|
||||
-- to_insert_exist <- filterM attachmentFileExists to_insert
|
||||
|
||||
with_hashes <- mapM computeAttachmentHash to_insert_exist
|
||||
|
||||
|
@ -400,13 +391,15 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
|
|||
|
||||
Right saved -> do
|
||||
putStrLn $ "Saved " ++ (show $ length saved) ++ " attachments!"
|
||||
mapM_ (copyFiles settings) attachments_on_board
|
||||
mapM_ (copyOrMoveFiles settings fgs) attachments_on_board
|
||||
|
||||
where
|
||||
attachmentFileExists :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO Bool
|
||||
attachmentFileExists (_, _, _, _, p, _) = doesFileExist (At.file_path p)
|
||||
ensureAttachmentExists :: Details -> IO (Maybe Details)
|
||||
ensureAttachmentExists (a, b, c, d, p, f) =
|
||||
(attachmentPaths fgs) p >>=
|
||||
return . (maybe Nothing (\x -> Just (a, b, c, d, x, f)))
|
||||
|
||||
computeAttachmentHash :: (Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment) -> IO At.Attachment
|
||||
computeAttachmentHash :: Details -> IO At.Attachment
|
||||
computeAttachmentHash (_, _, _, _, p, q) = do
|
||||
let f = At.file_path p
|
||||
|
||||
|
@ -484,13 +477,12 @@ processFiles settings tuples = do -- perfect just means that our posts have ids,
|
|||
notDeleted :: (a, b, c, d, At.Paths, At.Attachment) -> Bool
|
||||
notDeleted (_, _, _, _, p, _) = not $ "deleted" `isSuffixOf` (At.file_path p)
|
||||
|
||||
|
||||
withPathPrefix :: Text -> FilePath
|
||||
withPathPrefix = ((<>) $ backup_read_root settings) . unpack
|
||||
withPathPrefix = (addPathPrefix fgs) . unpack
|
||||
|
||||
parseAttachments
|
||||
:: (Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post, Posts.Post)
|
||||
-> [(Sites.Site, Boards.Board, Threads.Thread, Posts.Post, At.Paths, At.Attachment)]
|
||||
-> [ Details ]
|
||||
parseAttachments (site, board, thread, p, q) = filter notDeleted $
|
||||
case JSONPosts.files p of
|
||||
Just files -> map
|
||||
|
@ -597,14 +589,46 @@ createNewPosts settings tuples = do
|
|||
|
||||
data FileGetters = FileGetters
|
||||
{ getJSONCatalog :: Sites.Site -> String -> IO (Either String [ Catalog ])
|
||||
, getJSONPosts :: Sites.Site -> String -> IO (Either String JSONPosts.PostWrapper)
|
||||
, addPathPrefix :: String -> String
|
||||
, attachmentPaths :: At.Paths -> IO (Maybe At.Paths)
|
||||
, copyOrMove :: String -> (String, String) -> (String, String) -> IO ()
|
||||
}
|
||||
|
||||
|
||||
localFileGetters :: JSONSettings -> FileGetters
|
||||
localFileGetters settings = FileGetters
|
||||
{ getJSONCatalog = const $ parseJSONCatalog . (backup_read_root settings </>)
|
||||
{ getJSONCatalog = const $ parseJSONCatalog . withRoot
|
||||
, getJSONPosts = const $ parsePosts . withRoot
|
||||
, addPathPrefix = ((++) $ backup_read_root settings)
|
||||
, attachmentPaths = \p -> do
|
||||
exists <- doesFileExist (At.file_path p)
|
||||
if exists then return (Just p) else return Nothing
|
||||
, copyOrMove = \common_dest (src, dest) (thumb_src, thumb_dest) -> do
|
||||
destination_exists <- doesFileExist dest
|
||||
|
||||
if not destination_exists
|
||||
then do
|
||||
src_exists <- doesFileExist src
|
||||
|
||||
createDirectoryIfMissing True common_dest
|
||||
|
||||
if src_exists
|
||||
then putStrLn ("Copying " ++ src) >> copyFile src dest
|
||||
else return ()
|
||||
|
||||
thumb_exists <- doesFileExist thumb_src
|
||||
|
||||
if thumb_exists
|
||||
then putStrLn ("Copying " ++ thumb_src) >> copyFile thumb_src thumb_dest
|
||||
else return ()
|
||||
|
||||
else return ()
|
||||
}
|
||||
|
||||
where
|
||||
withRoot = (backup_read_root settings </>)
|
||||
|
||||
|
||||
processBoard :: JSONSettings -> FileGetters -> Sites.Site -> Boards.Board -> IO ()
|
||||
processBoard settings fgs@FileGetters {..} site board = do
|
||||
|
@ -619,8 +643,7 @@ processBoard settings fgs@FileGetters {..} site board = do
|
|||
|
||||
all_threads_for_board :: [ Threads.Thread ] <- ensureThreads settings board threads_on_board
|
||||
|
||||
all_posts_on_board :: [(Threads.Thread, [ JSONPosts.Post ])] <- mapM (readPosts settings fgs board) all_threads_for_board
|
||||
|
||||
all_posts_on_board :: [(Threads.Thread, [ JSONPosts.Post ])] <- mapM (readPosts fgs site board) all_threads_for_board
|
||||
|
||||
let tuples :: [(Sites.Site, Boards.Board, Threads.Thread, JSONPosts.Post)] = concatMap
|
||||
(\(t, posts) -> map (\p -> (site, board, t, p)) posts)
|
||||
|
@ -635,7 +658,7 @@ processBoard settings fgs@FileGetters {..} site board = do
|
|||
|
||||
let perfect_post_pairs = addPostsToTuples tuples posts_result
|
||||
|
||||
processFiles settings perfect_post_pairs
|
||||
processFiles settings fgs perfect_post_pairs
|
||||
|
||||
Left errMsg ->
|
||||
putStrLn $ "Failed to parse the JSON file in directory: "
|
||||
|
|
23
src/Main.hs
23
src/Main.hs
|
@ -8,6 +8,8 @@ import System.Console.CmdArgs (cmdArgs, Data, Typeable)
|
|||
import Data.Aeson (decode)
|
||||
import System.FilePath ((</>))
|
||||
import Control.Concurrent.Async (mapConcurrently)
|
||||
import Data.Aeson (FromJSON)
|
||||
import System.Directory (createDirectoryIfMissing, renameFile)
|
||||
|
||||
import qualified SitesType as Sites
|
||||
import Common.Server.ConsumerSettings
|
||||
|
@ -16,8 +18,8 @@ import Lib
|
|||
( processBoards
|
||||
, FileGetters (..)
|
||||
)
|
||||
import JSONParsing (Catalog)
|
||||
import qualified Network.DataClient as Client
|
||||
import qualified Common.AttachmentType as At
|
||||
|
||||
newtype CliArgs = CliArgs
|
||||
{ settingsFile :: String
|
||||
|
@ -53,11 +55,26 @@ getSettings = do
|
|||
Just settings -> return settings
|
||||
|
||||
httpFileGetters :: JSONSettings -> FileGetters
|
||||
httpFileGetters _ = FileGetters
|
||||
httpFileGetters settings = FileGetters
|
||||
{ getJSONCatalog = httpGetJSON
|
||||
, getJSONPosts = httpGetJSON
|
||||
, addPathPrefix = ((++) $ site_url settings)
|
||||
-- attachmentPaths here actually doesn't get the paths of the attachment,
|
||||
-- it downloads them into a temporary file and gets that path of that.
|
||||
, attachmentPaths = \paths -> do
|
||||
filepath <- Client.getFile (At.file_path paths)
|
||||
thumbpath <- Client.getFile (At.thumbnail_path paths)
|
||||
|
||||
return $ filepath >>= \fp ->
|
||||
thumbpath >>= \tp ->
|
||||
return (At.Paths fp tp)
|
||||
, copyOrMove = \common_dest (src, dest) (thumb_src, thumb_dest) -> do
|
||||
createDirectoryIfMissing True common_dest
|
||||
renameFile src dest
|
||||
renameFile thumb_src thumb_dest
|
||||
}
|
||||
|
||||
httpGetJSON :: Sites.Site -> String -> IO (Either String [Catalog])
|
||||
httpGetJSON :: (FromJSON a) => Sites.Site -> String -> IO (Either String a)
|
||||
httpGetJSON site path = (Client.getJSON $ Sites.url site </> path)
|
||||
>>= getErrMsg
|
||||
where
|
||||
|
|
|
@ -18,6 +18,7 @@ module Network.DataClient
|
|||
, getAttachments
|
||||
, postAttachments
|
||||
, getJSON
|
||||
, getFile
|
||||
) where
|
||||
|
||||
import Control.Monad (forM)
|
||||
|
@ -37,6 +38,7 @@ import Data.Aeson
|
|||
, Value
|
||||
)
|
||||
import GHC.Generics
|
||||
import System.IO.Temp (openBinaryTempFile, getCanonicalTemporaryDirectory)
|
||||
|
||||
import qualified Common.Server.JSONSettings as T
|
||||
import qualified SitesType as Sites
|
||||
|
@ -220,3 +222,19 @@ eitherDecodeResponse (Right bs) =
|
|||
|
||||
getJSON :: (FromJSON a) => String -> IO (Either HttpError a)
|
||||
getJSON url = get_ url [] >>= return . eitherDecodeResponse
|
||||
|
||||
|
||||
getFile :: String -> IO (Maybe String)
|
||||
getFile url = do
|
||||
result <- get_ url []
|
||||
|
||||
case result of
|
||||
Left (err :: HttpError) -> do
|
||||
putStrLn $ show err
|
||||
return Nothing
|
||||
Right lbs -> do
|
||||
tmp_root <- getCanonicalTemporaryDirectory
|
||||
(tmp_filepath, tmp_filehandle) <- openBinaryTempFile tmp_root "chan.attachment"
|
||||
LBS.writeFile tmp_filepath lbs
|
||||
LBS.hPut tmp_filehandle lbs
|
||||
return $ Just tmp_filepath
|
||||
|
|
Loading…
Reference in New Issue