WIP: Save Attachments

- nothing works well but the code to compute sha256 and perceptual hash
  on files is there...
This commit is contained in:
towards-a-new-leftypol 2024-01-20 21:04:13 -05:00
parent 576db35727
commit 29c8f4eedb
11 changed files with 185 additions and 11 deletions

View File

@ -77,6 +77,7 @@ executable chan-delorean
Common.PostsType Common.PostsType
AttachmentType AttachmentType
Hash Hash
Data.WordUtil
-- LANGUAGE extensions used by modules in this package. -- LANGUAGE extensions used by modules in this package.
-- other-extensions: -- other-extensions:
@ -90,13 +91,15 @@ executable chan-delorean
filepath, filepath,
containers, containers,
text, text,
http-client,
http-conduit, http-conduit,
safe-exceptions, safe-exceptions,
http-types, http-types,
time, time,
cryptonite, cryptonite,
memory, memory,
mime-types mime-types,
perceptual-hash
-- Directories containing source files. -- Directories containing source files.
hs-source-dirs: src hs-source-dirs: src

17
nix-support/hip.nix Normal file
View File

@ -0,0 +1,17 @@
{ nixpkgs ? import <nixpkgs> {} }:
let
haskellPackages = nixpkgs.haskellPackages.override {
overrides = self: super: {
repa = import ./repa.nix { inherit nixpkgs; };
};
};
hip = haskellPackages.callCabal2nix "hip" (nixpkgs.fetchFromGitHub {
owner = "lehins";
repo = "hip";
rev = "9f1111ea8e3f6d284404074cb6ac3e2ff164f0fe";
sha256 = "sha256-/GhspMhU5MjFu7B6AU4tkaqqNGpR3daoOJpuatHemlM=";
}) {};
in
hip

View File

@ -0,0 +1,13 @@
{ nixpkgs ? import <nixpkgs> {} }:
let
haskellPackages = nixpkgs.haskellPackages;
http-client = haskellPackages.callCabal2nix "http-client" (nixpkgs.fetchFromGitHub {
owner = "towards-a-new-leftypol";
repo = "http-client";
rev = "7900de1687fe1ec244abf9801009d27a619a3ba0";
sha256 = "";
} + "/http-client") {};
in
http-client

View File

@ -0,0 +1,35 @@
{ nixpkgs ? import <nixpkgs> {} }:
let
haskellPackages = nixpkgs.haskellPackages;
src = nixpkgs.fetchFromGitHub {
owner = "towards-a-new-leftypol";
repo = "http-client";
rev = "a32d92fff9171a8beb948b430e274a1667b3ca35";
sha256 = "sha256-LfBTsB2fHZ1z+wvt3mowowXL+Ta7jesj+L3Y8NqSmfI=";
};
http-client = haskellPackages.callCabal2nix "http-client" (src + "/http-client") {};
conduitPackages = nixpkgs.haskellPackages.override {
overrides = self: super: {
http-client = http-client;
};
};
http-conduit = conduitPackages.callCabal2nix "http-conduit" (src + "/http-conduit") {};
env = http-client.env.overrideAttrs (oldAttrs: {
buildInputs = oldAttrs.buildInputs ++ [
haskellPackages.cabal-install
];
});
in
{
http-conduit = http-conduit;
http-client = http-client;
env = env;
}

View File

@ -0,0 +1,17 @@
{ nixpkgs ? import <nixpkgs> {} }:
let
haskellPackages = nixpkgs.haskellPackages.override {
overrides = self: super: {
hip = import ./hip.nix { inherit nixpkgs; };
};
};
perceptual-hash = haskellPackages.callCabal2nix "perceptual-hash" (nixpkgs.fetchFromGitHub {
owner = "vmchale";
repo = "phash";
rev = "90fa59bec93b6adcad614d37033c1f2e059b6e46";
sha256 = "sha256-CysvgcgzIuvKFEwNnCYHB6MXGzQnBQN7Nuo6HADuavk=";
}) {};
in
perceptual-hash

View File

@ -0,0 +1,14 @@
{ nixpkgs ? import <nixpkgs> {} }:
let
haskellPackages = nixpkgs.haskellPackages;
repa = haskellPackages.callCabal2nix "repa" ./repa/repa {};
env = repa.env.overrideAttrs (oldAttrs: {
buildInputs = oldAttrs.buildInputs ++ [
haskellPackages.cabal-install
];
});
in
if nixpkgs.lib.inNixShell then env else repa

13
nix-support/repa.nix Normal file
View File

@ -0,0 +1,13 @@
{ nixpkgs ? import <nixpkgs> {} }:
let
haskellPackages = nixpkgs.haskellPackages;
repa = haskellPackages.callCabal2nix "repa" (nixpkgs.fetchFromGitHub {
owner = "towards-a-new-leftypol";
repo = "repa";
rev = "8d30d271d0dc87f39fdc8dd6d330a9c59174892e";
sha256 = "sha256-ECx7YhOFHQDy+/qlMCcM+EHud/r/3jVTPeWXVLUOawE=";
} + "/repa") {};
in
repa

View File

@ -3,9 +3,13 @@ let
inherit (nixpkgs) pkgs; inherit (nixpkgs) pkgs;
perceptual-hash = import ./nix-support/perceptual-hash.nix { inherit nixpkgs; };
http-conduit = import ./nix-support/http-conduit.nix { inherit nixpkgs; };
f = { mkDerivation, base, stdenv, cabal-install, f = { mkDerivation, base, stdenv, cabal-install,
aeson, safe-exceptions, bytestring, cmdargs, aeson, safe-exceptions, bytestring, cmdargs,
http-conduit, cryptonite, memory, mime-types http-conduit, cryptonite, memory, mime-types,
perceptual-hash
}: }:
mkDerivation { mkDerivation {
pname = "chan-delorean"; pname = "chan-delorean";
@ -15,7 +19,7 @@ let
isExecutable = true; isExecutable = true;
executableHaskellDepends = [ executableHaskellDepends = [
base safe-exceptions aeson bytestring cmdargs http-conduit base safe-exceptions aeson bytestring cmdargs http-conduit
cryptonite memory mime-types cryptonite memory mime-types perceptual-hash
]; ];
testHaskellDepends = [ cabal-install ]; testHaskellDepends = [ cabal-install ];
license = "unknown"; license = "unknown";
@ -27,7 +31,10 @@ let
variant = if doBenchmark then pkgs.haskell.lib.doBenchmark else pkgs.lib.id; variant = if doBenchmark then pkgs.haskell.lib.doBenchmark else pkgs.lib.id;
drv = variant (haskellPackages.callPackage f {}); drv = variant (haskellPackages.callPackage f {
perceptual-hash = perceptual-hash;
http-conduit = http-conduit.http-conduit;
});
enhancedDrv = if pkgs.lib.inNixShell enhancedDrv = if pkgs.lib.inNixShell
then drv.env.overrideAttrs (oldAttrs: { then drv.env.overrideAttrs (oldAttrs: {

View File

@ -23,6 +23,7 @@ import Data.Maybe (fromJust)
import Data.Text (Text, unpack) import Data.Text (Text, unpack)
import Data.Text.Encoding (decodeUtf8) import Data.Text.Encoding (decodeUtf8)
import Network.Mime (defaultMimeLookup, defaultMimeType) import Network.Mime (defaultMimeLookup, defaultMimeType)
import PerceptualHash (fileHash)
import JSONParsing import JSONParsing
import JSONSettings import JSONSettings
@ -35,6 +36,7 @@ import qualified ThreadType as Threads
import qualified AttachmentType as At import qualified AttachmentType as At
import qualified Common.PostsType as Posts import qualified Common.PostsType as Posts
import qualified Hash as Hash import qualified Hash as Hash
import qualified Data.WordUtil as Words
newtype SettingsCLI = SettingsCLI newtype SettingsCLI = SettingsCLI
{ jsonFile :: FilePath { jsonFile :: FilePath
@ -261,7 +263,7 @@ processFiles settings post_pairs = do -- perfect just means that our posts have
case existing_attachments_result of case existing_attachments_result of
Left err -> do Left err -> do
putStrLn $ "Error fetching boards: " ++ show err putStrLn $ "Error fetching attachments: " ++ show err
exitFailure exitFailure
Right existing_attachments -> do Right existing_attachments -> do
let map_existing :: Map.Map Int64 [ At.Attachment ] = let map_existing :: Map.Map Int64 [ At.Attachment ] =
@ -290,6 +292,8 @@ processFiles settings post_pairs = do -- perfect just means that our posts have
let to_insert_ = filter ((`Set.notMember` existing_hashes) . At.sha256_hash) have_hash let to_insert_ = filter ((`Set.notMember` existing_hashes) . At.sha256_hash) have_hash
_ <- Client.postAttachments settings to_insert_
-- TODO: Concat all values in to_insert_map and -- TODO: Concat all values in to_insert_map and
-- go ahead and compute sha256 and phashes on them. -- go ahead and compute sha256 and phashes on them.
-- ensure that the sha256 in to_insert_map is not in map_existing -- ensure that the sha256 in to_insert_map is not in map_existing
@ -300,12 +304,22 @@ processFiles settings post_pairs = do -- perfect just means that our posts have
computeAttachmentHash :: (At.Paths, At.Attachment) -> IO At.Attachment computeAttachmentHash :: (At.Paths, At.Attachment) -> IO At.Attachment
computeAttachmentHash (p, q) = do computeAttachmentHash (p, q) = do
let f = backup_read_root settings <> "/" <> unpack (At.file_path p) let f = backup_read_root settings <> "/" <> unpack (At.file_path p)
sha256_sum <- Hash.computeSHA256 f sha256_sum <- Hash.computeSHA256 f
return q { At.sha256_hash = sha256_sum } either_phash <- fileHash f
computeAttachmentPhash :: (At.Paths, At.Attachment) -> IO At.Attachment phash :: Int64 <- case either_phash of
computeAttachmentPhash = undefined Left err_str -> do
putStrLn $ "Failed to compute phash for file " ++ (unpack sha256_sum) ++ " " ++ f ++ " " ++ err_str
return (-1)
Right phash_w -> return $ Words.wordToSignedInt64 phash_w
return q
{ At.sha256_hash = sha256_sum
, At.phash = phash
}
parseLegacyPaths :: JSONPosts.Post -> Maybe At.Paths parseLegacyPaths :: JSONPosts.Post -> Maybe At.Paths
parseLegacyPaths post = do parseLegacyPaths post = do

18
src/Data/WordUtil.hs Normal file
View File

@ -0,0 +1,18 @@
-- These two functions were stolen from the module
-- Data.ProtoLens.Encoding.Bytes from the proto-lens library.
module Data.WordUtil
( signedInt64ToWord
, wordToSignedInt64
) where
import Data.Int (Int64)
import Data.Word (Word64)
import Data.Bits ((.&.), xor, shiftR, shiftL)
signedInt64ToWord :: Int64 -> Word64
signedInt64ToWord n = fromIntegral $ shiftL n 1 `xor` shiftR n 63
wordToSignedInt64 :: Word64 -> Int64
wordToSignedInt64 n
= fromIntegral (shiftR n 1) `xor` negate (fromIntegral $ n .&. 1)

View File

@ -14,10 +14,17 @@ module DataClient
, postThreads , postThreads
, postPosts , postPosts
, getAttachments , getAttachments
, postAttachments
) where ) where
import Data.Int (Int64) import Data.Int (Int64)
import Network.HTTP.Simple import Network.HTTP.Simple hiding (httpLbs)
import Network.HTTP.Client
( newManager
, managerSetMaxHeaderLength
, httpLbs
)
import Network.HTTP.Client.Conduit (defaultManagerSettings)
import qualified Data.ByteString.Lazy as LBS import qualified Data.ByteString.Lazy as LBS
import qualified Data.ByteString.Lazy.Char8 as LC8 import qualified Data.ByteString.Lazy.Char8 as LC8
import Network.HTTP.Types.Status (statusCode) import Network.HTTP.Types.Status (statusCode)
@ -66,7 +73,10 @@ get settings path = do
initReq <- parseRequest requestUrl initReq <- parseRequest requestUrl
let req = setRequestHeader "Authorization" [C8.pack $ "Bearer " ++ T.jwt settings] initReq let req = setRequestHeader "Authorization" [C8.pack $ "Bearer " ++ T.jwt settings] initReq
putStrLn $ "calling " ++ requestUrl putStrLn $ "calling " ++ requestUrl
handleHttp (httpLBS req)
let man_settings = managerSetMaxHeaderLength (16384 * 4) defaultManagerSettings
manager <- newManager man_settings
handleHttp (httpLbs req manager)
post post
@ -106,7 +116,9 @@ handleHttp action = do
in if 200 <= (statusCode $ getResponseStatus response) && (statusCode $ getResponseStatus response) < 300 in if 200 <= (statusCode $ getResponseStatus response) && (statusCode $ getResponseStatus response) < 300
then return $ Right responseBody then return $ Right responseBody
else return $ Left (StatusCodeError (statusCode $ getResponseStatus response) responseBody) else return $ Left (StatusCodeError (statusCode $ getResponseStatus response) responseBody)
Left e -> return $ Left $ HttpException e Left e -> do
putStrLn "Some nasty http exception must have occurred"
return $ Left $ HttpException e
getSiteBoards :: T.JSONSettings -> Int -> IO (Either HttpError [ Boards.Board ]) getSiteBoards :: T.JSONSettings -> Int -> IO (Either HttpError [ Boards.Board ])
@ -182,6 +194,17 @@ getAttachments settings post_ids =
hashes :: String = intercalate "," $ (map show post_ids) hashes :: String = intercalate "," $ (map show post_ids)
postAttachments
:: T.JSONSettings
-> [ Attachments.Attachment ]
-> IO (Either HttpError [ Attachments.Attachment ])
postAttachments settings attachments =
post settings "/attachments" payload True >>= return . eitherDecodeResponse
where
payload = encode attachments
postPosts postPosts
:: T.JSONSettings :: T.JSONSettings
-> [ Posts.Post ] -> [ Posts.Post ]