WIP: Save Attachments

- nothing works well but the code to compute sha256 and perceptual hash
  on files is there...
This commit is contained in:
towards-a-new-leftypol 2024-01-20 21:04:13 -05:00
parent 576db35727
commit 29c8f4eedb
11 changed files with 185 additions and 11 deletions

View File

@ -77,6 +77,7 @@ executable chan-delorean
Common.PostsType
AttachmentType
Hash
Data.WordUtil
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:
@ -90,13 +91,15 @@ executable chan-delorean
filepath,
containers,
text,
http-client,
http-conduit,
safe-exceptions,
http-types,
time,
cryptonite,
memory,
mime-types
mime-types,
perceptual-hash
-- Directories containing source files.
hs-source-dirs: src

17
nix-support/hip.nix Normal file
View File

@ -0,0 +1,17 @@
{ nixpkgs ? import <nixpkgs> {} }:
let
haskellPackages = nixpkgs.haskellPackages.override {
overrides = self: super: {
repa = import ./repa.nix { inherit nixpkgs; };
};
};
hip = haskellPackages.callCabal2nix "hip" (nixpkgs.fetchFromGitHub {
owner = "lehins";
repo = "hip";
rev = "9f1111ea8e3f6d284404074cb6ac3e2ff164f0fe";
sha256 = "sha256-/GhspMhU5MjFu7B6AU4tkaqqNGpR3daoOJpuatHemlM=";
}) {};
in
hip

View File

@ -0,0 +1,13 @@
{ nixpkgs ? import <nixpkgs> {} }:
let
haskellPackages = nixpkgs.haskellPackages;
http-client = haskellPackages.callCabal2nix "http-client" (nixpkgs.fetchFromGitHub {
owner = "towards-a-new-leftypol";
repo = "http-client";
rev = "7900de1687fe1ec244abf9801009d27a619a3ba0";
sha256 = "";
} + "/http-client") {};
in
http-client

View File

@ -0,0 +1,35 @@
{ nixpkgs ? import <nixpkgs> {} }:
let
haskellPackages = nixpkgs.haskellPackages;
src = nixpkgs.fetchFromGitHub {
owner = "towards-a-new-leftypol";
repo = "http-client";
rev = "a32d92fff9171a8beb948b430e274a1667b3ca35";
sha256 = "sha256-LfBTsB2fHZ1z+wvt3mowowXL+Ta7jesj+L3Y8NqSmfI=";
};
http-client = haskellPackages.callCabal2nix "http-client" (src + "/http-client") {};
conduitPackages = nixpkgs.haskellPackages.override {
overrides = self: super: {
http-client = http-client;
};
};
http-conduit = conduitPackages.callCabal2nix "http-conduit" (src + "/http-conduit") {};
env = http-client.env.overrideAttrs (oldAttrs: {
buildInputs = oldAttrs.buildInputs ++ [
haskellPackages.cabal-install
];
});
in
{
http-conduit = http-conduit;
http-client = http-client;
env = env;
}

View File

@ -0,0 +1,17 @@
{ nixpkgs ? import <nixpkgs> {} }:
let
haskellPackages = nixpkgs.haskellPackages.override {
overrides = self: super: {
hip = import ./hip.nix { inherit nixpkgs; };
};
};
perceptual-hash = haskellPackages.callCabal2nix "perceptual-hash" (nixpkgs.fetchFromGitHub {
owner = "vmchale";
repo = "phash";
rev = "90fa59bec93b6adcad614d37033c1f2e059b6e46";
sha256 = "sha256-CysvgcgzIuvKFEwNnCYHB6MXGzQnBQN7Nuo6HADuavk=";
}) {};
in
perceptual-hash

View File

@ -0,0 +1,14 @@
{ nixpkgs ? import <nixpkgs> {} }:
let
haskellPackages = nixpkgs.haskellPackages;
repa = haskellPackages.callCabal2nix "repa" ./repa/repa {};
env = repa.env.overrideAttrs (oldAttrs: {
buildInputs = oldAttrs.buildInputs ++ [
haskellPackages.cabal-install
];
});
in
if nixpkgs.lib.inNixShell then env else repa

13
nix-support/repa.nix Normal file
View File

@ -0,0 +1,13 @@
{ nixpkgs ? import <nixpkgs> {} }:
let
haskellPackages = nixpkgs.haskellPackages;
repa = haskellPackages.callCabal2nix "repa" (nixpkgs.fetchFromGitHub {
owner = "towards-a-new-leftypol";
repo = "repa";
rev = "8d30d271d0dc87f39fdc8dd6d330a9c59174892e";
sha256 = "sha256-ECx7YhOFHQDy+/qlMCcM+EHud/r/3jVTPeWXVLUOawE=";
} + "/repa") {};
in
repa

View File

@ -3,9 +3,13 @@ let
inherit (nixpkgs) pkgs;
perceptual-hash = import ./nix-support/perceptual-hash.nix { inherit nixpkgs; };
http-conduit = import ./nix-support/http-conduit.nix { inherit nixpkgs; };
f = { mkDerivation, base, stdenv, cabal-install,
aeson, safe-exceptions, bytestring, cmdargs,
http-conduit, cryptonite, memory, mime-types
http-conduit, cryptonite, memory, mime-types,
perceptual-hash
}:
mkDerivation {
pname = "chan-delorean";
@ -15,7 +19,7 @@ let
isExecutable = true;
executableHaskellDepends = [
base safe-exceptions aeson bytestring cmdargs http-conduit
cryptonite memory mime-types
cryptonite memory mime-types perceptual-hash
];
testHaskellDepends = [ cabal-install ];
license = "unknown";
@ -27,7 +31,10 @@ let
variant = if doBenchmark then pkgs.haskell.lib.doBenchmark else pkgs.lib.id;
drv = variant (haskellPackages.callPackage f {});
drv = variant (haskellPackages.callPackage f {
perceptual-hash = perceptual-hash;
http-conduit = http-conduit.http-conduit;
});
enhancedDrv = if pkgs.lib.inNixShell
then drv.env.overrideAttrs (oldAttrs: {

View File

@ -23,6 +23,7 @@ import Data.Maybe (fromJust)
import Data.Text (Text, unpack)
import Data.Text.Encoding (decodeUtf8)
import Network.Mime (defaultMimeLookup, defaultMimeType)
import PerceptualHash (fileHash)
import JSONParsing
import JSONSettings
@ -35,6 +36,7 @@ import qualified ThreadType as Threads
import qualified AttachmentType as At
import qualified Common.PostsType as Posts
import qualified Hash as Hash
import qualified Data.WordUtil as Words
newtype SettingsCLI = SettingsCLI
{ jsonFile :: FilePath
@ -261,7 +263,7 @@ processFiles settings post_pairs = do -- perfect just means that our posts have
case existing_attachments_result of
Left err -> do
putStrLn $ "Error fetching boards: " ++ show err
putStrLn $ "Error fetching attachments: " ++ show err
exitFailure
Right existing_attachments -> do
let map_existing :: Map.Map Int64 [ At.Attachment ] =
@ -290,6 +292,8 @@ processFiles settings post_pairs = do -- perfect just means that our posts have
let to_insert_ = filter ((`Set.notMember` existing_hashes) . At.sha256_hash) have_hash
_ <- Client.postAttachments settings to_insert_
-- TODO: Concat all values in to_insert_map and
-- go ahead and compute sha256 and phashes on them.
-- ensure that the sha256 in to_insert_map is not in map_existing
@ -300,12 +304,22 @@ processFiles settings post_pairs = do -- perfect just means that our posts have
computeAttachmentHash :: (At.Paths, At.Attachment) -> IO At.Attachment
computeAttachmentHash (p, q) = do
let f = backup_read_root settings <> "/" <> unpack (At.file_path p)
sha256_sum <- Hash.computeSHA256 f
return q { At.sha256_hash = sha256_sum }
either_phash <- fileHash f
computeAttachmentPhash :: (At.Paths, At.Attachment) -> IO At.Attachment
computeAttachmentPhash = undefined
phash :: Int64 <- case either_phash of
Left err_str -> do
putStrLn $ "Failed to compute phash for file " ++ (unpack sha256_sum) ++ " " ++ f ++ " " ++ err_str
return (-1)
Right phash_w -> return $ Words.wordToSignedInt64 phash_w
return q
{ At.sha256_hash = sha256_sum
, At.phash = phash
}
parseLegacyPaths :: JSONPosts.Post -> Maybe At.Paths
parseLegacyPaths post = do

18
src/Data/WordUtil.hs Normal file
View File

@ -0,0 +1,18 @@
-- These two functions were stolen from the module
-- Data.ProtoLens.Encoding.Bytes from the proto-lens library.
module Data.WordUtil
( signedInt64ToWord
, wordToSignedInt64
) where
import Data.Int (Int64)
import Data.Word (Word64)
import Data.Bits ((.&.), xor, shiftR, shiftL)
signedInt64ToWord :: Int64 -> Word64
signedInt64ToWord n = fromIntegral $ shiftL n 1 `xor` shiftR n 63
wordToSignedInt64 :: Word64 -> Int64
wordToSignedInt64 n
= fromIntegral (shiftR n 1) `xor` negate (fromIntegral $ n .&. 1)

View File

@ -14,10 +14,17 @@ module DataClient
, postThreads
, postPosts
, getAttachments
, postAttachments
) where
import Data.Int (Int64)
import Network.HTTP.Simple
import Network.HTTP.Simple hiding (httpLbs)
import Network.HTTP.Client
( newManager
, managerSetMaxHeaderLength
, httpLbs
)
import Network.HTTP.Client.Conduit (defaultManagerSettings)
import qualified Data.ByteString.Lazy as LBS
import qualified Data.ByteString.Lazy.Char8 as LC8
import Network.HTTP.Types.Status (statusCode)
@ -66,7 +73,10 @@ get settings path = do
initReq <- parseRequest requestUrl
let req = setRequestHeader "Authorization" [C8.pack $ "Bearer " ++ T.jwt settings] initReq
putStrLn $ "calling " ++ requestUrl
handleHttp (httpLBS req)
let man_settings = managerSetMaxHeaderLength (16384 * 4) defaultManagerSettings
manager <- newManager man_settings
handleHttp (httpLbs req manager)
post
@ -106,7 +116,9 @@ handleHttp action = do
in if 200 <= (statusCode $ getResponseStatus response) && (statusCode $ getResponseStatus response) < 300
then return $ Right responseBody
else return $ Left (StatusCodeError (statusCode $ getResponseStatus response) responseBody)
Left e -> return $ Left $ HttpException e
Left e -> do
putStrLn "Some nasty http exception must have occurred"
return $ Left $ HttpException e
getSiteBoards :: T.JSONSettings -> Int -> IO (Either HttpError [ Boards.Board ])
@ -182,6 +194,17 @@ getAttachments settings post_ids =
hashes :: String = intercalate "," $ (map show post_ids)
postAttachments
:: T.JSONSettings
-> [ Attachments.Attachment ]
-> IO (Either HttpError [ Attachments.Attachment ])
postAttachments settings attachments =
post settings "/attachments" payload True >>= return . eitherDecodeResponse
where
payload = encode attachments
postPosts
:: T.JSONSettings
-> [ Posts.Post ]