Parse catalog json from file

This commit is contained in:
towards-a-new-leftypol 2023-10-05 02:25:00 -04:00
parent d6ce32187b
commit 26c57a3bc8
5 changed files with 127 additions and 12 deletions

1
.gitignore vendored
View File

@ -1,4 +1,3 @@
.dbpassword
sql/spamnoticer_init.sql
todo
dist-newstyle/

View File

@ -65,7 +65,8 @@ executable chan-delorean
main-is: Backfill.hs
-- Modules included in this executable, other than Main.
-- other-modules:
other-modules:
JSONParsing
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:
@ -75,7 +76,10 @@ executable chan-delorean
aeson,
bytestring,
cmdargs,
directory
directory,
filepath,
containers,
text
-- Directories containing source files.
hs-source-dirs: src

View File

@ -72,6 +72,9 @@ CREATE INDEX posts_body_search_idx ON posts USING GIN (body_search_index);
CREATE INDEX posts_thread_id_idx ON posts (thread_id);
CREATE INDEX posts_board_post_id_idx ON posts (board_post_id);
-- This is to optimize joins on thread_id and filtering/sorting by creation_time in 'posts' table.
CREATE INDEX posts_thread_id_creation_time_idx ON posts (thread_id, creation_time);
CREATE OR REPLACE FUNCTION update_post_body_search_index() RETURNS trigger AS $$
BEGIN
NEW.body_search_index := to_tsvector('english', NEW.body);

View File

@ -5,11 +5,15 @@
module Main where
import System.Exit
import Control.Monad (filterM)
import Data.Aeson (FromJSON, decode)
import qualified Data.ByteString.Lazy as B
import System.Console.CmdArgs
import GHC.Generics
import System.Directory (listDirectory)
import System.Directory (listDirectory, doesFileExist)
import System.FilePath ((</>))
import JSONParsing
data SettingsCLI = SettingsCLI
{ jsonFile :: FilePath
@ -29,10 +33,39 @@ settingsCLI = SettingsCLI
} &= summary "Backfill v0.0.1"
-- Function to list all files and directories inside the backup_read_root
listBackupContents :: JSONSettings -> IO ()
listBackupContents settings =
listDirectory (backup_read_root settings) >>= mapM_ print
listCatalogDirectories :: JSONSettings -> IO [FilePath]
listCatalogDirectories settings = do
dirs <- listDirectory (backup_read_root settings)
filterM hasCatalog dirs
where
hasCatalog dir = do
let catalogPath = (backup_read_root settings) </> dir </> "catalog.json"
doesFileExist catalogPath
processBackupDirectory :: JSONSettings -> IO ()
processBackupDirectory settings = do
putStrLn "JSON successfully read!"
print settings -- print the decoded JSON settings
dirs <- listCatalogDirectories settings
mapM_ print dirs
mapM_ processDir dirs
where
backupDir :: FilePath
backupDir = backup_read_root settings
processDir dir = do
let catalogPath = backupDir </> dir </> "catalog.json"
putStrLn $ "catalog file path: " ++ catalogPath
result <- parseJSONFile catalogPath
case result of
Right catalogs ->
mapM_ (mapM_ (print . no) . threads) catalogs
Left errMsg ->
putStrLn $ "Failed to parse the JSON file in directory: "
++ dir ++ ". Error: " ++ errMsg
main :: IO ()
main = do
@ -49,7 +82,4 @@ main = do
Nothing -> do
putStrLn "Error: Invalid JSON format."
exitFailure
Just settings -> do
putStrLn "JSON successfully read!"
print settings -- print the decoded JSON settings
listBackupContents settings
Just settings -> processBackupDirectory settings

79
src/JSONParsing.hs Normal file
View File

@ -0,0 +1,79 @@
module JSONParsing
( Thread(..)
, File(..)
, Catalog(..)
, parseJSONFile
) where
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
import Data.Aeson
import GHC.Generics
import qualified Data.ByteString.Lazy as B
import qualified Data.Text as T
import Data.Aeson.Types (typeMismatch)
data Cyclical = Cyclical Int deriving (Show, Generic)
instance FromJSON Cyclical where
parseJSON (Number n) = return $ Cyclical (floor n)
parseJSON (String s) =
case reads (T.unpack s) :: [(Int, String)] of
[(n, "")] -> return $ Cyclical n
_ -> typeMismatch "Int or String containing Int" (String s)
parseJSON invalid = typeMismatch "Int or String" invalid
data Thread = Thread
{ no :: Int
, sub :: Maybe String
, com :: Maybe String
, name :: Maybe String
, capcode :: Maybe String
, time :: Int
, omitted_posts :: Maybe Int
, omitted_images:: Maybe Int
, replies :: Maybe Int
, images :: Maybe Int
, sticky :: Maybe Int
, locked :: Maybe Int
, cyclical :: Maybe Cyclical
, last_modified :: Int
, board :: String
, files :: Maybe [File]
, resto :: Int
, unique_ips :: Maybe Int
} deriving (Show, Generic)
data File = File
{ id :: String
, mime :: String
, ext :: String
, h :: Maybe Int
, w :: Maybe Int
, fsize :: Int
, filename :: String
, spoiler :: Maybe Bool
, md5 :: String
, file_path :: String
, thumb_path :: String
} deriving (Show, Generic)
data Catalog = Catalog
{ threads :: [Thread]
, page :: Int
} deriving (Show, Generic)
instance FromJSON Thread
--instance ToJSON Thread
instance FromJSON File
--instance ToJSON File
instance FromJSON Catalog
--instance ToJSON Catalog
parseJSONFile :: FilePath -> IO (Either String [Catalog])
parseJSONFile path = do
jsonData <- B.readFile path
return $ eitherDecode jsonData