From 04abd715826c3f59d02ab36d7bb1122a863517ce Mon Sep 17 00:00:00 2001 From: towards-a-new-leftypol Date: Tue, 16 Apr 2024 23:49:33 -0400 Subject: [PATCH] Implement a simple priority queue mechanism - pseudo-random index generator to that favours priority indices --- chan-delorean.cabal | 5 ++- src/Backfill.hs | 11 ----- src/Main.hs | 1 + src/PriorityQueue.hs | 101 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 12 deletions(-) create mode 100644 src/PriorityQueue.hs diff --git a/chan-delorean.cabal b/chan-delorean.cabal index 261d823..27516b6 100644 --- a/chan-delorean.cabal +++ b/chan-delorean.cabal @@ -136,6 +136,7 @@ executable chan-delorean-consoomer Network.DataClientTypes Common.Server.ConsumerSettings Common.Server.JSONSettings + PriorityQueue -- LANGUAGE extensions used by modules in this package. -- other-extensions: @@ -159,7 +160,9 @@ executable chan-delorean-consoomer mime-types, perceptual-hash, async, - temporary + temporary, + stm, + random -- Directories containing source files. hs-source-dirs: src diff --git a/src/Backfill.hs b/src/Backfill.hs index f39e4b7..de51eb9 100644 --- a/src/Backfill.hs +++ b/src/Backfill.hs @@ -8,17 +8,6 @@ import System.Console.CmdArgs import Common.Server.JSONSettings import Lib --- TODO: detect saged threads by reading the bump time from the thread and comparing --- that time to the timestamp of the most recent post. If the post is newer --- - then the thread is being saged. Reasons it can be saged: --- - it's saged by a mod --- - the post has sage in the email field --- - the thread is full. --- --- Better to support all those flags via the api: saged, locked, cyclical?, sticky --- - deleted could be there too --- - edited could be there too - main :: IO () main = do settingsValue <- cmdArgs $ SettingsCLI "backfill_settings.json" diff --git a/src/Main.hs b/src/Main.hs index 0db5d42..bcaba2f 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -83,6 +83,7 @@ httpFileGetters settings = FileGetters putStrLn $ "Copy Or Move (Move) src: " ++ src ++ " dest: " ++ dest createDirectoryIfMissing True common_dest moveFile src dest + case m_thumb_src of Nothing -> return () Just thumb_src -> moveFile thumb_src thumb_dest diff --git a/src/PriorityQueue.hs b/src/PriorityQueue.hs new file mode 100644 index 0000000..2c7cced --- /dev/null +++ b/src/PriorityQueue.hs @@ -0,0 +1,101 @@ +{- + The purpose of this module is to provide a way to store things + in order (for example as most recent), take the nth item in + the order out, and put a new item in. In O(log n) time. + + There is also a function to choose a random integer, but favours lower ones. + Together, this lets us process data in a safe way asynchronously, + where if we keep choosing a skewed random item to process all of the items + eventually get processed. No two threads will process the same thing at the + same time, and more popular things get processed more often. +-} + +module PriorityQueue + ( Elem (..) + , Queue + , take + , put + , selectSkewedIndex + ) +where + +import Prelude hiding (splitAt, take) +import Data.Set hiding (take, foldr, map) +import Data.Ord (comparing) +import System.Random (StdGen, getStdGen, randomR) +import Data.List (sort, group) + + +data Elem a = Elem + { priority :: Int + , element :: a + } + +instance Ord (Elem a) where + compare = comparing priority + + +instance Eq (Elem a) where + (==) x y = priority x == priority y + + +type Queue a = Set (Elem a) + + +take :: Int -> Queue a -> (Elem a, Queue a) +take n set = + let (_, greater) = splitAt (size set - n - 1) set + elem = findMin greater + in (elem, delete elem set) + + +put :: Elem a -> Queue a -> Queue a +put = insert + + +-- Simplified function to generate a number linearly skewed towards the start of the range +linearSkewRandom :: Double -> Double -> StdGen -> (Double, StdGen) +linearSkewRandom min max rng = + let (u, rng') = randomR (0.0, 1.0) rng + -- skewedValue = min + (u ** 2) * (max - min) + skewedValue = (min - 0.5) + (u ** 2) * (max - min + 0.5) + -- skewedValue = (min - 0.5) + (1 - sqrt u) * (max - min + 0.5) + -- skewedValue = min + (1 - sqrt u) * (max - min) + in (skewedValue, rng') + + +-- Function to select an index from 0 to n-1 with a linear skew towards lower numbers +selectSkewedIndex :: Int -> StdGen -> (Int, StdGen) +selectSkewedIndex n rng = + let max = fromIntegral (n - 1) + (randValue, newRng) = linearSkewRandom 0 max rng + in (ceiling randValue, newRng) + + +main :: IO () +main = do + stdGen <- getStdGen + --putStrLn "Hello World" + + -- let i = fst $ selectSkewedIndex (size q) stdGen + -- let x = fst $ take i q + -- print (i, priority x) + + let rs = foldr f ([], stdGen) [1..100000] + mapM_ pf $ countOccurrences $ fst rs + + where + pf :: (Show a, Show b) => (a, b) -> IO () + pf (a, b) = putStrLn $ (show a) ++ "," ++ (show b) + + f _ (xs, gen) = + let (x, newgen) = selectSkewedIndex (size q) gen + in (x:xs, newgen) + + q :: Queue Int + q = fromList [ Elem i undefined | i <- [1..100] ] + +countOccurrences :: (Eq a, Ord a) => [a] -> [(a, Int)] +countOccurrences rolls = map (\x -> (head x, length x)) . group . sort $ rolls + +