#!/usr/bin/env stack {- stack --resolver lts-13.15 runghc --package aeson --package aeson-pretty --package pandoc --package protolude --package servant-client --package turtle -} {-# LANGUAGE DeriveAnyClass #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE KindSignatures #-} {-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TypeOperators #-} import Protolude hiding (FilePath, die, (<.>), (<>)) import Turtle hiding (Text, stdin, text) import qualified Turtle as Turtle import Data.Aeson import Data.Aeson.Encode.Pretty (encodePretty) import Network.HTTP.Client hiding (Proxy) import Network.HTTP.Client.TLS import Servant.API import Servant.Client import qualified Data.Char as Char import qualified Data.Text as Text import qualified Text.Pandoc as Pandoc import qualified System.IO as SystemIO import qualified System.IO.Error as SystemIO (tryIOError) newtype JobAd = JobAd Text deriving (Eq, Show) simpleHash :: Text -> Text simpleHash txt = show $ Text.foldl rgen 0 txt where coprime = 7 :: Int prime = 999983 :: Int rgen i c = (Char.ord c + i) * coprime `mod` prime - 1 -- | FIXME: there must be a better way to do this jobAdText :: JobAd -> Text jobAdText (JobAd txt) = txt jobAdTitle :: JobAd -> Text jobAdTitle jobAd = atDef "" (Text.lines (jobAdText jobAd)) 1 jobAdFilename :: JobAd -> FilePath jobAdFilename jobAd = fromText $ trimedTitle <> "_" <> hash <> ".md" where makeReadable c | (Char.isAlphaNum c) = c makeReadable c | (Char.isSeparator c) = '_' makeReadable c | otherwise = '_' trimedTitle = Text.map makeReadable $ Text.take 30 $ jobAdTitle jobAd hash = simpleHash $ jobAdText jobAd -- -- news.ycombinator.com -- data Item = Item { id :: Int , text :: Maybe Text , kids :: Maybe [Int] } deriving (Show, Generic, FromJSON) title :: Item -> Text title item = fromMaybe "[No Title]" $ headMay $ Text.lines $ fromMaybe "" $ text item url :: Item -> Text url item = "https://news.ycombinator.com/item?id=" <> (show $ id item) fromItem :: Item -> IO JobAd fromItem item = do let title = url item let txt = fromMaybe "" $ text item body <- Pandoc.runIOorExplode $ Pandoc.readHtml Pandoc.def txt >>= Pandoc.writeMarkdown Pandoc.def return $ JobAd $ "<" <> title <> ">\n" <> body newtype JobStories = JobStories [Text] deriving (Show, Eq, ToJSON, FromJSON, Generic) type HackerNewsAPI = "item" :> Capture "id" Int :> ".json" :> Get '[ JSON] Item :<|> "jobstories.json" :> Get '[ JSON] JobStories hackerNewsAPI :: Proxy HackerNewsAPI hackerNewsAPI = Proxy hackerNewsURL :: BaseUrl hackerNewsURL = BaseUrl Https "hacker-news.firebaseio.com" 443 "/v0" getItem :: Int -> ClientM Item getJobStories :: ClientM JobStories getItem :<|> getJobStories = client hackerNewsAPI -- -- Decisions -- data Choice = Accept | Reject getChar :: IO (Maybe Text) getChar = do key <- SystemIO.tryIOError SystemIO.getChar case key of Left _ -> return Nothing Right k -> return $ Just (toS [k]) decide :: JobAd -> IO Choice decide jobAd = do shells "clear" mempty putStrLn $ jobAdText jobAd putStrLn $ promptMsg cmd <- (fromMaybe "x") <$> getChar case (parse cmd) of Just choice -> return choice Nothing -> decide jobAd where promptMsg = "--------\nReject (k), Accept (l), or Exit (Ctrl+c)?" :: Text parse "l" = Just Accept parse "k" = Just Reject parse _ = Nothing -- -- Main -- data Command = PullHackerNewsJobPosts (Maybe FilePath, Int) | Decide (Maybe FilePath, Maybe FilePath, Maybe FilePath) defaultLocation :: FilePath defaultLocation = "./news.ycombinator.com" defaultAcceptLocation :: FilePath defaultAcceptLocation = "./accepted" defaultRejectLocation :: FilePath defaultRejectLocation = "./rejected" parser :: Turtle.Parser Command parser = fmap PullHackerNewsJobPosts (Turtle.subcommand "pull" "download Hacker News job postings" ((,) <$> (optional $ optPath "dir" 'd' "location to save Jobs. env: JOBS_DIR") <*> (argInt "Item ID" "Who is Hiring item ID i.e. https://news.ycombinator.com/item?id=[Item ID]"))) <|> fmap Decide (Turtle.subcommand "decide" "browse through Jobs in current directory" ((,,) <$> (optional $ optPath "dir" 'd' "location containing Jobs. env: JOBS_DIR.") <*> (optional $ optPath "accepted" 'a' "move rejected Jobs to this location. env: JOBS_ACCEPT.") <*> (optional $ optPath "rejected" 'r' "move accepted Jobs to this location. env: JOBS_REJECT."))) cmdDecide :: (Maybe FilePath, Maybe FilePath, Maybe FilePath) -> IO () cmdDecide (jobsDir, accepted, rejected) -- | Set up IO to make INPUT processing smoother = do SystemIO.hSetBuffering stdin SystemIO.NoBuffering SystemIO.hSetEcho stdin False -- Apply env. variables envJobsDir <- need "JOBS_DIR" envAccept <- need "JOBS_ACCEPT" envReject <- need "JOBS_REJECT" let jobsDir' = fromMaybe defaultLocation (jobsDir <|> (fromText <$> envJobsDir)) let accepted' = fromMaybe defaultAcceptLocation (accepted <|> (fromText <$> envAccept)) let rejected' = fromMaybe defaultRejectLocation (rejected <|> (fromText <$> envReject)) -- Create directories if not there mktree accepted' mktree rejected' files <- listFiles jobsDir' sequence_ $ foreach files $ \file -> do jobAd <- JobAd <$> Turtle.readTextFile file decision <- decide jobAd case decision of Accept -> mv file (accepted' (filename file)) Reject -> mv file (rejected' (filename file)) where listFiles dir = Turtle.sort $ Turtle.find (ends ".md") dir cmdPullhackerNewsJobs :: (Maybe FilePath, Int) -> IO () cmdPullhackerNewsJobs (jobsDir, itemID) = do manager' <- newManager tlsManagerSettings let clientEnv = mkClientEnv manager' hackerNewsURL -- Apply env. variables envJobsDir <- need "JOBS_DIR" let jobsDir' = fromMaybe defaultLocation ((fromText <$> envJobsDir) <> jobsDir) -- Check against already seen files files <- Turtle.sort $ Turtle.findtree (Turtle.ends ".md") "." prevPulled <- for files (\file -> JobAd <$> Turtle.readTextFile file) -- Create directories if not there mktree jobsDir' flip runClientM clientEnv $ do jobAds <- (fromMaybe [] . kids) <$> getItem itemID for_ jobAds $ \itemID -> (do item <- getItem itemID jobAd <- liftIO $ fromItem item unless (elem jobAd prevPulled) $ liftIO $ writeTextFile (jobsDir' <> jobAdFilename jobAd) (jobAdText jobAd)) `catchError` print return () main :: IO () main = do x <- Turtle.options "Pull and browse Jobs posted on Hacker News" parser case x of PullHackerNewsJobPosts arg -> cmdPullhackerNewsJobs arg Decide arg -> cmdDecide arg