diff options
-rw-r--r-- | README.md | 30 | ||||
-rwxr-xr-x | whoishiring.hs | 245 |
2 files changed, 275 insertions, 0 deletions
diff --git a/README.md b/README.md new file mode 100644 index 0000000..adce52d --- /dev/null +++ b/README.md @@ -0,0 +1,30 @@ +Who is Hiring? +-------------- + +Search and browse job posts from Hacker News 'Who is Hiring?' threads as +markdown files -offline-. + +'whoishiring.hs' downloads all root item posts on a Hacker News thread as +markdown files, allowing you to use -existing- filesystem tools to find the job +of your dreams. It also offers a Tinder-like option for browsing the markdown +files via the 'decide' subcommand. +``` +#./whoishiring.hs --help +Pull and browse Jobs posted on Hacker News + +Usage: whoishiring.hs (pull | decide) + +Available options: + -h,--help Show this help text + +Available commands: + pull download Hacker News job postings + decide browse through Jobs in current directory +``` + +Requirements +============ + + If stack is installed, it will pull all required dependencies. + + - [stack](https://docs.haskellstack.org/en/stable/README/) diff --git a/whoishiring.hs b/whoishiring.hs new file mode 100755 index 0000000..8cc0070 --- /dev/null +++ b/whoishiring.hs @@ -0,0 +1,245 @@ +#!/usr/bin/env stack +{- stack + --resolver lts-13.15 + runghc + --package aeson + --package aeson-pretty + --package pandoc + --package protolude + --package servant-client + --package turtle +-} +{-# LANGUAGE DeriveAnyClass #-} +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeOperators #-} + +import Protolude hiding (FilePath, die, (<.>), (<>)) +import Turtle hiding (Text, stdin, text) +import qualified Turtle as Turtle + +import Data.Aeson +import Data.Aeson.Encode.Pretty (encodePretty) +import Network.HTTP.Client hiding (Proxy) +import Network.HTTP.Client.TLS +import Servant.API +import Servant.Client + +import qualified Data.Char as Char +import qualified Data.Text as Text +import qualified Text.Pandoc as Pandoc + +import qualified System.IO as SystemIO +import qualified System.IO.Error as SystemIO (tryIOError) + +newtype JobAd = + JobAd Text + deriving (Eq, Show) + +simpleHash :: Text -> Text +simpleHash txt = show $ Text.foldl rgen 0 txt + where + coprime = 7 :: Int + prime = 999983 :: Int + rgen i c = (Char.ord c + i) * coprime `mod` prime - 1 + +-- | FIXME: there must be a better way to do this +jobAdText :: JobAd -> Text +jobAdText (JobAd txt) = txt + +jobAdTitle :: JobAd -> Text +jobAdTitle jobAd = atDef "" (Text.lines (jobAdText jobAd)) 1 + +jobAdFilename :: JobAd -> FilePath +jobAdFilename jobAd = fromText $ trimedTitle <> "_" <> hash <> ".md" + where + makeReadable c + | (Char.isAlphaNum c) = c + makeReadable c + | (Char.isSeparator c) = '_' + makeReadable c + | otherwise = '_' + trimedTitle = Text.map makeReadable $ Text.take 30 $ jobAdTitle jobAd + hash = simpleHash $ jobAdText jobAd + +-- +-- news.ycombinator.com +-- +data Item = Item + { id :: Int + , text :: Maybe Text + , kids :: Maybe [Int] + } deriving (Show, Generic, FromJSON) + +title :: Item -> Text +title item = + fromMaybe "[No Title]" $ headMay $ Text.lines $ fromMaybe "" $ text item + +url :: Item -> Text +url item = "https://news.ycombinator.com/item?id=" <> (show $ id item) + +fromItem :: Item -> IO JobAd +fromItem item = do + let title = url item + let txt = fromMaybe "" $ text item + body <- + Pandoc.runIOorExplode $ + Pandoc.readHtml Pandoc.def txt >>= Pandoc.writeMarkdown Pandoc.def + return $ JobAd $ "<" <> title <> ">\n" <> body + +newtype JobStories = + JobStories [Text] + deriving (Show, Eq, ToJSON, FromJSON, Generic) + +type HackerNewsAPI + = "item" :> Capture "id" Int :> ".json" :> Get '[ JSON] Item :<|> "jobstories.json" :> Get '[ JSON] JobStories + +hackerNewsAPI :: Proxy HackerNewsAPI +hackerNewsAPI = Proxy + +hackerNewsURL :: BaseUrl +hackerNewsURL = BaseUrl Https "hacker-news.firebaseio.com" 443 "/v0" + +getItem :: Int -> ClientM Item +getJobStories :: ClientM JobStories +getItem :<|> getJobStories = client hackerNewsAPI + +-- +-- Decisions +-- +data Choice + = Accept + | Reject + +getChar :: IO (Maybe Text) +getChar = do + key <- SystemIO.tryIOError SystemIO.getChar + case key of + Left _ -> return Nothing + Right k -> return $ Just (toS [k]) + +decide :: JobAd -> IO Choice +decide jobAd = do + shells "clear" mempty + putStrLn $ jobAdText jobAd + putStrLn $ promptMsg + cmd <- (fromMaybe "x") <$> getChar + case (parse cmd) of + Just choice -> return choice + Nothing -> decide jobAd + where + promptMsg = "--------\nReject (k), Accept (l), or Exit (Ctrl+c)?" :: Text + parse "l" = Just Accept + parse "k" = Just Reject + parse _ = Nothing + +-- +-- Main +-- +data Command + = PullHackerNewsJobPosts (Maybe FilePath, Int) + | Decide (Maybe FilePath, Maybe FilePath, Maybe FilePath) + +defaultLocation :: FilePath +defaultLocation = "./news.ycombinator.com" + +defaultAcceptLocation :: FilePath +defaultAcceptLocation = "./accepted" + +defaultRejectLocation :: FilePath +defaultRejectLocation = "./rejected" + +parser :: Turtle.Parser Command +parser = + fmap + PullHackerNewsJobPosts + (Turtle.subcommand + "pull" + "download Hacker News job postings" + ((,) <$> + (optional $ optPath "dir" 'd' "location to save Jobs. env: JOBS_DIR") <*> + (argInt + "Item ID" + "Who is Hiring item ID i.e. https://news.ycombinator.com/item?id=[Item ID]"))) <|> + fmap + Decide + (Turtle.subcommand + "decide" + "browse through Jobs in current directory" + ((,,) <$> + (optional $ optPath "dir" 'd' "location containing Jobs. env: JOBS_DIR.") <*> + (optional $ + optPath + "accepted" + 'a' + "move rejected Jobs to this location. env: JOBS_ACCEPT.") <*> + (optional $ + optPath + "rejected" + 'r' + "move accepted Jobs to this location. env: JOBS_REJECT."))) + +cmdDecide :: (Maybe FilePath, Maybe FilePath, Maybe FilePath) -> IO () +cmdDecide (jobsDir, accepted, rejected) + -- | Set up IO to make INPUT processing smoother + = do + SystemIO.hSetBuffering stdin SystemIO.NoBuffering + SystemIO.hSetEcho stdin False + -- Apply env. variables + envJobsDir <- need "JOBS_DIR" + envAccept <- need "JOBS_ACCEPT" + envReject <- need "JOBS_REJECT" + let jobsDir' = + fromMaybe defaultLocation (jobsDir <|> (fromText <$> envJobsDir)) + let accepted' = + fromMaybe defaultAcceptLocation (accepted <|> (fromText <$> envAccept)) + let rejected' = + fromMaybe defaultRejectLocation (rejected <|> (fromText <$> envReject)) + -- Create directories if not there + mktree accepted' + mktree rejected' + files <- listFiles jobsDir' + sequence_ $ + foreach files $ \file -> do + jobAd <- JobAd <$> Turtle.readTextFile file + decision <- decide jobAd + case decision of + Accept -> mv file (accepted' </> (filename file)) + Reject -> mv file (rejected' </> (filename file)) + where + listFiles dir = Turtle.sort $ Turtle.find (ends ".md") dir + +cmdPullhackerNewsJobs :: (Maybe FilePath, Int) -> IO () +cmdPullhackerNewsJobs (jobsDir, itemID) = do + manager' <- newManager tlsManagerSettings + let clientEnv = mkClientEnv manager' hackerNewsURL + -- Apply env. variables + envJobsDir <- need "JOBS_DIR" + let jobsDir' = + fromMaybe defaultLocation ((fromText <$> envJobsDir) <> jobsDir) + -- Check against already seen files + files <- Turtle.sort $ Turtle.findtree (Turtle.ends ".md") "." + prevPulled <- for files (\file -> JobAd <$> Turtle.readTextFile file) + -- Create directories if not there + mktree jobsDir' + flip runClientM clientEnv $ do + jobAds <- (fromMaybe [] . kids) <$> getItem itemID + for_ jobAds $ \itemID -> + (do item <- getItem itemID + jobAd <- liftIO $ fromItem item + unless (elem jobAd prevPulled) $ + liftIO $ + writeTextFile (jobsDir' <> jobAdFilename jobAd) (jobAdText jobAd)) `catchError` + print + return () + +main :: IO () +main = do + x <- Turtle.options "Pull and browse Jobs posted on Hacker News" parser + case x of + PullHackerNewsJobPosts arg -> cmdPullhackerNewsJobs arg + Decide arg -> cmdDecide arg |