aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md30
-rwxr-xr-xwhoishiring.hs245
2 files changed, 275 insertions, 0 deletions
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..adce52d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,30 @@
+Who is Hiring?
+--------------
+
+Search and browse job posts from Hacker News 'Who is Hiring?' threads as
+markdown files -offline-.
+
+'whoishiring.hs' downloads all root item posts on a Hacker News thread as
+markdown files, allowing you to use -existing- filesystem tools to find the job
+of your dreams. It also offers a Tinder-like option for browsing the markdown
+files via the 'decide' subcommand.
+```
+#./whoishiring.hs --help
+Pull and browse Jobs posted on Hacker News
+
+Usage: whoishiring.hs (pull | decide)
+
+Available options:
+ -h,--help Show this help text
+
+Available commands:
+ pull download Hacker News job postings
+ decide browse through Jobs in current directory
+```
+
+Requirements
+============
+
+ If stack is installed, it will pull all required dependencies.
+
+ - [stack](https://docs.haskellstack.org/en/stable/README/)
diff --git a/whoishiring.hs b/whoishiring.hs
new file mode 100755
index 0000000..8cc0070
--- /dev/null
+++ b/whoishiring.hs
@@ -0,0 +1,245 @@
+#!/usr/bin/env stack
+{- stack
+ --resolver lts-13.15
+ runghc
+ --package aeson
+ --package aeson-pretty
+ --package pandoc
+ --package protolude
+ --package servant-client
+ --package turtle
+-}
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DuplicateRecordFields #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE KindSignatures #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE TypeOperators #-}
+
+import Protolude hiding (FilePath, die, (<.>), (<>))
+import Turtle hiding (Text, stdin, text)
+import qualified Turtle as Turtle
+
+import Data.Aeson
+import Data.Aeson.Encode.Pretty (encodePretty)
+import Network.HTTP.Client hiding (Proxy)
+import Network.HTTP.Client.TLS
+import Servant.API
+import Servant.Client
+
+import qualified Data.Char as Char
+import qualified Data.Text as Text
+import qualified Text.Pandoc as Pandoc
+
+import qualified System.IO as SystemIO
+import qualified System.IO.Error as SystemIO (tryIOError)
+
+newtype JobAd =
+ JobAd Text
+ deriving (Eq, Show)
+
+simpleHash :: Text -> Text
+simpleHash txt = show $ Text.foldl rgen 0 txt
+ where
+ coprime = 7 :: Int
+ prime = 999983 :: Int
+ rgen i c = (Char.ord c + i) * coprime `mod` prime - 1
+
+-- | FIXME: there must be a better way to do this
+jobAdText :: JobAd -> Text
+jobAdText (JobAd txt) = txt
+
+jobAdTitle :: JobAd -> Text
+jobAdTitle jobAd = atDef "" (Text.lines (jobAdText jobAd)) 1
+
+jobAdFilename :: JobAd -> FilePath
+jobAdFilename jobAd = fromText $ trimedTitle <> "_" <> hash <> ".md"
+ where
+ makeReadable c
+ | (Char.isAlphaNum c) = c
+ makeReadable c
+ | (Char.isSeparator c) = '_'
+ makeReadable c
+ | otherwise = '_'
+ trimedTitle = Text.map makeReadable $ Text.take 30 $ jobAdTitle jobAd
+ hash = simpleHash $ jobAdText jobAd
+
+--
+-- news.ycombinator.com
+--
+data Item = Item
+ { id :: Int
+ , text :: Maybe Text
+ , kids :: Maybe [Int]
+ } deriving (Show, Generic, FromJSON)
+
+title :: Item -> Text
+title item =
+ fromMaybe "[No Title]" $ headMay $ Text.lines $ fromMaybe "" $ text item
+
+url :: Item -> Text
+url item = "https://news.ycombinator.com/item?id=" <> (show $ id item)
+
+fromItem :: Item -> IO JobAd
+fromItem item = do
+ let title = url item
+ let txt = fromMaybe "" $ text item
+ body <-
+ Pandoc.runIOorExplode $
+ Pandoc.readHtml Pandoc.def txt >>= Pandoc.writeMarkdown Pandoc.def
+ return $ JobAd $ "<" <> title <> ">\n" <> body
+
+newtype JobStories =
+ JobStories [Text]
+ deriving (Show, Eq, ToJSON, FromJSON, Generic)
+
+type HackerNewsAPI
+ = "item" :> Capture "id" Int :> ".json" :> Get '[ JSON] Item :<|> "jobstories.json" :> Get '[ JSON] JobStories
+
+hackerNewsAPI :: Proxy HackerNewsAPI
+hackerNewsAPI = Proxy
+
+hackerNewsURL :: BaseUrl
+hackerNewsURL = BaseUrl Https "hacker-news.firebaseio.com" 443 "/v0"
+
+getItem :: Int -> ClientM Item
+getJobStories :: ClientM JobStories
+getItem :<|> getJobStories = client hackerNewsAPI
+
+--
+-- Decisions
+--
+data Choice
+ = Accept
+ | Reject
+
+getChar :: IO (Maybe Text)
+getChar = do
+ key <- SystemIO.tryIOError SystemIO.getChar
+ case key of
+ Left _ -> return Nothing
+ Right k -> return $ Just (toS [k])
+
+decide :: JobAd -> IO Choice
+decide jobAd = do
+ shells "clear" mempty
+ putStrLn $ jobAdText jobAd
+ putStrLn $ promptMsg
+ cmd <- (fromMaybe "x") <$> getChar
+ case (parse cmd) of
+ Just choice -> return choice
+ Nothing -> decide jobAd
+ where
+ promptMsg = "--------\nReject (k), Accept (l), or Exit (Ctrl+c)?" :: Text
+ parse "l" = Just Accept
+ parse "k" = Just Reject
+ parse _ = Nothing
+
+--
+-- Main
+--
+data Command
+ = PullHackerNewsJobPosts (Maybe FilePath, Int)
+ | Decide (Maybe FilePath, Maybe FilePath, Maybe FilePath)
+
+defaultLocation :: FilePath
+defaultLocation = "./news.ycombinator.com"
+
+defaultAcceptLocation :: FilePath
+defaultAcceptLocation = "./accepted"
+
+defaultRejectLocation :: FilePath
+defaultRejectLocation = "./rejected"
+
+parser :: Turtle.Parser Command
+parser =
+ fmap
+ PullHackerNewsJobPosts
+ (Turtle.subcommand
+ "pull"
+ "download Hacker News job postings"
+ ((,) <$>
+ (optional $ optPath "dir" 'd' "location to save Jobs. env: JOBS_DIR") <*>
+ (argInt
+ "Item ID"
+ "Who is Hiring item ID i.e. https://news.ycombinator.com/item?id=[Item ID]"))) <|>
+ fmap
+ Decide
+ (Turtle.subcommand
+ "decide"
+ "browse through Jobs in current directory"
+ ((,,) <$>
+ (optional $ optPath "dir" 'd' "location containing Jobs. env: JOBS_DIR.") <*>
+ (optional $
+ optPath
+ "accepted"
+ 'a'
+ "move rejected Jobs to this location. env: JOBS_ACCEPT.") <*>
+ (optional $
+ optPath
+ "rejected"
+ 'r'
+ "move accepted Jobs to this location. env: JOBS_REJECT.")))
+
+cmdDecide :: (Maybe FilePath, Maybe FilePath, Maybe FilePath) -> IO ()
+cmdDecide (jobsDir, accepted, rejected)
+ -- | Set up IO to make INPUT processing smoother
+ = do
+ SystemIO.hSetBuffering stdin SystemIO.NoBuffering
+ SystemIO.hSetEcho stdin False
+ -- Apply env. variables
+ envJobsDir <- need "JOBS_DIR"
+ envAccept <- need "JOBS_ACCEPT"
+ envReject <- need "JOBS_REJECT"
+ let jobsDir' =
+ fromMaybe defaultLocation (jobsDir <|> (fromText <$> envJobsDir))
+ let accepted' =
+ fromMaybe defaultAcceptLocation (accepted <|> (fromText <$> envAccept))
+ let rejected' =
+ fromMaybe defaultRejectLocation (rejected <|> (fromText <$> envReject))
+ -- Create directories if not there
+ mktree accepted'
+ mktree rejected'
+ files <- listFiles jobsDir'
+ sequence_ $
+ foreach files $ \file -> do
+ jobAd <- JobAd <$> Turtle.readTextFile file
+ decision <- decide jobAd
+ case decision of
+ Accept -> mv file (accepted' </> (filename file))
+ Reject -> mv file (rejected' </> (filename file))
+ where
+ listFiles dir = Turtle.sort $ Turtle.find (ends ".md") dir
+
+cmdPullhackerNewsJobs :: (Maybe FilePath, Int) -> IO ()
+cmdPullhackerNewsJobs (jobsDir, itemID) = do
+ manager' <- newManager tlsManagerSettings
+ let clientEnv = mkClientEnv manager' hackerNewsURL
+ -- Apply env. variables
+ envJobsDir <- need "JOBS_DIR"
+ let jobsDir' =
+ fromMaybe defaultLocation ((fromText <$> envJobsDir) <> jobsDir)
+ -- Check against already seen files
+ files <- Turtle.sort $ Turtle.findtree (Turtle.ends ".md") "."
+ prevPulled <- for files (\file -> JobAd <$> Turtle.readTextFile file)
+ -- Create directories if not there
+ mktree jobsDir'
+ flip runClientM clientEnv $ do
+ jobAds <- (fromMaybe [] . kids) <$> getItem itemID
+ for_ jobAds $ \itemID ->
+ (do item <- getItem itemID
+ jobAd <- liftIO $ fromItem item
+ unless (elem jobAd prevPulled) $
+ liftIO $
+ writeTextFile (jobsDir' <> jobAdFilename jobAd) (jobAdText jobAd)) `catchError`
+ print
+ return ()
+
+main :: IO ()
+main = do
+ x <- Turtle.options "Pull and browse Jobs posted on Hacker News" parser
+ case x of
+ PullHackerNewsJobPosts arg -> cmdPullhackerNewsJobs arg
+ Decide arg -> cmdDecide arg