aboutsummaryrefslogtreecommitdiff
path: root/whoishiring.hs
blob: 8cc00708c20d457366efa37471ae6a2ac2b79e62 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
#!/usr/bin/env stack
{- stack
  --resolver lts-13.15
  runghc
  --package aeson
  --package aeson-pretty
  --package pandoc
  --package protolude
  --package servant-client
  --package turtle
-}
{-# LANGUAGE DeriveAnyClass        #-}
{-# LANGUAGE DeriveGeneric         #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleContexts      #-}
{-# LANGUAGE KindSignatures        #-}
{-# LANGUAGE NoImplicitPrelude     #-}
{-# LANGUAGE OverloadedStrings     #-}
{-# LANGUAGE TypeOperators         #-}

import           Protolude                hiding (FilePath, die, (<.>), (<>))
import           Turtle                   hiding (Text, stdin, text)
import qualified Turtle                   as Turtle

import           Data.Aeson
import           Data.Aeson.Encode.Pretty (encodePretty)
import           Network.HTTP.Client      hiding (Proxy)
import           Network.HTTP.Client.TLS
import           Servant.API
import           Servant.Client

import qualified Data.Char                as Char
import qualified Data.Text                as Text
import qualified Text.Pandoc              as Pandoc

import qualified System.IO                as SystemIO
import qualified System.IO.Error          as SystemIO (tryIOError)

newtype JobAd =
  JobAd Text
  deriving (Eq, Show)

simpleHash :: Text -> Text
simpleHash txt = show $ Text.foldl rgen 0 txt
  where
    coprime = 7 :: Int
    prime = 999983 :: Int
    rgen i c = (Char.ord c + i) * coprime `mod` prime - 1

-- | FIXME: there must be a better way to do this
jobAdText :: JobAd -> Text
jobAdText (JobAd txt) = txt

jobAdTitle :: JobAd -> Text
jobAdTitle jobAd = atDef "" (Text.lines (jobAdText jobAd)) 1

jobAdFilename :: JobAd -> FilePath
jobAdFilename jobAd = fromText $ trimedTitle <> "_" <> hash <> ".md"
  where
    makeReadable c
      | (Char.isAlphaNum c) = c
    makeReadable c
      | (Char.isSeparator c) = '_'
    makeReadable c
      | otherwise = '_'
    trimedTitle = Text.map makeReadable $ Text.take 30 $ jobAdTitle jobAd
    hash = simpleHash $ jobAdText jobAd

--
-- news.ycombinator.com
--
data Item = Item
  { id   :: Int
  , text :: Maybe Text
  , kids :: Maybe [Int]
  } deriving (Show, Generic, FromJSON)

title :: Item -> Text
title item =
  fromMaybe "[No Title]" $ headMay $ Text.lines $ fromMaybe "" $ text item

url :: Item -> Text
url item = "https://news.ycombinator.com/item?id=" <> (show $ id item)

fromItem :: Item -> IO JobAd
fromItem item = do
  let title = url item
  let txt = fromMaybe "" $ text item
  body <-
    Pandoc.runIOorExplode $
    Pandoc.readHtml Pandoc.def txt >>= Pandoc.writeMarkdown Pandoc.def
  return $ JobAd $ "<" <> title <> ">\n" <> body

newtype JobStories =
  JobStories [Text]
  deriving (Show, Eq, ToJSON, FromJSON, Generic)

type HackerNewsAPI
   = "item" :> Capture "id" Int :> ".json" :> Get '[ JSON] Item :<|> "jobstories.json" :> Get '[ JSON] JobStories

hackerNewsAPI :: Proxy HackerNewsAPI
hackerNewsAPI = Proxy

hackerNewsURL :: BaseUrl
hackerNewsURL = BaseUrl Https "hacker-news.firebaseio.com" 443 "/v0"

getItem :: Int -> ClientM Item
getJobStories :: ClientM JobStories
getItem :<|> getJobStories = client hackerNewsAPI

--
-- Decisions
--
data Choice
  = Accept
  | Reject

getChar :: IO (Maybe Text)
getChar = do
  key <- SystemIO.tryIOError SystemIO.getChar
  case key of
    Left _  -> return Nothing
    Right k -> return $ Just (toS [k])

decide :: JobAd -> IO Choice
decide jobAd = do
  shells "clear" mempty
  putStrLn $ jobAdText jobAd
  putStrLn $ promptMsg
  cmd <- (fromMaybe "x") <$> getChar
  case (parse cmd) of
    Just choice -> return choice
    Nothing     -> decide jobAd
  where
    promptMsg = "--------\nReject (k), Accept (l), or Exit (Ctrl+c)?" :: Text
    parse "l" = Just Accept
    parse "k" = Just Reject
    parse _   = Nothing

--
-- Main
--
data Command
  = PullHackerNewsJobPosts (Maybe FilePath, Int)
  | Decide (Maybe FilePath, Maybe FilePath, Maybe FilePath)

defaultLocation :: FilePath
defaultLocation = "./news.ycombinator.com"

defaultAcceptLocation :: FilePath
defaultAcceptLocation = "./accepted"

defaultRejectLocation :: FilePath
defaultRejectLocation = "./rejected"

parser :: Turtle.Parser Command
parser =
  fmap
    PullHackerNewsJobPosts
    (Turtle.subcommand
       "pull"
       "download Hacker News job postings"
       ((,) <$>
        (optional $ optPath "dir" 'd' "location to save Jobs. env: JOBS_DIR") <*>
        (argInt
           "Item ID"
           "Who is Hiring item ID i.e. https://news.ycombinator.com/item?id=[Item ID]"))) <|>
  fmap
    Decide
    (Turtle.subcommand
       "decide"
       "browse through Jobs in current directory"
       ((,,) <$>
        (optional $ optPath "dir" 'd' "location containing Jobs. env: JOBS_DIR.") <*>
        (optional $
         optPath
           "accepted"
           'a'
           "move rejected Jobs to this location. env: JOBS_ACCEPT.") <*>
        (optional $
         optPath
           "rejected"
           'r'
           "move accepted Jobs to this location. env: JOBS_REJECT.")))

cmdDecide :: (Maybe FilePath, Maybe FilePath, Maybe FilePath) -> IO ()
cmdDecide (jobsDir, accepted, rejected)
    -- | Set up IO to make INPUT processing smoother
 = do
  SystemIO.hSetBuffering stdin SystemIO.NoBuffering
  SystemIO.hSetEcho stdin False
    -- Apply env. variables
  envJobsDir <- need "JOBS_DIR"
  envAccept <- need "JOBS_ACCEPT"
  envReject <- need "JOBS_REJECT"
  let jobsDir' =
        fromMaybe defaultLocation (jobsDir <|> (fromText <$> envJobsDir))
  let accepted' =
        fromMaybe defaultAcceptLocation (accepted <|> (fromText <$> envAccept))
  let rejected' =
        fromMaybe defaultRejectLocation (rejected <|> (fromText <$> envReject))
    -- Create directories if not there
  mktree accepted'
  mktree rejected'
  files <- listFiles jobsDir'
  sequence_ $
    foreach files $ \file -> do
      jobAd <- JobAd <$> Turtle.readTextFile file
      decision <- decide jobAd
      case decision of
        Accept -> mv file (accepted' </> (filename file))
        Reject -> mv file (rejected' </> (filename file))
  where
    listFiles dir = Turtle.sort $ Turtle.find (ends ".md") dir

cmdPullhackerNewsJobs :: (Maybe FilePath, Int) -> IO ()
cmdPullhackerNewsJobs (jobsDir, itemID) = do
  manager' <- newManager tlsManagerSettings
  let clientEnv = mkClientEnv manager' hackerNewsURL
  -- Apply env. variables
  envJobsDir <- need "JOBS_DIR"
  let jobsDir' =
        fromMaybe defaultLocation ((fromText <$> envJobsDir) <> jobsDir)
  -- Check against already seen files
  files <- Turtle.sort $ Turtle.findtree (Turtle.ends ".md") "."
  prevPulled <- for files (\file -> JobAd <$> Turtle.readTextFile file)
  -- Create directories if not there
  mktree jobsDir'
  flip runClientM clientEnv $ do
    jobAds <- (fromMaybe [] . kids) <$> getItem itemID
    for_ jobAds $ \itemID ->
      (do item <- getItem itemID
          jobAd <- liftIO $ fromItem item
          unless (elem jobAd prevPulled) $
            liftIO $
            writeTextFile (jobsDir' <> jobAdFilename jobAd) (jobAdText jobAd)) `catchError`
      print
  return ()

main :: IO ()
main = do
  x <- Turtle.options "Pull and browse Jobs posted on Hacker News" parser
  case x of
    PullHackerNewsJobPosts arg -> cmdPullhackerNewsJobs arg
    Decide arg                 -> cmdDecide arg