1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
|
#!/usr/bin/env stack
{- stack
--resolver lts-13.15
runghc
--package aeson
--package aeson-pretty
--package pandoc
--package protolude
--package servant-client
--package turtle
-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeOperators #-}
import Protolude hiding (FilePath, die, (<.>), (<>))
import Turtle hiding (Text, stdin, text)
import qualified Turtle as Turtle
import Data.Aeson
import Data.Aeson.Encode.Pretty (encodePretty)
import Network.HTTP.Client hiding (Proxy)
import Network.HTTP.Client.TLS
import Servant.API
import Servant.Client
import qualified Data.Char as Char
import qualified Data.Text as Text
import qualified Text.Pandoc as Pandoc
import qualified System.IO as SystemIO
import qualified System.IO.Error as SystemIO (tryIOError)
newtype JobAd =
JobAd Text
deriving (Eq, Show)
simpleHash :: Text -> Text
simpleHash txt = show $ Text.foldl rgen 0 txt
where
coprime = 7 :: Int
prime = 999983 :: Int
rgen i c = (Char.ord c + i) * coprime `mod` prime - 1
-- | FIXME: there must be a better way to do this
jobAdText :: JobAd -> Text
jobAdText (JobAd txt) = txt
jobAdTitle :: JobAd -> Text
jobAdTitle jobAd = atDef "" (Text.lines (jobAdText jobAd)) 1
jobAdFilename :: JobAd -> FilePath
jobAdFilename jobAd = fromText $ trimedTitle <> "_" <> hash <> ".md"
where
makeReadable c
| (Char.isAlphaNum c) = c
makeReadable c
| (Char.isSeparator c) = '_'
makeReadable c
| otherwise = '_'
trimedTitle = Text.map makeReadable $ Text.take 30 $ jobAdTitle jobAd
hash = simpleHash $ jobAdText jobAd
--
-- news.ycombinator.com
--
data Item = Item
{ id :: Int
, text :: Maybe Text
, kids :: Maybe [Int]
} deriving (Show, Generic, FromJSON)
title :: Item -> Text
title item =
fromMaybe "[No Title]" $ headMay $ Text.lines $ fromMaybe "" $ text item
url :: Item -> Text
url item = "https://news.ycombinator.com/item?id=" <> (show $ id item)
fromItem :: Item -> IO JobAd
fromItem item = do
let title = url item
let txt = fromMaybe "" $ text item
body <-
Pandoc.runIOorExplode $
Pandoc.readHtml Pandoc.def txt >>= Pandoc.writeMarkdown Pandoc.def
return $ JobAd $ "<" <> title <> ">\n" <> body
newtype JobStories =
JobStories [Text]
deriving (Show, Eq, ToJSON, FromJSON, Generic)
type HackerNewsAPI
= "item" :> Capture "id" Int :> ".json" :> Get '[ JSON] Item :<|> "jobstories.json" :> Get '[ JSON] JobStories
hackerNewsAPI :: Proxy HackerNewsAPI
hackerNewsAPI = Proxy
hackerNewsURL :: BaseUrl
hackerNewsURL = BaseUrl Https "hacker-news.firebaseio.com" 443 "/v0"
getItem :: Int -> ClientM Item
getJobStories :: ClientM JobStories
getItem :<|> getJobStories = client hackerNewsAPI
--
-- Decisions
--
data Choice
= Accept
| Reject
getChar :: IO (Maybe Text)
getChar = do
key <- SystemIO.tryIOError SystemIO.getChar
case key of
Left _ -> return Nothing
Right k -> return $ Just (toS [k])
decide :: JobAd -> IO Choice
decide jobAd = do
shells "clear" mempty
putStrLn $ jobAdText jobAd
putStrLn $ promptMsg
cmd <- (fromMaybe "x") <$> getChar
case (parse cmd) of
Just choice -> return choice
Nothing -> decide jobAd
where
promptMsg = "--------\nReject (k), Accept (l), or Exit (Ctrl+c)?" :: Text
parse "l" = Just Accept
parse "k" = Just Reject
parse _ = Nothing
--
-- Main
--
data Command
= PullHackerNewsJobPosts (Maybe FilePath, Int)
| Decide (Maybe FilePath, Maybe FilePath, Maybe FilePath)
defaultLocation :: FilePath
defaultLocation = "./news.ycombinator.com"
defaultAcceptLocation :: FilePath
defaultAcceptLocation = "./accepted"
defaultRejectLocation :: FilePath
defaultRejectLocation = "./rejected"
parser :: Turtle.Parser Command
parser =
fmap
PullHackerNewsJobPosts
(Turtle.subcommand
"pull"
"download Hacker News job postings"
((,) <$>
(optional $ optPath "dir" 'd' "location to save Jobs. env: JOBS_DIR") <*>
(argInt
"Item ID"
"Who is Hiring item ID i.e. https://news.ycombinator.com/item?id=[Item ID]"))) <|>
fmap
Decide
(Turtle.subcommand
"decide"
"browse through Jobs in current directory"
((,,) <$>
(optional $ optPath "dir" 'd' "location containing Jobs. env: JOBS_DIR.") <*>
(optional $
optPath
"accepted"
'a'
"move rejected Jobs to this location. env: JOBS_ACCEPT.") <*>
(optional $
optPath
"rejected"
'r'
"move accepted Jobs to this location. env: JOBS_REJECT.")))
cmdDecide :: (Maybe FilePath, Maybe FilePath, Maybe FilePath) -> IO ()
cmdDecide (jobsDir, accepted, rejected)
-- | Set up IO to make INPUT processing smoother
= do
SystemIO.hSetBuffering stdin SystemIO.NoBuffering
SystemIO.hSetEcho stdin False
-- Apply env. variables
envJobsDir <- need "JOBS_DIR"
envAccept <- need "JOBS_ACCEPT"
envReject <- need "JOBS_REJECT"
let jobsDir' =
fromMaybe defaultLocation (jobsDir <|> (fromText <$> envJobsDir))
let accepted' =
fromMaybe defaultAcceptLocation (accepted <|> (fromText <$> envAccept))
let rejected' =
fromMaybe defaultRejectLocation (rejected <|> (fromText <$> envReject))
-- Create directories if not there
mktree accepted'
mktree rejected'
files <- listFiles jobsDir'
sequence_ $
foreach files $ \file -> do
jobAd <- JobAd <$> Turtle.readTextFile file
decision <- decide jobAd
case decision of
Accept -> mv file (accepted' </> (filename file))
Reject -> mv file (rejected' </> (filename file))
where
listFiles dir = Turtle.sort $ Turtle.find (ends ".md") dir
cmdPullhackerNewsJobs :: (Maybe FilePath, Int) -> IO ()
cmdPullhackerNewsJobs (jobsDir, itemID) = do
manager' <- newManager tlsManagerSettings
let clientEnv = mkClientEnv manager' hackerNewsURL
-- Apply env. variables
envJobsDir <- need "JOBS_DIR"
let jobsDir' =
fromMaybe defaultLocation ((fromText <$> envJobsDir) <> jobsDir)
-- Check against already seen files
files <- Turtle.sort $ Turtle.findtree (Turtle.ends ".md") "."
prevPulled <- for files (\file -> JobAd <$> Turtle.readTextFile file)
-- Create directories if not there
mktree jobsDir'
flip runClientM clientEnv $ do
jobAds <- (fromMaybe [] . kids) <$> getItem itemID
for_ jobAds $ \itemID ->
(do item <- getItem itemID
jobAd <- liftIO $ fromItem item
unless (elem jobAd prevPulled) $
liftIO $
writeTextFile (jobsDir' <> jobAdFilename jobAd) (jobAdText jobAd)) `catchError`
print
return ()
main :: IO ()
main = do
x <- Turtle.options "Pull and browse Jobs posted on Hacker News" parser
case x of
PullHackerNewsJobPosts arg -> cmdPullhackerNewsJobs arg
Decide arg -> cmdDecide arg
|