Extracting titles and links from atom feed

The code I used to extract the titles and links for the previous post:

#! /usr/bin/runhaskell

import System.Environment
import Text.HTML.TagSoup
import Text.Printf

main :: IO ()
main = do
    [fn] <- getArgs
    readAtomFile fn >>= writeList

readAtomFile :: FilePath -> IO [(String, String)]
readAtomFile fn = do
    xml <- readFile fn
    let xmlTags = parseTags xml
        es = map parseEntry (getAllEntries xmlTags)
    return es

getAllEntries :: [Tag String] -> [[Tag String]]
getAllEntries = partitions (~== TagOpen "entry" [])

getElemText :: String -> [Tag String] -> String
getElemText n = fromTagText . (!! 1) . dropWhile (~/= TagOpen n [])

getEntryTitle, getEntryLink :: [Tag String] -> String
getEntryTitle = getElemText "title"
getEntryLink = fromAttrib "href" . (!! 0) . dropWhile (~/= TagOpen "link" [])

parseEntry :: [Tag String] -> (String, String)
parseEntry e = (title, link)
    where
        title = getEntryTitle e
        link = getEntryLink e

writeList :: [(String, String)] -> IO ()
writeList posts = putStrLn items
    where
        items = unlines $ fmap writeListItem posts

writeListItem :: (String, String) -> String
writeListItem (t, l) = printf "- [%s](%s)" t l

It can also be found as a snippet here.

Leave a comment