Under the F# Weekly news preparation lies a simple F# script.
This script uses Twitterizer2  – one of the simplest Twitter client libraries for .NET. Source code ia available on GitHub, binaries are available through NuGet.
Script logic is relatively simple. First of all, collect a list of queries for Twitter.
let tweets = ["#fsharp";"#fsharpx";"@dsyme";"#websharper";"#fsharpweekly"]
Then make a call to the Twitter Search APIÂ for each query, concatenate the results for last week and sort all tweets by creation date.
|> List.map (getTweets (DateTime.Now - TimeSpan.FromDays(7.0)))
|> List.concat
|> List.sortBy (fun t -> t.CreatedDate)
Then leave only ‘en’ news and filter out tweets without links and RT leaving only first occurrence of each unique link.
|> List.filter (fun t -> t.Language = "en")
|> filterUniqLinks
Also in the source code below you can find console printing method for results verification and html printing method for further manual results review.
Feel free to use it in your social researches.
#r "Twitterizer2.dll"
open Twitterizer
open Twitterizer.Entities
open System
open System.Net
open System.Text.RegularExpressions;
let getTweets (sinceDate:DateTime) query =
let rec collect pageNum =
let options = SearchOptions(NumberPerPage = 100, SinceDate = sinceDate, PageNumber = pageNum);
printfn "Loading %d-%d" (pageNum*options.NumberPerPage) ((pageNum+1)*options.NumberPerPage)
let result = TwitterSearch.Search(query, options);
if (result.Result <> RequestResult.Success || result.ResponseObject.Count = 0)
then List.empty
else result.ResponseObject |> List.ofSeq |> List.append (collect (pageNum+1))
collect 1 |> List.rev
let urlRegexp = Regex("http://([\\w+?\\.\\w+])+([a-zA-Z0-9\\~\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)_\\-\\=\\+\\\\\\/\\?\\.\\:\\;\\'\\,]*)?", RegexOptions.IgnoreCase);
let filterUniqLinks (tweets: TwitterSearchResult list) =
let hash = new System.Collections.Generic.HashSet<string>();
tweets |> List.fold
(fun acc t ->
let mathces = urlRegexp.Matches(t.Text)
if (mathces.Count = 0) then acc
else let urls =
[0 .. (mathces.Count-1)]
|> List.map (fun i -> mathces.[i].Value)
|> List.filter (fun url -> not(hash.Contains(url)))
if (List.isEmpty urls) then acc
else urls |> List.iter(fun url -> hash.Add(url) |> ignore)
t :: acc)
[] |> List.rev
let printTweets (tweets: TwitterSearchResult list) =
tweets |> List.iter (fun t ->
printfn "%15s : %s : %s" t.FromUserScreenName (t.CreatedDate.ToShortDateString()) t.Text)
let tweets = ["#fsharp";"#fsharpx";"@dsyme";"#websharper";"#fsharpweekly"]
|> List.map (getTweets (DateTime.Now - TimeSpan.FromDays(7.0)))
|> List.concat
|> List.sortBy (fun t -> t.CreatedDate)
|> List.filter (fun t -> t.Language = "en")
|> filterUniqLinks
printfn "Tweets count : %d" tweets.Length
printTweets tweets
let printTweetsInHtml filename (tweets: TwitterSearchResult list) =
let formatTweet (text:string) =
let matches = urlRegexp.Matches(text)
seq {0 .. (matches.Count-1)}
|> Seq.fold (
fun (t:string) i ->
let url = matches.[i].Value
t.Replace(url, (sprintf "<a href=\"%s\" target=\"_blank\">%s</a>" url url)))
text
let rows =
tweets
|> List.mapi (fun i t ->
let id = (tweets.Length - i)
let text = formatTweet(t.Text)
sprintf "<table id=\"%d\"><tr><td rowspan=\"2\" width=\"30\">%d</td><td rowspan=\"2\" width=\"80\"><a href=\"javascript:remove('%d')\">Remove</a><td rowspan=\"2\"><a href=\"https://twitter.com/%s\" target=\"_blank\"><img src=\"%s\"/></a></td><td><b>%s</b></td></tr><tr><td>Created : %s <br></td></tr></table>"
id id id t.FromUserScreenName t.ProfileImageLocation text (t.CreatedDate.ToString()))
|> List.fold (fun s r -> s+" "+r) ""
let html = sprintf "<html><head><script>function remove(id){return (elem=document.getElementById(id)).parentNode.removeChild(elem);}</script></head><body>%s</body></html>" rows
System.IO.File.WriteAllText(filename, html)
printTweetsInHtml "d:\\tweets.html" tweets