This vignette provides a quick tour of the R package rtweet: Collecting Twitter Data.
Search for up to 18,000 (non-retweeted) tweets containing the rstats hashtag.
## search for 18000 tweets using the rstats hashtag
rt <- search_tweets(
"#rstats", n = 18000, include_rts = FALSE
)
## preview tweets data
rt
## preview users data
users_data(rt)
## plot time series (if ggplot2 is installed)
ts_plot(rt)Quickly visualize frequency of tweets over time using ts_plot().
## plot time series of tweets
ts_plot(rt, "3 hours") +
ggplot2::theme_minimal() +
ggplot2::theme(plot.title = ggplot2::element_text(face = "bold")) +
ggplot2::labs(
x = NULL, y = NULL,
title = "Frequency of #rstats Twitter statuses from past 9 days",
subtitle = "Twitter status (tweet) counts aggregated using three-hour intervals",
caption = "\nSource: Data collected from Twitter's REST API via rtweet"
)Twitter rate limits cap the number of search results returned to 18,000 every 15 minutes. To request more than that, simply set retryonratelimit = TRUE and rtweet will wait for rate limit resets for you.
## search for 250,000 tweets containing the word data
rt <- search_tweets(
"data", n = 250000, retryonratelimit = TRUE
)Search by geo-location—for example, find 10,000 tweets in the English language sent from the United States.
## search for 10,000 tweets sent from the US
rt <- search_tweets(
"lang:en", geocode = lookup_coords("usa"), n = 10000
)
## create lat/lng variables using all available tweet and profile geo-location data
rt <- lat_lng(rt)
## plot state boundaries
par(mar = c(0, 0, 0, 0))
maps::map("state", lwd = .25)
## plot lat and lng points onto state map
with(rt, points(lng, lat, pch = 20, cex = .75, col = rgb(0, .3, .7, .75)))Randomly sample (approximately 1%) from the live stream of all tweets.
Stream all geo enabled tweets from London for 60 seconds.
## stream tweets from london for 60 seconds
rt <- stream_tweets(lookup_coords("london, uk"), timeout = 60)Stream all tweets mentioning realDonaldTrump or Trump for a week.
Retrieve a list of all the accounts a user follows.
Retrieve a list of the accounts following a user.
## get user IDs of accounts following CNN
cnn_flw <- get_followers("cnn", n = 75000)
## lookup data on those accounts
cnn_flw_data <- lookup_users(cnn_flw$user_id)Or if you really want ALL of their followers:
Get the most recent 3,200 tweets from cnn, BBCWorld, and foxnews.
## get user IDs of accounts followed by CNN
tmls <- get_timelines(c("cnn", "BBCWorld", "foxnews"), n = 3200)
## plot the frequency of tweets for each user over time
tmls %>%
dplyr::filter(created_at > "2017-10-29") %>%
dplyr::group_by(screen_name) %>%
ts_plot("days", trim = 1L) +
ggplot2::geom_point() +
ggplot2::theme_minimal() +
ggplot2::theme(
legend.title = ggplot2::element_blank(),
legend.position = "bottom",
plot.title = ggplot2::element_text(face = "bold")) +
ggplot2::labs(
x = NULL, y = NULL,
title = "Frequency of Twitter statuses posted by news organization",
subtitle = "Twitter status (tweet) counts aggregated by day from October/November 2017",
caption = "\nSource: Data collected from Twitter's REST API via rtweet"
)Get the 3,000 most recently favorited statuses by JK Rowling.
Search for 1,000 users with the rstats hashtag in their profile bios.
## lookup users by screen_name or user_id
users <- c("KimKardashian", "justinbieber", "taylorswift13",
"espn", "JoelEmbiid", "cstonehoops", "KUHoops",
"upshotnyt", "fivethirtyeight", "hadleywickham",
"cnn", "foxnews", "msnbc", "maddow", "seanhannity",
"potus", "epa", "hillaryclinton", "realdonaldtrump",
"natesilver538", "ezraklein", "annecoulter")
famous_tweeters <- lookup_users(users)
## preview users data
famous_tweeters
# extract most recent tweets data from the famous tweeters
tweets_data(famous_tweeters)