Data Science Tutorials - Getting to know gt

What is gt?

The grammar of tables package makes it easy to create pretty output tables. Similar to ggplot, it allows you to construct a table using various elements, giving significant control over what the final outcome looks like.

This image shows the parts of a gt table.

There are a variety of different functions that allow you to tailor your table to your specific needs. Those functions can be found in the Package index.

Data

To better demonstrate the abilities of gt, we’re going to use the Spotify API to pull data from my current favorite artist. This process requires you to set up a Spotify app which is more work than we need to do right now, so I’ve shared the data with you. For instructions on how to set

library(httr)
library(yaml)
# TO DO: add need to add your own personal account info in a config folder note
config <- yaml::yaml.load_file(input = "config.yml")
# Get access token
get_token <- POST(
  url = "https://accounts.spotify.com/api/token",
  add_headers("Content-Type" = "application/x-www-form-urlencoded"),
  body = list(
    grant_type = "client_credentials",
    client_id = config$client_id,
    client_secret = config$client_secret
  ),
  encode = "form"
)

access_token1 <- content(get_token, "parsed")
access_token <- access_token1$access_token

# chappell_roan_id <- "7GlBOeep6PqTfFi59PTUUN"

# response <- GET(
#   url = paste0("https://api.spotify.com/v1/artists/", chappell_roan_id),
#   add_headers(Authorization = paste("Bearer", access_token))
# )

# content <- content(response, "parsed")


#url <- "https://example.com/v1/refresh"
#headers <- c("Content-Type" = "application/x-www-form-urlencoded")
#body <- list(refresh_token = access_token)

#response <- POST(url, 
                 # add_headers(headers), 
                 # body = body, 
                 # encode = "form")

# Print the response status code
#print(status_code(response))

# Print the response content
#print(content(response, "text"))

# my_top_5 <- GET(
#   url = paste0("https://api.spotify.com/v1/me/top/tracks"),
#   add_headers(Authorization = paste0("Bearer ", access_token))
# )
# 
# top5 <- content(my_top_5, "parsed")
# 
# top5

# Get Spotify's playlist called "Today's Top Hits".
library(plyr)
playlist_id <- "37i9dQZF1DXcBWIGoYBM5M"
response <- GET(
  url = paste0("https://api.spotify.com/v1/playlists/", playlist_id),
  add_headers(Authorization = paste("Bearer", access_token))
)

playlist_content <- content(response, "parsed")
# playlist_data <- plyr::ldply(playlist_content)
track_name <- playlist_content$tracks$items[[1]]$track$name
track_artist <- playlist_content$tracks$items[[1]]$track$artists[[1]]$name

artist_list <- c()
track_list <- c()
for (i in 1:length(playlist_content$tracks$items)){
  track_name <- playlist_content$tracks$items[[i]]$track$name
  track_artist <- playlist_content$tracks$items[[i]]$track$artists[[1]]$name
  track_list[[i]] <- track_name
  artist_list[[i]] <- track_artist
  
}

track_data <- do.call(rbind, track_list)
artist_data <- do.call(rbind, artist_list)

track_df <- as.data.frame(track_data)
artist_df <- as.data.frame(artist_data)

names(track_df)[names(track_df) == 'V1'] <- 'track'
names(artist_df)[names(artist_df) == 'V1'] <- 'artist'

# Combine
all_data <- cbind(artist_df, track_df)
all_data$rank <- c(1:nrow(all_data))
all_data <- all_data |> 
  dplyr::relocate(rank, .before = artist)

Display a simple gt table

# Create a gt table with Spotify's top 50 tracks
library(gt)
all_data |> 
  gt() |> 
  tab_header(
    title = "Spotify Top 50"
  )

rank	artist	track
Spotify Top 50
1	Sabrina Carpenter	Please Please Please
2	KAROL G	Si Antes Te Hubiera Conocido
3	Chappell Roan	Good Luck, Babe!
4	Shaboozey	A Bar Song (Tipsy)
5	Billie Eilish	BIRDS OF A FEATHER
6	Kendrick Lamar	Not Like Us
7	Tommy Richman	MILLION DOLLAR BABY
8	Hozier	Too Sweet
9	Post Malone	I Had Some Help (Feat. Morgan Wallen)
10	Benson Boone	Beautiful Things
11	Sabrina Carpenter	Espresso
12	Myles Smith	Stargazing
13	Artemas	i like the way you kiss me
14	Billie Eilish	LUNCH
15	Djo	End of Beginning
16	Quavo	Tough
17	Ariana Grande	we can't be friends (wait for your love)
18	Teddy Swims	Lose Control
19	Dasha	Austin
20	Taylor Swift	I Can Do It With a Broken Heart
21	Tinashe	Nasty
22	Eminem	Houdini
23	Mark Ambor	Belong Together
24	Benson Boone	Slow It Down
25	The Kid LAROI	GIRLS
26	Chappell Roan	HOT TO GO!
27	Tate McRae	greedy
28	Adam Port	Move
29	SZA	Saturn
30	Taylor Swift	Fortnight (feat. Post Malone)
31	Zach Bryan	28
32	David Guetta	I Don't Wanna Wait
33	Ariana Grande	the boy is mine
34	Gracie Abrams	Close To You
35	Morgan Wallen	Lies Lies Lies
36	Noah Kahan	Stick Season
37	Jimin	Smeraldo Garden Marching Band (feat. Loco)
38	CYRIL	Stumblin' In
39	Charli xcx	360
40	The Weeknd	One Of The Girls (with JENNIE, Lily Rose Depp)
41	LISA	Rockstar
42	Central Cee	BAND4BAND (feat. Lil Baby)
43	Michael Marcagi	Scared To Start
44	FloyyMenor	Gata Only
45	Mustard	Parking Lot
46	Coldplay	feelslikeimfallinginlove
47	Tyla	Water
48	Dua Lipa	Illusion
49	Rvssian	Santa
50	ILLIT	Magnetic

Check to see if there are any repeats artists in the Top 50 and list their songs.

library(dplyr)
# Get all data with duplicated artists
# Can combine data cleaning/manipulating with gt()
unique_duplicates <- all_data |> 
  dplyr::group_by(artist) |> 
  filter(n() > 1) |> 
# Add them to a table with sub rows and add colors to those who are in the top 10
  gt() |> 
  tab_header(
    title = "Artists with multiple Top 50 Songs",
    subtitle = html(
      "Link to info", "<a href = 'https://gt.rstudio.com/reference/data_color.html'>data_color</a>"
      )
    ) |>
  # Add color to songs that are in the top 10
  data_color(
    columns = rank,
    target_columns = c(rank, track),
    rows = rank <= 10,
    method = "numeric",
    palette = c("#5bb450")
  ) |> 
  # Add footnote about the color
  tab_footnote(footnote = "Green indicates songs in the Top 10")

unique_duplicates

rank	track
Artists with multiple Top 50 Songs
Link to info data_color
Sabrina Carpenter
1	Please Please Please
11	Espresso
Chappell Roan
3	Good Luck, Babe!
26	HOT TO GO!
Billie Eilish
5	BIRDS OF A FEATHER
14	LUNCH
Benson Boone
10	Beautiful Things
24	Slow It Down
Ariana Grande
17	we can't be friends (wait for your love)
33	the boy is mine
Taylor Swift
20	I Can Do It With a Broken Heart
30	Fortnight (feat. Post Malone)
Green indicates songs in the Top 10

Note

I had issues with rows being automatically striped every other row. The default for gt is for alternating stripes to be disabled (though it can be enabled with opt_row_striping(row_striping = TRUE)). This turned out to be a Quarto issue, so I had to put section this in my header:

See the documentation here: Disabling Quarto Table Processing

Get the Top 10 songs for the Spotify Top 50, top artist

# Get more data about the top artist
library(httr)
library(yaml)
library(gt)
library(dplyr)
config <- yaml::yaml.load_file(input = "config.yml")
# Get access token
get_token <- POST(
  url = "https://accounts.spotify.com/api/token",
  add_headers("Content-Type" = "application/x-www-form-urlencoded"),
  body = list(
    grant_type = "client_credentials",
    client_id = config$client_id,
    client_secret = config$client_secret
  ),
  encode = "form"
)

access_token1 <- content(get_token, "parsed")
access_token <- access_token1$access_token

# Get Spotify Top artist
top_artist <- all_data$artist[1]

# Get Top Artist ID
id <- playlist_content$tracks$items[[1]]$track$artists[[1]]$id

top1_top <- GET(
  url = paste0("https://api.spotify.com/v1/artists/", id, "/top-tracks"),
  add_headers(Authorization = paste("Bearer", access_token))
)

top1_top_songs <- content(top1_top, "parsed")

top_track_list <- c()
for (i in 1:length(top1_top_songs$tracks)){
  track_name <- top1_top_songs$tracks[[i]]$name
  top_track_list[[i]] <- track_name
}
top_track_df <- do.call(rbind, top_track_list)
top_track_df <- as.data.frame(top_track_df)


top_track_df |> 
  dplyr::rename("Track Name" = V1) |> 
  dplyr::mutate(Artist = top_artist) |> 
  dplyr::relocate(Artist, .before = "Track Name") |> 
  dplyr::mutate(rank = row_number()) |> 
  dplyr::relocate(rank, .before = "Artist") |> 
  gt() |> 
  tab_header(
  title = paste0(top_artist, " Top ", nrow(top_track_df), " Songs"),
  subtitle = "Per Spotify")

rank	Artist	Track Name
Sabrina Carpenter Top 10 Songs
Per Spotify
1	Sabrina Carpenter	Espresso
2	Sabrina Carpenter	Please Please Please
3	Sabrina Carpenter	Feather
4	Sabrina Carpenter	Nonsense
5	Sabrina Carpenter	because i liked a boy
6	Sabrina Carpenter	Looking at Me
7	Sabrina Carpenter	On My Way
8	Sabrina Carpenter	Read your Mind
9	Sabrina Carpenter	emails i can’t send
10	Sabrina Carpenter	You Need Me Now? (feat. Sabrina Carpenter)