Load the data and analyze

library(generalconference)
#> Loading required package: dplyr
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> Loading required package: furrr
#> Loading required package: future
#> Loading required package: glue
#> 
#> Attaching package: 'glue'
#> The following object is masked from 'package:dplyr':
#> 
#>     collapse
#> Loading required package: purrr
#> Loading required package: stringr
#> Loading required package: readr
#> Loading required package: rvest
#> 
#> Attaching package: 'rvest'
#> The following object is masked from 'package:readr':
#> 
#>     guess_encoding
#> Loading required package: tictoc
#> Loading required package: tidyr
#> Loading required package: xml2
library(dplyr)
data("genconf")
head(genconf)
#> # A tibble: 6 × 4
#>    year month date       sessions        
#>   <dbl> <dbl> <date>     <list>          
#> 1  2021     4 2021-04-01 <tibble [5 × 4]>
#> 2  2020    10 2020-10-01 <tibble [5 × 4]>
#> 3  2020     4 2020-04-01 <tibble [5 × 4]>
#> 4  2019    10 2019-10-01 <tibble [5 × 4]>
#> 5  2019     4 2019-04-01 <tibble [5 × 4]>
#> 6  2018    10 2018-10-01 <tibble [5 × 4]>
df <- genconf

How many conferences have there been since 1971?

df %>%
  count()
#> # A tibble: 1 × 1
#>       n
#>   <int>
#> 1   101

How many sessions have there been?

df %>%
  unnest(sessions) %>%
  count()
#> # A tibble: 1 × 1
#>       n
#>   <int>
#> 1   613

How many talks have there been since 1971?

df %>%
  unnest(sessions) %>%
  unnest(talks) %>%
  count()
#> # A tibble: 1 × 1
#>       n
#>   <int>
#> 1  3883

Explore the quality of the data

df_talks <- df %>%
  unnest(sessions) %>%
  unnest(talks)
df_talks %>%
  head()
#> # A tibble: 6 × 14
#>    year month date       session_name   session_id session_url      talk_urls   
#>   <dbl> <dbl> <date>     <chr>               <int> <chr>            <chr>       
#> 1  2021     4 2021-04-01 Saturday Morn…          1 /study/general-… /study/gene…
#> 2  2021     4 2021-04-01 Saturday Morn…          1 /study/general-… /study/gene…
#> 3  2021     4 2021-04-01 Saturday Morn…          1 /study/general-… /study/gene…
#> 4  2021     4 2021-04-01 Saturday Morn…          1 /study/general-… /study/gene…
#> 5  2021     4 2021-04-01 Saturday Morn…          1 /study/general-… /study/gene…
#> 6  2021     4 2021-04-01 Saturday Morn…          1 /study/general-… /study/gene…
#> # … with 7 more variables: talk_session_id <int>, url <chr>, title1 <chr>,
#> #   author1 <chr>, author2 <chr>, kicker1 <chr>, paragraphs <list>

Only one talk without a title, but this is a video talk.

df_talks %>% filter(is.na(title1)) %>% pull(url)
#> [1] "https://www.churchofjesuschrist.org/study/general-conference/2020/10/33video"