Skip to content

Guidance on how to pull data within specific geographies #4

@francisbarton

Description

@francisbarton

I'm not clear how to restrict the stat-xplore data download to specific areas (in particular, here, all LSOAs within a specific Local Authority).

I'm not clear how to use the recodes feature, basically!

Here's a reprex of my rather rudimentary code so far.

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(magrittr)
library(tibble)
library(purrr)
#> 
#> Attaching package: 'purrr'
#> The following object is masked from 'package:magrittr':
#> 
#>     set_names
library(dwpstat)

lad19cd <- "E07000170" # Ashfield

lsoa_ids <- c("E01027925", "E01027926", "E01027927", "E01027928", "E01027929",
  "E01027930", "E01027931", "E01027932", "E01027933", "E01027934",
  "E01027935", "E01027936", "E01027937", "E01027938", "E01027939",
  "E01027940", "E01027941", "E01027942", "E01027943", "E01027944",
  "E01027945", "E01027946", "E01027947", "E01027948", "E01027949",
  "E01027950", "E01027951", "E01027952", "E01027953", "E01027954",
  "E01027955", "E01027956", "E01027957", "E01027958", "E01027959",
  "E01027960", "E01027961", "E01027962", "E01027963", "E01027964",
  "E01027965", "E01027966", "E01027967", "E01027968", "E01027969",
  "E01027970", "E01027971", "E01027972", "E01027973", "E01027974",
  "E01027975", "E01027976", "E01027977", "E01027978", "E01027979",
  "E01027980", "E01027981", "E01027982", "E01027983", "E01027984",
  "E01027985", "E01027986", "E01027987", "E01027988", "E01027989",
  "E01027990", "E01027991", "E01027992", "E01027993", "E01027994",
  "E01027995", "E01027996", "E01027997", "E01027998")

uc_database_id <- dwp_schema(
  dwp_schema() %>%
    filter(label == "Universal Credit") %>%
    pull(id)) %>%
  filter(label == "Households on Universal Credit") %>%
  pull(id)
uc_database_id
#> [1] "str:database:UC_Households"

uc_measures_id <- dwp_schema(
  dwp_schema(
    dwp_schema() %>%
      filter(label == "Universal Credit") %>%
      pull(id)) %>%
    filter(label == "Households on Universal Credit") %>%
    pull(id)) %>%
  filter(label == "Households on Universal Credit") %>%
  pull(id)
uc_measures_id
#> [1] "str:count:UC_Households:V_F_UC_HOUSEHOLDS"

uc_month_id <- dwp_schema(
  dwp_schema(
    dwp_schema() %>%
      filter(label == "Universal Credit") %>%
      pull(id)) %>%
    filter(label == "Households on Universal Credit") %>%
    pull(id)) %>%
  filter(label == "Month") %>%
  pull(id)
uc_month_id
#> [1] "str:field:UC_Households:F_UC_DATE:DATE_NAME"

uc_geog_la_id <- dwp_schema(
  dwp_schema(
    dwp_schema(
      dwp_schema(
        dwp_schema() %>%
          filter(label == "Universal Credit") %>%
          pull(id)) %>%
        filter(label == "Households on Universal Credit") %>%
        pull(id)) %>%
      filter(label == "Geography (residence-based)") %>%
      pull(id)) %>%
    filter(label == "National - Regional - LA - OAs") %>%
    pull(id)) %>%
  filter(label == "Local Authority") %>%
  pull(id) %>%
  paste0(., ":", lad19cd)
uc_geog_la_id
#> [1] "str:valueset:UC_Households:V_F_UC_HOUSEHOLDS:COA_CODE:V_C_MASTERGEOG11_LA_TO_REGION:E07000170"

uc_geog_lsoa_id <- dwp_schema(
  dwp_schema(
    dwp_schema(
      dwp_schema(
        dwp_schema() %>%
          filter(label == "Universal Credit") %>%
          pull(id)) %>%
        filter(label == "Households on Universal Credit") %>%
        pull(id)) %>%
      filter(label == "Geography (residence-based)") %>%
      pull(id)) %>%
    filter(label == "National - Regional - LA - OAs") %>%
    pull(id)) %>%
  filter(label == "Lower Layer Super Output Areas") %>%
  pull(id)
uc_geog_lsoa_id
#> [1] "str:valueset:UC_Households:V_F_UC_HOUSEHOLDS:COA_CODE:V_C_MASTERGEOG11_LSOA_TO_MSOA"

uc_geographies_data <- dwp_schema(
  dwp_schema(
    dwp_schema(
      dwp_schema(
        dwp_schema(
          dwp_schema() %>%
            filter(label == "Universal Credit") %>%
            pull(id)) %>%
          filter(label == "Households on Universal Credit") %>%
          pull(id)) %>%
        filter(label == "Geography (residence-based)") %>%
        pull(id)) %>%
      filter(label == "National - Regional - LA - OAs") %>%
      pull(id)) %>%
    filter(label == "Lower Layer Super Output Areas") %>%
    pull(id))
head(uc_geographies_data)
#> # A tibble: 6 x 4
#>   id                            label       location                       type 
#>   <chr>                         <chr>       <chr>                          <chr>
#> 1 str:value:UC_Households:V_F_~ Wakefield ~ https://stat-xplore.dwp.gov.u~ VALUE
#> 2 str:value:UC_Households:V_F_~ Dunnikier ~ https://stat-xplore.dwp.gov.u~ VALUE
#> 3 str:value:UC_Households:V_F_~ Kingston u~ https://stat-xplore.dwp.gov.u~ VALUE
#> 4 str:value:UC_Households:V_F_~ Barnsley 0~ https://stat-xplore.dwp.gov.u~ VALUE
#> 5 str:value:UC_Households:V_F_~ Dunoon - 05 https://stat-xplore.dwp.gov.u~ VALUE
#> 6 str:value:UC_Households:V_F_~ Torbay 017B https://stat-xplore.dwp.gov.u~ VALUE

.

# this gets total national numbers of claimants
# how do I add in a geography filter?

uc_data_pull <- dwp_get_data(
  database = uc_database_id,
  measures = uc_measures_id,
  column = uc_month_id)

uc_data_tibble <- tibble(
  month = uc_data_pull %>%
    pluck("fields", "items", 1, "labels"),
  claimants = uc_data_pull %>%
    pluck("cubes", "str:count:UC_Households:V_F_UC_HOUSEHOLDS", "values"))
head(uc_data_tibble)
#> # A tibble: 6 x 2
#>   month     claimants
#>   <list>        <dbl>
#> 1 <chr [1]>     98139
#> 2 <chr [1]>    111297
#> 3 <chr [1]>    124950
#> 4 <chr [1]>    141142
#> 5 <chr [1]>    155999
#> 6 <chr [1]>    175187

Created on 2020-02-11 by the reprex package (v0.3.0)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions