# link to the API
<- 'https://apps-st.fisheries.noaa.gov/ods/foss/afsc_groundfish_survey_haul/' api_link_haul
Access via API and R
An application programming interface (API) is a way for two or more computer programs to communicate with each other. More information about how to amend API links can be found here. Useful introductions to using APIs in R
can be found here.
There are three tables the user can pull from the API. Learn more about them on the FOSS data description page. Here, you can see them in their raw JSON format:
- haul: https://apps-st.fisheries.noaa.gov/ods/foss/afsc_groundfish_survey_haul/
- catch: https://apps-st.fisheries.noaa.gov/ods/foss/afsc_groundfish_survey_catch/
- species: https://apps-st.fisheries.noaa.gov/ods/foss/afsc_groundfish_survey_species/
Here are some examples of how to use the data with R:
12.1 Ex. Load all rows of the catch, haul, and species data tables
Note that without specifying, a basic query to the API will only return 25 entries.
12.1.1 Load haul data
12.1.1.1 Load first 25 rows of data
<- httr::GET(url = api_link_haul)
res # res ## Test connection
## convert from JSON format
<- jsonlite::fromJSON(base::rawToChar(res$content))$items
dat
# Find how many rows and columns are in the data pull
print(paste0("rows: ", nrow(dat), "; cols: ", ncol(dat)))
[1] "rows: 25; cols: 28"
12.1.1.2 Load all data:
Since the maxim number of rows a user can pull is 10,000 rows in a query, the user needs to cycle through by offsetting
to the next 10,000 rows (as is shown here).
<- data.frame()
dat for (i in seq(0, 500000, 10000)){
## find how many iterations it takes to cycle through the data
print(i)
## query the API link
<- httr::GET(url = paste0(api_link_haul, "?offset=",i,"&limit=10000"))
res ## convert from JSON format
<- jsonlite::fromJSON(base::rawToChar(res$content))
data
## if there are no data, stop the loop
if (is.null(nrow(data$items))) {
break
}
## bind sub-pull to dat data.frame
<- dplyr::bind_rows(dat,
dat $items %>%
data::select(-links)) # necessary for API accounting, but not part of the dataset)
dplyr }
[1] 0
[1] 10000
[1] 20000
[1] 30000
[1] 40000
Explore the data contents:
# Find how many rows and columns are in the data pull
print(paste0("rows: ", nrow(dat), "; cols: ", ncol(dat)))
[1] "rows: 33991; cols: 27"
# learn about the structure of the data
summary(dat)
year srvy survey survey_name
Min. :1982 Length:33991 Length:33991 Length:33991
1st Qu.:1997 Class :character Class :character Class :character
Median :2006 Mode :character Mode :character Mode :character
Mean :2005
3rd Qu.:2015
Max. :2024
survey_definition_id cruise cruisejoin hauljoin
Min. : 47.00 Min. :198201 Min. : -770 Min. : -23911
1st Qu.: 47.00 1st Qu.:199701 1st Qu.: -697 1st Qu.: -14104
Median : 78.00 Median :200601 Median : -616 Median : -4314
Mean : 74.55 Mean :200543 Mean : 294616 Mean : 289722
3rd Qu.: 98.00 3rd Qu.:201501 3rd Qu.: 837799 3rd Qu.: 816124
Max. :143.00 Max. :202401 Max. :1225395 Max. :1225635
haul stratum station vessel_id
Min. : 1.0 Min. : 10.0 Length:33991 Min. : 1.0
1st Qu.: 56.0 1st Qu.: 31.0 Class :character 1st Qu.: 88.0
Median :112.0 Median : 50.0 Mode :character Median : 94.0
Mean :117.3 Mean :130.9 Mean :107.8
3rd Qu.:170.0 3rd Qu.:142.0 3rd Qu.:147.0
Max. :355.0 Max. :794.0 Max. :178.0
vessel_name date_time latitude_dd_start longitude_dd_start
Length:33991 Length:33991 Min. :51.19 Min. :-180.0
Class :character Class :character 1st Qu.:55.00 1st Qu.:-170.7
Mode :character Mode :character Median :57.16 Median :-165.3
Mean :56.86 Mean :-139.6
3rd Qu.:58.97 3rd Qu.:-154.4
Max. :65.34 Max. : 180.0
latitude_dd_end longitude_dd_end bottom_temperature_c surface_temperature_c
Min. :51.19 Min. :-180.0 Min. :-2.100 Min. :-1.100
1st Qu.:55.00 1st Qu.:-170.7 1st Qu.: 2.700 1st Qu.: 5.800
Median :57.16 Median :-165.3 Median : 4.100 Median : 7.400
Mean :56.86 Mean :-139.6 Mean : 3.829 Mean : 7.794
3rd Qu.:58.96 3rd Qu.:-154.4 3rd Qu.: 5.200 3rd Qu.: 9.300
Max. :65.35 Max. : 180.0 Max. :15.300 Max. :18.100
NA's :4 NA's :4 NA's :1601 NA's :852
depth_m distance_fished_km duration_hr net_width_m
Min. : 9.0 Min. :0.135 Min. :0.0250 Min. : 7.51
1st Qu.: 68.0 1st Qu.:1.497 1st Qu.:0.2710 1st Qu.:15.58
Median : 102.0 Median :2.528 Median :0.4900 Median :16.40
Mean : 137.8 Mean :2.206 Mean :0.4006 Mean :16.42
3rd Qu.: 156.0 3rd Qu.:2.833 3rd Qu.:0.5090 3rd Qu.:17.21
Max. :1200.0 Max. :4.334 Max. :0.9800 Max. :23.82
net_height_m area_swept_km2 performance
Min. : 0.000 Min. :0.002314 Min. :0.0000
1st Qu.: 2.383 1st Qu.:0.024261 1st Qu.:0.0000
Median : 5.865 Median :0.039562 Median :0.0000
Mean : 4.822 Mean :0.036378 Mean :0.2779
3rd Qu.: 6.788 3rd Qu.:0.047281 3rd Qu.:0.0000
Max. :11.038 Max. :0.077795 Max. :7.0000
NA's :3269
# Print the first few lines of the data
%>%
dat head(3) %>%
::flextable() %>%
flextable::colformat_num(
flextablex = .,
j = c("year", "cruise", "cruisejoin"),
big.mark = "") %>%
::theme_zebra() flextable
year | srvy | survey | survey_name | survey_definition_id | cruise | cruisejoin | hauljoin | haul | stratum | station | vessel_id | vessel_name | date_time | latitude_dd_start | longitude_dd_start | latitude_dd_end | longitude_dd_end | bottom_temperature_c | surface_temperature_c | depth_m | distance_fished_km | duration_hr | net_width_m | net_height_m | area_swept_km2 | performance |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2004 | EBS | eastern Bering Sea | Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey | 98 | 200401 | 1195616 | 1,195,895 | 110 | 42 | HG2019 | 89 | ALDEBARAN | 2004-06-30T16:19:47Z | 57.17397 | -169.3162 | 57.14825 | -169.3204 | 3.8 | 9.1 | 72 | 2.869 | 0.51 | 17.532 | 2.219 | 0.050299 | 0 |
2004 | EBS | eastern Bering Sea | Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey | 98 | 200401 | 1195616 | 1,195,896 | 111 | 42 | G-21 | 89 | ALDEBARAN | 2004-07-02T08:09:22Z | 56.99756 | -170.1878 | 56.99808 | -170.1400 | 4.7 | 7.9 | 70 | 2.912 | 0.51 | 17.500 | 2.290 | 0.050960 | 0 |
2004 | EBS | eastern Bering Sea | Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey | 98 | 200401 | 1195616 | 1,195,897 | 113 | 42 | HG2120 | 89 | ALDEBARAN | 2004-07-02T13:36:10Z | 57.15147 | -169.9100 | 57.17434 | -169.8874 | 5.0 | 7.7 | 50 | 2.891 | 0.51 | 16.999 | 2.366 | 0.049144 | 0 |
# save outputs for later comparison
<- dat dat_haul_api
12.1.2 Load catch data
# link to the API
<- 'https://apps-st.fisheries.noaa.gov/ods/foss/afsc_groundfish_survey_catch/' api_link_catch
12.1.2.1 Load first 25 rows of data
<- httr::GET(url = api_link_catch)
res # res ## Test connection
## convert from JSON format
<- jsonlite::fromJSON(base::rawToChar(res$content))$items
dat
# Find how many rows and columns are in the data pull
print(paste0("rows: ", nrow(dat), "; cols: ", ncol(dat)))
[1] "rows: 25; cols: 8"
12.1.2.2 Load all data
Since the maxim number of rows a user can pull is 10,000 rows in a query, the user needs to cycle through by offsetting
to the next 10,000 rows (as is shown here).
<- data.frame()
dat # for (i in seq(0, 100000, 10000)){
for (i in seq(0, 1000000, 10000)){
## find how many iterations it takes to cycle through the data
# print(i)
## query the API link
<- httr::GET(url = paste0(api_link_catch, "?offset=",i,"&limit=10000"))
res ## convert from JSON format
<- jsonlite::fromJSON(base::rawToChar(res$content))
data
## if there are no data, stop the loop
if (is.null(nrow(data$items))) {
break
}
## bind sub-pull to dat data.frame
<- dplyr::bind_rows(dat,
dat $items %>%
data::select(-links)) # necessary for API accounting, but not part of the dataset)
dplyr }
Explore the data contents:
# Find how many rows and columns are in the data pull
print(paste0("rows: ", nrow(dat), "; cols: ", ncol(dat)))
[1] "rows: 891144; cols: 7"
# learn about the structure of the data
summary(dat)
hauljoin species_code cpue_kgkm2 cpue_nokm2
Min. : -23911 Min. : 1 Min. : 0 Min. : 13
1st Qu.: -14439 1st Qu.:20510 1st Qu.: 6 1st Qu.: 58
Median : -5267 Median :40500 Median : 49 Median : 214
Mean : 280338 Mean :45195 Mean : 1250 Mean : 4605
3rd Qu.: 802426 3rd Qu.:71800 3rd Qu.: 372 3rd Qu.: 1137
Max. :1225635 Max. :99999 Max. :3226235 Max. :21780780
NA's :87811
count weight_kg taxon_confidence
Min. : 1.0 Min. : 0.001 Length:891144
1st Qu.: 2.0 1st Qu.: 0.199 Class :character
Median : 8.0 Median : 1.814 Mode :character
Mean : 180.5 Mean : 41.720
3rd Qu.: 43.0 3rd Qu.: 13.780
Max. :867119.0 Max. :18187.700
NA's :87811
# Print the first few lines of the data
%>%
dat head(3) %>%
::flextable() %>%
flextable::colformat_num(
flextablex = .,
j = c("species_code"),
big.mark = "") %>%
::theme_zebra() flextable
hauljoin | species_code | cpue_kgkm2 | cpue_nokm2 | count | weight_kg | taxon_confidence |
---|---|---|---|---|---|---|
-7,235 | 20510 | 1,904.036558 | 594.6202 | 19 | 60.840 | High |
-7,235 | 20622 | 0.688508 | 62.5916 | 2 | 0.022 | High |
-7,235 | 21230 | 769.876715 | 219.0706 | 7 | 24.600 | High |
# save outputs for later comparison
<- dat dat_catch_api
12.1.3 Load species data
Since there are less than 10,000 rows of species data (and the maxim number of rows a user can pull from this API is 10,000 rows in a query), we can simply call ?offset=0&limit=10000
in our query call.
# link to the API
<- 'https://apps-st.fisheries.noaa.gov/ods/foss/afsc_groundfish_survey_species/' api_link_species
<- httr::GET(url = paste0(api_link_species, "?offset=0&limit=10000"))
res
## convert from JSON format
<- jsonlite::fromJSON(base::rawToChar(res$content))
data <- data$items %>%
dat ::select(-links) # necessary for API accounting, but not part of the dataset dplyr
Explore the data contents:
# Find how many rows and columns are in the data pull
print(paste0("rows: ", nrow(dat), "; cols: ", ncol(dat)))
[1] "rows: 1014; cols: 6"
# learn about the structure of the data
summary(dat)
species_code scientific_name common_name id_rank
Min. : 1 Length:1014 Length:1014 Length:1014
1st Qu.:22177 Class :character Class :character Class :character
Median :66868 Mode :character Mode :character Mode :character
Mean :50653
3rd Qu.:75077
Max. :99999
worms itis
Min. : 51 Min. : 46861
1st Qu.: 127206 1st Qu.: 97781
Median : 254573 Median : 162045
Mean : 293224 Mean : 217907
3rd Qu.: 342060 3rd Qu.: 167487
Max. :1699296 Max. :1206057
NA's :82 NA's :132
# Print the first few lines of the data
%>%
dat head(3) %>%
::flextable() %>%
flextable::colformat_num(
flextablex = .,
j = c("species_code", "worms", "itis"),
big.mark = "") %>%
::theme_zebra() flextable
species_code | scientific_name | common_name | id_rank | worms | itis |
---|---|---|---|---|---|
1 | fish egg unid. | ||||
2 | fish larvae unid. | ||||
3 | fish unid. |
# save outputs for later comparison
<- dat dat_species_api
12.2 Ex. Create zero-filled data using data loaded in last example
It is important to create and have access to zero-fill (presence and absence) so you can do simple analyses and plot data.
First prepare a table with all combinations of what species should be listed for what hauls/surveys. For zero-filled data, all species caught in a survey need to have zero or non-zero row entries for a haul
<- dplyr::full_join(
comb # find all species that have been caught, by survey
x = dplyr::left_join(dat_catch_api, dat_haul_api, by = "hauljoin") %>%
::select(survey_definition_id, species_code) %>%
dplyr::distinct(),
dplyr# find all haul events (hauljoins), by survey
y = dat_haul_api %>%
::select(survey_definition_id, hauljoin) %>%
dplyr::distinct(),
dplyrrelationship = "many-to-many",
by = "survey_definition_id"
%>%
) ::select(-survey_definition_id) # now, redundant dplyr
Explore the data contents:
print(paste0("rows: ", nrow(comb), "; cols: ", ncol(comb)))
[1] "rows: 21733474; cols: 2"
%>% head(3) %>%
comb ::flextable() %>%
flextable::colformat_num(
flextablex = .,
j = c("species_code", "hauljoin"),
big.mark = "") %>%
::theme_zebra() flextable
species_code | hauljoin |
---|---|
20510 | 1225491 |
20510 | 1225492 |
20510 | 1225493 |
Now, using that table of combinations (here, called comb
), join data to make a full zero-filled CPUE dataset. When all of the data have been full joined together, there should be the maximum number of rows in comb
.
<- comb %>%
dat # add species data
::left_join(dat_species_api) %>% # , "species_code"
dplyr# add haul data
::left_join(dat_haul_api) %>% # , c("hauljoin")
dplyr# add catch data
::left_join(dat_catch_api) %>% # , c("species_code", "hauljoin")
dplyr# modify/clean up zero-filled rows
::mutate(
dplyrcpue_kgkm2 = ifelse(is.na(cpue_kgkm2), 0, cpue_kgkm2),
cpue_nokm2 = ifelse(is.na(cpue_nokm2), 0, cpue_nokm2),
count = ifelse(is.na(count), 0, count),
weight_kg = ifelse(is.na(weight_kg), 0, weight_kg))
TRUE Joining with `by = join_by(species_code)`
TRUE Joining with `by = join_by(hauljoin)`
TRUE Joining with `by = join_by(species_code, hauljoin)`
Explore the data contents:
# Find how many rows and columns are in the data pull
print(paste0("rows: ", nrow(dat), "; cols: ", ncol(dat)))
[1] "rows: 21733474; cols: 38"
# learn about the structure of the data
summary(dat)
species_code hauljoin scientific_name common_name
Min. : 1 Min. : -23911 Length:21733474 Length:21733474
1st Qu.:21810 1st Qu.: -14004 Class :character Class :character
Median :66839 Median : -4364 Mode :character Mode :character
Mean :50538 Mean : 298229
3rd Qu.:74986 3rd Qu.: 821816
Max. :99999 Max. :1225635
id_rank worms itis year
Length:21733474 Min. : 51 Min. : 46861 Min. :1982
Class :character 1st Qu.: 126824 1st Qu.: 97160 1st Qu.:1997
Mode :character Median : 254510 Median : 160846 Median :2006
Mean : 269709 Mean : 203649 Mean :2006
3rd Qu.: 292719 3rd Qu.: 167456 3rd Qu.:2015
Max. :1699296 Max. :1206057 Max. :2024
NA's :1584968 NA's :2475974
srvy survey survey_name survey_definition_id
Length:21733474 Length:21733474 Length:21733474 Min. : 47.00
Class :character Class :character Class :character 1st Qu.: 47.00
Mode :character Mode :character Mode :character Median : 52.00
Mean : 68.95
3rd Qu.: 98.00
Max. :143.00
cruise cruisejoin haul stratum
Min. :198201 Min. : -770 Min. : 1.0 Min. : 10.0
1st Qu.:199701 1st Qu.: -697 1st Qu.: 59.0 1st Qu.: 31.0
Median :200601 Median : -616 Median :117.0 Median : 61.0
Mean :200556 Mean : 303023 Mean :122.7 Mean :142.1
3rd Qu.:201501 3rd Qu.: 837800 3rd Qu.:177.0 3rd Qu.:212.0
Max. :202401 Max. :1225395 Max. :355.0 Max. :794.0
station vessel_id vessel_name date_time
Length:21733474 Min. : 1 Length:21733474 Length:21733474
Class :character 1st Qu.: 88 Class :character Class :character
Mode :character Median : 94 Mode :character Mode :character
Mean :110
3rd Qu.:147
Max. :178
latitude_dd_start longitude_dd_start latitude_dd_end longitude_dd_end
Min. :51.19 Min. :-180.0 Min. :51.19 Min. :-180.0
1st Qu.:54.68 1st Qu.:-169.9 1st Qu.:54.68 1st Qu.:-169.9
Median :56.98 Median :-163.4 Median :56.98 Median :-163.4
Mean :56.61 Mean :-136.6 Mean :56.61 Mean :-136.6
3rd Qu.:58.67 3rd Qu.:-152.1 3rd Qu.:58.67 3rd Qu.:-152.1
Max. :65.34 Max. : 180.0 Max. :65.35 Max. : 180.0
NA's :2268 NA's :2268
bottom_temperature_c surface_temperature_c depth_m distance_fished_km
Min. :-2.1 Min. :-1.1 Min. : 9 Min. :0.135
1st Qu.: 3.1 1st Qu.: 5.9 1st Qu.: 71 1st Qu.:1.481
Median : 4.3 Median : 7.6 Median : 109 Median :1.677
Mean : 4.1 Mean : 8.0 Mean : 142 Mean :2.096
3rd Qu.: 5.4 3rd Qu.: 9.7 3rd Qu.: 167 3rd Qu.:2.800
Max. :15.3 Max. :18.1 Max. :1200 Max. :4.334
NA's :1106136 NA's :598744
duration_hr net_width_m net_height_m area_swept_km2
Min. :0.0250 Min. : 7.51 Min. : 0.0 Min. :0.002314
1st Qu.:0.2690 1st Qu.:15.54 1st Qu.: 2.6 1st Qu.:0.023802
Median :0.3050 Median :16.32 Median : 6.2 Median :0.027834
Mean :0.3798 Mean :16.36 Mean : 5.2 Mean :0.034427
3rd Qu.:0.5000 3rd Qu.:17.12 3rd Qu.: 6.9 3rd Qu.:0.046295
Max. :0.9800 Max. :23.82 Max. :11.0 Max. :0.077795
NA's :1736222
performance cpue_kgkm2 cpue_nokm2 count
Min. :0.0000 Min. : 0 Min. : 0 Min. : 0.0
1st Qu.:0.0000 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0.0
Median :0.0000 Median : 0 Median : 0 Median : 0.0
Mean :0.2925 Mean : 51 Mean : 170 Mean : 6.7
3rd Qu.:0.0000 3rd Qu.: 0 3rd Qu.: 0 3rd Qu.: 0.0
Max. :7.0000 Max. :3226235 Max. :21780780 Max. :867119.0
weight_kg taxon_confidence
Min. : 0.000 Length:21733474
1st Qu.: 0.000 Class :character
Median : 0.000 Mode :character
Mean : 1.711
3rd Qu.: 0.000
Max. :18187.700
# Print the first few lines of the data
%>%
dat head(3) %>%
::flextable() %>%
flextable::colformat_num(
flextablex = .,
j = c("species_code", "worms", "itis", "hauljoin", "year", "cruise", "cruisejoin"),
big.mark = "") %>%
::theme_zebra() flextable
species_code | hauljoin | scientific_name | common_name | id_rank | worms | itis | year | srvy | survey | survey_name | survey_definition_id | cruise | cruisejoin | haul | stratum | station | vessel_id | vessel_name | date_time | latitude_dd_start | longitude_dd_start | latitude_dd_end | longitude_dd_end | bottom_temperature_c | surface_temperature_c | depth_m | distance_fished_km | duration_hr | net_width_m | net_height_m | area_swept_km2 | performance | cpue_kgkm2 | cpue_nokm2 | count | weight_kg | taxon_confidence |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
20510 | 1225491 | Anoplopoma fimbria | sablefish | species | 159463 | 167123 | 2004 | BSS | Bering Sea Slope | Eastern Bering Sea Slope Bottom Trawl Survey | 78 | 200401 | 1225395 | 225 | 12 | 76 | 134 | NORTHWEST EXPLORER | 2004-08-05T18:18:27Z | 54.57648 | -166.5989 | 54.57169 | -166.6362 | 3.9 | 9.3 | 413 | 2.470 | 0.53 | 15.576 | 6.953 | 0.038473 | 0 | 520.3687 | 233.93199 | 9 | 20.02 | High |
20510 | 1225492 | Anoplopoma fimbria | sablefish | species | 159463 | 167123 | 2004 | BSS | Bering Sea Slope | Eastern Bering Sea Slope Bottom Trawl Survey | 78 | 200401 | 1225395 | 192 | 12 | 89 | 134 | NORTHWEST EXPLORER | 2004-07-29T18:02:55Z | 55.12762 | -167.8108 | 55.10951 | -167.7877 | 3.8 | 9.7 | 430 | 2.494 | 0.52 | 15.778 | 6.768 | 0.039350 | 0 | 199.7442 | 101.65099 | 4 | 7.86 | High |
20510 | 1225493 | Anoplopoma fimbria | sablefish | species | 159463 | 167123 | 2004 | BSS | Bering Sea Slope | Eastern Bering Sea Slope Bottom Trawl Survey | 78 | 200401 | 1225395 | 193 | 15 | 149 | 134 | NORTHWEST EXPLORER | 2004-07-30T08:04:59Z | 54.94091 | -167.7837 | 54.92692 | -167.7685 | 2.8 | 8.6 | 1,016 | 1.827 | 0.38 | 16.519 | 6.204 | 0.030180 | 0 | 440.6861 | 99.40288 | 3 | 13.30 | High |
# save outputs for later comparison
<- dat dat_zerofill_api
12.3 Ex. Visualize zero-filled data for 2023 eastern Bering Sea walleye pollock in CPUE data in distribution map
Using the zero-filled data from the previous example, we can make a few plots!
Here is some example data of 2023 through 2019 (year %in% 2019:2023
) eastern and northern Bering Sea (srvy %in% c("EBS", "NBS)
) walleye pollock (species_code == 21740
).
<- dat_zerofill_api %>%
dat ::filter(year %in% 2019:2023 &
dplyr%in% c("EBS", "NBS") &
srvy == 21740) %>%
species_code ::select(year, common_name, longitude_dd_start, latitude_dd_start, cpue_kgkm2)
dplyr
# Find how many rows and columns are in the data pull
print(paste0("rows: ", nrow(dat), "; cols: ", ncol(dat)))
[1] "rows: 2052; cols: 5"
# # learn about the structure of the data
# summary(dat)
# Print the first few lines of the data
%>%
dat head(3) %>%
::flextable() %>%
flextable::colformat_num(
flextablex = .,
j = c("year"),
big.mark = "") %>%
::theme_zebra() flextable
year | common_name | longitude_dd_start | latitude_dd_start | cpue_kgkm2 |
---|---|---|---|---|
2023 | walleye pollock | -168.2743 | 63.69779 | 2,970.544 |
2023 | walleye pollock | -168.2171 | 63.02298 | 1,291.441 |
2023 | walleye pollock | -168.8329 | 62.88096 | 4,573.617 |
12.3.1 Plot locations on map
library(ggplot2)
::ggplot(data = dat %>% dplyr::filter(cpue_kgkm2 != 0),
ggplot2mapping = aes(x = longitude_dd_start,
y = latitude_dd_start,
size = cpue_kgkm2)) +
::geom_point(alpha = .75) +
ggplot2::geom_point(data = dat %>% dplyr::filter(cpue_kgkm2 == 0),
ggplot2color = "red",
shape = 17,
alpha = .75,
size = 3) +
::xlab("Longitude *W") +
ggplot2::ylab("Latitude *N") +
ggplot2::ggtitle(label = "CPUE (kg/km^2) of walleye pollock (Weight CPUE; kg/km2)",
ggplot2subtitle = "Eastern Bering Sea bottom trawl survey") +
::scale_size_continuous(name = "Weight (kg)") +
ggplot2::facet_wrap(facets = vars(year)) +
ggplot2::theme_bw() ggplot2
12.3.2 Plot inverse-distance weighted plot of CPUE
This map is constructed using akgfmaps
. To make IDW plots, you must have data from all stations surveyed, even if no fish of interest were found there.
These plots are similar to those published in the annual Bering Sea data reports.
# devtools::install_github("afsc-gap-products/akgfmaps", build_vignettes = TRUE)
library(akgfmaps)
<- akgfmaps::make_idw_stack(
idw x = dat %>%
::select(COMMON_NAME = common_name,
dplyrCPUE_KGHA = cpue_kgkm2,
LATITUDE = latitude_dd_start,
LONGITUDE = longitude_dd_start,
year), grouping.vars = "year",
region = "bs.all", # Predefined EBS area
set.breaks = "jenks", # Gets Jenks breaks from classint::classIntervals()
in.crs = "+proj=longlat", # Set input coordinate reference system
out.crs = "EPSG:3338", # Set output coordinate reference system
extrapolation.grid.type = "sf")
[inverse distance weighted interpolation]
[inverse distance weighted interpolation]
[inverse distance weighted interpolation]
[inverse distance weighted interpolation]
[inverse distance weighted interpolation]
[inverse distance weighted interpolation]
[inverse distance weighted interpolation]
[inverse distance weighted interpolation]
<- akgfmaps::get_base_layers(
shps select.region = "bs.all",
include.corners = TRUE,
set.crs = "EPSG:3338")
# set.breaks <- akgfmaps::eval_plot_breaks(CPUE = dat$cpue_kgkm2, n.breaks = 5)
# set.breaks <- as.vector(unlist(set.breaks[set.breaks$style == "pretty", -1]))
<- c(0, 50000, 100000, 150000, 200000, 250000)
set.breaks
<- ggplot() +
figure_print # add map of alaska
::geom_sf(data = shps$akland,
ggplot2color = NA,
fill = "grey50") +
# add IDW plots
geom_sf(data = idw$extrapolation.stack,
mapping = aes(fill = var1.pred),
na.rm = FALSE,
show.legend = TRUE,
color = NA) +
::scale_fill_manual(
ggplot2name = "walleye pollock\nCPUE (kg/km2)",
values = c("gray90",
::viridis(
viridisoption = "mako",
direction = -1,
n = length(set.breaks)-1,
begin = 0.20,
end = 0.80)),
na.translate = FALSE, # Don't use NA
drop = FALSE) +
# seperate plots by year
::facet_wrap(facets = vars(year), nrow = 2) +
ggplot2# add survey area
::geom_sf(
ggplot2data = shps$survey.area,
mapping = aes(color = SURVEY,
geometry = geometry),
fill = "transparent",
linewidth = 1,
show.legend = FALSE) +
::scale_color_manual(
ggplot2name = " ",
values = c("grey30", "grey50"),
breaks = shps$survey.area$SURVEY,
labels = shps$survey.area$SRVY) +
# lat/lon axis and map bounds
::scale_x_continuous(name = "Longitude °W",
ggplot2breaks = seq(-180, -150, 5)) +
::scale_y_continuous(name = "Latitude °N",
ggplot2breaks = seq(50, 65, 5)) + # seq(52, 62, 2)
::coord_sf(xlim = sf::st_bbox(shps$survey.area)[c(1,3)],
ggplot2ylim = sf::st_bbox(shps$survey.area)[c(2,4)]) +
# add theme aesthetics
::guides(
ggplot2fill = guide_legend(
order = 1,
title.position = "top",
label.position = "bottom",
title.hjust = 0.5,
override.aes = list(color = NA),
nrow = 1),
color = "none") +
::theme(
ggplot2panel.background = element_rect(fill = "white", colour = NA),
panel.border = element_rect(fill = NA, colour = "grey20"),
strip.background = element_blank(),
strip.text = element_text(size = 10, face = "bold"),
legend.text = element_text(size = 9),
legend.background = element_rect(colour = "transparent",
fill = "transparent"),
legend.key = element_rect(colour = "transparent",
fill = "transparent"),
legend.position = "bottom",
legend.box = "horizontal",
legend.box.spacing = unit(0, "pt"), # reduce space between legend & plot
legend.margin=margin(0, 0, 0, 0) )
figure_print
12.4 Ex. Show catch data for 2023 eastern Bering Sea Walleye Pollock (one species in one survey region in one year)
Data downloads and joins for just one species, survey, and year are much faster and easier to do.
First, because year
is identified in the haul table, we need to identify all of the hauls (or more specifically, hauljoin
codes) that were completed in the eastern Bering Sea ("srvy":"EBS"
) in 2023 ("year":2023
).
Note: Check how many rows and columns are in the data pull. The eastern Bering Sea survey (before 2024) has 376 stations in it, and pollock are often found in throughout the region so this should have a similar number of rows.
## query the API link
<- httr::GET(url = paste0(api_link_haul, '?limit=10000&q={"year":2023,"srvy":"EBS"}'))
res
## convert from JSON format
<- jsonlite::fromJSON(base::rawToChar(res$content))
data <- data$items %>%
dat ::select(-links) # necessary for API accounting, but not part of the dataset
dplyr
## show summary of data to make sure it is subset correctly
summary(dat %>% dplyr::mutate(srvy = as.factor(srvy)))
year srvy survey survey_name
Min. :2023 EBS:376 Length:376 Length:376
1st Qu.:2023 Class :character Class :character
Median :2023 Mode :character Mode :character
Mean :2023
3rd Qu.:2023
Max. :2023
survey_definition_id cruise cruisejoin hauljoin
Min. :98 Min. :202301 Min. :-760.0 Min. :-23019
1st Qu.:98 1st Qu.:202301 1st Qu.:-760.0 1st Qu.:-22776
Median :98 Median :202301 Median :-759.0 Median :-22539
Mean :98 Mean :202301 Mean :-759.5 Mean :-22552
3rd Qu.:98 3rd Qu.:202301 3rd Qu.:-759.0 3rd Qu.:-22333
Max. :98 Max. :202301 Max. :-759.0 Max. :-22110
haul stratum station vessel_id
Min. : 7.00 Min. :10.00 Length:376 Min. :134.0
1st Qu.: 65.75 1st Qu.:31.00 Class :character 1st Qu.:134.0
Median :114.00 Median :41.00 Mode :character Median :162.0
Mean :114.16 Mean :39.22 Mean :148.3
3rd Qu.:161.25 3rd Qu.:50.00 3rd Qu.:162.0
Max. :224.00 Max. :90.00 Max. :162.0
vessel_name date_time latitude_dd_start longitude_dd_start
Length:376 Length:376 Min. :54.66 Min. :-178.2
Class :character Class :character 1st Qu.:57.00 1st Qu.:-172.7
Mode :character Mode :character Median :58.02 Median :-168.9
Mean :58.26 Mean :-168.8
3rd Qu.:59.50 3rd Qu.:-165.2
Max. :62.01 Max. :-158.3
latitude_dd_end longitude_dd_end bottom_temperature_c surface_temperature_c
Min. :54.68 Min. :-178.2 Min. :-1.600 Min. : 1.700
1st Qu.:57.01 1st Qu.:-172.7 1st Qu.: 1.200 1st Qu.: 4.200
Median :58.02 Median :-168.9 Median : 2.700 Median : 6.550
Mean :58.26 Mean :-168.8 Mean : 2.249 Mean : 6.386
3rd Qu.:59.50 3rd Qu.:-165.2 3rd Qu.: 3.500 3rd Qu.: 8.525
Max. :62.01 Max. :-158.3 Max. : 5.400 Max. :11.000
depth_m distance_fished_km duration_hr net_width_m
Min. : 20.00 Min. :1.065 Min. :0.1890 Min. :12.90
1st Qu.: 54.75 1st Qu.:2.805 1st Qu.:0.5100 1st Qu.:16.66
Median : 74.00 Median :2.889 Median :0.5180 Median :17.27
Mean : 80.75 Mean :2.854 Mean :0.5129 Mean :17.15
3rd Qu.:105.00 3rd Qu.:2.945 3rd Qu.:0.5260 3rd Qu.:17.83
Max. :171.00 Max. :3.849 Max. :0.6560 Max. :20.29
net_height_m area_swept_km2 performance
Min. :1.300 Min. :0.02017 Min. :0.0000
1st Qu.:1.875 1st Qu.:0.04725 1st Qu.:0.0000
Median :2.064 Median :0.04944 Median :0.0000
Mean :2.107 Mean :0.04892 Mean :0.1075
3rd Qu.:2.343 3rd Qu.:0.05134 3rd Qu.:0.0000
Max. :3.196 Max. :0.06369 Max. :6.2200
## Find how many rows and columns are in the data pull.
print(paste0("rows: ", nrow(dat), "; cols: ", ncol(dat)))
[1] "rows: 376; cols: 27"
# save outputs for later comparison
<- dat dat_haul_ex
# Print the first few lines of the data
%>%
dat_haul_ex head(3) %>%
::flextable() %>%
flextable::colformat_num(
flextablex = .,
j = c("year", "hauljoin", "cruise"),
big.mark = "") %>%
::theme_zebra() flextable
year | srvy | survey | survey_name | survey_definition_id | cruise | cruisejoin | hauljoin | haul | stratum | station | vessel_id | vessel_name | date_time | latitude_dd_start | longitude_dd_start | latitude_dd_end | longitude_dd_end | bottom_temperature_c | surface_temperature_c | depth_m | distance_fished_km | duration_hr | net_width_m | net_height_m | area_swept_km2 | performance |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2023 | EBS | eastern Bering Sea | Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey | 98 | 202301 | -760 | -22472 | 94 | 50 | C-01 | 134 | NORTHWEST EXPLORER | 2023-06-21T15:22:41Z | 55.66353 | -167.5917 | 55.68996 | -167.5982 | 4.4 | 7.8 | 135 | 2.966 | 0.512 | 18.647 | 1.587 | 0.055307 | 0 |
2023 | EBS | eastern Bering Sea | Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey | 98 | 202301 | -760 | -22510 | 95 | 50 | C-18 | 134 | NORTHWEST EXPLORER | 2023-06-22T07:11:43Z | 55.66357 | -168.2098 | 55.68946 | -168.2130 | 4.4 | 8.0 | 136 | 2.884 | 0.519 | 17.259 | 1.931 | 0.049775 | 0 |
2023 | EBS | eastern Bering Sea | Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey | 98 | 202301 | -760 | -22511 | 96 | 50 | D-18 | 134 | NORTHWEST EXPLORER | 2023-06-22T10:18:09Z | 55.99245 | -168.2189 | 56.01884 | -168.2278 | 4.3 | 7.6 | 151 | 2.986 | 0.528 | 17.520 | 2.042 | 0.052315 | 0 |
12.4.1 Identify species_code
for walleye pollock
In the catch data, we itemize species catches by species_code
. To find out which species_code
to use, you can check variations on the following code. Note that here the word pollock
is case sensitive. All species common_name
entries are lower case except for proper nouns (e.g., “Pacific”). The notation for finding a string is to use %
around the phrase. Since %
is a reserved character in a URL, you have to replace %
with %25
. Similarly, %20
needs to be used in place of a space (e.g., between “walleye” and “pollock”: "walleye%20pollock"}'
).
## query the API link. Use:
<- httr::GET(url = paste0(api_link_species, '?q={%22common_name%22:%22walleye%20pollock%22}'))
res # OR
<- httr::GET(url = paste0(api_link_species, '?q={"common_name":{"$like":"%25pollock%25"}}'))
res # OR
<- httr::GET(url = paste0(api_link_species, '?q={"common_name":"walleye%20pollock"}'))
res
## convert from JSON format
<- jsonlite::fromJSON(base::rawToChar(res$content))
data
# save outputs for later comparison
<- data$items %>% dplyr::select(-links) # necessary for API accounting, but not part of the dataset dat_species_ex
# Print the first few lines of the data
%>%
dat_species_ex head(3) %>%
::flextable() %>%
flextable::colformat_num(
flextablex = .,
j = c("species_code"),
big.mark = "") %>%
::theme_zebra() flextable
species_code | scientific_name | common_name | id_rank | worms | itis |
---|---|---|---|---|---|
21740 | Gadus chalcogrammus | walleye pollock | species | 300,735 | 934,083 |
12.4.2 Then, apply the hauljoins
and species_code
to catch query
We’ll use the data from the haul and species table we collected before to select 2023 eastern Bering Sea walleye pollock catch data.
## query the API link
# data for all walleye pollock caught in all 2023 eastern Bering Sea survey hauls
<- data.frame()
dat # there must be a better way to select multiple values for one parameter,
# but saving that, we will loop through each hauljoin and collect the data of interest
for (i in 1:nrow(dat_haul_ex)) {
<- httr::GET(url = paste0(
res
api_link_catch, '?q={"species_code":21740,"hauljoin":', dat_haul_ex$hauljoin[i],'}'))
## convert from JSON format
<- jsonlite::fromJSON(base::rawToChar(res$content))
data if (length(data$items) != 0) {
<- dplyr::bind_rows(
dat
dat,$items %>%
data::select(-links)) # necessary for API accounting, but not part of the dataset
dplyr
} }
Explore data:
# Find how many rows and columns are in the data pull
print(paste0("rows: ", nrow(dat), "; cols: ", ncol(dat)))
[1] "rows: 374; cols: 7"
# learn about the structure of the data
summary(dat)
hauljoin species_code cpue_kgkm2 cpue_nokm2
Min. :-23019 Min. :21740 Min. : 10.34 Min. : 18.26
1st Qu.:-22777 1st Qu.:21740 1st Qu.: 1454.44 1st Qu.: 2281.20
Median :-22540 Median :21740 Median : 3286.76 Median : 5863.07
Mean :-22553 Mean :21740 Mean : 6364.85 Mean : 11540.65
3rd Qu.:-22324 3rd Qu.:21740 3rd Qu.: 6956.25 3rd Qu.: 12456.99
Max. :-22110 Max. :21740 Max. :148679.68 Max. :202321.08
count weight_kg taxon_confidence
Min. : 1.0 Min. : 0.492 Length:374
1st Qu.: 113.2 1st Qu.: 71.560 Class :character
Median : 284.0 Median : 162.310 Mode :character
Mean : 572.8 Mean : 315.419
3rd Qu.: 616.5 3rd Qu.: 350.399
Max. :9997.0 Max. :7346.495
# Print the first few lines of the data
%>%
dat head(3) %>%
::flextable() %>%
flextable::colformat_num(
flextablex = .,
j = c("hauljoin", "species_code"),
big.mark = "") %>%
::theme_zebra() flextable
hauljoin | species_code | cpue_kgkm2 | cpue_nokm2 | count | weight_kg | taxon_confidence |
---|---|---|---|---|---|---|
-22472 | 21740 | 52.6154 | 72.32357 | 4 | 2.91 | High |
-22510 | 21740 | 351.5824 | 361.62764 | 18 | 17.50 | High |
-22511 | 21740 | 3,110.7879 | 3,784.78562 | 198 | 162.74 | High |
# save outputs for later comparison
<- dat dat_catch_ex
For reference and to help break down the above query, see these other query examples:
# data for haul -22775 (i.e., one specific haul)?
<- httr::GET(url = paste0(api_link_catch,
res '?offset=',i,'&limit=10000&q={"hauljoin":-22775}'))
# data for all walleye pollock (i.e., one species) caught in all years and surveys
<- httr::GET(url = paste0(api_link_catch,
res '?offset=',i,'&limit=10000&q={"species_code":21740}'))
12.4.3 Create zero-filled data for 2023 eastern Bering Sea walleye pollock and plot
It is important to create and have access to zero-fill (presence and absence) so you can do simple analyses and plot data.
<- dplyr::full_join(
dat
dat_haul_ex,%>%
dat_catch_ex) ::full_join(
dplyr%>%
dat_species_ex) # modify zero-filled rows
::mutate(
dplyrcpue_kgkm2 = ifelse(is.na(cpue_kgkm2), 0, cpue_kgkm2),
cpue_nokm2 = ifelse(is.na(cpue_nokm2), 0, cpue_nokm2),
count = ifelse(is.na(count), 0, count),
weight_kg = ifelse(is.na(weight_kg), 0, weight_kg))
Explore data
# Find how many rows and columns are in the data pull
print(paste0("rows: ", nrow(dat), "; cols: ", ncol(dat)))
[1] "rows: 376; cols: 38"
# learn about the structure of the data
summary(dat)
year srvy survey survey_name
Min. :2023 Length:376 Length:376 Length:376
1st Qu.:2023 Class :character Class :character Class :character
Median :2023 Mode :character Mode :character Mode :character
Mean :2023
3rd Qu.:2023
Max. :2023
survey_definition_id cruise cruisejoin hauljoin
Min. :98 Min. :202301 Min. :-760.0 Min. :-23019
1st Qu.:98 1st Qu.:202301 1st Qu.:-760.0 1st Qu.:-22776
Median :98 Median :202301 Median :-759.0 Median :-22539
Mean :98 Mean :202301 Mean :-759.5 Mean :-22552
3rd Qu.:98 3rd Qu.:202301 3rd Qu.:-759.0 3rd Qu.:-22333
Max. :98 Max. :202301 Max. :-759.0 Max. :-22110
haul stratum station vessel_id
Min. : 7.00 Min. :10.00 Length:376 Min. :134.0
1st Qu.: 65.75 1st Qu.:31.00 Class :character 1st Qu.:134.0
Median :114.00 Median :41.00 Mode :character Median :162.0
Mean :114.16 Mean :39.22 Mean :148.3
3rd Qu.:161.25 3rd Qu.:50.00 3rd Qu.:162.0
Max. :224.00 Max. :90.00 Max. :162.0
vessel_name date_time latitude_dd_start longitude_dd_start
Length:376 Length:376 Min. :54.66 Min. :-178.2
Class :character Class :character 1st Qu.:57.00 1st Qu.:-172.7
Mode :character Mode :character Median :58.02 Median :-168.9
Mean :58.26 Mean :-168.8
3rd Qu.:59.50 3rd Qu.:-165.2
Max. :62.01 Max. :-158.3
latitude_dd_end longitude_dd_end bottom_temperature_c surface_temperature_c
Min. :54.68 Min. :-178.2 Min. :-1.600 Min. : 1.700
1st Qu.:57.01 1st Qu.:-172.7 1st Qu.: 1.200 1st Qu.: 4.200
Median :58.02 Median :-168.9 Median : 2.700 Median : 6.550
Mean :58.26 Mean :-168.8 Mean : 2.249 Mean : 6.386
3rd Qu.:59.50 3rd Qu.:-165.2 3rd Qu.: 3.500 3rd Qu.: 8.525
Max. :62.01 Max. :-158.3 Max. : 5.400 Max. :11.000
depth_m distance_fished_km duration_hr net_width_m
Min. : 20.00 Min. :1.065 Min. :0.1890 Min. :12.90
1st Qu.: 54.75 1st Qu.:2.805 1st Qu.:0.5100 1st Qu.:16.66
Median : 74.00 Median :2.889 Median :0.5180 Median :17.27
Mean : 80.75 Mean :2.854 Mean :0.5129 Mean :17.15
3rd Qu.:105.00 3rd Qu.:2.945 3rd Qu.:0.5260 3rd Qu.:17.83
Max. :171.00 Max. :3.849 Max. :0.6560 Max. :20.29
net_height_m area_swept_km2 performance species_code
Min. :1.300 Min. :0.02017 Min. :0.0000 Min. :21740
1st Qu.:1.875 1st Qu.:0.04725 1st Qu.:0.0000 1st Qu.:21740
Median :2.064 Median :0.04944 Median :0.0000 Median :21740
Mean :2.107 Mean :0.04892 Mean :0.1075 Mean :21740
3rd Qu.:2.343 3rd Qu.:0.05134 3rd Qu.:0.0000 3rd Qu.:21740
Max. :3.196 Max. :0.06369 Max. :6.2200 Max. :21740
NA's :2
cpue_kgkm2 cpue_nokm2 count weight_kg
Min. : 0 Min. : 0 Min. : 0.0 Min. : 0.00
1st Qu.: 1431 1st Qu.: 2268 1st Qu.: 112.0 1st Qu.: 70.64
Median : 3273 Median : 5842 Median : 280.0 Median : 161.44
Mean : 6331 Mean : 11479 Mean : 569.8 Mean : 313.74
3rd Qu.: 6946 3rd Qu.: 12345 3rd Qu.: 611.5 3rd Qu.: 349.81
Max. :148680 Max. :202321 Max. :9997.0 Max. :7346.49
taxon_confidence scientific_name common_name id_rank
Length:376 Length:376 Length:376 Length:376
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
worms itis
Min. :300735 Min. :934083
1st Qu.:300735 1st Qu.:934083
Median :300735 Median :934083
Mean :300735 Mean :934083
3rd Qu.:300735 3rd Qu.:934083
Max. :300735 Max. :934083
NA's :2 NA's :2
# Print the first few lines of the data
%>%
dat head(3) %>%
::flextable() %>%
flextable::colformat_num(
flextablex = .,
j = c("year", "cruise", "cruisejoin", "species_code"),
big.mark = "") %>%
::theme_zebra() flextable
year | srvy | survey | survey_name | survey_definition_id | cruise | cruisejoin | hauljoin | haul | stratum | station | vessel_id | vessel_name | date_time | latitude_dd_start | longitude_dd_start | latitude_dd_end | longitude_dd_end | bottom_temperature_c | surface_temperature_c | depth_m | distance_fished_km | duration_hr | net_width_m | net_height_m | area_swept_km2 | performance | species_code | cpue_kgkm2 | cpue_nokm2 | count | weight_kg | taxon_confidence | scientific_name | common_name | id_rank | worms | itis |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2023 | EBS | eastern Bering Sea | Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey | 98 | 202301 | -760 | -22,472 | 94 | 50 | C-01 | 134 | NORTHWEST EXPLORER | 2023-06-21T15:22:41Z | 55.66353 | -167.5917 | 55.68996 | -167.5982 | 4.4 | 7.8 | 135 | 2.966 | 0.512 | 18.647 | 1.587 | 0.055307 | 0 | 21740 | 52.6154 | 72.32357 | 4 | 2.91 | High | Gadus chalcogrammus | walleye pollock | species | 300,735 | 934,083 |
2023 | EBS | eastern Bering Sea | Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey | 98 | 202301 | -760 | -22,510 | 95 | 50 | C-18 | 134 | NORTHWEST EXPLORER | 2023-06-22T07:11:43Z | 55.66357 | -168.2098 | 55.68946 | -168.2130 | 4.4 | 8.0 | 136 | 2.884 | 0.519 | 17.259 | 1.931 | 0.049775 | 0 | 21740 | 351.5824 | 361.62764 | 18 | 17.50 | High | Gadus chalcogrammus | walleye pollock | species | 300,735 | 934,083 |
2023 | EBS | eastern Bering Sea | Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey | 98 | 202301 | -760 | -22,511 | 96 | 50 | D-18 | 134 | NORTHWEST EXPLORER | 2023-06-22T10:18:09Z | 55.99245 | -168.2189 | 56.01884 | -168.2278 | 4.3 | 7.6 | 151 | 2.986 | 0.528 | 17.520 | 2.042 | 0.052315 | 0 | 21740 | 3,110.7879 | 3,784.78562 | 198 | 162.74 | High | Gadus chalcogrammus | walleye pollock | species | 300,735 | 934,083 |
12.4.4 Visualize CPUE data in distribution map
Using the zero-filled data from the previous example, we can make a few plots!
12.5 Plot locations
library(ggplot2)
::ggplot(data = dat %>% dplyr::filter(cpue_kgkm2 != 0),
ggplot2mapping = aes(x = longitude_dd_start,
y = latitude_dd_start,
size = cpue_kgkm2)) +
::geom_point(alpha = .75) +
ggplot2::geom_point(data = dat %>% dplyr::filter(cpue_kgkm2 == 0),
ggplot2color = "red",
shape = 17,
alpha = .75,
size = 3) +
::xlab("Longitude *W") +
ggplot2::ylab("Latitude *N") +
ggplot2::ggtitle(label = "Catches of walleye pollock (Weight CPUE; kg/km2)",
ggplot2subtitle = "2023 eastern Bering Sea bottom trawl survey") +
::scale_size_continuous(name = "Weight (kg)") +
ggplot2::theme_bw() ggplot2
12.5.1 Plot inverse-distance weighted modeled product of locations
This map is constructed using akgfmaps
# devtools::install_github("afsc-gap-products/akgfmaps", build_vignettes = TRUE)
library(akgfmaps)
<- akgfmaps::make_idw_map(
figure0 CPUE_KGHA = dat$cpue_kgkm2, # calculates the same, regardless of units.
LATITUDE = dat$latitude_dd_start,
LONGITUDE = dat$longitude_dd_start,
region = "bs.south", # Predefined EBS area
set.breaks = "jenks", # Gets Jenks breaks from classint::classIntervals()
in.crs = "+proj=longlat", # Set input coordinate reference system
out.crs = "EPSG:3338", # Set output coordinate reference system
extrapolation.grid.type = "sf")
[inverse distance weighted interpolation]
[inverse distance weighted interpolation]
$plot + # 20x20km grid
figure0::guides(fill=guide_legend(title = "walleye pollock\nCPUE (kg/km2)")) ggplot2
12.6 Other query examples
12.6.1 Ex. Combination of year, srvy, stratum
Show haul data where year is less than 1989, srvy = “EBS”, and stratum is not equal to 81.
<- httr::GET(
res url = paste0(api_link_haul,
'?&limit=10000&q={"year":{"$lt":1989},"stratum":{"$ne":"81"},"srvy":"EBS"}}'))
<- jsonlite::fromJSON(base::rawToChar(res$content))
data <- data$items %>%
dat ::select(-links) # necessary for API accounting, but not part of the dataset) dplyr
Explore data: