library(gapindex)
<- gapindex::get_connected() channel
Access via Oracle and R (AFSC Staff only)
If the user has access to the AFSC Oracle
database, the user can use SQL developer
to view and pull the FOSS public data directly from the GAP_PRODUCTS
Oracle
schema.
14.0.1 Connect to Oracle from R
Many users will want to access the data from Oracle
using R
. The user will need to install the RODBC
R
package and ask OFIS (IT) connect R
to Oracle
. Then, use the following code in R
to establish a connection from R
to Oracle
:
Here, the user can write in their username and password directly into the RODBC
connect function. Never save usernames or passwords in scripts that may be intentionally or unintentionally shared with others. If no username and password is entered in the function, pop-ups will appear on the screen asking for the username and password.
14.0.2 Ex. Wholesale download data and join data in R
<- c(
locations "GAP_PRODUCTS.FOSS_CATCH",
"GAP_PRODUCTS.FOSS_HAUL",
"GAP_PRODUCTS.FOSS_SPECIES"
)
print(Sys.Date())
<- c() # log if any tables are unable to download
error_loading for (i in 1:length(locations)){
print(locations[i])
<- RODBC::sqlQuery(channel, paste0("SELECT * FROM ", locations[i], "; "))
a if (is.null(nrow(a))) { # if an error in downloading has occurred
<- c(error_loading, locations[i])
error_loading else { # if no error in downloading has occurred
} write.csv(x = a,
# change file name to be more computer file storage friendly
::here(paste0(tolower(gsub(
herepattern = '.',
replacement = "_",
x = locations[i],
fixed = TRUE)),
".csv")))
}
} error_loading
Join downloaded files into presence-only table
# Load data
library(dplyr)
library(here)
library(readr)
<- readr::read_csv(file = here::here("data/gap_products_foss_catch.csv"))[,-1] # remove "row number" column
catch <- readr::read_csv(file = here::here("data/gap_products_foss_haul.csv"))[,-1] # remove "row number" column
haul <- readr::read_csv(file = here::here("data/gap_products_foss_species.csv"))[,-1] # remove "row number" column
species
<-
dat # join haul and catch data to unique species by survey table
::left_join(haul, catch) %>%
dplyr# join species data to unique species by survey table
::left_join(species) %>%
dplyr# modify zero-filled rows
::mutate(
dplyrCPUE_KGKM2 = ifelse(is.null(CPUE_KGKM2), 0, CPUE_KGKM2), # just in case
CPUE_KGHA = CPUE_KGKM2/100, # Hectares
CPUE_NOKM2 = ifelse(is.null(CPUE_NOKM2), 0, CPUE_NOKM2), # just in case
CPUE_NOHA = CPUE_NOKM2/100, # Hectares
COUNT = ifelse(is.null(COUNT), 0, COUNT),
WEIGHT_KG = ifelse(is.null(WEIGHT_KG), 0, WEIGHT_KG) )
Join downloaded files into zero-filled table
# Load data
library(dplyr)
library(here)
library(readr)
<- readr::read_csv(file = here::here("data/gap_products_foss_catch.csv"))[,-1] # remove "row number" column
catch <- readr::read_csv(file = here::here("data/gap_products_foss_haul.csv"))[,-1] # remove "row number" column
haul <- readr::read_csv(file = here::here("data/gap_products_foss_species.csv"))[,-1] # remove "row number" column
species
# come up with full combination of what species should be listed for what hauls/surveys
# for zero-filled data, all species caught in a survey need to have zero or non-zero row entries for a haul
<- dplyr::full_join(
comb x = dplyr::left_join(catch, haul, by = "HAULJOIN") %>%
::select(SURVEY_DEFINITION_ID, SPECIES_CODE) %>%
dplyr::distinct(),
dplyry = haul %>%
::select(SURVEY_DEFINITION_ID, HAULJOIN) %>%
dplyr::distinct(),
dplyrby = "SURVEY_DEFINITION_ID",
relationship = "many-to-many"
)
# Join data to make a full zero-filled CPUE dataset
<- comb %>%
dat # add species data to unique species by survey table
::left_join(species, "SPECIES_CODE") %>%
dplyr# add catch data
::full_join(catch, c("SPECIES_CODE", "HAULJOIN")) %>%
dplyr# add haul data
::full_join(haul) %>% # , c("SURVEY_DEFINITION_ID", "HAULJOIN")
dplyr# modify zero-filled rows
::mutate(
dplyrCPUE_KGKM2 = ifelse(is.null(CPUE_KGKM2), 0, CPUE_KGKM2),
CPUE_KGHA = CPUE_KGKM2/100, # Hectares
CPUE_NOKM2 = ifelse(is.null(CPUE_NOKM2), 0, CPUE_NOKM2),
CPUE_NOHA = CPUE_NOKM2/100, # Hectares
COUNT = ifelse(is.null(COUNT), 0, COUNT),
WEIGHT_KG = ifelse(is.null(WEIGHT_KG), 0, WEIGHT_KG) )
14.0.3 Ex. Join data using Oracle
To join these tables in Oracle, you may use a variant of the following code:
SELECT
YEAR,
hh.
hh.SRVY,
hh.SURVEY,
hh.SURVEY_DEFINITION_ID,
hh.SURVEY_NAME,
hh.CRUISE,
hh.CRUISEJOIN,
hh.HAUL,
hh.HAULJOIN,
hh.STRATUM,
hh.STATION,
hh.VESSEL_ID,
hh.VESSEL_NAME,
hh.DATE_TIME,
hh.LATITUDE_DD_START,
hh.LONGITUDE_DD_START,
hh.LATITUDE_DD_END,
hh.LONGITUDE_DD_END,
hh.BOTTOM_TEMPERATURE_C,
hh.SURFACE_TEMPERATURE_C,
hh.DEPTH_M,
cc.SPECIES_CODE,
ss.ITIS,
ss.WORMS,
ss.COMMON_NAME,
ss.SCIENTIFIC_NAME,
ss.ID_RANK,CASE WHEN cc.CPUE_KGKM2 IS NULL THEN 0 ELSE cc.CPUE_KGKM2 END AS CPUE_KGKM2,
CASE WHEN cc.CPUE_NOKM2 IS NULL THEN 0 ELSE cc.CPUE_NOKM2 END AS CPUE_NOKM2,
CASE WHEN cc.COUNT IS NULL THEN 0 ELSE cc.COUNT END AS COUNT,
CASE WHEN cc.WEIGHT_KG IS NULL THEN 0 ELSE cc.WEIGHT_KG END AS WEIGHT_KG,
CASE WHEN cc.TAXON_CONFIDENCE IS NULL THEN NULL ELSE cc.TAXON_CONFIDENCE END AS TAXON_CONFIDENCE,
hh.AREA_SWEPT_KM2,
hh.DISTANCE_FISHED_KM,
hh.DURATION_HR,
hh.NET_WIDTH_M,
hh.NET_HEIGHT_M,
hh.PERFORMANCE FROM GAP_PRODUCTS.FOSS_SURVEY_SPECIES sv
FULL OUTER JOIN GAP_PRODUCTS.FOSS_SPECIES ss
ON sv.SPECIES_CODE = ss.SPECIES_CODE
FULL OUTER JOIN GAP_PRODUCTS.FOSS_HAUL hh
ON sv.SURVEY_DEFINITION_ID = hh.SURVEY_DEFINITION_ID
FULL OUTER JOIN GAP_PRODUCTS.FOSS_CATCH cc
ON sv.SPECIES_CODE = cc.SPECIES_CODE
AND hh.HAULJOIN = cc.HAULJOIN
14.0.4 Ex. Subset data
Here, we are pulling EBS Pacific cod from 2010 - 2021:
# Pull data
<- RODBC::sqlQuery(
data channel = channel,
query =
"SELECT * FROM GAP_PRODUCTS.FOSS_CATCH cc
JOIN GAP_PRODUCTS.FOSS_HAUL hh
ON cc.HAULJOIN = hh.HAULJOIN
WHERE SRVY = 'EBS'
AND SPECIES_CODE = 21720 -- 'Pacific cod'
AND YEAR >= 2010
AND YEAR < 2021")
::flextable(data[1:3,]) %>%
flextable::theme_zebra() flextable
HAULJOIN | SPECIES_CODE | CPUE_KGKM2 | CPUE_NOKM2 | COUNT | WEIGHT_KG | TAXON_CONFIDENCE | YEAR | SRVY | SURVEY | SURVEY_DEFINITION_ID | SURVEY_NAME | CRUISE | CRUISEJOIN | HAULJOIN.1 | HAUL | STRATUM | STATION | VESSEL_ID | VESSEL_NAME | DATE_TIME | LATITUDE_DD_START | LONGITUDE_DD_START | LATITUDE_DD_END | LONGITUDE_DD_END | BOTTOM_TEMPERATURE_C | SURFACE_TEMPERATURE_C | DEPTH_M | DISTANCE_FISHED_KM | DURATION_HR | NET_WIDTH_M | NET_HEIGHT_M | AREA_SWEPT_KM2 | PERFORMANCE |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
-19,461 | 21,720 | 646.8800 | 200.3345 | 10 | 32.29 | High | 2,019 | EBS | eastern Bering Sea | 98 | Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey | 201,901 | -726 | -19,461 | 197 | 41 | R-27 | 94 | VESTERAALEN | 2019-07-26 13:05:25 | 60.65452 | -174.8251 | 60.68077 | -174.8282 | 2.4 | 10.3 | 97 | 2.922 | 0.533 | 17.083 | 2.424 | 0.04991653 | 0 |
-19,446 | 21,720 | 1,212.7733 | 164.7230 | 8 | 58.90 | High | 2,019 | EBS | eastern Bering Sea | 98 | Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey | 201,901 | -727 | -19,446 | 168 | 61 | Q-28 | 162 | ALASKA KNIGHT | 2019-07-20 07:45:12 | 60.34982 | -175.3932 | 60.32608 | -175.3835 | 2.6 | 10.1 | 111 | 2.692 | 0.506 | 18.041 | 2.460 | 0.04856637 | 0 |
-19,422 | 21,720 | 313.8482 | 108.5980 | 6 | 17.34 | High | 2,019 | EBS | eastern Bering Sea | 98 | Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey | 201,901 | -726 | -19,422 | 180 | 61 | L-29 | 94 | VESTERAALEN | 2019-07-22 17:45:32 | 58.67524 | -175.5278 | 58.67691 | -175.5795 | 3.7 | 10.3 | 135 | 3.014 | 0.541 | 18.331 | 1.751 | 0.05524963 | 0 |
14.0.5 Ex. Find all species found in the eastern Bering Sea (EBS) survey in 2023
# Pull data
<- RODBC::sqlQuery(
data channel = channel,
query =
"SELECT DISTINCT
ss.COMMON_NAME,
ss.SCIENTIFIC_NAME,
ss.ID_RANK,
ss.WORMS
FROM GAP_PRODUCTS.FOSS_CATCH cc -- get species codes
LEFT JOIN GAP_PRODUCTS.FOSS_SPECIES ss -- get species info
ON cc.SPECIES_CODE = ss.SPECIES_CODE
LEFT JOIN GAP_PRODUCTS.FOSS_HAUL hh -- filter by year and survey
ON cc.HAULJOIN = hh.HAULJOIN
WHERE hh.YEAR = 2023
AND hh.SURVEY_DEFINITION_ID = 98 -- EBS survey
ORDER BY COMMON_NAME")
::flextable(data[1:3,]) %>%
flextable# flextable::fit_to_width(max_width = 6) %>%
::theme_zebra() flextable
COMMON_NAME | SCIENTIFIC_NAME | ID_RANK | WORMS |
---|---|---|---|
Alaska great-tellin | Megangulus luteus | species | 423,511 |
Alaska plaice | Pleuronectes quadrituberculatus | species | 254,564 |
Alaska skate | Arctoraja parmifera | species | 1,577,324 |