library(gapindex)
channel <- gapindex::get_connected()Access via Oracle and R (AFSC Staff only)
If the user has access to the AFSC Oracle database, the user can use SQL developer to view and pull the FOSS public data directly from the GAP_PRODUCTS Oracle schema.
14.0.1 Connect to Oracle from R
Many users will want to access the data from Oracle using R. The user will need to install the RODBC R package and ask OFIS (IT) connect R to Oracle. Then, use the following code in R to establish a connection from R to Oracle:
Here, the user can write in their username and password directly into the RODBC connect function. Never save usernames or passwords in scripts that may be intentionally or unintentionally shared with others. If no username and password is entered in the function, pop-ups will appear on the screen asking for the username and password.
14.0.2 Ex. Wholesale download data and join data in R
locations <- c(
"GAP_PRODUCTS.FOSS_CATCH",
"GAP_PRODUCTS.FOSS_HAUL",
"GAP_PRODUCTS.FOSS_SPECIES"
)
print(Sys.Date())
error_loading <- c() # log if any tables are unable to download
for (i in 1:length(locations)){
print(locations[i])
a <- RODBC::sqlQuery(channel, paste0("SELECT * FROM ", locations[i], "; "))
if (is.null(nrow(a))) { # if an error in downloading has occurred
error_loading <- c(error_loading, locations[i])
} else { # if no error in downloading has occurred
write.csv(x = a,
# change file name to be more computer file storage friendly
here::here(paste0(tolower(gsub(
pattern = '.',
replacement = "_",
x = locations[i],
fixed = TRUE)),
".csv")))
}
}
error_loadingJoin downloaded files into presence-only table
# Load data
library(dplyr)
library(here)
library(readr)
catch <- readr::read_csv(file = here::here("data/gap_products_foss_catch.csv"))[,-1] # remove "row number" column
haul <- readr::read_csv(file = here::here("data/gap_products_foss_haul.csv"))[,-1] # remove "row number" column
species <- readr::read_csv(file = here::here("data/gap_products_foss_species.csv"))[,-1] # remove "row number" column
dat <-
# join haul and catch data to unique species by survey table
dplyr::left_join(haul, catch) |>
# join species data to unique species by survey table
dplyr::left_join(species) |>
# modify zero-filled rows
dplyr::mutate(
CPUE_KGKM2 = ifelse(is.null(CPUE_KGKM2), 0, CPUE_KGKM2), # just in case
CPUE_KGHA = CPUE_KGKM2/100, # Hectares
CPUE_NOKM2 = ifelse(is.null(CPUE_NOKM2), 0, CPUE_NOKM2), # just in case
CPUE_NOHA = CPUE_NOKM2/100, # Hectares
COUNT = ifelse(is.null(COUNT), 0, COUNT),
WEIGHT_KG = ifelse(is.null(WEIGHT_KG), 0, WEIGHT_KG) )Join downloaded files into zero-filled table
# Load data
library(dplyr)
library(here)
library(readr)
catch <- readr::read_csv(file = here::here("data/gap_products_foss_catch.csv"))[,-1] # remove "row number" column
haul <- readr::read_csv(file = here::here("data/gap_products_foss_haul.csv"))[,-1] # remove "row number" column
species <- readr::read_csv(file = here::here("data/gap_products_foss_species.csv"))[,-1] # remove "row number" column
# come up with full combination of what species should be listed for what hauls/surveys
# for zero-filled data, all species caught in a survey need to have zero or non-zero row entries for a haul
comb <- dplyr::full_join(
x = dplyr::left_join(catch, haul, by = "HAULJOIN") |>
dplyr::select(SURVEY_DEFINITION_ID, SPECIES_CODE) |>
dplyr::distinct(),
y = haul |>
dplyr::select(SURVEY_DEFINITION_ID, HAULJOIN) |>
dplyr::distinct(),
by = "SURVEY_DEFINITION_ID",
relationship = "many-to-many"
)
# Join data to make a full zero-filled CPUE dataset
dat <- comb |>
# add species data to unique species by survey table
dplyr::left_join(species, "SPECIES_CODE") |>
# add catch data
dplyr::full_join(catch, c("SPECIES_CODE", "HAULJOIN")) |>
# add haul data
dplyr::full_join(haul) |> # , c("SURVEY_DEFINITION_ID", "HAULJOIN")
# modify zero-filled rows
dplyr::mutate(
CPUE_KGKM2 = ifelse(is.null(CPUE_KGKM2), 0, CPUE_KGKM2),
CPUE_KGHA = CPUE_KGKM2/100, # Hectares
CPUE_NOKM2 = ifelse(is.null(CPUE_NOKM2), 0, CPUE_NOKM2),
CPUE_NOHA = CPUE_NOKM2/100, # Hectares
COUNT = ifelse(is.null(COUNT), 0, COUNT),
WEIGHT_KG = ifelse(is.null(WEIGHT_KG), 0, WEIGHT_KG) ) 14.0.3 Ex. Join data using Oracle
To join these tables in Oracle, you may use a variant of the following code:
SELECT
hh.YEAR,
hh.SRVY,
hh.SURVEY,
hh.SURVEY_DEFINITION_ID,
hh.SURVEY_NAME,
hh.CRUISE,
hh.CRUISEJOIN,
hh.HAUL,
hh.HAULJOIN,
hh.STRATUM,
hh.STATION,
hh.VESSEL_ID,
hh.VESSEL_NAME,
hh.DATE_TIME,
hh.LATITUDE_DD_START,
hh.LONGITUDE_DD_START,
hh.LATITUDE_DD_END,
hh.LONGITUDE_DD_END,
hh.BOTTOM_TEMPERATURE_C,
hh.SURFACE_TEMPERATURE_C,
hh.DEPTH_M,
cc.SPECIES_CODE,
ss.ITIS,
ss.WORMS,
ss.COMMON_NAME,
ss.SCIENTIFIC_NAME,
ss.ID_RANK,
CASE WHEN cc.CPUE_KGKM2 IS NULL THEN 0 ELSE cc.CPUE_KGKM2 END AS CPUE_KGKM2,
CASE WHEN cc.CPUE_NOKM2 IS NULL THEN 0 ELSE cc.CPUE_NOKM2 END AS CPUE_NOKM2,
CASE WHEN cc.COUNT IS NULL THEN 0 ELSE cc.COUNT END AS COUNT,
CASE WHEN cc.WEIGHT_KG IS NULL THEN 0 ELSE cc.WEIGHT_KG END AS WEIGHT_KG,
CASE WHEN cc.TAXON_CONFIDENCE IS NULL THEN NULL ELSE cc.TAXON_CONFIDENCE END AS TAXON_CONFIDENCE,
hh.AREA_SWEPT_KM2,
hh.DISTANCE_FISHED_KM,
hh.DURATION_HR,
hh.NET_WIDTH_M,
hh.NET_HEIGHT_M,
hh.PERFORMANCE
FROM GAP_PRODUCTS.FOSS_SURVEY_SPECIES sv
FULL OUTER JOIN GAP_PRODUCTS.FOSS_SPECIES ss
ON sv.SPECIES_CODE = ss.SPECIES_CODE
FULL OUTER JOIN GAP_PRODUCTS.FOSS_HAUL hh
ON sv.SURVEY_DEFINITION_ID = hh.SURVEY_DEFINITION_ID
FULL OUTER JOIN GAP_PRODUCTS.FOSS_CATCH cc
ON sv.SPECIES_CODE = cc.SPECIES_CODE
AND hh.HAULJOIN = cc.HAULJOIN14.0.4 Ex. Subset data
Here, we are pulling EBS Pacific cod from 2010 - 2021:
# Pull data
data <- RODBC::sqlQuery(
channel = channel,
query =
"SELECT * FROM GAP_PRODUCTS.FOSS_CATCH cc
JOIN GAP_PRODUCTS.FOSS_HAUL hh
ON cc.HAULJOIN = hh.HAULJOIN
WHERE SRVY = 'EBS'
AND SPECIES_CODE = 21720 -- 'Pacific cod'
AND YEAR >= 2010
AND YEAR < 2021")
flextable::flextable(data[1:3,]) |>
flextable::theme_zebra() HAULJOIN | SPECIES_CODE | CPUE_KGKM2 | CPUE_NOKM2 | COUNT | WEIGHT_KG | TAXON_CONFIDENCE | YEAR | SRVY | SURVEY | SURVEY_DEFINITION_ID | SURVEY_NAME | CRUISE | CRUISEJOIN | HAULJOIN.1 | HAUL | STRATUM | STATION | VESSEL_ID | VESSEL_NAME | DATE_TIME | LATITUDE_DD_START | LONGITUDE_DD_START | LATITUDE_DD_END | LONGITUDE_DD_END | BOTTOM_TEMPERATURE_C | SURFACE_TEMPERATURE_C | DEPTH_M | DISTANCE_FISHED_KM | DURATION_HR | NET_WIDTH_M | NET_HEIGHT_M | AREA_SWEPT_KM2 | PERFORMANCE |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
-6,361 | 21,720 | 1,733.7417 | 2,910.6057 | 107 | 63.736 | High | 2,010 | EBS | eastern Bering Sea | 98 | Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey | 201,001 | -658 | -6,361 | 2 | 10 | H-16 | 89 | ALDEBARAN | 2010-06-07 08:17:51 | 57.32308 | -158.4166 | 57.34365 | -158.3987 | 2.9 | 4.4 | 32 | 2.532 | 0.458 | 14.519 | 2.407 | 0.03676211 | 1.12 |
-6,362 | 21,720 | 4,048.8473 | 1,182.5116 | 49 | 167.773 | High | 2,010 | EBS | eastern Bering Sea | 98 | Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey | 201,001 | -658 | -6,362 | 3 | 10 | I-16 | 89 | ALDEBARAN | 2010-06-07 10:49:21 | 57.65588 | -158.3651 | 57.68152 | -158.3637 | 2.4 | 2.9 | 37 | 2.854 | 0.523 | 14.519 | 2.117 | 0.04143723 | 0.00 |
-6,363 | 21,720 | 964.1577 | 288.0806 | 12 | 40.162 | High | 2,010 | EBS | eastern Bering Sea | 98 | Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey | 201,001 | -658 | -6,363 | 4 | 10 | J-16 | 89 | ALDEBARAN | 2010-06-07 13:32:46 | 57.98402 | -158.3283 | 58.00947 | -158.3353 | 1.9 | 2.2 | 35 | 2.869 | 0.521 | 14.519 | 1.933 | 0.04165501 | 0.00 |
14.0.5 Ex. Find all species found in the eastern Bering Sea (EBS) survey in 2023
# Pull data
data <- RODBC::sqlQuery(
channel = channel,
query =
"SELECT DISTINCT
ss.COMMON_NAME,
ss.SCIENTIFIC_NAME,
ss.ID_RANK,
ss.WORMS
FROM GAP_PRODUCTS.FOSS_CATCH cc -- get species codes
LEFT JOIN GAP_PRODUCTS.FOSS_SPECIES ss -- get species info
ON cc.SPECIES_CODE = ss.SPECIES_CODE
LEFT JOIN GAP_PRODUCTS.FOSS_HAUL hh -- filter by year and survey
ON cc.HAULJOIN = hh.HAULJOIN
WHERE hh.YEAR = 2023
AND hh.SURVEY_DEFINITION_ID = 98 -- EBS survey
ORDER BY COMMON_NAME")
flextable::flextable(data[1:3,]) |>
# flextable::fit_to_width(max_width = 6) |>
flextable::theme_zebra() COMMON_NAME | SCIENTIFIC_NAME | ID_RANK | WORMS |
|---|---|---|---|
Alaska great-tellin | Megangulus luteus | species | 423,511 |
Alaska plaice | Pleuronectes quadrituberculatus | species | 254,564 |
Alaska skate | Arctoraja parmifera | species | 1,577,324 |