Access via Oracle and R (AFSC Staff only)

If the user has access to the AFSC Oracle database, the user can use SQL developer to view and pull the FOSS public data directly from the GAP_PRODUCTS Oracle schema.

14.0.1 Connect to Oracle from R

Many users will want to access the data from Oracle using R. The user will need to install the RODBC R package and ask OFIS (IT) connect R to Oracle. Then, use the following code in R to establish a connection from R to Oracle:

Here, the user can write in their username and password directly into the RODBC connect function. Never save usernames or passwords in scripts that may be intentionally or unintentionally shared with others. If no username and password is entered in the function, pop-ups will appear on the screen asking for the username and password.

library(gapindex)
channel <- gapindex::get_connected()

14.0.2 Ex. Wholesale download data and join data in R

locations <- c(
  "GAP_PRODUCTS.FOSS_CATCH",
  "GAP_PRODUCTS.FOSS_HAUL",
  "GAP_PRODUCTS.FOSS_SPECIES"
)

print(Sys.Date())

error_loading <- c() # log if any tables are unable to download 
for (i in 1:length(locations)){
  print(locations[i])
  a <- RODBC::sqlQuery(channel, paste0("SELECT * FROM ", locations[i], "; "))
  if (is.null(nrow(a))) { # if an error in downloading has occurred
    error_loading <- c(error_loading, locations[i])
  } else { # if no error in downloading has occurred
    write.csv(x = a, 
              # change file name to be more computer file storage friendly
              here::here(paste0(tolower(gsub(
                pattern = '.', 
                replacement = "_", 
                x = locations[i], 
                fixed = TRUE)),
                ".csv")))
  }
}
error_loading

Join downloaded files into presence-only table

# Load data
library(dplyr)
library(here)
library(readr)
catch <- readr::read_csv(file = here::here("data/gap_products_foss_catch.csv"))[,-1] # remove "row number" column
haul <- readr::read_csv(file = here::here("data/gap_products_foss_haul.csv"))[,-1] # remove "row number" column
species <- readr::read_csv(file = here::here("data/gap_products_foss_species.csv"))[,-1] # remove "row number" column

dat <- 
  # join haul and catch data to unique species by survey table
  dplyr::left_join(haul, catch) %>% 
  # join species data to unique species by survey table
  dplyr::left_join(species) %>% 
  # modify zero-filled rows
  dplyr::mutate(
    CPUE_KGKM2 = ifelse(is.null(CPUE_KGKM2), 0, CPUE_KGKM2), # just in case
    CPUE_KGHA = CPUE_KGKM2/100, # Hectares
    CPUE_NOKM2 = ifelse(is.null(CPUE_NOKM2), 0, CPUE_NOKM2), # just in case
    CPUE_NOHA = CPUE_NOKM2/100, # Hectares
    COUNT = ifelse(is.null(COUNT), 0, COUNT),
    WEIGHT_KG = ifelse(is.null(WEIGHT_KG), 0, WEIGHT_KG) )

Join downloaded files into zero-filled table

# Load data
library(dplyr)
library(here)
library(readr)
catch <- readr::read_csv(file = here::here("data/gap_products_foss_catch.csv"))[,-1] # remove "row number" column
haul <- readr::read_csv(file = here::here("data/gap_products_foss_haul.csv"))[,-1] # remove "row number" column
species <- readr::read_csv(file = here::here("data/gap_products_foss_species.csv"))[,-1] # remove "row number" column

# come up with full combination of what species should be listed for what hauls/surveys
# for zero-filled data, all species caught in a survey need to have zero or non-zero row entries for a haul
comb <- dplyr::full_join(
  x = dplyr::left_join(catch, haul, by = "HAULJOIN") %>%
    dplyr::select(SURVEY_DEFINITION_ID, SPECIES_CODE) %>%
    dplyr::distinct(),
  y = haul %>%
    dplyr::select(SURVEY_DEFINITION_ID, HAULJOIN) %>%
    dplyr::distinct(), 
  by = "SURVEY_DEFINITION_ID", 
  relationship = "many-to-many"
)

# Join data to make a full zero-filled CPUE dataset
dat <- comb %>% 
  # add species data to unique species by survey table
  dplyr::left_join(species, "SPECIES_CODE") %>% 
  # add catch data
  dplyr::full_join(catch, c("SPECIES_CODE", "HAULJOIN")) %>% 
  # add haul data
  dplyr::full_join(haul) %>% # , c("SURVEY_DEFINITION_ID", "HAULJOIN")
  # modify zero-filled rows
  dplyr::mutate(
    CPUE_KGKM2 = ifelse(is.null(CPUE_KGKM2), 0, CPUE_KGKM2),
    CPUE_KGHA = CPUE_KGKM2/100, # Hectares
    CPUE_NOKM2 = ifelse(is.null(CPUE_NOKM2), 0, CPUE_NOKM2),
    CPUE_NOHA = CPUE_NOKM2/100, # Hectares
    COUNT = ifelse(is.null(COUNT), 0, COUNT),
    WEIGHT_KG = ifelse(is.null(WEIGHT_KG), 0, WEIGHT_KG) )

14.0.3 Ex. Join data using Oracle

To join these tables in Oracle, you may use a variant of the following code:


SELECT 
hh.YEAR,
hh.SRVY,                 
hh.SURVEY,
hh.SURVEY_DEFINITION_ID,
hh.SURVEY_NAME,
hh.CRUISE,
hh.CRUISEJOIN,           
hh.HAUL,
hh.HAULJOIN,
hh.STRATUM,
hh.STATION,
hh.VESSEL_ID,
hh.VESSEL_NAME,          
hh.DATE_TIME,
hh.LATITUDE_DD_START, 
hh.LONGITUDE_DD_START, 
hh.LATITUDE_DD_END,
hh.LONGITUDE_DD_END, 
hh.BOTTOM_TEMPERATURE_C,
hh.SURFACE_TEMPERATURE_C,
hh.DEPTH_M,
cc.SPECIES_CODE,
ss.ITIS,
ss.WORMS,
ss.COMMON_NAME,     
ss.SCIENTIFIC_NAME,
ss.ID_RANK,
CASE WHEN cc.CPUE_KGKM2 IS NULL THEN 0 ELSE cc.CPUE_KGKM2 END AS CPUE_KGKM2,
CASE WHEN cc.CPUE_NOKM2 IS NULL THEN 0 ELSE cc.CPUE_NOKM2 END AS CPUE_NOKM2,
CASE WHEN cc.COUNT IS NULL THEN 0 ELSE cc.COUNT END AS COUNT,
CASE WHEN cc.WEIGHT_KG IS NULL THEN 0 ELSE cc.WEIGHT_KG END AS WEIGHT_KG,
CASE WHEN cc.TAXON_CONFIDENCE IS NULL THEN NULL ELSE cc.TAXON_CONFIDENCE END AS TAXON_CONFIDENCE,
hh.AREA_SWEPT_KM2,       
hh.DISTANCE_FISHED_KM,
hh.DURATION_HR,          
hh.NET_WIDTH_M,
hh.NET_HEIGHT_M,
hh.PERFORMANCE 
FROM GAP_PRODUCTS.FOSS_SURVEY_SPECIES sv
FULL OUTER JOIN GAP_PRODUCTS.FOSS_SPECIES ss
ON sv.SPECIES_CODE = ss.SPECIES_CODE
FULL OUTER JOIN GAP_PRODUCTS.FOSS_HAUL hh
ON sv.SURVEY_DEFINITION_ID = hh.SURVEY_DEFINITION_ID
FULL OUTER JOIN GAP_PRODUCTS.FOSS_CATCH cc
ON sv.SPECIES_CODE = cc.SPECIES_CODE
AND hh.HAULJOIN = cc.HAULJOIN

14.0.4 Ex. Subset data

Here, we are pulling EBS Pacific cod from 2010 - 2021:

# Pull data
data <- RODBC::sqlQuery(
channel = channel, 
query = 
"SELECT * FROM GAP_PRODUCTS.FOSS_CATCH cc
JOIN GAP_PRODUCTS.FOSS_HAUL hh
ON cc.HAULJOIN = hh.HAULJOIN
WHERE SRVY = 'EBS' 
AND SPECIES_CODE = 21720 -- 'Pacific cod' 
AND YEAR >= 2010 
AND YEAR < 2021")

flextable::flextable(data[1:3,]) %>% 
  flextable::theme_zebra()

HAULJOIN	SPECIES_CODE	CPUE_KGKM2	CPUE_NOKM2	COUNT	WEIGHT_KG	TAXON_CONFIDENCE	YEAR	SRVY	SURVEY	SURVEY_DEFINITION_ID	SURVEY_NAME	CRUISE	CRUISEJOIN	HAULJOIN.1	HAUL	STRATUM	STATION	VESSEL_ID	VESSEL_NAME	DATE_TIME	LATITUDE_DD_START	LONGITUDE_DD_START	LATITUDE_DD_END	LONGITUDE_DD_END	BOTTOM_TEMPERATURE_C	SURFACE_TEMPERATURE_C	DEPTH_M	DISTANCE_FISHED_KM	DURATION_HR	NET_WIDTH_M	NET_HEIGHT_M	AREA_SWEPT_KM2
-19,461	21,720	646.8800	200.3345	10	32.29	High	2,019	EBS	eastern Bering Sea	98	Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey	201,901	-726	-19,461	197	41	R-27	94	VESTERAALEN	2019-07-26 13:05:25	60.65452	-174.8251	60.68077	-174.8282	2.4	10.3	97	2.922	0.533	17.083	2.424	0.04991653
-19,446	21,720	1,212.7733	164.7230	8	58.90	High	2,019	EBS	eastern Bering Sea	98	Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey	201,901	-727	-19,446	168	61	Q-28	162	ALASKA KNIGHT	2019-07-20 07:45:12	60.34982	-175.3932	60.32608	-175.3835	2.6	10.1	111	2.692	0.506	18.041	2.460	0.04856637
-19,422	21,720	313.8482	108.5980	6	17.34	High	2,019	EBS	eastern Bering Sea	98	Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey	201,901	-726	-19,422	180	61	L-29	94	VESTERAALEN	2019-07-22 17:45:32	58.67524	-175.5278	58.67691	-175.5795	3.7	10.3	135	3.014	0.541	18.331	1.751	0.05524963

14.0.5 Ex. Find all species found in the eastern Bering Sea (EBS) survey in 2023

# Pull data
data <- RODBC::sqlQuery(
channel = channel, 
query = 
"SELECT DISTINCT 
ss.COMMON_NAME,
ss.SCIENTIFIC_NAME, 
ss.ID_RANK, 
ss.WORMS
FROM GAP_PRODUCTS.FOSS_CATCH cc -- get species codes
LEFT JOIN GAP_PRODUCTS.FOSS_SPECIES ss -- get species info
ON cc.SPECIES_CODE = ss.SPECIES_CODE
LEFT JOIN GAP_PRODUCTS.FOSS_HAUL hh -- filter by year and survey
ON cc.HAULJOIN = hh.HAULJOIN
WHERE hh.YEAR = 2023
AND hh.SURVEY_DEFINITION_ID = 98 -- EBS survey
ORDER BY COMMON_NAME")

flextable::flextable(data[1:3,]) %>% 
  # flextable::fit_to_width(max_width = 6) %>% 
  flextable::theme_zebra()

COMMON_NAME	SCIENTIFIC_NAME	ID_RANK	WORMS
Alaska great-tellin	Megangulus luteus	species	423,511
Alaska plaice	Pleuronectes quadrituberculatus	species	254,564
Alaska skate	Arctoraja parmifera	species	1,577,324