Access via Oracle and R (AFSC only)

If the user has access to the AFSC Oracle database, the user can use SQL developer to view and pull the FOSS public data directly from the GAP_PRODUCTS Oracle schema.

13.0.1 Connect to Oracle from R

Many users will want to access the data from Oracle using R. The user will need to install the RODBC R package and ask OFIS (IT) connect R to Oracle. Then, use the following code in R to establish a connection from R to Oracle:

Here, the user can write in their username and password directly into the RODBC connect function. Never save usernames or passwords in scripts that may be intentionally or unintentionally shared with others. If no username and password is entered in the function, pop-ups will appear on the screen asking for the username and password.

library(gapindex)
channel <- gapindex::get_connected()

13.0.2 Ex. Wholesale download data and join data in R

locations <- c(
  "GAP_PRODUCTS.FOSS_CATCH",
  "GAP_PRODUCTS.FOSS_HAUL",
  "GAP_PRODUCTS.FOSS_SPECIES"
)

print(Sys.Date())

error_loading <- c() # log if any tables are unable to download 
for (i in 1:length(locations)){
  print(locations[i])
  a <- RODBC::sqlQuery(channel, paste0("SELECT * FROM ", locations[i], "; "))
  if (is.null(nrow(a))) { # if an error in downloading has occurred
    error_loading <- c(error_loading, locations[i])
  } else { # if no error in downloading has occurred
    write.csv(x = a, 
              # change file name to be more computer file storage friendly
              here::here(paste0(tolower(gsub(
                pattern = '.', 
                replacement = "_", 
                x = locations[i], 
                fixed = TRUE)),
                ".csv")))
  }
}
error_loading

Join downloaded files into presence-only table

# Load data
library(dplyr)
library(here)
library(readr)
catch <- readr::read_csv(file = here::here("data/gap_products_foss_catch.csv"))[,-1] # remove "row number" column
haul <- readr::read_csv(file = here::here("data/gap_products_foss_haul.csv"))[,-1] # remove "row number" column
species <- readr::read_csv(file = here::here("data/gap_products_foss_species.csv"))[,-1] # remove "row number" column

dat <- 
  # join haul and catch data to unique species by survey table
  dplyr::left_join(haul, catch) %>% 
  # join species data to unique species by survey table
  dplyr::left_join(species) %>% 
  # modify zero-filled rows
  dplyr::mutate(
    CPUE_KGKM2 = ifelse(is.null(CPUE_KGKM2), 0, CPUE_KGKM2), # just in case
    CPUE_KGHA = CPUE_KGKM2/100, # Hectares
    CPUE_NOKM2 = ifelse(is.null(CPUE_NOKM2), 0, CPUE_NOKM2), # just in case
    CPUE_NOHA = CPUE_NOKM2/100, # Hectares
    COUNT = ifelse(is.null(COUNT), 0, COUNT),
    WEIGHT_KG = ifelse(is.null(WEIGHT_KG), 0, WEIGHT_KG) )

Join downloaded files into zero-filled table

# Load data
library(dplyr)
library(here)
library(readr)
catch <- readr::read_csv(file = here::here("data/gap_products_foss_catch.csv"))[,-1] # remove "row number" column
haul <- readr::read_csv(file = here::here("data/gap_products_foss_haul.csv"))[,-1] # remove "row number" column
species <- readr::read_csv(file = here::here("data/gap_products_foss_species.csv"))[,-1] # remove "row number" column

# come up with full combination of what species should be listed for what hauls/surveys
# for zero-filled data, all species caught in a survey need to have zero or non-zero row entries for a haul
comb <- dplyr::full_join(
  x = dplyr::left_join(catch, haul, by = "HAULJOIN") %>%
    dplyr::select(SURVEY_DEFINITION_ID, SPECIES_CODE) %>%
    dplyr::distinct(),
  y = haul %>%
    dplyr::select(SURVEY_DEFINITION_ID, HAULJOIN) %>%
    dplyr::distinct(), 
  by = "SURVEY_DEFINITION_ID", 
  relationship = "many-to-many"
)

# Join data to make a full zero-filled CPUE dataset
dat <- comb %>% 
  # add species data to unique species by survey table
  dplyr::left_join(species, "SPECIES_CODE") %>% 
  # add catch data
  dplyr::full_join(catch, c("SPECIES_CODE", "HAULJOIN")) %>% 
  # add haul data
  dplyr::full_join(haul) %>% # , c("SURVEY_DEFINITION_ID", "HAULJOIN")
  # modify zero-filled rows
  dplyr::mutate(
    CPUE_KGKM2 = ifelse(is.null(CPUE_KGKM2), 0, CPUE_KGKM2),
    CPUE_KGHA = CPUE_KGKM2/100, # Hectares
    CPUE_NOKM2 = ifelse(is.null(CPUE_NOKM2), 0, CPUE_NOKM2),
    CPUE_NOHA = CPUE_NOKM2/100, # Hectares
    COUNT = ifelse(is.null(COUNT), 0, COUNT),
    WEIGHT_KG = ifelse(is.null(WEIGHT_KG), 0, WEIGHT_KG) ) 

13.0.3 Ex. Join data using Oracle

To join these tables in Oracle, you may use a variant of the following code:


SELECT 
hh.YEAR,
hh.SRVY,                 
hh.SURVEY,
hh.SURVEY_DEFINITION_ID,
hh.SURVEY_NAME,
hh.CRUISE,
hh.CRUISEJOIN,           
hh.HAUL,
hh.HAULJOIN,
hh.STRATUM,
hh.STATION,
hh.VESSEL_ID,
hh.VESSEL_NAME,          
hh.DATE_TIME,
hh.LATITUDE_DD_START, 
hh.LONGITUDE_DD_START, 
hh.LATITUDE_DD_END,
hh.LONGITUDE_DD_END, 
hh.BOTTOM_TEMPERATURE_C,
hh.SURFACE_TEMPERATURE_C,
hh.DEPTH_M,
cc.SPECIES_CODE,
ss.ITIS,
ss.WORMS,
ss.COMMON_NAME,     
ss.SCIENTIFIC_NAME,
ss.ID_RANK,
CASE WHEN cc.CPUE_KGKM2 IS NULL THEN 0 ELSE cc.CPUE_KGKM2 END AS CPUE_KGKM2,
CASE WHEN cc.CPUE_NOKM2 IS NULL THEN 0 ELSE cc.CPUE_NOKM2 END AS CPUE_NOKM2,
CASE WHEN cc.COUNT IS NULL THEN 0 ELSE cc.COUNT END AS COUNT,
CASE WHEN cc.WEIGHT_KG IS NULL THEN 0 ELSE cc.WEIGHT_KG END AS WEIGHT_KG,
CASE WHEN cc.TAXON_CONFIDENCE IS NULL THEN NULL ELSE cc.TAXON_CONFIDENCE END AS TAXON_CONFIDENCE,
hh.AREA_SWEPT_KM2,       
hh.DISTANCE_FISHED_KM,
hh.DURATION_HR,          
hh.NET_WIDTH_M,
hh.NET_HEIGHT_M,
hh.PERFORMANCE 
FROM GAP_PRODUCTS.FOSS_SURVEY_SPECIES sv
FULL OUTER JOIN GAP_PRODUCTS.FOSS_SPECIES ss
ON sv.SPECIES_CODE = ss.SPECIES_CODE
FULL OUTER JOIN GAP_PRODUCTS.FOSS_HAUL hh
ON sv.SURVEY_DEFINITION_ID = hh.SURVEY_DEFINITION_ID
FULL OUTER JOIN GAP_PRODUCTS.FOSS_CATCH cc
ON sv.SPECIES_CODE = cc.SPECIES_CODE
AND hh.HAULJOIN = cc.HAULJOIN

13.0.4 Ex. Subset data

Here, we are pulling EBS Pacific cod from 2010 - 2021:

# Pull data
data <- RODBC::sqlQuery(
channel = channel, 
query = 
"SELECT * FROM GAP_PRODUCTS.FOSS_CATCH cc
JOIN GAP_PRODUCTS.FOSS_HAUL hh
ON cc.HAULJOIN = hh.HAULJOIN
WHERE SRVY = 'EBS' 
AND SPECIES_CODE = 21720 -- 'Pacific cod' 
AND YEAR >= 2010 
AND YEAR < 2021")

flextable::flextable(data[1:3,]) %>% 
  flextable::theme_zebra() 

HAULJOIN

SPECIES_CODE

CPUE_KGKM2

CPUE_NOKM2

COUNT

WEIGHT_KG

TAXON_CONFIDENCE

YEAR

SRVY

SURVEY

SURVEY_DEFINITION_ID

SURVEY_NAME

CRUISE

CRUISEJOIN

HAULJOIN.1

HAUL

STRATUM

STATION

VESSEL_ID

VESSEL_NAME

DATE_TIME

LATITUDE_DD_START

LONGITUDE_DD_START

LATITUDE_DD_END

LONGITUDE_DD_END

BOTTOM_TEMPERATURE_C

SURFACE_TEMPERATURE_C

DEPTH_M

DISTANCE_FISHED_KM

DURATION_HR

NET_WIDTH_M

NET_HEIGHT_M

AREA_SWEPT_KM2

PERFORMANCE

-19,288

21,720

449.8301

1,876.1759

83

19.90

1

2,019

EBS

eastern Bering Sea

98

Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey

201,901

-727

-19,288

96

20

O-18

162

ALASKA KNIGHT

2019-06-29 06:54:00

59.68079

-168.6144

59.65546

-168.6178

5.1

7.8

39

2.821

0.505

15.682

2.227

0.044239

0

-19,252

21,720

413.4828

248.0897

12

20.00

1

2,019

EBS

eastern Bering Sea

98

Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey

201,901

-727

-19,252

76

31

G-03

162

ALASKA KNIGHT

2019-06-24 15:52:02

57.01591

-166.4752

56.99137

-166.4601

4.1

9.7

74

2.880

0.528

16.795

2.126

0.048370

0

-18,731

21,720

946.3481

2,592.1327

118

43.08

1

2,019

EBS

eastern Bering Sea

98

Eastern Bering Sea Crab/Groundfish Bottom Trawl Survey

201,901

-726

-18,731

11

31

I-13

94

VESTERAALEN

2019-06-04 13:15:57

57.69052

-160.2580

57.66518

-160.2640

5.5

7.5

54

2.840

0.520

16.029

2.200

0.045522

0

13.0.5 Ex. Find all species found in the eastern Bering Sea (EBS) survey in 2023

# Pull data
data <- RODBC::sqlQuery(
channel = channel, 
query = 
"SELECT DISTINCT 
ss.COMMON_NAME,
ss.SCIENTIFIC_NAME, 
ss.ID_RANK, 
ss.WORMS
FROM GAP_PRODUCTS.FOSS_CATCH cc -- get species codes
LEFT JOIN GAP_PRODUCTS.FOSS_SPECIES ss -- get species info
ON cc.SPECIES_CODE = ss.SPECIES_CODE
LEFT JOIN GAP_PRODUCTS.FOSS_HAUL hh -- filter by year and survey
ON cc.HAULJOIN = hh.HAULJOIN
WHERE hh.YEAR = 2023
AND hh.SURVEY_DEFINITION_ID = 98 -- EBS survey
ORDER BY COMMON_NAME")

flextable::flextable(data[1:3,]) %>% 
  # flextable::fit_to_width(max_width = 6) %>% 
  flextable::theme_zebra() 

COMMON_NAME

SCIENTIFIC_NAME

ID_RANK

WORMS

Alaska great-tellin

Megangulus luteus

species

423,511

Alaska plaice

Pleuronectes quadrituberculatus

species

254,564

Alaska razor

Siliqua alta

species

413,689