library(gargle)
library(googledrive)
# googledrive::drive_auth(path="/etc/sa_key.json") # connect to default account
# Connect to google drive using your (probably NOAA) email
<- rstudioapi::showPrompt(title = "Email",
gdrive_email message = "Email for Google Drive",
default = "")
drive_auth(token = credentials_user_oauth2(
scopes = "https://www.googleapis.com/auth/drive",
email = gdrive_email))
drive_user() # check user account
GCP Instructions
Introduction
Before beginning this process, please check with Sophia Wassermann, Emily Markowitz, or OFIS to confirm that a computing instance and an Rstudio server have been set up for your username.
Resources:
- Here are some helpful slides
- R Script for testing GCP (also included below)
Initial Setup
Connecting to your node
Because usage of GCP is metered, you will need to start and, importantly stop, your node every time you want to use it.
- Navigate to the AFSC dev server on GCP.
The first time you navigate to Google Cloud and connect to your node, there will be many authorization windows. Please accept them. Make sure you are connecting via your NOAA account.
You should be in the ‘Instances’ pane, with a list of nodes associated with people’s names. Click on the check box in the line associated with your username.
Press START / RESUME in the blue menu bar above the list of nodes. After a moment, the status icon for the node will be a green circle with a checkmark instead of a gray circle with a square.
In a text editor, copy in the following connection code, substituting
[SERVER NAME]
with the one that has been configured for you. Do not include the brackets around your name (it should be the same as the name of your node in GCP).gcloud compute ssh --ssh-flag="-4 -L 8787:localhost:8787" [YOUR-NAME]-sdm-node --project=ggn-nmfs-afscdsm-dev-1 --zone=us-east4-c --tunnel-through-iap
e.g.,
gcloud compute ssh --ssh-flag="-4 -L 8787:localhost:8787" sophia-wassermann-sdm-node --project=ggn-nmfs-afscdsm-dev-1 --zone=us-east4-c --tunnel-through-iap
We recommend saving this connection string to a text file on your local computer. You will need to use it every time you connect to your instance.
- You will use the ‘google cloud shell’ for the following steps. This may load automatically as a black box across the bottom of the window (as similarly seen in the above screenshots), or you may need to click on the ‘Activate Cloud Shell’ button in the menu bar at the top right. It is the square with symbols inside. You may also need to authorize the cloud shell by pressing ‘Authorize’.
- Copy the connection code from Step 5 into the the cloud shell.
- The first time you connect, you will need to create an ssh key pair. Follow the prompts. You do not need to create a passphrase (can press ‘enter/return’ without entering anything).
- When you are done with your session, make sure to turn off the instance by pressing ‘STOP’ in the blue menu bar. The connection to the command line and Rstudio will be terminated and the status icon will return to the gray square. This is very important for keeping operating costs reasonable.
Connecting to Rstudio Server
These instances have been built with a container image on top of rocker:rstudio that comes preconfigured with packages to run tinyVAST and sdmTMB workloads and to manage data ingress and egress through Oracle and Google Drive. This means that all further setup and operations are conducted from inside an Rstudio Server. Connecting is very easy, as the basic requirements and connections have already been set up by OFIS.
To connect:
Once you have connected to your instance through step 8 above, open a new tab in your browser and navigate to
https://ssh.cloud.google.com/devshell/proxy?authuser=0&port=8787
. You can bookmark this page for future reference.Log in using the username and password ‘rstudio’
You should now be able to use Rstudio as you would on your local machine.
Configuring SSH for Github
Configuring an SSH key for your instance is required to push to github. It is possible to clone repositories using https, but you will not be able to push any changes. You will need to do the following in the Terminal within Rstudio Server.
- Switch to the Terminal tab at the bottom of the Rstudio Server window. Generate a new SSH key and add it to the ssh-agent, following the instructions for Linux on github. You do not need to specify a ‘file in which to save the key’ or a passphrase. Paste in the following text, replacing the email address with your Github email address:
ssh-keygen -t ed25519 -C "your_email@example.com"
[Enter]
[Enter]
eval "$(ssh-agent -s)"
ssh-add ~/.ssh/id_ed25519
Once you have created the key and added it to the ssh-agent, follow the github instructions for adding a new SSH key to your github account, again following the Linux instructions.
When you’re prompted to “Enter a file in which to save the key”, press Enter to accept the default file location. You also do not need to enter a passphrase (can press ‘Enter’ to not use one, and then press ‘Enter’ again to confirm).
[Enter]
[Enter]
- In the RStudio Terminal, copy the SSH public key to your clipboard with the following. Select and copy what is printed.
cat ~/.ssh/id_ed25519.pub
- On GitHub, click on your profile photo at the upper-right corner of the page and click ‘Settings’ in the menu.
- In the ‘Access’ section of the sidebar, click on ‘SSH and GPG keys’.
- Click New SSH key or Add SSH key. Make sure to use an informative title, such as
gcp-rstudio-20250507
. In the ‘Key’ field, paste your public key. Click ‘Add SSH Key’.
- Back in the Rstudio Server terminal, clone the github repository using SSH, using the command
git clone
. Make sure you are in the directory where you want the repo to be cloned; it will default to your ‘home’ directory, which will be fine for most circumstances. If you have created a folder within which you would like the repo to live, you can navigate inside of it with the commandcd
, followed by the directory name, in the Rstudio Server terminal. The address for a repository can be copied from its github page if you click the green<> Code
button and select SSH.
e.g., git clone git@github.com:afsc-gap-products/model-based-indices.git
- To connect the git integration in Rstudio with your cloned repository, you need to create an Rstudio Project associated with it. Click on the File menu in Rstudio Server -> ‘New Project’ -> ‘Existing Directory’ -> browse to the cloned repo.
Configuring Google Drive
Each instance is set up with a connection to a Google Drive associated with a unique email account. To connect the instance to your google account, you will need to run the following code to authenticate your credentials. The code will prompt you to provide your email address for Google Drive (which is likely your NOAA email).
Testing
Below are some code snippets for testing the basic function of the instance and its installed packages.
Oracle connection
You will need an oracle account that has access to the AFSC schemas to use the following code as-written. To streamline the process, you can save a file to your instance that contains your username and password. I created an R script in my home directory on the instance with the content:
<- "USERNAME"
oracle_user <- "password" oracle_password
If you prefer to type your username and password in when accessing Oracle, the following code will prompt you for the information when needed. OFIS has configured two methods for connecting to Oracle. Both are presented below, but only one is necessary to connect.
if(file.exists("~/oracle_credentials.R")) {
source("~/oracle_credentials.R")
else {
} <- rstudioapi::showPrompt(title = "Username",
oracle_user message = "Oracle Username",
default = "")
<- rstudioapi::showPrompt(title = "Password",
oracle_pw message = "Oracle Password",
default = "")
}
# Two different options for connecting to Oracle
<- RODBC::odbcDriverConnect(
channel connection = paste0("Driver=/opt/oracle/instantclient_12_2/libsqora.so.12.1;DBQ=raja.afsc.noaa.gov:1521/afsc;UID=",
";PWD=", oracle_pw),
oracle_user, rows_at_time = 1
)
<- DBI::dbConnect(
con ::odbc(),
odbc.connection_string = paste0("Driver=/opt/oracle/instantclient_12_2/libsqora.so.12.1;DBQ=raja.afsc.noaa.gov:1521/afsc;UID=",
";PWD=", oracle_pw)
oracle_user, )
sdmTMB
If you will be working with sdmTMB, run the following test from their website.
library(sdmTMB)
<- make_mesh(pcod, xy_cols = c("X", "Y"), cutoff = 10)
mesh
<- sdmTMB(
fit ~ s(depth),
density data = pcod,
mesh = mesh,
family = tweedie(link = "log"),
spatial = "on"
)
# if this prints, everything should be working! fit
tinyVAST
This test takes a few minutes to run. It is based on the age composition expansion vignette.
library(tinyVAST)
library(fmesher)
library(sf)
<- function() {
format_data # Pull & format data
data(bering_sea_pollock_ages)
<- subset(bering_sea_pollock_ages, Year >= 2021)
Data $Age <- factor(paste0("Age_",Data$Age))
Data$Year_Age <- interaction(Data$Year, Data$Age)
Data
# Project to UTM
<- st_as_sf(Data,
Data coords = c('Lon','Lat'),
crs = st_crs(4326))
<- st_transform(Data, crs = st_crs("+proj=utm +zone=2 +units=km"))
Data <- cbind(st_drop_geometry(Data), st_coordinates(Data))
Data
return(Data)
}
<- format_data()
Data
# Set up tinyVAST settings
<- ""
sem
<- "
dsem Age_1 -> Age_1, 1, lag1
Age_2 -> Age_2, 1, lag1
Age_3 -> Age_3, 1, lag1
Age_4 -> Age_4, 1, lag1
Age_5 -> Age_5, 1, lag1
Age_6 -> Age_6, 1, lag1
Age_7 -> Age_7, 1, lag1
Age_8 -> Age_8, 1, lag1
Age_9 -> Age_9, 1, lag1
Age_10 -> Age_10, 1, lag1
Age_11 -> Age_11, 1, lag1
Age_12 -> Age_12, 1, lag1
Age_13 -> Age_13, 1, lag1
Age_14 -> Age_14, 1, lag1
Age_15 -> Age_15, 1, lag1
"
<- fm_mesh_2d(loc = Data[,c("X","Y")],
mesh cutoff = 50)
<- tinyVASTcontrol(getsd = FALSE,
control profile = c("alpha_j"),
trace = 0)
<- list(
family Age_1 = tweedie(),
Age_2 = tweedie(),
Age_3 = tweedie(),
Age_4 = tweedie(),
Age_5 = tweedie(),
Age_6 = tweedie(),
Age_7 = tweedie(),
Age_8 = tweedie(),
Age_9 = tweedie(),
Age_10 = tweedie(),
Age_11 = tweedie(),
Age_12 = tweedie(),
Age_13 = tweedie(),
Age_14 = tweedie(),
Age_15 = tweedie()
)
# Fit tinyVAST model
<- tinyVAST(
myfit data = Data,
formula = Abundance_per_hectare ~ 0 + Year_Age,
sem = sem,
dsem = dsem,
family = family,
space_column = c("X", "Y"),
variable_column = "Age",
time_column = "Year",
distribution_column = "Age",
spatial_graph = mesh,
control = control
)
# if this prints, everything is groovy! myfit