Libraries
library(ggplot2)
library(tidyverse)
library(here)
library(dplyr)
library(colorout)
library(extrafont)
library(scales)
library(stringr)
library(ggtext)
library(rnaturalearth)
library(rnaturalearthdata)
library(ggrepel)
library(Cairo)
library(gstudio)
library(ggmap)
Clear memory and environment
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 4099084 219.0 7028130 375.4 NA 5989809 319.9
## Vcells 5983467 45.7 12255594 93.6 32768 10146326 77.5
Load the data
chip <- readRDS(here("output","maps","sampling_loc.rds"))
wgs<-read.table(here("output","maps","wgs.txt"), header = T, sep = "\t")
Check wgs
## City Years label color gb
## 1 Limbe, Cameroon 2018 LIMw #cc41c0 WGS
## 2 Le Tampon, La Reunion 2017 TAMw #cc41c0 WGS
## 3 Puerto Iguazu-Misiones, Argentina 2018 PUEw #cc41c0 WGS
## 4 Maceio, Brazil 2018 MACw #cc41c0 WGS
## 5 Cali, Colombia 2013 CALw #cc41c0 WGS
## 6 Jardin Panteon, Mexico 2017 JARw #cc41c0 WGS
Get coordinates
# Rename
wgs2 <- wgs2 |>
dplyr::rename(
Longitude = lon,
Latitude = lat,
Abbreviation = label
) |>
dplyr::select(
Abbreviation, Longitude, Latitude
) |>
mutate(tech = "WGS")
head(wgs2)
## Abbreviation Longitude Latitude tech
## 1 LIMw 9.195443 4.022466 WGS
## 2 TAMw 55.515318 -21.279044 WGS
## 3 PUEw -54.578599 -25.597164 WGS
## 4 MACw -35.735217 -9.666042 WGS
## 5 CALw -76.531985 3.451647 WGS
## 6 JARw -101.216730 21.468961 WGS
Check chip
## # A tibble: 6 × 6
## Pop_City Country Latitude Longitude Region Abbreviation
## <chr> <chr> <dbl> <dbl> <chr> <chr>
## 1 Gelephu Bhutan 26.9 90.5 Asia GEL
## 2 Phnom Penh Cambodia 11.6 105. Asia CAM
## 3 Hainan China 19.2 110. Asia HAI
## 4 Yunnan China 24.5 101. Asia YUN
## 5 Hunan China 27.6 112. Asia HUN
## 6 Bengaluru India 13.0 77.6 Asia BEN
We have some extra localities. Let’s get the populations we have from a fam file
fam_file <- here(
"output", "populations", "snps_sets", "r2_0.1.fam"
)
# Read the .fam file
fam_data <- read.table(fam_file,
header = FALSE,
col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))
fam_data <- fam_data |>
distinct(FamilyID, .keep_all = TRUE)
# View the first few rows
head(fam_data)
## FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1 OKI 1001 0 0 2 -9
## 2 HAI 1053 0 0 0 -9
## 3 YUN 1089 0 0 1 -9
## 4 KAT 1103 0 0 2 -9
## 5 TAI 1161 0 0 2 -9
## 6 HUN 1169 0 0 0 -9
Now merge with our chip data
# Join with sampling_loc to get sampling localities
chip2 <- chip |>
inner_join(fam_data, by = c("Abbreviation" = "FamilyID"))
chip2 <- chip2 |>
distinct(Abbreviation, .keep_all = TRUE) |>
dplyr::select(
Abbreviation, Longitude, Latitude
)
chip2 <- chip2 |>
dplyr::select(
Abbreviation, Longitude, Latitude
) |>
mutate(tech = "CHIP")
head(chip2)
## # A tibble: 6 × 4
## Abbreviation Longitude Latitude tech
## <chr> <dbl> <dbl> <chr>
## 1 GEL 90.5 26.9 CHIP
## 2 CAM 105. 11.6 CHIP
## 3 HAI 110. 19.2 CHIP
## 4 YUN 101. 24.5 CHIP
## 5 HUN 112. 27.6 CHIP
## 6 BEN 77.6 13.0 CHIP
Bind objects
data <- rbind(wgs2, chip2)
# Remove the "w"
data$Abbreviation <- gsub("w$", "", data$Abbreviation)
head(data)
## Abbreviation Longitude Latitude tech
## 1 LIM 9.195443 4.022466 WGS
## 2 TAM 55.515318 -21.279044 WGS
## 3 PUE -54.578599 -25.597164 WGS
## 4 MAC -35.735217 -9.666042 WGS
## 5 CAL -76.531985 3.451647 WGS
## 6 JAR -101.216730 21.468961 WGS
# Identify localities with both "CHIP" and "WGS".
both_techs_localities <- data |>
group_by(Abbreviation) |>
summarise(both = n_distinct(tech) > 1) |>
filter(both) |>
pull(Abbreviation)
# Add a new column to your data
data$both_techs <- ifelse(data$Abbreviation %in% both_techs_localities, "Both", as.character(data$tech))
world <- ne_countries(scale = "medium", returnclass = "sf")
source(
here(
"scripts", "analysis", "my_theme2.R"
)
)
# Plot
ggplot() +
geom_sf(data = world, fill = "#e2f5eb", color = "gray") +
geom_point(
data = data,
aes(x = Longitude, y = Latitude, color = both_techs),
size = 0.3,
show.legend = FALSE
) +
geom_text_repel(
data = data,
aes(
x = Longitude,
y = Latitude,
label = Abbreviation,
color = both_techs # Use the both_techs column directly for label colors
),
size = 2,
box.padding = unit(0.01, "lines"),
max.overlaps = 50,
show.legend = FALSE
) +
scale_color_manual(values = c("Both" = "blue", "CHIP" = "red", "WGS" = "black")) +
labs(color = "Genotyped with:") +
coord_sf(xlim = c(-130, 180), ylim = c(-60, 80)) +
my_theme() +
theme(legend.position = "top",
axis.text.y = element_text(margin = margin(
t = 0,
r = 10,
b = 0,
l = 0
)))
ggsave(
here("output", "maps", "samples.pdf"),
width = 8,
height = 5,
units = "in",
device = cairo_pdf
)
Zoom
# Plot
ggplot() +
geom_sf(data = world, fill = "#e2f5eb", color = "gray") +
geom_point(
data = data,
aes(x = Longitude, y = Latitude, color = both_techs),
size = 0.3,
show.legend = FALSE
) +
geom_text_repel(
data = data,
aes(
x = Longitude,
y = Latitude,
label = Abbreviation,
color = both_techs # Use the both_techs column directly for label colors
),
size = 2,
box.padding = unit(0.01, "lines"),
max.overlaps = 50,
show.legend = FALSE
) +
scale_color_manual(values = c("Both" = "blue", "CHIP" = "red", "WGS" = "black")) +
labs(color = "Genotyped with:") +
coord_sf(xlim = c(60, 160), ylim = c(-10, 60)) +
my_theme() +
theme(legend.position = "top",
axis.text.y = element_text(margin = margin(
t = 0,
r = 10,
b = 0,
l = 0
)))
ggsave(
here("output", "maps", "samples_zoom.pdf"),
width = 8,
height = 5,
units = "in",
device = cairo_pdf
)
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
# Filter countries for each region
east_asia_countries <- c("China", "Mongolia", "South Korea", "North Korea", "Japan", "Taiwan")
south_asia_countries <- c("India", "Pakistan", "Bangladesh", "Nepal", "Bhutan", "Sri Lanka", "Maldives")
southeast_asia_countries <- c("Myanmar", "Thailand", "Cambodia", "Laos", "Vietnam", "Malaysia", "Singapore", "Indonesia", "Brunei", "Philippines", "East Timor")
east_asia <- world[world$name %in% east_asia_countries, ]
south_asia <- world[world$name %in% south_asia_countries, ]
southeast_asia <- world[world$name %in% southeast_asia_countries, ]
# Your ggplot code
ggplot() +
geom_sf(data = world,
fill = "#e2f5eb",
color = "gray") +
geom_sf(data = east_asia, aes(fill = "East Asia")) +
geom_sf(data = south_asia, aes(fill = "South Asia")) +
geom_sf(data = southeast_asia, aes(fill = "Southeast Asia")) +
geom_point(
data = data,
aes(x = Longitude, y = Latitude, color = both_techs),
size = 0.3,
show.legend = FALSE
) +
geom_text_repel(
data = data,
aes(
x = Longitude,
y = Latitude,
label = Abbreviation,
color = both_techs
),
size = 2,
box.padding = unit(0.01, "lines"),
max.overlaps = 50,
show.legend = FALSE
) +
scale_fill_manual(values = c(
"East Asia" = "#faaea1",
"South Asia" = "#95f5a8",
"Southeast Asia" = "#b2c7ff"
)) +
scale_color_manual(values = c(
"Both" = "blue",
"CHIP" = "red",
"WGS" = "black"
)) +
labs(fill = "Regions", color = "Genotyped with:") +
coord_sf(xlim = c(60, 160), ylim = c(-10, 60)) +
my_theme() +
theme(legend.position = "top",
axis.text.y = element_text(margin = margin(
t = 0,
r = 10,
b = 0,
l = 0
)))