##LEA

Libraries

library(LEA)
library(vcfR)
library(RColorBrewer)
library(ggplot2)
library(adegenet)
library(ape)
library(tidyverse)
library(here)
library(dplyr)
library(ggplot2)
library(colorout)
library(extrafont)
library(scales)
library(stringr)
library(ggtext)

1 Neutral SNP set

Check the data

ls output/populations/snps_sets/*.vcf
## output/populations/snps_sets/neutral.vcf
## output/populations/snps_sets/r2_0.01.vcf
## output/populations/snps_sets/r2_0.1.vcf

Import the data

genotype <- here(
    "output", "populations", "snps_sets", "neutral.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 9047
##   column count: 246
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 9047
##   Character matrix gt cols: 246
##   skip: 0
##   nrows: 9047
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant: 9047
## All variants processed

Population and individuals information

inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## BEN CAM CHA GEL HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KLP KUN LAM MAT OKI 
##  12  12  12   2  12   4   7  12  11   4   2   6  12  11  10   4   4   9  12  11 
## QNC SON SSK SUF SUU TAI UTS YUN 
##  12   3  12   6   6   8  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert format

vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   237
##  - number of detected loci:      9047
## 
## For SNP info, please check /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/neutral.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/neutral.removed file, for more informations.
## [1] "/Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/neutral.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   237
##  - number of detected loci:      9047
## 
## For SNP info, please check /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/neutral.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/neutral.removed file, for more informations.
## 
## 
##  - number of detected individuals:   237
##  - number of detected loci:      9047
## [1] "/Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/neutral.lfmm"

PCA

setwd(
  here(
    "output", "populations"
  )
)
nPC <- length(inds)
pc <- pca(gsub(".vcf", ".lfmm", genotype), K = nPC)
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
## 
##         -n (number of individuals)          237
##         -L (number of loci)                 9047
##         -K (number of principal components) 237
##         -x (genotype file)                  /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/neutral.lfmm
##         -a (eigenvalue file)                /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/neutral.pca/neutral.eigenvalues
##         -e (eigenvector file)               /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/neutral.pca/neutral.eigenvectors
##         -d (standard deviation file)        /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/neutral.pca/neutral.sdev
##         -p (projection file)                /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/neutral.pca/neutral.projections
##         -c data centered
show(pc)
## * pca class *
## 
## project directory:               /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/ 
## pca result directory:            neutral.pca/ 
## input file:                      neutral.lfmm 
## eigenvalue file:                 neutral.eigenvalues 
## eigenvector file:                neutral.eigenvectors 
## standard deviation file:         neutral.sdev 
## projection file:                 neutral.projections 
## pcaProject file:                   neutral.pcaProject 
## number of individuals:           237 
## number of loci:                  9047 
## number of principal components:  237 
## centered:                        TRUE 
## scaled:                          FALSE

Test

# PC significant test: tracy-widom test
tw <- tracy.widom(pc)
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
## 
##         -n (number of eigenvalues)          237
##         -i (input file)                     /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/neutral.pca/neutral.eigenvalues
##         -o (output file)                    /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/neutral.pca/neutral.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "pink", cex = .8)

Get values

# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)

Check R symbols for plot

#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,33:127)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
ggplot(aes(x,y,shape=s ), data=foo ) +
  scale_shape_manual(values=good.shapes[1:N]) +
  geom_point()

Sample data

sampling_loc <- readRDS(here("output", "local_adaptation", "sampling_loc.rds"))
head(sampling_loc)
## # A tibble: 6 × 6
##   Pop_City   Country  Latitude Longitude Region Abbreviation
##   <chr>      <chr>       <dbl>     <dbl> <chr>  <chr>       
## 1 Gelephu    Bhutan       26.9      90.5 Asia   GEL         
## 2 Phnom Penh Cambodia     11.6     105.  Asia   CAM         
## 3 Hainan     China        19.2     110.  Asia   HAI         
## 4 Yunnan     China        24.5     101.  Asia   YUN         
## 5 Hunan      China        27.6     112.  Asia   HUN         
## 6 Bengaluru  India        13.0      77.6 Asia   BEN

More sample data

# import sample attributes
samples2 <- read.delim(
  here(
    "output", "populations", "Population_meta_data.txt"
  ),
  head = TRUE
)

samples2<- samples2 |>
  dplyr::select(
    region, pop
  )

# check head of the file
head(samples2)
##      region pop
## 1 East Asia HAI
## 2 East Asia HUN
## 3 East Asia YUN
## 4 East Asia AIZ
## 5 East Asia HIR
## 6 East Asia JAT

Merge with sampling_loc

merged_loc <- merge(samples2, sampling_loc, by.x = "pop", by.y = "Abbreviation")
head(merged_loc)
##   pop          region           Pop_City    Country Latitude Longitude Region
## 1 AIZ       East Asia Aizuwakamatsu City      Japan  37.4924  139.9936   Asia
## 2 ALV Southern Europe              Vlore    Albania  40.4660   19.4897 Europe
## 3 ANT     East Africa       Antananarivo Madagascar -18.8792   47.5079 Africa
## 4 AWK     West Africa               Awka    Nigeria   6.2105    7.0741 Africa
## 5 BAR Southern Europe          Barcelona      Spain  41.3851    2.1734 Europe
## 6 BEN      South Asia          Bengaluru      India  12.9716   77.5946   Asia

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 28 Levels: BEN CAM CHA GEL HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT ... YUN

Check how many sampling localities

length(unique(pc.coord$Population))
## [1] 28

Merge

merged_data <- merge(pc.coord, merged_loc, by.x = "Population", by.y = "pop")
head(merged_data)
##   Population     PC1       PC2       PC3     PC4       PC5       PC6     PC7
## 1        BEN 4.37630  -8.49291  1.301750 6.04456 -1.398500 -1.424350 5.29755
## 2        BEN 3.15445  -9.60122  2.426950 7.23139  1.127770 -1.431420 6.35691
## 3        BEN 4.20180  -9.55543  0.140743 6.79438 -2.902730 -0.371062 4.43490
## 4        BEN 3.29325 -10.46840  1.592560 6.21134 -1.546210 -0.116333 3.62283
## 5        BEN 3.33558  -8.23233 -0.329635 4.82641 -0.877709 -2.462100 2.93744
## 6        BEN 3.69959  -8.73129  0.975078 5.71000 -2.170690  0.171228 3.55868
##         PC8        PC9       PC10      PC11      PC12    PC13    PC14      PC15
## 1  0.418258  0.8581700 -2.1140200  0.403932  0.755038 0.86709 3.54468  3.348870
## 2 -3.638890  0.5569150 -0.9746360 -2.131210  0.992506 6.25126 7.04006 -1.224810
## 3 -0.787768  0.2004860 -1.5559300 -2.297540  7.197840 3.58952 5.03420  0.879158
## 4 -3.574550  1.8949100 -0.5245740  2.094650 11.070300 6.66769 6.39389  2.888340
## 5 -2.000580 -0.0592385 -0.0956112  0.296329  1.715170 5.23563 2.63582  0.211726
## 6 -4.236730 -0.0254789 -0.3885710  1.635510 11.269000 6.06673 8.25045  2.263050
##       PC16      PC17      PC18      PC19      PC20     PC21      PC22
## 1 2.266500 0.4937050  0.936941 -0.249966  -7.59223 1.590570  2.913630
## 2 1.731590 2.6181600 -7.437780 10.546400 -12.74120 6.118050  0.850224
## 3 0.795079 0.0348851 -4.137870  2.455610  -7.02787 3.010400 -3.516080
## 4 0.664427 5.0985300 -5.152310 12.289400 -20.29400 1.713420  9.149620
## 5 1.961610 4.5925100 -2.153170  4.446200  -8.42919 0.903396  0.300183
## 6 0.038567 4.3013300 -7.864620 12.546400 -22.87370 2.329010  8.302890
##         PC23      PC24      PC25      PC26      PC27       PC28      PC29
## 1  -2.998450  2.226510 -4.134560  -1.80144 -0.682614 -1.8120500  0.731278
## 2   0.979196  0.102092 -0.678146   4.26657  7.878370 -2.3262700 -7.237450
## 3  -4.430550  0.298540  3.734540   0.30848  1.207510  2.1870000  0.262863
## 4 -15.527900 -0.514269  4.225960 -17.10980 -7.352570 -4.5877700  0.348908
## 5  -4.832100  2.157480  5.352590   1.55464  0.577411  0.0279119 -3.147570
## 6 -14.815700 -2.194700  4.168020 -17.19570 -7.083170 -6.8810000  4.511120
##        PC30     PC31      PC32      PC33      PC34      PC35      PC36
## 1 -2.718000 -2.86451 -0.557517   7.45755  3.331760  1.412550  1.447440
## 2  5.550120 -7.53496  7.681780  17.92080  8.704390 -9.891390 -3.175230
## 3  4.645830  1.28521  1.378480   6.68237  4.274220  1.311760 -0.675795
## 4  3.227000  6.83836 -2.767060 -15.89760 -5.527430 10.973500  4.639960
## 5 -0.767541 -9.46591 -0.998709   5.62576  0.394857 -0.338668  0.437628
## 6  5.456020  8.13206 -1.328480 -15.65260 -7.883590 10.257700  4.246680
##        PC37     PC38     PC39       PC40      PC41     PC42      PC43     PC44
## 1 -1.236060 -2.31369 -3.46827  -1.430540   5.85927 -2.42156 -4.953200  2.75709
## 2 -8.413230 -5.59285 -4.71282   0.628422 -15.58340  4.15695 -0.587489  8.73378
## 3 -2.557960 -6.53092 -2.69246 -10.106200   5.90567 -2.38381  0.028337 -4.92311
## 4  0.730155 10.96570  1.93659   3.565280  -7.02917  2.31145 -2.310510  1.13887
## 5  2.939750 -6.74712  3.81541   2.156330   8.49412  8.11400 -1.749020 -1.10889
## 6 -0.345879 11.74940  2.83661   2.554210  -8.56975  2.42033 -1.982800  2.38218
##       PC45     PC46      PC47      PC48      PC49      PC50     PC51      PC52
## 1 -1.06260 -5.88535 -3.866250 -2.792090 -8.805790 -4.783580  1.08519  7.710800
## 2  3.76776  6.12698 -2.891470  2.324780  5.137910  3.954260 -2.66744 -8.189800
## 3 -0.10932  1.33166  0.845514 -3.313070 -1.167760  2.764450  7.05471  4.309220
## 4  1.55809  4.23506  3.402920 -0.676161 -0.472500  1.142220 -0.58797  0.186841
## 5  3.20889  4.02518 -6.965760  0.533054  1.458720  0.164662 -3.88749  7.116020
## 6 -1.00794  5.14968  1.810030  0.667920  0.937871 -1.685820 -2.25735 -0.154678
##        PC53      PC54        PC55      PC56      PC57     PC58      PC59
## 1 -0.825919 -5.842780 -12.4542000 -1.379070  9.428110 -4.98541 -2.977210
## 2  0.792067  5.050380   4.1644800  2.846330  2.439710  1.61895  6.536610
## 3 -2.735420 -3.350100   0.0153237 -4.463040 -2.905220 -3.10869 -3.404570
## 4 -2.016290 -1.134030  -0.7440420  0.502967 -0.996968  1.70095 -2.322190
## 5  4.282800 -2.208570   1.3199900 -4.372890  0.633312 -4.16939  0.428124
## 6 -1.836650 -0.887757  -0.7771640 -0.290632 -0.811333  1.47468 -1.495380
##       PC60      PC61      PC62     PC63      PC64      PC65      PC66     PC67
## 1 -7.59668 -2.505030 -7.129600 -1.56993 -0.983789  5.331920 -3.549690  7.29029
## 2 -2.43516 -0.163389  5.786350  2.92768  0.838100 -2.358180 -2.791920 -2.61581
## 3  5.16332 -2.677590  1.301450 -3.67472 -0.256345  0.808496  0.868312  9.84112
## 4  2.12447  3.243990  0.954926 -4.47302 -0.668457  1.344160 -0.309001 -1.17989
## 5 10.45420 -9.894070 -0.727833  3.05093 -4.569950 -1.131220 -1.682510  5.88015
## 6  1.11254  5.368370  0.786815 -3.19110 -3.224250 -0.881736 -0.898017 -3.04252
##        PC68     PC69      PC70      PC71      PC72      PC73      PC74
## 1  1.124780 -3.04304 -0.234426  1.646690 -2.872680  2.761790  5.002480
## 2 -0.465154  3.01060  3.060190 -1.783330  2.225480 -6.059950 -4.589220
## 3  0.671785  1.42035 12.658400  3.626730 -3.174860  7.520300 -0.389783
## 4  1.048060  3.01960 -1.655890  3.534140 -0.828278 -1.640350 -0.609058
## 5  4.998470 -1.87002 -7.451220  7.730290 -4.879950  2.028180 -4.918560
## 6  0.375144  2.88701 -0.855020  0.761011  0.842539 -0.925931 -0.383770
##        PC75     PC76        PC77     PC78       PC79      PC80      PC81
## 1  5.460280  4.41496   6.9259400 -4.45891  -6.071340  5.431830  0.882699
## 2 -4.246580  1.54427   2.8377300 -5.39952   0.655056 -0.442470 -1.681380
## 3 -3.389440  3.33648   5.5459500 -7.53511   4.280020 -8.871840  7.424280
## 4  0.925562  2.14868  -0.0094871  2.45783   2.199380  1.214230 -1.066170
## 5 -2.999470 -3.42392 -10.4686000 -0.73857 -10.394700  8.560560  2.210170
## 6 -0.142440  1.32795   0.2351690  2.73865   0.805267  0.153639  0.360289
##        PC82     PC83     PC84      PC85      PC86       PC87      PC88
## 1  1.021380 -2.88876 -3.11124  1.971260  3.475180  0.3988560   1.13637
## 2 -3.314440  2.74458  2.18467  1.536420  0.891887 -3.7217400   2.13420
## 3 10.731200 -2.60091  7.30123  2.209020 -6.939950  6.2931500   3.64790
## 4 -1.143990 -3.40581 -2.36869  2.748030  1.066290  0.0595348   1.64486
## 5  3.085720 -5.75520 -8.40916 -0.488389  3.851140  4.3076000 -13.20160
## 6 -0.535352 -1.79455 -1.04637  0.867909 -0.266426 -1.9729300   1.66331
##        PC89      PC90      PC91      PC92      PC93     PC94      PC95
## 1 -1.571710 -5.388820 4.7262800  5.137020  7.640050 -2.06336 -0.191524
## 2  2.424040 -0.953752 0.2649880  2.039840  1.330480 -1.00312 -1.199620
## 3  3.857170 -2.921400 6.2494500  4.129870  0.741559 10.39450  1.825400
## 4  1.382640  0.995312 0.5869140 -2.227570 -2.757520 -3.24638 -0.377087
## 5 -3.369500  4.369990 0.3340530  6.429080  1.014070  4.52698  8.847760
## 6  0.696171  0.129954 0.0834456 -0.434483 -3.524420 -3.79555 -1.159570
##        PC96      PC97      PC98      PC99     PC100     PC101     PC102
## 1 -4.636210  5.873940 -1.334680 -3.990850 12.906400 -4.069180  7.124970
## 2  0.301139 -1.702710 -0.997261 -1.553300 -2.919190 -0.314934  0.181045
## 3 -1.220320 -8.583630 -4.371850 -3.065850 -4.302100  3.126220  3.575590
## 4  0.977184  1.879750  5.586060  0.778924 -1.670870 -2.374000 -2.487640
## 5 -5.412630  9.907240 -7.147550 -6.627350 -1.562170 -1.800620  3.978030
## 6  3.462910  0.688653  2.356510 -0.421214 -0.930388 -0.864959 -0.176942
##       PC103     PC104     PC105     PC106    PC107     PC108     PC109
## 1  -2.52448  1.769310 -5.905280  2.043350 -3.46493 -4.884270  2.082660
## 2  -1.01521  0.802170 -4.038370 -0.691994 -3.18058  0.695449 -1.668670
## 3  -3.84102  4.273330  1.874910  6.397430 -1.00814 -2.561450 -3.359470
## 4  -1.24520 -0.986551  0.404237  1.802320 -1.64126 -0.505373 -0.632051
## 5 -11.44770  3.894910  4.285670 -4.306270  5.21526  2.866630 -1.370020
## 6  -2.92768 -2.886310 -0.855235  0.660800 -2.03423 -1.666820  1.303810
##       PC110     PC111     PC112      PC113     PC114    PC115     PC116
## 1  3.251700  0.338407  6.548960  4.6250300 -5.594340  8.40533 -4.887490
## 2  1.961740  3.591030  0.608238 -1.9825200  1.661540  2.03002  1.777030
## 3 -0.240104 -2.357040 -9.981750  0.0767461  6.103760 -8.92658  0.846325
## 4 -0.683654  0.191901  2.053650 -1.7896500 -0.657608  2.87772  0.964671
## 5 -8.932880 -1.527300 -3.830750 -7.2255600 -8.429070 -1.30245  5.321390
## 6 -1.184050 -1.377610  0.207012 -0.4026520  1.804830 -1.24878  0.836848
##       PC117    PC118       PC119     PC120    PC121     PC122     PC123
## 1  9.666250  6.39068 -13.5397000 -4.198320 -6.30006 -4.811880  6.954070
## 2 -4.201100 -2.18900  -1.9603600  0.059874 -2.42651  5.856340 -4.217080
## 3  2.552130 -2.75252   1.8022600  9.058140 -5.03625 -3.374810  2.033850
## 4 -0.319576  1.64446  -0.0114097 -2.022920  2.34888  0.964435  3.192180
## 5  0.223514  7.80360  -1.8279400 -3.190910  4.40286 -3.280570  0.711008
## 6 -2.683800  1.39470   0.9004290 -0.485800 -1.17913  1.472110 -0.393499
##      PC124     PC125    PC126      PC127     PC128     PC129     PC130
## 1  2.12088 -6.939880  2.38261 -1.6656400 -6.475860 -1.800990  0.238012
## 2 -4.26757 -0.803577 -1.26519 -1.1318200  2.143390  0.289818  1.958530
## 3 -2.39697  0.593307  2.47723  2.3339100 -4.959710  3.057330  4.905610
## 4  2.15831  1.771880  1.96474 -2.1006100 -0.833041  0.938598  0.868147
## 5 -6.99154 -5.445290 -3.81242 -0.5342810 11.830600  0.768970 -5.542930
## 6  1.89504 -0.679298  1.34266  0.0967444 -0.379996 -0.449067 -0.534850
##       PC131      PC132     PC133     PC134     PC135      PC136     PC137
## 1 -0.148264 -11.439700 -2.947260  2.011090  4.997150  0.4860850 -4.783020
## 2  1.527930  -1.803360  0.693448  1.843930  2.773060  1.0511100 -2.351140
## 3 -2.033170  -0.217957 -8.583290 -3.653360 -7.247880  3.5454400 11.555300
## 4  1.055280  -0.405838 -1.933700 -1.355030 -0.325966  0.4413490  0.298962
## 5 -5.951020  10.692600  8.273010  5.195540  4.621700 -2.6897000  4.778050
## 6  1.545990  -0.501658  0.891743  0.833018 -2.789080  0.0555673 -1.301680
##       PC138     PC139     PC140     PC141      PC142      PC143     PC144
## 1 10.436500  8.040830  4.323820 -7.177330 -0.0146389 -0.6870660  2.803040
## 2  4.033410 -1.897770 -1.211650  0.982932 -1.2433500  2.2583300  1.612660
## 3 -3.181190  4.988420  2.975060  4.269470 -5.1622300  2.9066400 -1.809840
## 4  0.973862  0.248761 -2.323410 -0.190366 -0.3450590  0.8013970  2.449470
## 5 -4.132900 -5.254870  1.059320  1.254850  0.6880940  6.1524300  4.388500
## 6  3.772230 -1.032290 -0.508323  0.538316  1.2491900 -0.0413437  0.812397
##       PC145     PC146    PC147     PC148    PC149    PC150     PC151     PC152
## 1 -5.285530  8.675170 -3.69611  5.140990  4.87472 -2.67586 -3.017610 -1.106320
## 2  1.279830 -3.976750 -3.57416 -2.694600  2.49021  2.61082  0.838082 -0.815594
## 3  8.587160  2.856470 -3.51397  4.983220 -9.98956  3.35624  7.278330 -5.329680
## 4  0.740513  1.079820 -1.48380 -3.035540 -1.06675  2.23907 -0.522184 -0.209564
## 5 -4.080490 -2.292080  1.32743  0.826520 -1.82006 -4.40903 -4.750930 -3.324150
## 6  3.209690 -0.109013 -2.23390  0.260593 -1.38172 -2.61961 -1.120970  0.851559
##       PC153      PC154     PC155     PC156      PC157     PC158     PC159
## 1  4.930400  0.9879250 -1.750390 -7.283130 -5.3564700 -1.875460  6.546210
## 2  1.566620  2.3889900 -0.319553 -0.635176 -3.5783600 -0.032506  1.750800
## 3 -8.841080 -6.2074900  5.923220 -2.534390 -0.7496690  5.712290 -3.515100
## 4 -0.844072  0.0929918 -1.310740 -1.576310 -0.7022990  2.340250 -1.849170
## 5 -0.622044 -0.9798520 -2.222610  3.016310  0.3428920  5.421320  0.612915
## 6 -1.363410 -0.0195265  0.545937  0.891863  0.0501511  1.064020  1.835060
##     PC160     PC161     PC162    PC163     PC164     PC165    PC166     PC167
## 1 2.63061  2.531090 -7.767730 -1.54041 -4.430150 -4.271720 5.356400 -3.691840
## 2 1.74809  2.319120  0.425632  2.14025 -0.459572  0.041447 5.135550  1.174940
## 3 1.12718 -1.213960 -2.621000  4.43000  0.977359 -7.778930 4.142670 -8.046040
## 4 0.44376 -1.283590 -0.447030 -2.63067  1.714770 -1.081840 1.852630 -0.575097
## 5 4.81705 -5.108310  5.620360  1.71833  0.528482 -2.115070 2.621040 -1.563990
## 6 1.05485  0.124103  1.565870  1.14986 -0.120608 -0.779284 0.700009 -2.109950
##       PC168        PC169     PC170     PC171    PC172     PC173     PC174
## 1  5.884280  3.152060000 -1.758250  1.045200 2.439780  4.193200  2.214430
## 2 -1.284680  1.636430000 -1.251850 -0.611125 1.300510  0.354554  2.162220
## 3  3.040530 -6.350620000  3.454840 -0.079931 4.774280 -1.077050  0.635806
## 4  2.111120  1.188960000 -0.343492  0.269182 0.810886  0.185686  1.493480
## 5  0.559651 -0.000679444  0.905616 -3.559750 2.514020 -1.499410 -3.877260
## 6 -2.748230 -1.559660000 -1.083480 -2.361670 1.764400  1.806320 -3.237580
##       PC175     PC176      PC177     PC178       PC179     PC180      PC181
## 1 -0.776159 -1.371750 -0.0899256  2.569650 -2.56167000  2.200100  0.8432220
## 2  3.451480  0.788434  0.5683430 -0.273572 -0.88225700  0.703638 -3.2212300
## 3 -5.333300  1.238090 -0.0138343  1.434050 -0.42772900 -0.709691  3.6378600
## 4  1.464560  0.633538 -0.0833481  0.255600  0.63180100 -2.580900 -0.0601542
## 5 -2.312190  1.179940  1.9470400 -1.719170 -0.00525973 -0.587567 -1.0730100
## 6 -2.785130  0.949186  2.0900500 -0.808033 -1.07204000  1.774770 -2.7350800
##       PC182     PC183     PC184     PC185     PC186     PC187    PC188
## 1  0.743361  -1.90278  3.486440  1.295510  0.370433 -3.154840  1.41991
## 2  0.603326 -11.44990 16.409800 -1.930980 -1.191680  0.756095 -2.47657
## 3  1.000880  -1.02343  0.663309  2.191220 -0.121366 -0.873492 -1.22118
## 4 -1.657400   3.14178 -3.701460  3.815470  0.467723 -3.738570  0.11743
## 5  1.018240   5.73003  1.739920 -0.657979 -0.491018  3.691810 -1.77375
## 6 -0.445713  -1.42469  3.289180 -5.392580  1.532540  2.534330 -1.68645
##       PC189     PC190     PC191     PC192     PC193     PC194     PC195
## 1 -2.437740 -2.483790  0.472897 -1.913160  1.565010  1.176480  0.135920
## 2 13.131800 10.667700 13.481100 -1.313830  5.221990 -2.424860  1.387600
## 3  1.047980  0.246695 -1.641280 -0.789780  1.442320 -0.963197  0.168085
## 4 -0.823372 -0.823006 -0.545444 -2.945140 -1.556810 -3.733990 -1.118920
## 5  2.573690 -0.560743 -1.359560  0.528937 -0.230594  1.176170  0.777487
## 6  1.174370 -1.213170  1.671460  3.180860  0.495079  3.867240  0.479138
##       PC196     PC197     PC198     PC199     PC200     PC201     PC202
## 1  1.276340 -0.217320  0.491549 -1.212930  0.790819  0.272161 -1.606890
## 2 -0.348992 -1.370850 -1.816120 -8.088100 -1.100810 -3.679010  0.288224
## 3 -2.648420 -2.327710  2.386340  0.716205 -1.089580 -0.660717 -0.122745
## 4 -5.251300 -0.296413 -0.887046 -0.221650  1.101760  1.803400  2.141860
## 5 -0.891218  1.220190  1.148470  0.581395 -0.788039 -0.358581  1.508290
## 6  3.146090 -0.279995  0.576187  0.716393 -2.182150  0.346678 -1.547790
##        PC203      PC204      PC205      PC206      PC207    PC208       PC209
## 1  1.8281700 -0.0125929  0.0134631  -0.325229  -1.118270 -1.73842 -0.36448400
## 2 -1.5495500 -1.5634200 -0.0573725  -4.392160   0.103819 -0.41825 -3.37734000
## 3  0.0971598  0.5458590  0.5330740   0.633660  -0.563500  1.01478 -0.14952500
## 4 -6.1468900 -4.0877300 -0.4566410 -24.460600  11.093400  5.52783 -1.08098000
## 5 -0.5364600 -1.7823200  1.2405900  -0.910865  -0.943015 -0.86736 -1.15204000
## 6  6.3199800  4.9156100  0.1397560  22.944700 -11.639800 -5.81086  0.00637711
##        PC210     PC211     PC212      PC213     PC214     PC215      PC216
## 1 -0.1063190  1.215150  0.802243 -0.4509100  1.028240  1.173710 -0.3100460
## 2 -0.3431220 -0.224690 -0.281572  0.3739310 -0.411959 -0.143375  0.0388579
## 3  0.1013250 -1.745660 -1.588710 -0.0596969 -1.013200 -0.827744  0.2829530
## 4  7.5309900 -5.949620 -2.401050 -1.5621600 -1.357780  0.779242  1.0228200
## 5  0.0297901  0.621533  1.266560 -0.1096040  0.167501  1.312220  0.6323010
## 6 -5.8788000  5.541060  2.364220  1.6548700  1.433450 -1.005440 -0.2665910
##       PC217      PC218      PC219     PC220     PC221     PC222      PC223
## 1  0.406126  0.1515290  0.5604360  0.392286 -0.464393  0.307427  1.1979000
## 2 -1.477540 -0.9672210  0.0433564 -0.293628  0.316365  0.806927  0.4071760
## 3 -0.399529 -0.4893580  0.3635990  0.213455 -0.173931 -1.016860 -0.0901845
## 4 -0.430235 -0.3691010 -0.5494290  1.106470 -1.285610 -0.444356 -0.9959270
## 5 -0.590986  0.0668085 -0.5283220  0.336070  0.567322  0.542618 -0.2716210
## 6  0.288843  0.3270920  0.9601420 -1.550190  1.306380  0.488043  0.3256280
##       PC224      PC225     PC226      PC227      PC228     PC229     PC230
## 1 -0.198742 -0.3012860 -0.041302 -0.5263640 -0.0152281  0.389568  0.120556
## 2 -1.135510  0.2268410 -0.148378 -1.0113900  1.2128700  0.111132 -0.352921
## 3  0.749927 -0.5074970  0.356613 -0.1782180  0.0927267  0.372520 -0.628034
## 4 -0.715390  1.4408500 -0.590676 -0.3904220  0.0832206  0.925409 -1.008750
## 5  0.132177  0.0912397 -0.222446  0.0761237  0.1063310 -0.268953  0.237252
## 6  1.074680 -1.3663600  0.357651  0.7025460 -1.2537500 -0.710826  0.863416
##        PC231       PC232      PC233      PC234       PC235       PC236 PC237
## 1  0.2117910  0.00350157  0.0616939 -0.0100055  0.14216400 -0.22124700     0
## 2 -0.0604577  0.37558000 -0.2863980 -0.0372642 -0.07085410  0.00400021     0
## 3  0.4932220 -0.48504600  0.1623480 -0.0521154  0.00928758  0.18965900     0
## 4  0.0690208 -0.34392600  0.1940470  0.5010270  0.51785900 -0.00289706     0
## 5  0.5046400  0.20789500  0.1978250  0.0218614 -0.15939900 -0.01038810     0
## 6 -0.2211650  0.04149430  0.1325040 -0.3977150 -0.30353500 -0.03311780     0
##   Individual     region  Pop_City Country Latitude Longitude Region
## 1          a South Asia Bengaluru   India  12.9716   77.5946   Asia
## 2        257 South Asia Bengaluru   India  12.9716   77.5946   Asia
## 3        261 South Asia Bengaluru   India  12.9716   77.5946   Asia
## 4        263 South Asia Bengaluru   India  12.9716   77.5946   Asia
## 5        264 South Asia Bengaluru   India  12.9716   77.5946   Asia
## 6        262 South Asia Bengaluru   India  12.9716   77.5946   Asia

Create PCA plot

# source the plotting function
source(
  here(
    "scripts", "analysis", "my_theme2.R"
  )
)

distinct_palette <- c(
  "#E69F00",
  "#009E73",
  "#0072B2",
  "#CC79A7",
  "#800000",
  "#808080",
  "#CCEBC5",
  "#FFB5B8",
  "#99CCFF",
  "#8E8BFF",
  "#F781BF",
  "#FFFF33",
  "#A65628",
  "#984EA3",
  "#D2D2D2",
  "#4DAF4A",
  "#DEB887",
  "#7FFFD4",
  "#D2691E",
  "#DC143C",
  "#8B008B"
)


# make plot by continent and range
ggplot(merged_data, aes(PC1, PC2)) +
  geom_point(aes(colour = Country, shape = Country), size = 1) +
  xlab(paste0("PC1 (", perc[1], " Variance)")) +
  ylab(paste0("PC2 (", perc[2], " Variance)")) +
  labs(
    caption = "PCA with 9,047 intergenic SNPs of 237 mosquitoes from 28 localities in Asia."
  ) +
  guides(
    color = guide_legend(title = "Country", ncol = 3),
    shape = guide_legend(title = "Country", ncol = 3),
    fill = guide_legend(title = "Region", ncol = 1)
  ) +
  stat_ellipse(aes(fill = region, group = region), geom = "polygon", alpha = 0.2, level = 0.8) +
  scale_color_manual(values = distinct_palette) +
  scale_shape_manual(values=good.shapes[c(1:25, 58:67)]) +
  my_theme() +
  theme(
    plot.caption = element_text(face = "italic"),
    legend.position = "top",
    legend.justification = "top",
    legend.box.just = "center",
    legend.box.background = element_blank(),
    plot.margin = margin(5.5, 25, 5.5, 5.5, "points"),
    legend.margin = margin(10,10,10,10)
  )

# #   ____________________________________________________________________________
# #   save the pca plot                                                       ####
ggsave(
  here(
    "output", "populations", "figures", "PCA_lea_pc1_pc2_neutral.pdf"
  ),
  width  = 8,
  height = 6,
  units  = "in"
)

PC1 and PC3

# source the plotting function
source(
  here(
    "scripts", "analysis", "my_theme2.R"
  )
)

distinct_palette <- c(
  "#E69F00",
  "#009E73",
  "#0072B2",
  "#CC79A7",
  "#800000",
  "#808080",
  "#CCEBC5",
  "#FFB5B8",
  "#99CCFF",
  "#8E8BFF",
  "#F781BF",
  "#FFFF33",
  "#A65628",
  "#984EA3",
  "#D2D2D2",
  "#4DAF4A",
  "#DEB887",
  "#7FFFD4",
  "#D2691E",
  "#DC143C",
  "#8B008B" 
)


# make plot by continent and range
ggplot(merged_data, aes(PC1, PC2)) +
  geom_point(aes(colour = Country, shape = Country), size = 1) +
  xlab(paste0("PC1 (", perc[1], " Variance)")) +
  ylab(paste0("PC3 (", perc[3], " Variance)")) +
  labs(
    caption = "PCA with 9,047 intergenic SNPs of 237 mosquitoes from 28 localities in Asia."
  ) +
  guides(
    color = guide_legend(title = "Country", ncol = 3),
    shape = guide_legend(title = "Country", ncol = 3),
    fill = guide_legend(title = "Region", ncol = 1)
  ) +
  stat_ellipse(aes(fill = region, group = region), geom = "polygon", alpha = 0.2, level = 0.8) +
  scale_color_manual(values = distinct_palette) +
  scale_shape_manual(values=good.shapes[c(1:25, 58:67)]) +
  my_theme() +
  theme(
    plot.caption = element_text(face = "italic"),
    legend.position = "top",
    legend.justification = "top",
    legend.box.just = "center",
    legend.box.background = element_blank(),
    plot.margin = margin(5.5, 25, 5.5, 5.5, "points"),
    legend.margin = margin(10,10,10,10)
  )

# #   ____________________________________________________________________________
# #   save the pca plot                                                       ####
ggsave(
  here(
    "output", "populations", "figures", "PCA_lea_pc1_pc3_neutral.pdf"
  ),
  width  = 8,
  height = 6,
  units  = "in"
)

Run LEA

# set output dir
setwd(
  here(
    "output", "populations"
  )
)
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
  genotype,
  K = 1:15,
  project = "new",
  repetitions = 5,
  CPU = 4,
  entropy = TRUE
)
project = load.snmfProject("output/populations/snps_sets/neutral.snmfProject")

Cross entropy

# Open a new pdf file
pdf(here("output","populations","figures","lea_cross_entropy_neutral.pdf"), width = 6, height = 4)

# Create your plot
plot(project, col = "pink", pch = 19, cex = 1.2)

# Close the pdf file
dev.off()
## quartz_off_screen 
##                 2
plot(project, col = "pink", pch = 19, cex = 1.2)

1.1 k7

Default plot

# select the best run for K = 4 clusters
best = which.min(cross.entropy(project, K = 7))
# best is run 3
barchart(project, K = 7, run = best,
        border = NA, space = 0,
        col = distinct_palette,
        xlab = "Individuals",
        ylab = "Ancestry proportions",
        main = "Ancestry matrix") -> bp
axis(1, at = 1:length(bp$order),
     labels = bp$order, las=1,
     cex.axis = .4)

Mean admixture by country using ggplot

distinct_palette <-
  c(
    "#AE9393",
    "#FFB347",
    "#FFFF99",
    "#D0F0C0",
    "#AEC6CF",
    "#008080",
    "#F49AC2",
    "#1E90FF",
    "#75FAFF",
    "#77DD77",
    "#B22222",
    "#B8B800",
    "#B20CD9",
    "#FFD1DC",
    "#779ECB",
    "#FF8C1A"
  )

sampling_loc <- readRDS(here("output", "local_adaptation", "sampling_loc.rds"))


# Extract ancestry coefficients
Q_values <- as.data.frame(Q(project, K = 7, run = best))

# Create a named vector to map countries to regions
country_to_region <- c(
  "Bhutan" = "South Asia",
  "Cambodia" = "Southeast Asia",
  "China" = "East Asia",
  "India" = "South Asia",
  "Indonesia" = "Southeast Asia",
  "Japan" = "East Asia",
  "Malaysia" = "Southeast Asia",
  "Maldives" = "South Asia",
  "Nepal" = "South Asia",
  "Sri Lanka" = "South Asia",
  "Taiwan" = "East Asia",
  "Thailand" = "Southeast Asia",
  "Vietnam" = "Southeast Asia",
  "Madagascar" = "Africa",
  "Nigeria" = "Africa",
  "Albania" = "Europe",
  "Russia" = "Europe",
  "Spain" = "Europe",
  "USA" = "North America",
  "Argentina" = "South America",
  "Trinidad and Tobago" = "North America"
)

# Add the region to the data frame
sampling_loc$Region2 <- country_to_region[sampling_loc$Country]


# Add individual IDs and pops ids
Q_values$ind <- inds
Q_values$pop <- pops

# Melt the data frame for plotting
Q_melted <- melt(Q_values, id.vars = c("ind", "pop"))

# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country
Q_ordered <- Q_joined |>
  arrange(Region, Region2, Country) |>
  mutate(Region_Country = factor(Region_Country, levels = unique(Region_Country)))

# Group by Country and calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(Region_Country, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <- data.frame(Region_Country = unique(Q_grouped$Region_Country))

# Add the order of each country to ensure correct placement of borders
borders$order <- 1:nrow(borders) + 0.5  # Shift borders to the right edge of the bars


# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(Region_Country) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)

# source the plotting function
source(
  here(
    "scripts", "analysis", "my_theme2.R"
  )
)

# Generate all potential variable names
all_variables <- paste0("V", 1:16)

# Create a data frame that pairs each potential variable with a color
color_mapping <- data.frame(variable = all_variables,
                            color = distinct_palette[1:length(all_variables)])

# Merge this data frame with Q_grouped_filtered to create the new color column
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create bar chart
ggplot(Q_grouped_filtered, aes(x = Region_Country, y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_segment(data = borders, aes(x = order, xend = order, y = 0, yend = 1, fill = NULL), linetype = "solid", color = "#2C444A") +  # Add borders
  my_theme() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1),
        legend.position = "none") +  # Hide legend
  xlab("") +  # Suppress x-axis label
  ylab("Ancestry proportions") +
  ggtitle("Ancestry matrix") +
  labs(caption = "Each bar represents the average ancestry proportions for individuals in a given country for k=7.") +
  # scale_fill_manual(values = color) +
  scale_x_discrete(labels = function(x) gsub(".*_", "", x))  # Remove Region prefix from labels


#   ____________________________________________________________________________
#   save the pca plot                                                       ####
# ggsave(
#   here(
#     "output", "populations", "figures", "LEA_k=7_neutral.pdf"
#   ),
#   width  = 12,
#   height = 6,
#   units  = "in",
#   device = cairo_pdf
# )

Using ggplot2 for individual admixtures

# Extract ancestry coefficients
leak7 <- read_delim(
  here("output", "populations", "snps_sets", "neutral.snmf", "K7", "run3","neutral_r3.7.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
# unseen_pckmeans.7.Q
# pckmeans.7.Q
head(leak7)
## # A tibble: 6 × 7
##      X1       X2       X3       X4       X5       X6       X7
##   <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1 0.481 0.0708   0.222    0.00569  0.0413   0.0686   0.110   
## 2 0.241 0.0661   0.271    0.0172   0.0506   0.162    0.192   
## 3 0.730 0.0596   0.129    0.00957  0.000100 0.000100 0.0716  
## 4 0.999 0.000100 0.000100 0.000100 0.000100 0.000100 0.000100
## 5 0.999 0.000100 0.000100 0.000100 0.000100 0.000100 0.000100
## 6 0.721 0.0666   0.110    0.0293   0.000100 0.000100 0.0729

The fam file

fam_file <- here(
  "output", "populations", "snps_sets", "neutral.fam"
)

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create ID column

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to matrix

leak7 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak7)

head(leak7)
##    ind pop       X1         X2         X3         X4          X5          X6
## 1 1001 OKI 0.481261 0.07082050 0.22190700 0.00569292 0.041312000 0.068599300
## 2 1002 OKI 0.240552 0.06607170 0.27105700 0.01724490 0.050601700 0.162441000
## 3 1003 OKI 0.729612 0.05958920 0.12941900 0.00956504 0.000099982 0.000099982
## 4 1004 OKI 0.999400 0.00009995 0.00009995 0.00009995 0.000099950 0.000099950
## 5 1005 OKI 0.999400 0.00009995 0.00009995 0.00009995 0.000099950 0.000099950
## 6 1006 OKI 0.721117 0.06660400 0.10982200 0.02930780 0.000099982 0.000099982
##           X7
## 1 0.11040800
## 2 0.19203200
## 3 0.07161450
## 4 0.00009995
## 5 0.00009995
## 6 0.07294920

Rename the columns

# Rename the columns starting from the third one
leak7 <- leak7 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak7)
##    ind pop       v1         v2         v3         v4          v5          v6
## 1 1001 OKI 0.481261 0.07082050 0.22190700 0.00569292 0.041312000 0.068599300
## 2 1002 OKI 0.240552 0.06607170 0.27105700 0.01724490 0.050601700 0.162441000
## 3 1003 OKI 0.729612 0.05958920 0.12941900 0.00956504 0.000099982 0.000099982
## 4 1004 OKI 0.999400 0.00009995 0.00009995 0.00009995 0.000099950 0.000099950
## 5 1005 OKI 0.999400 0.00009995 0.00009995 0.00009995 0.000099950 0.000099950
## 6 1006 OKI 0.721117 0.06660400 0.10982200 0.02930780 0.000099982 0.000099982
##           v7
## 1 0.11040800
## 2 0.19203200
## 3 0.07161450
## 4 0.00009995
## 5 0.00009995
## 6 0.07294920

Import sample locations

sampling_loc <- readRDS(here("output", "local_adaptation", "sampling_loc.rds"))
head(sampling_loc)
## # A tibble: 6 × 6
##   Pop_City   Country  Latitude Longitude Region Abbreviation
##   <chr>      <chr>       <dbl>     <dbl> <chr>  <chr>       
## 1 Gelephu    Bhutan       26.9      90.5 Asia   GEL         
## 2 Phnom Penh Cambodia     11.6     105.  Asia   CAM         
## 3 Hainan     China        19.2     110.  Asia   HAI         
## 4 Yunnan     China        24.5     101.  Asia   YUN         
## 5 Hunan      China        27.6     112.  Asia   HUN         
## 6 Bengaluru  India        13.0      77.6 Asia   BEN
source(
  here(
    "scripts", "analysis", "my_theme3.R"
  )
)

# Create a named vector to map countries to regions
country_to_region <- c(
  "Bhutan" = "South Asia",
  "Cambodia" = "Southeast Asia",
  "China" = "East Asia",
  "India" = "South Asia",
  "Indonesia" = "Southeast Asia",
  "Japan" = "East Asia",
  "Malaysia" = "Southeast Asia",
  "Maldives" = "South Asia",
  "Nepal" = "South Asia",
  "Sri Lanka" = "South Asia",
  "Taiwan" = "East Asia",
  "Thailand" = "Southeast Asia",
  "Vietnam" = "Southeast Asia"
)

# Add the region to the data frame
sampling_loc$Region2 <- country_to_region[sampling_loc$Country]

# Melt the data frame for plotting
Q_melted <- leak7 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(Region, Region2, Country, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Region_Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Region_Country = unique(Q_ordered$Region_Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Region_Country, function(rc)
    max(which(Q_ordered$Region_Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 
color_palette <-
  c(
    "v1" = "#FF0000",
    "v2" = "#0000FF",
    "v3" = "#FF00FF",
    "v4" = "#D0F0C0",
    "v5" = "#00FF00",
    "v6" = "#FFFF00",
    "v7" = "#AEC6CF"
  )


# Generate all potential variable names
all_variables <- paste0("v", 1:7)

# Map each variable to a name
color_mapping <- data.frame(variable = all_variables,
                            color = names(color_palette))

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=7.\n LEA inference for k7 with intergenic SNPs.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("output", "populations", "figures", "lea_k=7_neutral.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

1.2 k5

We can plot k=5 to compare to other algorithms.

Find the best run

best = which.min(cross.entropy(project, K = 5))
best
## [1] 3

Run 3 for k=5

# Extract ancestry coefficients
leak5 <- read_delim(
  here("output", "populations", "snps_sets", "neutral.snmf", "K5", "run3","neutral_r3.5.Q"),
  delim = " ",
  col_names = FALSE,
  show_col_types = FALSE
) 
# unseen_pckmeans.7.Q
# pckmeans.7.Q
head(leak5)
## # A tibble: 6 × 5
##      X1     X2     X3     X4     X5
##   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 0.486 0.156  0.0171 0.0872 0.255 
## 2 0.413 0.221  0.0428 0.0816 0.241 
## 3 0.609 0.0970 0.0268 0.102  0.165 
## 4 0.711 0.123  0.0448 0.0523 0.0687
## 5 0.769 0.128  0.0445 0.0295 0.0289
## 6 0.595 0.117  0.0503 0.0863 0.151

The fam file

fam_file <- here(
  "output", "populations", "snps_sets", "neutral.fam"
)

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create ID column

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"

# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to matrix

leak5 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak5)

head(leak5)
##    ind pop       X1        X2        X3        X4        X5
## 1 1001 OKI 0.485589 0.1555300 0.0171214 0.0872414 0.2545190
## 2 1002 OKI 0.413323 0.2213730 0.0427845 0.0816402 0.2408790
## 3 1003 OKI 0.609096 0.0969784 0.0268309 0.1019500 0.1651450
## 4 1004 OKI 0.711456 0.1227820 0.0447598 0.0522809 0.0687215
## 5 1005 OKI 0.769209 0.1279830 0.0444730 0.0294702 0.0288648
## 6 1006 OKI 0.595327 0.1167970 0.0502820 0.0862996 0.1512940

Rename the columns

# Rename the columns starting from the third one
leak5 <- leak5 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak5)
##    ind pop       v1        v2        v3        v4        v5
## 1 1001 OKI 0.485589 0.1555300 0.0171214 0.0872414 0.2545190
## 2 1002 OKI 0.413323 0.2213730 0.0427845 0.0816402 0.2408790
## 3 1003 OKI 0.609096 0.0969784 0.0268309 0.1019500 0.1651450
## 4 1004 OKI 0.711456 0.1227820 0.0447598 0.0522809 0.0687215
## 5 1005 OKI 0.769209 0.1279830 0.0444730 0.0294702 0.0288648
## 6 1006 OKI 0.595327 0.1167970 0.0502820 0.0862996 0.1512940

Import sample locations

sampling_loc <- readRDS(here("output", "local_adaptation", "sampling_loc.rds"))
head(sampling_loc)
## # A tibble: 6 × 6
##   Pop_City   Country  Latitude Longitude Region Abbreviation
##   <chr>      <chr>       <dbl>     <dbl> <chr>  <chr>       
## 1 Gelephu    Bhutan       26.9      90.5 Asia   GEL         
## 2 Phnom Penh Cambodia     11.6     105.  Asia   CAM         
## 3 Hainan     China        19.2     110.  Asia   HAI         
## 4 Yunnan     China        24.5     101.  Asia   YUN         
## 5 Hunan      China        27.6     112.  Asia   HUN         
## 6 Bengaluru  India        13.0      77.6 Asia   BEN

Structure plot

source(
  here(
    "scripts", "analysis", "my_theme3.R"
  )
)

# Create a named vector to map countries to regions
country_to_region <- c(
  "Bhutan" = "South Asia",
  "Cambodia" = "Southeast Asia",
  "China" = "East Asia",
  "India" = "South Asia",
  "Indonesia" = "Southeast Asia",
  "Japan" = "East Asia",
  "Malaysia" = "Southeast Asia",
  "Maldives" = "South Asia",
  "Nepal" = "South Asia",
  "Sri Lanka" = "South Asia",
  "Taiwan" = "East Asia",
  "Thailand" = "Southeast Asia",
  "Vietnam" = "Southeast Asia"
)

# Add the region to the data frame
sampling_loc$Region2 <- country_to_region[sampling_loc$Country]

# Melt the data frame for plotting
Q_melted <- leak5 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(Region, Region2, Country, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Region_Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Region_Country = unique(Q_ordered$Region_Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Region_Country, function(rc)
    max(which(Q_ordered$Region_Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 
color_palette <-
  c(
    "v1" = "#FF0000",
    "v2" = "#FFFF00",
    "v3" = "#FF00FF",
    "v4" = "#0000FF",
    "v5" = "#00FF00"
  )

# Generate all potential variable names
all_variables <- paste0("v", 1:5)

# Map each variable to a name
color_mapping <- data.frame(variable = all_variables,
                            color = names(color_palette))

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=5.\n LEA inference for k1:15 with intergenic SNPs") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("output", "populations", "figures", "lea_k=5_neutral.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

2 SNP set with prunning using r2 0.01

Import the data

genotype <- here(
    "output", "populations", "snps_sets", "r2_0.01.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 20931
##   column count: 246
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 20931
##   Character matrix gt cols: 246
##   skip: 0
##   nrows: 20931
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant: 20931
## All variants processed

Samples ids

#create vectors of individual names and population attribution
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## BEN CAM CHA GEL HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KLP KUN LAM MAT OKI 
##  12  12  12   2  12   4   7  12  11   4   2   6  12  11  10   4   4   9  12  11 
## QNC SON SSK SUF SUU TAI UTS YUN 
##  12   3  12   6   6   8  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))

Convert

#  convert the vcf file, which should be located in the working directory, into a geno file in the working directory
vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   237
##  - number of detected loci:      20931
## 
## For SNP info, please check /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.01.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.01.removed file, for more informations.
## [1] "/Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.01.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   237
##  - number of detected loci:      20931
## 
## For SNP info, please check /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.01.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.01.removed file, for more informations.
## 
## 
##  - number of detected individuals:   237
##  - number of detected loci:      20931
## [1] "/Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.01.lfmm"

PCA

### PCA #####
# set output dir
setwd(
  here(
    "output", "populations"
  )
)
nPC <- length(inds)
pc <- pca(gsub(".vcf", ".lfmm", genotype), K = nPC)
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
## 
##         -n (number of individuals)          237
##         -L (number of loci)                 20931
##         -K (number of principal components) 237
##         -x (genotype file)                  /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.01.lfmm
##         -a (eigenvalue file)                /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.01.pca/r2_0.01.eigenvalues
##         -e (eigenvector file)               /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.01.pca/r2_0.01.eigenvectors
##         -d (standard deviation file)        /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.01.pca/r2_0.01.sdev
##         -p (projection file)                /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.01.pca/r2_0.01.projections
##         -c data centered
show(pc)
## * pca class *
## 
## project directory:               /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/ 
## pca result directory:            r2_0.01.pca/ 
## input file:                      r2_0.01.lfmm 
## eigenvalue file:                 r2_0.01.eigenvalues 
## eigenvector file:                r2_0.01.eigenvectors 
## standard deviation file:         r2_0.01.sdev 
## projection file:                 r2_0.01.projections 
## pcaProject file:                   r2_0.01.pcaProject 
## number of individuals:           237 
## number of loci:                  20931 
## number of principal components:  237 
## centered:                        TRUE 
## scaled:                          FALSE

Test

# PC significant test: tracy-widom test
tw <- tracy.widom(pc)
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
## 
##         -n (number of eigenvalues)          237
##         -i (input file)                     /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.01.pca/r2_0.01.eigenvalues
##         -o (output file)                    /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.01.pca/r2_0.01.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "pink", cex = .8)

Values

# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)

Shapes

#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,33:127)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
# ggplot(aes(x,y,shape=s ), data=foo ) +
#   scale_shape_manual(values=good.shapes[1:N]) +
#   geom_point()
sampling_loc <- readRDS(here("output", "local_adaptation", "sampling_loc.rds"))
head(sampling_loc)
## # A tibble: 6 × 6
##   Pop_City   Country  Latitude Longitude Region Abbreviation
##   <chr>      <chr>       <dbl>     <dbl> <chr>  <chr>       
## 1 Gelephu    Bhutan       26.9      90.5 Asia   GEL         
## 2 Phnom Penh Cambodia     11.6     105.  Asia   CAM         
## 3 Hainan     China        19.2     110.  Asia   HAI         
## 4 Yunnan     China        24.5     101.  Asia   YUN         
## 5 Hunan      China        27.6     112.  Asia   HUN         
## 6 Bengaluru  India        13.0      77.6 Asia   BEN
# import sample attributes
samples2 <- read.delim(
  here(
    "output", "populations", "Population_meta_data.txt"
  ),
  head = TRUE
)

samples2<- samples2 |>
  dplyr::select(
    region, pop
  )

# check head of the file
head(samples2)
##      region pop
## 1 East Asia HAI
## 2 East Asia HUN
## 3 East Asia YUN
## 4 East Asia AIZ
## 5 East Asia HIR
## 6 East Asia JAT

Merge with sampling_loc

merged_loc <- merge(samples2, sampling_loc, by.x = "pop", by.y = "Abbreviation")
head(merged_loc)
##   pop          region           Pop_City    Country Latitude Longitude Region
## 1 AIZ       East Asia Aizuwakamatsu City      Japan  37.4924  139.9936   Asia
## 2 ALV Southern Europe              Vlore    Albania  40.4660   19.4897 Europe
## 3 ANT     East Africa       Antananarivo Madagascar -18.8792   47.5079 Africa
## 4 AWK     West Africa               Awka    Nigeria   6.2105    7.0741 Africa
## 5 BAR Southern Europe          Barcelona      Spain  41.3851    2.1734 Europe
## 6 BEN      South Asia          Bengaluru      India  12.9716   77.5946   Asia
head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 28 Levels: BEN CAM CHA GEL HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT ... YUN

Check how many sampling localities

length(unique(pc.coord$Population))
## [1] 28

Merge

merged_data <- merge(pc.coord, merged_loc, by.x = "Population", by.y = "pop")
head(merged_data)
##   Population       PC1        PC2         PC3        PC4         PC5
## 1        BEN -17.90190 -13.002500  -7.2670900 -11.918100   1.0964700
## 2        BEN -17.93450 -14.717300  -6.3516000 -10.708700  -0.0449077
## 3        BEN -18.22870 -14.240900  -4.4810400 -10.268100   2.9889300
## 4        BEN -19.67310 -14.695200  -6.1351400 -12.530900   0.6038590
## 5        BEN -19.01670 -14.440200  -6.0434600 -10.188900   0.4559690
## 6        BEN -19.43160 -15.309400  -6.3463000 -11.194800   1.8665300
##           PC6         PC7         PC8         PC9         PC10        PC11
## 1  11.3607000   3.6325200   0.5615690   0.7443210   3.56790000  -1.2186300
## 2  11.7923000   6.1032400   0.9970240   1.2984900   3.62965000   1.3514300
## 3  12.0954000   3.8806900   0.4328050   4.1457800   0.44179200  -1.1241700
## 4  12.6324000   4.7203700   3.0257500  -0.4835580   3.13575000   1.4538800
## 5  10.3163000   4.9066400  -1.3510300   1.8632900   0.57243500   1.7469600
## 6  14.2087000   5.4421200   1.3146500   0.3335790   4.30928000   2.3163900
##          PC12        PC13         PC14        PC15        PC16        PC17
## 1   5.0798800  10.8528000  -1.62618000   1.5962100  -2.3794600   7.7308200
## 2   6.8340400  17.3541000  -3.41404000   3.9840700  -8.7519700  11.1157000
## 3   6.0387700   8.8210700   1.56500000   1.7374900  -6.1850600   4.3241800
## 4   9.5661100  23.8264000  -2.17051000   1.3617000 -19.8542000  21.7801000
## 5   4.9803800  11.0009000  -1.08150000   2.2197600  -9.7946900   9.1312500
## 6  10.4980000  23.8454000  -2.72740000  -0.9202820 -19.4283000  23.0033000
##          PC18        PC19         PC20         PC21        PC22        PC23
## 1   8.6297600  -7.2383600  -1.72834000  -2.29163000  -1.5999600  -0.9595880
## 2  12.3018000 -16.5695000  -4.05063000   4.61595000   7.6653600   8.0522900
## 3   7.1417500  -4.5098100  -0.89364100  -0.18754700   2.9355700  -0.8048980
## 4  29.8949000 -25.7393000  -6.26344000 -17.30780000  -4.8246000   6.9430100
## 5  12.6089000  -9.2004900   0.16270700   1.02105000   2.7359800  -1.1633200
## 6  30.7327000 -26.2560000  -6.58537000 -18.25270000  -6.4104500   5.7672900
##          PC24        PC25        PC26        PC27        PC28        PC29
## 1   0.2011810   0.9980390   4.8263200   0.1325070   8.5312600   4.8973400
## 2  -2.6400800  -8.0084100   3.4078600   0.5875850  41.7643000  -2.9045500
## 3  -1.2316800  -2.6352300   0.6075140  -3.4183700  14.3696000   2.2585800
## 4  -1.0752500  -1.3561500  -3.7509800   7.4906100 -53.4306000  -0.8970300
## 5  -2.0157800  -3.0357300   4.9004800  -3.9299400  13.1434000   1.1177900
## 6   1.1187500  -0.6441630  -4.6809100   7.3379300 -53.3781000  -1.9565300
##          PC30        PC31        PC32       PC33         PC34        PC35
## 1  -1.0993600   1.3776500  -3.1590300   8.197340 -3.28310e+00   0.6942100
## 2   9.6170300   0.4396210 -11.5739000  -6.502880 -2.98107e+00 -12.8954000
## 3   1.9207300   0.6430770   0.6475940   6.318330 -1.45904e+00   5.7210000
## 4  -5.2056400  -0.7765100   0.1151770  -3.516580  8.54274e+00   3.5054600
## 5   4.9757300   2.8238400   3.3264400   3.782140 -2.01291e+00   2.7278200
## 6  -6.3261300  -0.3028110   0.2640330  -4.705340  8.46390e+00   2.7145000
##         PC36        PC37        PC38        PC39        PC40         PC41
## 1 -19.546100   1.6307400   7.9670900  12.1926000  -7.5127000  -6.71680000
## 2  35.259100 -12.6680000 -19.7221000  -7.4981500   4.1988300   9.48769000
## 3  -4.952760   9.1514800   3.9724600   5.3001000  -0.6516530  -8.97442000
## 4   9.030310  -5.3780200  -5.8836200  -2.0504600   2.7857100   0.99269000
## 5  -6.012780   5.1401600   5.6471700   2.4481400  -8.8134900   3.06722000
## 6  10.026200  -6.1831900  -3.4579000  -1.5059100   3.7721300   0.92876900
##           PC42         PC43         PC44        PC45         PC46        PC47
## 1   0.27363700   0.48858400   6.65987000  -2.0698800   2.19308000   3.2132200
## 2  -3.37954000   3.00714000 -11.10240000 -11.4410000  -1.34877000  -6.4843800
## 3   3.89341000   8.49724000   7.65705000  -0.6474010  -4.21483000   9.8374400
## 4   0.24127500   2.73943000  -7.27468000   1.0411900   1.94592000   0.0667949
## 5   5.79418000 -10.21430000  10.86950000   6.4989500  -8.09734000   6.3903500
## 6  -0.40454900   3.95591000  -3.62748000   1.9212600  -0.11949000  -0.0968189
##          PC48        PC49        PC50        PC51        PC52         PC53
## 1   3.4432700  -3.0286700   0.3809270   2.8016300  -0.9819770   5.59108000
## 2  -2.7165900  -0.1707690   2.1610400  -3.2642200   0.1060190  -4.97748000
## 3   1.8423600  -6.1604300   9.0706000   4.3074200  -4.0256700   2.67980000
## 4  -4.3389700   1.6031800  -0.2220430   4.7514700  -0.3281900  -0.85157900
## 5  -7.9248600   0.9392220   1.3949800  -0.2696310  -6.9665900   3.21643000
## 6  -4.0341500   1.1811200  -0.1944490   3.0525400  -2.5135200   0.03861260
##          PC54        PC55        PC56        PC57        PC58        PC59
## 1  -0.6117950  10.0651000   9.6576100   4.7893900   1.3430800  -0.2409040
## 2   4.5585500   0.8849700   3.1219600   1.8669000   2.5932400   1.0940000
## 3   3.3207900   8.3311700  -6.7989200   1.0066300  -5.1416500  -4.2421800
## 4   0.8164820   2.7990500  -0.3478110  -4.0807900  -2.2665200   1.5128700
## 5  -9.3075500  -1.3163200  -0.4588830  -5.4090400   0.2336710   7.8980000
## 6   1.8932200   1.3383700  -1.6295700  -1.9759700  -3.2515500   1.8249000
##          PC60         PC61         PC62         PC63        PC64        PC65
## 1  14.5442000   6.75771000  -4.16275000  -3.34383000  -4.0519500  -8.9364100
## 2   2.8806000   0.79244800  -6.11226000  -2.09776000   0.8313180   0.8872920
## 3   3.2729200   5.45797000   4.79916000 -11.29980000  18.3176000  -6.7797600
## 4   0.2076770   2.27738000  -0.19930500   1.08452000   0.5369900   0.3139800
## 5  -1.7100100 -20.47780000   5.00979000  -2.12069000  -8.8987000  13.4305000
## 6  -0.1712560   0.00267691   0.19289800   2.12909000  -1.0451100  -0.6928220
##          PC66        PC67        PC68        PC69        PC70         PC71
## 1  -0.3976430   4.2047500  -5.8997100  11.1053000  -8.2240800  -6.56877000
## 2   3.8146500   0.1732010   4.7255700   1.8232100  -4.8239700   1.92682000
## 3   3.9141400 -11.9011000  -7.6325900  -1.5422200   5.0635000 -19.40920000
## 4   1.6778900   1.4802400  -0.7630900  -2.3272700  -2.8085800   2.42192000
## 5  -1.3309900   8.1064900   4.8434100  -5.6680500  -3.5060700   0.11156900
## 6  -1.5918200   2.4971500  -0.2498380   1.0470100  -2.4386300   3.74709000
##           PC72        PC73        PC74        PC75        PC76        PC77
## 1  10.17420000 -11.3514000  12.1511000   7.3504900  -3.4114200  11.4614000
## 2   6.65587000  -1.7871800   0.8773190   6.6931100  -3.8885100   4.5592300
## 3  11.23100000   2.5372800   6.0350200 -16.4688000   6.0996400  -1.9770300
## 4   1.26418000   3.5485400   2.5055700  -1.3571900  -0.2802030  -2.4362500
## 5  -9.67074000  -3.5692000   0.7541080   8.9538000   9.9158600   5.7044100
## 6   1.68754000   1.4240800   1.1316700  -0.5093640   0.6560440  -2.4503900
##          PC78         PC79         PC80        PC81       PC82         PC83
## 1  -1.2538500  12.10260000  -8.48566000  -0.1041340  -3.066780  14.98330000
## 2  -3.9005900  -0.76464200  -2.58456000  -1.3075800  -0.986590   4.14678000
## 3  -5.2103500 -14.80720000   0.70741700 -12.2276000  -5.550970  -0.01035090
## 4   0.3489420   1.81816000   1.12405000   0.5692800  -1.314130   0.66222800
## 5  -5.6811100   9.44320000   3.11869000  -7.1019500   3.851270   8.07150000
## 6  -1.7167500   0.35873200   0.95966900  -1.6488600   2.060370  -0.81767900
##          PC84         PC85         PC86        PC87         PC88        PC89
## 1   2.3149900   3.89791000  12.47620000  -0.5768750 -10.94860000   8.3926800
## 2   3.5540500  -0.54512000  -0.56611100  -3.7005800  -3.49611000   5.1526000
## 3  -4.3325200  -7.16474000 -17.07000000  11.7084000 -19.53300000   3.3667300
## 4  -0.0312164   1.63946000  -2.52175000   0.6181420  -3.65607000   1.7798800
## 5  -4.5457500 -20.50650000   7.88814000   4.2336200   5.41318000   2.4999300
## 6   0.4372570   0.02794520  -1.31700000  -0.0425701  -4.42135000   2.9181500
##           PC90        PC91         PC92        PC93         PC94        PC95
## 1  5.79364e+00   8.5922300   0.14517200  -8.6384300  6.56127e+00  15.7541000
## 2 -5.08055e+00  -1.0193600   1.22284000   2.9416000  2.18153e+00   0.9014790
## 3  3.40803e+00 -12.0594000   7.34224000   5.5900500 -1.04445e+01   2.9026900
## 4 -6.27443e-01  -0.0617235  -1.85707000   2.2907300 -1.08001e+00  -1.0537700
## 5  4.08469e-01  -3.2443700  -7.11269000 -15.5868000  1.34640e+01  -6.6347000
## 6  1.27792e-01   0.2317580  -3.22602000   4.5194700 -6.87758e-01   1.0373500
##           PC96        PC97        PC98        PC99        PC100        PC101
## 1  -2.17798000   9.6451900  -2.9397800   3.7278100  15.45490000  -1.93906000
## 2  -1.52268000   1.4386800   4.4276200   5.1092400  -1.73627000   0.58899600
## 3   7.82575000  -6.6286800  -4.8165300  -3.7488100   8.43801000  -5.59910000
## 4  -3.78208000  -1.4896300   1.2138600  -0.2799580   3.42677000   1.98463000
## 5  -4.68281000  19.6298000  -2.5686300 -10.2652000   4.34823000  -5.26535000
## 6  -3.15895000  -0.9490340  -1.2296200  -1.3242200   0.81849900  -0.37831200
##         PC102       PC103       PC104       PC105        PC106      PC107
## 1 -11.0084000   2.6904800   7.3263300 -11.4519000 -7.58281e+00  -5.480940
## 2   0.8405100   1.7065200  -3.1779200   3.3342600  4.04050e+00  -1.574890
## 3 -10.2820000  -2.5442500   0.4606190   7.9754500  1.51699e+01   4.276550
## 4  -1.4471700  -0.3787470  -1.4323000   0.6866820 -6.73745e-01   0.172906
## 5   7.8624800 -25.7274000   2.5803600  -5.8653900  5.34167e+00  12.105200
## 6  -0.7867700  -3.2531500  -1.1306500  -1.3517000  9.83633e-01   1.013040
##          PC108        PC109        PC110        PC111       PC112       PC113
## 1   5.54389000   2.90407000 -1.04296e+00  -9.08223000   0.9924240  -5.4654500
## 2   3.21862000  -2.68425000  9.06628e-01  -0.06374850  -4.1879300   9.1203200
## 3  -7.25316000 -16.91270000 -2.83988e+00  -0.20587000   2.3416300   9.2018600
## 4  -1.58892000  -1.86825000  1.11053e-01  -1.35199000   0.6076020   0.7638030
## 5   5.53872000   7.16990000  2.32264e+00   6.74998000   5.7820500 -10.2362000
## 6  -1.70615000  -1.50396000  9.25306e-01   1.41573000   0.7468450   0.9168010
##         PC114        PC115       PC116       PC117       PC118        PC119
## 1  16.0245000  2.00140e-01   2.2682500  30.9071000 -14.0176000  14.49490000
## 2  -2.9589500  1.11736e+00   1.0987300   0.2075210   2.7354000   3.58570000
## 3  -1.7522100  3.60816e+00  -5.5661300   0.3417380  -2.0877900 -11.84230000
## 4  -3.0624800  2.11320e-01  -1.8161900   1.2055500   0.0216941  -1.15648000
## 5   3.0396500 -5.68673e+00   3.0634800 -21.3337000  14.7475000   1.74105000
## 6  -4.4599900  3.44836e-01   0.6655690  -0.3938190   0.8635360   0.09947570
##         PC120       PC121       PC122       PC123       PC124       PC125
## 1  11.5170000   3.7715500  -5.3076400  14.1398000   0.3237060  14.3916000
## 2  -1.2288700  -3.3643500   3.1099900   1.3271100   2.0985700   1.7774900
## 3 -26.4610000  -0.1960150   2.3575800 -11.9602000  -8.8435700  -2.2920000
## 4   2.3529400  -2.3316600  -0.5208220  -0.1174560   0.3076740   0.5477080
## 5  -8.1251300   3.7056300   4.9470600   8.1311400  -2.4845400  -7.0165700
## 6  -0.6484200  -1.3230400   1.4381300   0.7894440  -0.3390400   1.5554200
##          PC126       PC127       PC128        PC129       PC130        PC131
## 1 -13.37340000  10.5410000  -8.9874000   0.56791200  -2.5380600  -3.79983000
## 2  -3.80600000   1.8182000  -3.6902000  -0.95580000  -0.0548548  -0.13123200
## 3  -1.64595000  -1.3426600  -6.9094600   6.19054000  -5.6844800   2.30288000
## 4   2.04812000  -0.6915380   1.5297000  -1.88508000   0.4136230   0.06409940
## 5   4.79757000  -0.4246760 -23.9412000   4.02440000  -3.8891400  -0.12459900
## 6   0.48874100   0.3708300   0.5948270  -2.95115000   0.2731490  -2.79498000
##         PC132       PC133       PC134        PC135       PC136        PC137
## 1  -6.5030200  -2.6551700  -3.0083000  1.71730e+00 -15.7980000  -3.30561000
## 2   3.3670800  -1.2812100  -3.9863700 -3.59722e+00   3.6396300  -0.29263400
## 3  -3.8112700  -6.5208300  -2.2786300  5.91427e+00 -10.5297000   5.75885000
## 4   1.9161800   0.3483600  -1.6408100  6.24563e-01   1.9145900  -3.35105000
## 5  -9.9697800   9.6634700  16.4622000 -7.43107e+00 -13.1349000  -3.12213000
## 6   0.4723780  -1.7892500  -0.7034340  9.17912e-02   0.6926220  -0.43424800
##         PC138       PC139       PC140        PC141       PC142       PC143
## 1  -3.4260900  -6.2798900 -18.2417000  18.43020000   8.3856500   8.3382400
## 2   2.0531300  -1.9245000   5.3901200   3.73856000   0.8868380  -0.2223570
## 3  -2.3776000 -15.5123000  -3.4239300  -5.39212000   9.8656200 -14.1335000
## 4   1.9526400  -0.1817410  -1.1137700  -0.06987900  -1.2474000   1.1433900
## 5  -9.4054400  -0.4743670   6.8887700  -6.01800000  -8.0882000  -2.1503700
## 6   0.3406310   1.8611000   1.3291800  -1.37300000  -0.0787373  -0.5188240
##         PC144        PC145       PC146        PC147       PC148       PC149
## 1   5.5408400  -3.53930000   7.4150600 -3.42937e+00 -14.0967000  -0.2905990
## 2  -1.4229900  -1.28928000  -0.1623980 -1.42335e+00  -0.9474570  -0.4728770
## 3 -19.5236000  -5.99970000  -2.0135600  3.70539e+00 -12.8249000  -4.6990300
## 4  -3.7148500   2.61356000  -0.0552736  1.90159e-01  -1.5400000  -0.5712190
## 5  12.6267000   5.53412000  -3.4007200  1.10638e+01   2.7785500  -6.2069600
## 6  -2.4687700   1.81950000   0.6855870  9.36018e-01  -1.4877200  -0.9258230
##         PC150       PC151       PC152       PC153        PC154       PC155
## 1  -3.6434800  -2.5165500   8.6529600   6.8365100   3.97002000  -2.4990600
## 2   3.2271500  -0.6025260   0.2413790  -2.1465900  -0.17821300  -2.1719900
## 3  -4.8782900  -3.0064800  11.3089000 -18.0655000   3.58072000   3.4149300
## 4   2.5511700  -0.7835840  -1.8468500   2.1208400   0.86718000   1.3280200
## 5   5.5223400 -15.3104000  12.5198000   0.2354350   4.09006000  -4.6936100
## 6   1.3914800  -1.6093300   0.0225369  -0.2142720   0.16645600  -0.8581280
##         PC156       PC157        PC158       PC159        PC160       PC161
## 1   3.4061500   5.6563400 -14.58930000  -2.2798100   2.55155000  -4.1559500
## 2   3.1256100   5.4330800   1.86930000  -1.3366600   2.41087000   0.5058550
## 3  -7.5334500 -11.3179000   8.14216000   8.1905900  -3.37938000  -3.3158400
## 4  -0.4268840   0.1669370   1.33259000   0.1056740   1.84925000  -0.7278290
## 5   5.9555400  -0.2620110  15.39410000  -2.8111400   2.83182000  -7.7175000
## 6   2.6887400   1.7441300  -0.42150600   0.0936997   1.10580000   0.8256850
##          PC162        PC163       PC164        PC165        PC166       PC167
## 1  -3.51141000   6.82734000  -3.0286500  -6.18625000  -1.88673000   3.0251000
## 2   1.09072000   2.31460000   0.2269610   3.22892000  -0.24556700  -2.2675200
## 3  -3.55780000  -6.88406000  -4.2120400  -7.34124000   1.14787000   9.3826900
## 4  -2.60971000  -0.09920390  -1.2460100   1.19306000   0.28925800  -0.8751480
## 5   8.38805000  -2.39690000  -4.3334900  -5.83379000   4.37561000   4.9103300
## 6   3.39013000  -1.42401000   2.5435600  -1.37272000  -0.44609500  -0.5880850
##          PC168       PC169       PC170       PC171       PC172       PC173
## 1 -10.22260000   0.0996273   0.7942900  -2.4458000   1.2247300   1.5337200
## 2   1.92965000  -5.1401600   4.8208800  -0.3862410  -0.9916400   8.7602700
## 3  -6.04788000  -2.7664900   0.7856120  -2.4417500   4.5045000  -2.2243100
## 4  -1.16024000   2.2261500   0.9775040  -1.6839200   1.5442200   1.6330500
## 5  -1.68581000  -0.3800320   1.9521600  -0.0222831  -1.6690500  -3.2809600
## 6  -0.67821100  -0.4590530  -3.1786200  -0.6266000  -1.8906000  -1.6988800
##         PC174       PC175        PC176       PC177        PC178        PC179
## 1  -5.7806100   0.7983410   0.00877461   1.8108500   3.85221000   1.66574000
## 2   4.7133100  -9.0851500  -5.14256000   1.9136200 -10.10670000  21.35360000
## 3   5.5615300  -0.0637149   5.04407000   3.1295600  -1.06508000  -0.58003900
## 4   1.5073600  -1.2129000  -0.41484000   1.9049900   0.55288900  -2.34546000
## 5   1.8357800   1.3384100  -0.30506900   2.0971600  -0.37432500  -1.72155000
## 6  -0.8807790  -0.6410000  -1.13724000  -1.7205800  -0.35331700   1.65000000
##         PC180        PC181       PC182       PC183       PC184       PC185
## 1  -1.1177500   1.68266000   0.1625360  -0.6873690  -0.1991950  -2.1421400
## 2  -0.3818160 -21.89650000 -30.0915000  -2.8223600 -20.5764000 -16.0535000
## 3   0.4946190   1.76407000   5.7782900  -0.2852550  -2.4422900  -1.4630100
## 4  -1.1191600   2.44442000   2.4140900   0.5945130   2.3770900   0.0210807
## 5   0.9931850  -0.00366152   1.1296100  -0.7329420   1.3697700  -0.8306430
## 6   2.1824700  -0.86152400  -2.9240000  -1.7888400  -2.5250800  -0.3417910
##          PC186       PC187        PC188       PC189        PC190       PC191
## 1  -0.58769500   0.5035180   0.27533300   0.6882360   0.75877800  -2.0961300
## 2  -4.91425000  -3.2009500  10.19500000  -1.4866600  10.16390000   3.5570200
## 3  -3.75322000   0.5434730   2.09033000  -0.5993340   0.34070400   1.0701100
## 4  -1.32761000   1.9706000   0.80817500  -0.7095880   0.81718400   2.2280800
## 5   0.72703800   1.1247100   2.51569000   2.2309500  -0.56147900   0.9927260
## 6   0.87559200  -1.0830600  -3.87144000   1.2436500   0.29100400  -1.3358200
##         PC192        PC193        PC194        PC195        PC196        PC197
## 1  -0.0238817  -0.38964400  -0.49024500   0.61366800  9.26965e-01   0.78885700
## 2   4.2008800  -3.81343000   0.87417200   2.64868000 -6.84628e+00   0.86509400
## 3  -1.0110900   0.92613700  -1.26355000   1.40622000 -1.29062e+00   0.24960800
## 4  -1.9902700  -3.14291000   0.62826800  -0.91398500 -1.03266e+00  -2.28230000
## 5  -2.4911500  -0.11211700   1.64198000   2.27782000 -7.43295e-02   0.79224700
## 6   1.8620200   2.24947000  -1.46699000   0.42436500  9.79570e-01   1.80249000
##          PC198        PC199        PC200       PC201        PC202        PC203
## 1  -0.27983500  1.36257e+00   0.88625200   0.3675210  -0.02983140  -0.74587700
## 2   1.74120000 -7.01877e-01  -1.48442000  -3.9125000  -0.33859300  -1.14249000
## 3   0.00583607  8.62176e-01  -2.03765000  -0.0407867  -1.18543000  -0.95655900
## 4  -1.14747000  3.38411e-01   0.00306267  -3.9818500  -0.34079700  -0.96115700
## 5  -0.87461500 -8.37404e-01   0.67335100  -0.4522920  -0.09074620   0.59809600
## 6   1.80796000 -2.26957e-01   0.57860000   3.9909200  -0.33454100   1.16419000
##         PC204        PC205       PC206       PC207        PC208       PC209
## 1  -1.3294200   0.94916000   0.0299917  -0.4342240  -0.42708400   0.6010560
## 2   0.4091430   0.45188200   1.8599100   0.0502472   0.66675500   0.1485050
## 3   0.5120000   1.21180000  -0.5175010  -0.9309760  -0.78552000   0.3290820
## 4  11.0084000  13.71620000  34.0681000  11.6234000  28.36580000   7.2110000
## 5  -0.3647070   2.75853000   1.4452400  -0.8239570   1.61747000   0.8434370
## 6 -10.5223000 -13.46310000 -33.7128000 -10.8713000 -28.31130000  -6.9543700
##          PC210        PC211        PC212        PC213        PC214       PC215
## 1  1.16840e+00   1.66659000 -7.50885e-01  -0.39230300  -0.15339200   0.0859478
## 2 -2.17571e+00  -0.74063500 -2.77119e-02  -0.09033910   1.16402000  -0.4931860
## 3 -4.75291e-01  -0.64450000  9.10209e-01  -0.24600000  -0.66132600  -0.2668270
## 4 -8.49697e+00   1.72123000  1.52825e+00  -6.57878000  -4.05028000   0.8376290
## 5 -2.94385e-01  -0.72402900 -2.28893e+00   0.15191700  -0.75391700  -0.2944950
## 6  8.41046e+00  -2.62148000 -2.72686e+00   6.21112000   4.14061000  -2.1600800
##          PC216        PC217       PC218        PC219        PC220        PC221
## 1  -0.25909800  -0.75609300  -0.6732210   0.35182400  -0.39490900  -0.73818500
## 2  -0.63981300  -0.40078500   0.4307900   1.70623000   1.31312000  -0.12043900
## 3   0.62269100  -0.03747600   0.1398690  -0.03076260   0.36565100   0.50693400
## 4   3.40488000  -0.86605800   0.9283760  -2.32685000   0.29134700  -2.06623000
## 5   0.62887400  -1.06951000  -0.5426910  -0.22257500   0.37298500  -0.16970000
## 6  -2.96618000   0.31438700  -1.0794000   1.12584000  -0.51228200   1.47176000
##          PC222        PC223        PC224        PC225        PC226        PC227
## 1   0.49605300   0.31054800   0.21872000   0.55312500   0.41663800  -0.03724110
## 2  -0.83249600  -0.02255420  -0.28774000  -0.05444470   0.23562300   0.66069000
## 3  -0.03452660   0.19564500   1.29158000  -0.11675400  -0.37066100   0.34605900
## 4  -1.38360000   0.29058500   0.96982300   0.61571200   0.22156800  -0.16733400
## 5  -0.00491723  -0.19795800   0.18460600   0.18539800  -0.12778200  -0.43623000
## 6   2.10633000  -0.04304780  -1.57466000  -1.22561000   0.16357800   0.16727400
##          PC228        PC229        PC230        PC231        PC232        PC233
## 1  -0.57371700  7.87250e-02 -3.38842e-01  -0.03936270 -2.10701e-01   0.16224100
## 2   0.76586500  4.29712e-01  6.41845e-01   0.13673200  2.28730e-01   0.12963500
## 3  -0.02058750  5.59669e-01 -2.72200e-01  -0.17554100 -2.98125e-02  -0.02614160
## 4   0.97662700 -3.41863e-01  7.25576e-01   0.07196790 -2.81714e-01  -0.06426680
## 5   0.25871900 -7.09960e-01 -8.71551e-02  -0.52434000  1.39599e-01   0.08441400
## 6  -1.11175000  8.19412e-01 -8.58836e-01   0.98445500  5.90318e-01  -0.20273500
##          PC234        PC235        PC236 PC237 Individual     region  Pop_City
## 1  -0.07677560  9.53793e-02 -2.30538e-02   NAN          a South Asia Bengaluru
## 2  -0.05388450  2.44945e-01  6.48216e-02   NAN        257 South Asia Bengaluru
## 3  -0.08380570 -7.71649e-02  1.85948e-02   NAN        261 South Asia Bengaluru
## 4   0.20849300  1.91257e-01  8.65440e-02   NAN        263 South Asia Bengaluru
## 5  -0.33321400  2.16569e-01  1.62813e-01   NAN        264 South Asia Bengaluru
## 6   0.10208400 -3.33634e-01 -2.70744e-02   NAN        262 South Asia Bengaluru
##   Country Latitude Longitude Region
## 1   India  12.9716   77.5946   Asia
## 2   India  12.9716   77.5946   Asia
## 3   India  12.9716   77.5946   Asia
## 4   India  12.9716   77.5946   Asia
## 5   India  12.9716   77.5946   Asia
## 6   India  12.9716   77.5946   Asia

Plot

# source the plotting function
source(
  here(
    "scripts", "analysis", "my_theme2.R"
  )
)

distinct_palette <- c(
  "#E69F00",
  "#009E73",
  "#0072B2",
  "#CC79A7",
  "#800000",
  "#808080",
  "#CCEBC5",
  "#FFB5B8",
  "#99CCFF",
  "#8E8BFF",
  "#F781BF",
  "#FFFF33",
  "#A65628",
  "#984EA3",
  "#D2D2D2",
  "#4DAF4A",
  "#DEB887",
  "#7FFFD4",
  "#D2691E",
  "#DC143C",
  "#8B008B"
)
merged_data$PC1 <- as.numeric(as.character(merged_data$PC1))
merged_data$PC2 <- as.numeric(as.character(merged_data$PC2))
merged_data$PC3 <- as.numeric(as.character(merged_data$PC3))

# make plot by continent and range
ggplot(merged_data, aes(PC1, PC2)) +
  geom_point(aes(colour = Country, shape = Country), size = 1) +
  xlab(paste0("PC1 (", perc[1], " Variance)")) +
  ylab(paste0("PC2 (", perc[2], " Variance)")) +
  labs(
    caption = "PCA with 20,931 SNPs of 237 mosquitoes from 28 localities in Asia."
  ) +
  guides(
    color = guide_legend(title = "Country", ncol = 3),
    shape = guide_legend(title = "Country", ncol = 3),
    fill = guide_legend(title = "Region", ncol = 1)
  ) +
  stat_ellipse(aes(fill = region, group = region), geom = "polygon", alpha = 0.2, level = 0.8) +
  scale_color_manual(values = distinct_palette) +
  scale_shape_manual(values=good.shapes[c(1:25, 58:67)]) +
  my_theme() +
  theme(
    plot.caption = element_text(face = "italic"),
    legend.position = "top",
    legend.justification = "top",
    legend.box.just = "center",
    legend.box.background = element_blank(),
    plot.margin = margin(5.5, 25, 5.5, 5.5, "points"),
    legend.margin = margin(10,10,10,10)
  )

# #   ____________________________________________________________________________
# #   save the pca plot                                                       ####
ggsave(
  here(
    "output", "populations", "figures", "PCA_lea_pc1_pc2_r2_0.01.pdf"
  ),
  width  = 8,
  height = 6,
  units  = "in"
)

PC1 an PC3

# source the plotting function
source(
  here(
    "scripts", "analysis", "my_theme2.R"
  )
)

distinct_palette <- c(
  "#E69F00",
  "#009E73",
  "#0072B2",
  "#CC79A7",
  "#800000",
  "#808080",
  "#CCEBC5",
  "#FFB5B8",
  "#99CCFF",
  "#8E8BFF",
  "#F781BF",
  "#FFFF33",
  "#A65628",
  "#984EA3",
  "#D2D2D2",
  "#4DAF4A",
  "#DEB887",
  "#7FFFD4",
  "#D2691E",
  "#DC143C",
  "#8B008B" 
)


# make plot by continent and range
ggplot(merged_data, aes(PC1, PC2)) +
  geom_point(aes(colour = Country, shape = Country), size = 1) +
  xlab(paste0("PC1 (", perc[1], " Variance)")) +
  ylab(paste0("PC3 (", perc[3], " Variance)")) +
  labs(
    caption = "PCA with 20,931 SNPs of 237 mosquitoes from 28 localities in Asia."
  ) +
  guides(
    color = guide_legend(title = "Country", ncol = 3),
    shape = guide_legend(title = "Country", ncol = 3),
    fill = guide_legend(title = "Region", ncol = 1)
  ) +
  stat_ellipse(aes(fill = region, group = region), geom = "polygon", alpha = 0.2, level = 0.8) +
  scale_color_manual(values = distinct_palette) +
  scale_shape_manual(values=good.shapes[c(1:25, 58:67)]) +
  my_theme() +
  theme(
    plot.caption = element_text(face = "italic"),
    legend.position = "top",
    legend.justification = "top",
    legend.box.just = "center",
    legend.box.background = element_blank(),
    plot.margin = margin(5.5, 25, 5.5, 5.5, "points"),
    legend.margin = margin(10,10,10,10)
  )

# #   ____________________________________________________________________________
# #   save the pca plot                                                       ####
ggsave(
  here(
    "output", "populations", "figures", "PCA_lea_pc1_pc3_r2_0.01.pdf"
  ),
  width  = 8,
  height = 6,
  units  = "in"
)
# set output dir
setwd(
  here(
    "output", "populations"
  )
)
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 4 is the number of CPU used (hidden input) project = NULL
project = snmf(
  genotype,
  K = 1:15,
  project = "new",
  repetitions = 5,
  CPU = 4,
  entropy = TRUE
)

Load project

# To load the project, use:
project = load.snmfProject("output/populations/snps_sets/r2_0.01.snmfProject")

# To remove the project, use:
 # remove.snmfProject("snps_sets/r2_0.01.snmfProject")

Cross entropy

# Open a new pdf file
pdf(here("output","populations","figures","lea_cross_entropy_r2_0.01.pdf"), width = 6, height = 4)

# Create your plot
plot(project, col = "pink", pch = 19, cex = 1.2)

# Close the pdf file
dev.off()
plot(project, col = "pink", pch = 19, cex = 1.2)

We will not plot k=8 but only the k=5 to compare to admixture

2.1 k5

We can plot k=5 to compare to other algorithms

Find the best run

best8 = which.min(cross.entropy(project, K = 8)) # 5
best5 = which.min(cross.entropy(project, K = 5)) # 2
best8
## [1] 5
best5
## [1] 2

Run 3 for k=5

# Extract ancestry coefficients
leak5 <- read_delim(
  here("output", "populations", "snps_sets", "r2_0.01.snmf", "K5", "run2","r2_0.01_r2.5.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
# unseen_pckmeans.7.Q
# pckmeans.7.Q
head(leak5)
## # A tibble: 6 × 5
##         X1      X2     X3       X4    X5
##      <dbl>   <dbl>  <dbl>    <dbl> <dbl>
## 1 0.0574   0.0290  0.240  0.0219   0.651
## 2 0.0936   0.0212  0.239  0.0460   0.600
## 3 0.0114   0.00895 0.156  0.0202   0.804
## 4 0.000100 0.0106  0.117  0.000920 0.872
## 5 0.000100 0.0174  0.0831 0.000100 0.899
## 6 0.000100 0.0305  0.154  0.0105   0.805

The fam file

fam_file <- here(
  "output", "populations", "snps_sets", "r2_0.01.fam"
)

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create ID column

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"


# Merge columns "FamilyID" and "IndividualID" with an underscore
# fam_data$ind <- paste(fam_data$FamilyID, fam_data$IndividualID, sep = "_")


# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to matrix

leak5 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak5)

head(leak5)
##    ind pop          X1         X2       X3          X4       X5
## 1 1001 OKI 0.057351200 0.02903100 0.240333 0.021857400 0.651427
## 2 1002 OKI 0.093595600 0.02123130 0.239314 0.045972700 0.599886
## 3 1003 OKI 0.011381600 0.00894785 0.155884 0.020222400 0.803564
## 4 1004 OKI 0.000099990 0.01060030 0.116732 0.000919858 0.871648
## 5 1005 OKI 0.000099982 0.01739940 0.083069 0.000099982 0.899332
## 6 1006 OKI 0.000099990 0.03047430 0.153502 0.010503400 0.805420

Rename the columns

# Rename the columns starting from the third one
leak5 <- leak5 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak5)
##    ind pop          v1         v2       v3          v4       v5
## 1 1001 OKI 0.057351200 0.02903100 0.240333 0.021857400 0.651427
## 2 1002 OKI 0.093595600 0.02123130 0.239314 0.045972700 0.599886
## 3 1003 OKI 0.011381600 0.00894785 0.155884 0.020222400 0.803564
## 4 1004 OKI 0.000099990 0.01060030 0.116732 0.000919858 0.871648
## 5 1005 OKI 0.000099982 0.01739940 0.083069 0.000099982 0.899332
## 6 1006 OKI 0.000099990 0.03047430 0.153502 0.010503400 0.805420

Import sample locations

sampling_loc <- readRDS(here("output", "local_adaptation", "sampling_loc.rds"))
head(sampling_loc)
## # A tibble: 6 × 6
##   Pop_City   Country  Latitude Longitude Region Abbreviation
##   <chr>      <chr>       <dbl>     <dbl> <chr>  <chr>       
## 1 Gelephu    Bhutan       26.9      90.5 Asia   GEL         
## 2 Phnom Penh Cambodia     11.6     105.  Asia   CAM         
## 3 Hainan     China        19.2     110.  Asia   HAI         
## 4 Yunnan     China        24.5     101.  Asia   YUN         
## 5 Hunan      China        27.6     112.  Asia   HUN         
## 6 Bengaluru  India        13.0      77.6 Asia   BEN
source(
  here(
    "scripts", "analysis", "my_theme3.R"
  )
)

# Create a named vector to map countries to regions
country_to_region <- c(
  "Bhutan" = "South Asia",
  "Cambodia" = "Southeast Asia",
  "China" = "East Asia",
  "India" = "South Asia",
  "Indonesia" = "Southeast Asia",
  "Japan" = "East Asia",
  "Malaysia" = "Southeast Asia",
  "Maldives" = "South Asia",
  "Nepal" = "South Asia",
  "Sri Lanka" = "South Asia",
  "Taiwan" = "East Asia",
  "Thailand" = "Southeast Asia",
  "Vietnam" = "Southeast Asia"
)

# Add the region to the data frame
sampling_loc$Region2 <- country_to_region[sampling_loc$Country]

# Melt the data frame for plotting
Q_melted <- leak5 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(Region, Region2, Country, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Region_Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Region_Country = unique(Q_ordered$Region_Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Region_Country, function(rc)
    max(which(Q_ordered$Region_Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 
color_palette <-
  c(
    "v1" = "#FFFF00",
    "v2" = "#FF00FF",
    "v3" = "#00FF00",
    "v4" = "#0000FF",
    "v5" = "#FF0000"
  )


# Generate all potential variable names
all_variables <- paste0("v", 1:5)

# Map each variable to a name
color_mapping <- data.frame(variable = all_variables,
                            color = names(color_palette))

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=5.\n LEA inference for k1:15 with SNPs pruned with r2 0.01.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("output", "populations", "figures", "lea_k=5_r2_0.01.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)

3 SNP set with prunning using r2 0.1

Import the data

genotype <- here(
    "output", "populations", "snps_sets", "r2_0.1.vcf"
  )

d <- read.vcfR(
  genotype
) 
## Scanning file to determine attributes.
## File attributes:
##   meta lines: 8
##   header_line: 9
##   variant count: 57780
##   column count: 246
## 
Meta line 8 read in.
## All meta lines processed.
## gt matrix initialized.
## Character matrix gt created.
##   Character matrix gt rows: 57780
##   Character matrix gt cols: 246
##   skip: 0
##   nrows: 57780
##   row_num: 0
## 
Processed variant 1000
Processed variant 2000
Processed variant 3000
Processed variant 4000
Processed variant 5000
Processed variant 6000
Processed variant 7000
Processed variant 8000
Processed variant 9000
Processed variant 10000
Processed variant 11000
Processed variant 12000
Processed variant 13000
Processed variant 14000
Processed variant 15000
Processed variant 16000
Processed variant 17000
Processed variant 18000
Processed variant 19000
Processed variant 20000
Processed variant 21000
Processed variant 22000
Processed variant 23000
Processed variant 24000
Processed variant 25000
Processed variant 26000
Processed variant 27000
Processed variant 28000
Processed variant 29000
Processed variant 30000
Processed variant 31000
Processed variant 32000
Processed variant 33000
Processed variant 34000
Processed variant 35000
Processed variant 36000
Processed variant 37000
Processed variant 38000
Processed variant 39000
Processed variant 40000
Processed variant 41000
Processed variant 42000
Processed variant 43000
Processed variant 44000
Processed variant 45000
Processed variant 46000
Processed variant 47000
Processed variant 48000
Processed variant 49000
Processed variant 50000
Processed variant 51000
Processed variant 52000
Processed variant 53000
Processed variant 54000
Processed variant 55000
Processed variant 56000
Processed variant 57000
Processed variant: 57780
## All variants processed
#create vectors of individual names and population attribution
inds_full <- attr(d@gt,"dimnames")[[2]]
inds_full <- inds_full[-1]
a <- strsplit(inds_full, '_')
pops <- unname(sapply(a, FUN = function(x) return(as.character(x[1])))) 
table(pops)
## pops
## BEN CAM CHA GEL HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT KLP KUN LAM MAT OKI 
##  12  12  12   2  12   4   7  12  11   4   2   6  12  11  10   4   4   9  12  11 
## QNC SON SSK SUF SUU TAI UTS YUN 
##  12   3  12   6   6   8  12   9
pops <- factor(pops)
inds <- unname(sapply(a, FUN = function(x) return(as.character(x[2]))))
#convert the vcf file, which should be located in the working directory, into a geno file in the working directory
vcf2geno(genotype, gsub(".vcf", ".geno", genotype))
## 
##  - number of detected individuals:   237
##  - number of detected loci:      57780
## 
## For SNP info, please check /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.1.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.1.removed file, for more informations.
## [1] "/Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.1.geno"
vcf2lfmm(genotype, gsub(".vcf", ".lfmm", genotype))
## 
##  - number of detected individuals:   237
##  - number of detected loci:      57780
## 
## For SNP info, please check /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.1.vcfsnp.
## 
## 0 line(s) were removed because these are not SNPs.
## Please, check /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.1.removed file, for more informations.
## 
## 
##  - number of detected individuals:   237
##  - number of detected loci:      57780
## [1] "/Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.1.lfmm"

PCA

### PCA #####
# set output dir
setwd(
  here(
    "output", "populations"
  )
)
nPC <- length(inds)
pc <- pca(gsub(".vcf", ".lfmm", genotype), K = nPC)
## [1] "******************************"
## [1] " Principal Component Analysis "
## [1] "******************************"
## summary of the options:
## 
##         -n (number of individuals)          237
##         -L (number of loci)                 57780
##         -K (number of principal components) 237
##         -x (genotype file)                  /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.1.lfmm
##         -a (eigenvalue file)                /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.1.pca/r2_0.1.eigenvalues
##         -e (eigenvector file)               /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.1.pca/r2_0.1.eigenvectors
##         -d (standard deviation file)        /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.1.pca/r2_0.1.sdev
##         -p (projection file)                /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.1.pca/r2_0.1.projections
##         -c data centered
show(pc)
## * pca class *
## 
## project directory:               /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/ 
## pca result directory:            r2_0.1.pca/ 
## input file:                      r2_0.1.lfmm 
## eigenvalue file:                 r2_0.1.eigenvalues 
## eigenvector file:                r2_0.1.eigenvectors 
## standard deviation file:         r2_0.1.sdev 
## projection file:                 r2_0.1.projections 
## pcaProject file:                   r2_0.1.pcaProject 
## number of individuals:           237 
## number of loci:                  57780 
## number of principal components:  237 
## centered:                        TRUE 
## scaled:                          FALSE

Test

# PC significant test: tracy-widom test
tw <- tracy.widom(pc)
## [1] "*******************"
## [1] " Tracy-Widom tests "
## [1] "*******************"
## summary of the options:
## 
##         -n (number of eigenvalues)          237
##         -i (input file)                     /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.1.pca/r2_0.1.eigenvalues
##         -o (output file)                    /Users/lucianocosme/Library/CloudStorage/Dropbox/Albopictus/manuscript_chip/data/no_autogenous/albo_chip/output/populations/snps_sets/r2_0.1.pca/r2_0.1.tracywidom
# tw$pvalues
# plot the percentage of variance explained by each component
plot(tw$percentage, pch = 19, col = "pink", cex = .8)

Values

# plot preparation
pc.coord <- as.data.frame(pc$projections)
colnames(pc.coord) <- paste0("PC", 1:nPC)
pc.coord$Individual <- inds
pc.coord$Population <- pops
# perc1 <- paste0(round(tw$percentage, digits = 3) * 100, "%")
perc <- paste0(round(pc$eigenvalues/sum(pc$eigenvalues), digits = 3) * 100, "%")
nb.cols <- 40
mycolors <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)

Symbols

#to see all shapes -> plot shapes - para escolher os simbolos
N = 100; M = 1000
good.shapes = c(1:25,33:127)
foo = data.frame( x = rnorm(M), y = rnorm(M), s = factor( sample(1:N, M, replace = TRUE) ) )
# ggplot(aes(x,y,shape=s ), data=foo ) +
#   scale_shape_manual(values=good.shapes[1:N]) +
#   geom_point()

Meta data

sampling_loc <- readRDS(here("output", "local_adaptation", "sampling_loc.rds"))
head(sampling_loc)
## # A tibble: 6 × 6
##   Pop_City   Country  Latitude Longitude Region Abbreviation
##   <chr>      <chr>       <dbl>     <dbl> <chr>  <chr>       
## 1 Gelephu    Bhutan       26.9      90.5 Asia   GEL         
## 2 Phnom Penh Cambodia     11.6     105.  Asia   CAM         
## 3 Hainan     China        19.2     110.  Asia   HAI         
## 4 Yunnan     China        24.5     101.  Asia   YUN         
## 5 Hunan      China        27.6     112.  Asia   HUN         
## 6 Bengaluru  India        13.0      77.6 Asia   BEN

More meta data

# import sample attributes
samples2 <- read.delim(
  here(
    "output", "populations", "Population_meta_data.txt"
  ),
  head = TRUE
)

samples2<- samples2 |>
  dplyr::select(
    region, pop
  )

# check head of the file
head(samples2)
##      region pop
## 1 East Asia HAI
## 2 East Asia HUN
## 3 East Asia YUN
## 4 East Asia AIZ
## 5 East Asia HIR
## 6 East Asia JAT

Merge with sampling_loc

merged_loc <- merge(samples2, sampling_loc, by.x = "pop", by.y = "Abbreviation")
head(merged_loc)
##   pop          region           Pop_City    Country Latitude Longitude Region
## 1 AIZ       East Asia Aizuwakamatsu City      Japan  37.4924  139.9936   Asia
## 2 ALV Southern Europe              Vlore    Albania  40.4660   19.4897 Europe
## 3 ANT     East Africa       Antananarivo Madagascar -18.8792   47.5079 Africa
## 4 AWK     West Africa               Awka    Nigeria   6.2105    7.0741 Africa
## 5 BAR Southern Europe          Barcelona      Spain  41.3851    2.1734 Europe
## 6 BEN      South Asia          Bengaluru      India  12.9716   77.5946   Asia

Check pops

head(pc.coord$Population)
## [1] OKI OKI OKI OKI OKI OKI
## 28 Levels: BEN CAM CHA GEL HAI HAN HOC HUN INJ INW JAF KAC KAG KAN KAT ... YUN

Check how many sampling localities

length(unique(pc.coord$Population))
## [1] 28

Merge

merged_data <- merge(pc.coord, merged_loc, by.x = "Population", by.y = "pop")
head(merged_data)
##   Population       PC1        PC2         PC3         PC4          PC5
## 1        BEN -21.12510 -26.216700   8.4870200 -17.6548000  -2.71430000
## 2        BEN -20.99630 -27.566100   9.5577600 -18.2048000  -1.75778000
## 3        BEN -21.89770 -26.721400   8.5344700 -17.3222000  -3.71905000
## 4        BEN -22.71840 -29.186500   9.6005500 -19.6170000  -2.94831000
## 5        BEN -22.30890 -26.759800   7.6865200 -16.9286000  -0.77658300
## 6        BEN -22.42740 -29.049900   8.8043200 -18.9412000  -3.03286000
##            PC6         PC7         PC8          PC9        PC10         PC11
## 1   8.29477000  15.3405000  -0.2912250  -0.68459200  -0.3446610  -0.26423300
## 2   6.13050000  17.4734000  -0.9379350  -2.89122000  -0.4220160   1.65387000
## 3   6.14484000  16.3245000  -2.3025600   0.00718294  -3.2412500  -1.57941000
## 4   7.98096000  18.2228000  -1.6091300  -0.46856300   1.4654400   1.39153000
## 5   6.74858000  14.1221000  -1.9330800  -1.17370000  -2.0345600   1.61394000
## 6   7.86098000  20.2294000  -1.5138600  -0.04767160   0.2625480   2.38508000
##           PC12        PC13       PC14        PC15        PC16         PC17
## 1  15.28600000   0.2208010  -5.662690  -4.8364100  -3.2559600   8.52019000
## 2  25.48600000   0.8603530 -10.570600  -8.2537900  -8.4960800  16.38500000
## 3  17.21570000  -3.0425900  -4.455310  -3.9310800  -3.4042500   9.31844000
## 4  34.69670000  -1.7214400 -15.253700  -8.4099900 -12.8067000  26.61120000
## 5  17.70870000  -1.4226600  -6.968290  -5.9499100  -5.4339300  12.97110000
## 6  35.84950000  -1.8396600 -16.279800  -8.6482000 -10.5491000  27.06640000
##           PC18        PC19        PC20        PC21        PC22        PC23
## 1  -7.45547000 -12.9980000 -13.1721000  -2.7336000   5.5838900   0.2235280
## 2 -11.72490000 -22.1228000 -26.2750000   2.3413900  -0.9125150  -1.6070700
## 3  -5.58948000 -14.7010000 -10.4966000   2.7146200   5.8382500  -2.4851100
## 4 -24.83950000 -45.5056000 -40.5812000  -2.1702300  26.0265000  12.2099000
## 5  -9.64556000 -16.4420000 -16.4344000   1.7525900   3.8517700   1.0994000
## 6 -25.10490000 -46.8974000 -42.7536000  -3.0108400  26.4644000  11.8691000
##          PC24        PC25        PC26         PC27         PC28         PC29
## 1  -0.0977660   0.3519740   0.3603840  -1.29177000   2.52289000 -12.38320000
## 2  -6.4712800  -2.6704100   2.9100200  -2.31946000  13.14690000 -63.11520000
## 3  -2.3157800   1.0943200  -3.2507600  -0.68217800   4.88222000 -16.34230000
## 4   3.4801200  -1.9031300  10.7983000  15.84350000 -18.35750000  80.11600000
## 5  -1.1579000   0.6650960  -1.4165100  -1.71280000   2.89425000 -19.94890000
## 6   4.2589900  -3.7204100  11.0701000  16.35230000 -18.82040000  79.70180000
##           PC30        PC31        PC32        PC33        PC34        PC35
## 1   2.23894000  -4.8685000  -8.4121700  -2.9099100  -1.0632500   1.5417900
## 2  -1.48853000 -14.3530000 -11.5816000 -18.3954000  14.1691000   1.4895900
## 3  -2.75103000  -7.8402500  -7.1135300   1.0838200  -5.1373900   3.2087600
## 4   2.43782000  15.5151000  12.3956000  11.7816000  -3.6741600  -2.5404900
## 5  -3.92099000  -3.5588200  -4.4419800   0.3473710  -4.0952200   7.2271100
## 6   3.02210000  15.6442000  14.6329000  11.7776000  -2.0662700  -2.1780200
##           PC36         PC37        PC38        PC39         PC40        PC41
## 1  -6.02427000  -2.18806000  17.0480000 -11.7336000   1.64507000 -11.9023000
## 2   2.23907000  22.85220000 -54.2922000  33.3448000   7.78461000  30.2898000
## 3  -4.07089000  -4.33324000  14.3811000 -16.9921000  -8.89495000  -7.1035100
## 4  -0.13431900   8.78196000 -12.1345000  16.7969000  -1.83524000   8.6346400
## 5   1.37942000  -5.03079000   8.7141000 -18.6334000   1.41477000 -11.8812000
## 6  -1.16036000   8.89992000 -13.0480000  14.9684000   0.29230200   8.5124000
##           PC42        PC43        PC44        PC45        PC46         PC47
## 1  -1.84303000 -10.7653000  -0.6590260   0.2295630  -3.9737800  -1.48550000
## 2   4.82095000   4.3053900  10.4447000   7.0499000   3.3149700   4.67117000
## 3  -8.91135000  -7.9826900  -3.6077400  -6.9097100  -9.6182200  -6.19984000
## 4   5.16088000   0.3050340   6.5686600   2.2856900  -0.4083280   5.16049000
## 5   0.65145100   0.8891180  -9.8058600  -5.4994900   6.2744200   3.50539000
## 6   4.04394000   1.1642700   4.5592000   0.8154110   0.5976930   3.92659000
##          PC48       PC49        PC50         PC51        PC52        PC53
## 1   0.7704700   4.271930  -2.5711300  -4.23315000  17.8502000  -2.6153400
## 2  -6.3118600 -12.516600  -6.8601300  -2.76627000  -3.6622300   5.0692700
## 3  14.8582000  -9.614380   9.2264300 -14.45710000  -8.1461300   2.5570400
## 4  -0.7178890  -3.005620  -2.0844100  -1.80783000  -0.8996430  -4.4966200
## 5   4.8818200  10.317700  13.7773000  -2.66042000   8.4127100  -9.4883600
## 6  -0.2814910  -3.157850  -0.4583980  -1.90958000   0.8868460  -2.7746300
##          PC54        PC55        PC56         PC57         PC58        PC59
## 1 -17.3511000   7.5609200  -8.5034300   9.98055000   0.17205400  -5.0724600
## 2  13.4889000   2.6367000   8.5229200  -5.80035000  -1.16575000  -1.4937300
## 3 -21.9908000   1.2428000 -15.3251000  -3.54401000 -11.59130000   6.5279400
## 4   3.2565100  -1.1877700   2.1426200  -0.35587700   0.14634200   2.1414300
## 5  -2.7345600   0.4717490 -12.1945000  -4.05523000   3.51862000  -8.1935300
## 6   2.7136900  -0.2296190   1.6437500  -1.21208000   1.54248000   0.4463520
##          PC60        PC61        PC62         PC63         PC64         PC65
## 1   4.9486100  14.9490000  -1.5199000   7.06942000  11.65770000   0.70127900
## 2   5.5261500   3.8559400   2.3336500  -0.06036980  -2.45348000   3.19507000
## 3   2.0776200 -15.9403000   2.6667400   0.65687500  17.96120000   1.77015000
## 4   0.5873710   0.5500820   1.3890500   1.40572000  -1.61840000   3.19547000
## 5  -8.2266700   6.0298700 -19.8921000 -11.43200000   7.87088000 -22.04100000
## 6   1.2863000   1.4565100   0.5566610   0.46422100  -1.74822000   2.34437000
##           PC66        PC67         PC68        PC69         PC70        PC71
## 1   3.95704000  -7.8182000  13.63140000   5.1656200  -6.23350000   5.0587300
## 2   4.06759000  -4.1492200   2.43565000  -1.5840000  -6.93106000  -1.4605500
## 3  -4.41046000  -5.8493100   5.79765000   6.6740300  -6.07599000   9.6663100
## 4  -3.41262000  -3.3405300  -4.82349000  -1.0385000   1.02439000   0.3746210
## 5 -15.23610000   8.0330900   0.02494700   7.7890700  13.33950000  -1.2509100
## 6  -3.16960000  -2.1834500  -3.75185000  -1.3745800  -1.67620000  -0.8916860
##          PC72        PC73        PC74        PC75        PC76        PC77
## 1   5.3244100  -3.1439600   3.0601400 -12.9284000  -6.5948200   7.4553900
## 2  -4.3822100  -1.7575100  -5.7120100   4.4509400  -1.1125200   2.9793100
## 3  -3.5307500  -7.6236300 -15.7160000  17.9821000   1.5306500  16.6400000
## 4  -4.2089300  -0.7173410  -1.4582100   0.1425180   3.4637800   0.4420400
## 5  -0.6459200   4.1916900 -15.9249000  -1.1244700  -5.9023600 -11.9137000
## 6  -0.8438680   1.5446700   0.0408936   1.1384000   4.4380700   0.9679600
##          PC78        PC79        PC80        PC81        PC82        PC83
## 1  11.4180000  -4.1493100  -3.6194500  -7.5809200  -4.6613400 -29.7529000
## 2   0.3131760   1.3386600   1.4717200  -4.3463000   6.6829500  -0.0579343
## 3  -2.6814100  19.8327000   6.7309800  -5.1156500  -1.8507200   2.2913400
## 4   1.9045100   1.2774700   0.6844840   5.1533500   0.0925460   2.0507700
## 5  -3.2946200  -6.4985000   4.4015200 -14.9760000  -2.7248800   6.2544100
## 6  -0.0339263   1.5444500   1.9815200   4.1914900   0.8058800   2.7782300
##          PC84        PC85        PC86         PC87         PC88        PC89
## 1   2.8692600 -25.4671000   6.0299900 -24.25180000  -5.72269000  -0.1573810
## 2   3.4088300  -6.3397400   1.1540700  -3.63834000  -1.18003000   0.8212640
## 3  -6.9321300   4.9161500   8.9574800   0.02197780  -6.47049000  -0.7118300
## 4   3.8437200  -3.5312600   0.7478770  -0.38932500   0.37388700   0.7160080
## 5   1.0019100  20.3686000  -9.0571300  -1.74955000 -20.06520000 -13.0160000
## 6   3.5902700  -1.1994800   2.8760600   0.48492000  -1.47314000   0.7491990
##          PC90        PC91        PC92         PC93        PC94        PC95
## 1   4.9594800   1.5646300  25.2200000  10.04800000  -4.9090000  -6.6310000
## 2  -1.0611800   3.2943600  -6.5117900  -1.14887000   0.2463340   3.8931500
## 3  12.5858000   8.1533800   3.1850400  -7.91924000  37.7466000   6.3894000
## 4  -1.1176400   2.2843500  -3.0233800   2.47949000   0.8149600   1.3470900
## 5   2.9434100  -1.8421400  19.8871000 -19.37690000 -39.7354000  -8.2599300
## 6  -2.4755900  -0.3221850   1.0127300   0.58408300   1.3288200   0.9929450
##           PC96        PC97         PC98         PC99       PC100        PC101
## 1   1.06250000  -6.5147100 -3.30110e+01   2.45852000   7.3502700   5.54790000
## 2  -2.68238000   3.4172000 -3.75100e+00  -2.12078000   6.9060000   3.39591000
## 3  -1.49894000   9.7979500 -8.86757e+00  10.92240000  -4.1584500   4.64532000
## 4   0.47074300  -1.9753400  4.41828e+00  -0.08571420   3.2456600  -1.42437000
## 5  10.93620000  -2.7180600  8.47520e+00  -6.68774000   2.2402400 -13.10420000
## 6  -1.09000000   0.1293680  2.86652e+00  -0.87396900   0.3758100   1.55233000
##         PC102       PC103       PC104       PC105       PC106        PC107
## 1  17.7989000   2.9088600  -5.3209300   0.9298870  23.9096000   4.48179000
## 2  -0.8674370  -3.0078200  -1.8284700   3.0924500  -2.5779800  -3.28218000
## 3  23.7569000 -22.1894000  12.2133000 -20.1064000   7.3497200 -10.94570000
## 4  -0.9084140   1.5868700  -0.8982620  -1.1205500   0.7306400  -0.08758340
## 5  -1.6384400  17.5297000 -11.0310000  20.8293000 -20.0768000  18.16110000
## 6  -1.8220200   2.5812500  -3.0949400   0.3999790   1.2467100  -0.98033700
##         PC108        PC109       PC110       PC111       PC112       PC113
## 1 -14.8711000   7.42536000   5.2356700  15.1303000   4.7381300 -19.1832000
## 2  -0.2399960   0.33220600  -1.7960900   0.0425830  -2.4179400  -0.0886193
## 3   1.3638300   4.97378000  14.8264000 -17.6628000   3.9320800  -0.4203780
## 4  -0.6447630  -0.25084400   0.6140920   0.4052730  -2.2312800  -0.5270050
## 5 -12.5424000  -2.55056000  -8.9035400  -4.7021300   3.5133400  -4.8712800
## 6  -0.1677480   0.68691200  -0.6489940  -1.9357400   0.4753190   0.6299610
##         PC114       PC115        PC116       PC117       PC118        PC119
## 1  11.8697000 -16.5024000  -1.38657000  31.2129000  -2.5141500  1.55982e+01
## 2  -2.9395900  -2.6109800  -3.86882000  -2.6676500   4.8992000  1.55189e+00
## 3   8.5462400 -28.5863000  -6.94436000 -15.0042000  -3.9719400  3.50293e+00
## 4  -0.4712090  -1.2481700   1.43497000   1.3055000   0.2856130  9.66012e-01
## 5  17.3365000  -9.7901600  23.75140000 -21.9441000  29.8410000 -1.43594e+01
## 6   1.2300000  -2.2284100  -0.16898300  -0.6973090   3.0099000  7.22438e-03
##         PC120       PC121       PC122       PC123       PC124       PC125
## 1  -0.9381380   7.9233100  -7.7881200  11.4158000   4.5487900   1.2090600
## 2  -2.2910600   4.5672000   2.2435800   1.5907700  -0.2113100  -3.8570800
## 3 -26.1128000 -50.5424000  28.3420000 -16.2748000 -11.9804000  -9.0010900
## 4  -2.0107000   1.7252100   2.3525900   0.6548940  -0.2827160  -1.8095000
## 5  27.2832000   7.3588700   8.0147300   0.1358340   0.7245760  -5.0220800
## 6  -2.0911600   4.2355800   1.6387500   1.1441900  -2.3761100  -0.9229370
##          PC126        PC127        PC128       PC129       PC130       PC131
## 1  -5.66124000 -2.45241e+01  -0.94408000  -7.6509700  14.0592000  -6.7854100
## 2  -0.40686200 -1.02914e+00   1.29407000   1.7118800  -5.6673500   2.2435300
## 3   2.53832000  1.44299e+01  21.78290000  15.1964000 -29.2608000 -10.2528000
## 4   2.52369000  2.10891e+00  -1.08602000   1.3815300   0.0961440  -0.2154240
## 5 -20.50900000 -1.41735e+01  16.23380000  12.2509000 -38.6337000   9.2813200
## 6   1.11999000  4.80594e-01  -2.94598000   0.2351530  -1.6577600   0.6989100
##         PC132      PC133       PC134       PC135       PC136       PC137
## 1   3.3694000   2.374760  15.3837000  43.4299000  24.8567000  -4.5770600
## 2  -1.4535400  -0.369758   4.1663600   0.7569120  -1.4598400   2.8074300
## 3  -3.4540200  24.004000 -10.6336000 -16.6615000 -12.2395000 -20.4951000
## 4  -2.8547200  -0.237152  -3.6989100   1.7526100  -0.8467190  -0.9620770
## 5 -10.6143000  16.481200  -4.4333800   5.8114600 -12.2945000  -0.7365450
## 6  -1.7013100  -0.418307  -0.6489500   1.9126300  -2.0562300  -2.2321800
##          PC138       PC139        PC140       PC141       PC142       PC143
## 1  22.81840000   7.2535600 -24.79900000  -6.9311400  20.5485000 -30.0183000
## 2   2.72467000  -3.8703400  -2.12796000  -0.5092630  -4.1400400   0.3546790
## 3  -5.77881000  -9.8572000  -7.81972000  19.4081000 -10.2392000 -23.8906000
## 4  -0.63932900  -0.7312300  -0.24709300  -0.9064190  -2.5458000   0.7104400
## 5 -34.06140000  12.6227000   1.94264000   2.8129800  22.4516000   6.1937600
## 6  -1.57383000  -1.5379900   0.52125800  -0.7354850  -2.0646400  -1.0045100
##         PC144       PC145       PC146       PC147        PC148        PC149
## 1  28.3809000 -13.4795000  -0.8916330 -22.3339000 -35.47070000  2.64935e+01
## 2   1.0807600  -1.8851000   4.2552600   2.0318200   0.76320100  2.91314e+00
## 3 -14.0132000   1.2430400   4.2750000 -11.3812000  -7.56045000  8.49027e+00
## 4   0.8641170  -0.3380390   1.1405500  -1.9988700   2.28772000 -2.00982e+00
## 5  -6.2880600 -20.0530000  -8.8176100   0.4599260   9.19823000  1.56251e+01
## 6  -0.9878170   2.2425300   1.5324700  -1.0766500  -0.46756600 -9.26350e-01
##          PC150       PC151       PC152       PC153        PC154       PC155
## 1  14.76540000  -4.7179000   1.1529700 -10.0477000  -7.64873000   6.5217700
## 2  -0.96250400   1.1217800  -3.4778100   2.8493100  -4.85280000   2.7480900
## 3 -16.76270000  14.6486000   5.9606800 -13.5170000   5.54083000 -18.2164000
## 4  -0.23838300  -0.1063110   1.2425900   0.3399560  -1.90513000  -0.8189280
## 5  31.03950000  -3.1570900   4.7337200  21.8747000  15.65050000  -7.6675200
## 6  -3.43059000   0.1652030  -1.4355700   2.5930300   0.00541175  -0.2905870
##         PC156       PC157       PC158        PC159       PC160        PC161
## 1   9.1039300   5.5282200 -14.6385000   5.80290000   2.2640300  -2.07999000
## 2   0.4768470  -0.2338240  -0.7892600   0.23414500   2.2022900  -2.38962000
## 3  10.3792000   0.2274320   7.6679900  12.14660000   2.6239400   0.77791800
## 4  -1.7881600  -1.8496800  -1.2532500   1.32108000  -0.1423960  -0.12892000
## 5   9.5528900  -5.1418100  13.5768000  -3.92305000  10.2640000  -6.64926000
## 6  -0.3840940  -1.2611900  -0.2745640  -1.09414000   0.7413980  -3.67161000
##         PC162       PC163      PC164       PC165        PC166       PC167
## 1  -5.0150200   5.8248400  -3.722770  -0.4940910 -10.82150000  -0.9388300
## 2  -1.8125600   2.2831000   0.673383   2.8220100   4.50647000   2.6851500
## 3   3.9746700  -1.0814800  -5.647200  -0.9363040  -4.92348000   1.5099100
## 4  -2.7971600   0.2045050   1.025040   0.2495590   0.73708600   0.3986850
## 5   1.8564500   9.2686000  -6.952180   2.0798400   1.55571000  -0.2951240
## 6   1.5246700  -0.4823120  -0.676185  -0.0864281  -2.20714000   0.2351740
##          PC168       PC169       PC170        PC171        PC172       PC173
## 1  -0.55442200  -0.3743910   6.3452300  -7.13375000   0.43970100  -0.1253670
## 2  -2.59928000  -0.1307130   3.9138000  -1.89943000  -0.33665400   1.6222900
## 3   4.57266000   3.2703200   1.8131000   1.01558000   5.39459000  -5.5055100
## 4   0.05411580   1.1003500   0.3403380  -0.74050100   2.04745000   2.3980400
## 5  -0.99827800   6.0520200   4.8501700   0.40237900   1.92762000  -0.5799570
## 6   1.53756000   0.7213230  -0.1994150  -0.34776100   0.26710100  -0.4932810
##          PC174        PC175       PC176       PC177       PC178        PC179
## 1 -3.81927e-01  -2.11304000   2.9439400   2.4441800   1.2042500  -0.51521100
## 2 -5.40784e-01  -5.00470000   3.9466100  -8.0002700  -5.7376500  -3.98934000
## 3  5.17499e+00   1.64256000   0.1699090   1.6542200  -0.2425450   4.36133000
## 4  9.56254e-01   1.56088000   0.9974650  -3.2671800  -0.9238730  -0.50113900
## 5  3.68929e+00   2.86528000  -1.0041900  -0.8197480   1.5807100  -2.30744000
## 6 -3.76693e+00  -2.22085000  -2.9309000   4.4110100   1.4632700  -1.42867000
##          PC180        PC181       PC182        PC183        PC184       PC185
## 1   1.40223000  -0.65724700  -2.2840700  -4.18823000  -0.86105800   0.8483300
## 2  11.85860000  -7.14679000   4.6139300  24.99140000  13.90510000  53.3265000
## 3  -5.55482000  -1.98449000  -0.1362050  -3.22393000   1.89783000  -0.3886590
## 4  -2.59195000   0.47564500  -2.9618000   0.50321000   0.12141000  -3.6282800
## 5  -0.09715940   1.55499000   0.7971850  -2.64665000  -1.08346000  -1.4480800
## 6   2.96903000   0.99152700   0.2385350   0.40988500   0.56802300   2.0789500
##          PC186        PC187        PC188       PC189        PC190       PC191
## 1  -0.24489900   1.84136000 -2.80263e-01   1.8546400 -4.43113e-01   1.9208400
## 2  51.53680000 -17.32120000  5.02855e+00 -26.5415000  1.46635e+01  -2.2318900
## 3   0.44920700   0.65721700  8.46501e-01   1.5952600 -2.57029e+00   0.7854340
## 4  -1.44676000   2.56409000 -1.87040e+00   0.4244260  5.71903e-01   1.0444200
## 5  -0.56481600  -0.91672900 -2.27838e+00   0.5412340  1.78439e+00  -1.1764100
## 6  -0.31211400  -2.49429000 -9.40577e-01  -1.1041800  7.28286e-01   0.9416920
##          PC192        PC193        PC194       PC195       PC196       PC197
## 1  -0.87984600   0.50451800  -2.86036000   0.7874190   0.9720960  -0.7877650
## 2  -4.55282000   2.50351000  12.19390000  -3.3179900  -3.4685000   2.2320300
## 3   1.38094000   3.15353000  -0.76531500   0.8585230   0.7344950  -0.4920300
## 4   0.82732100   2.63986000  -1.71353000  -0.0159541   0.9678460   3.5549800
## 5   1.19707000   0.22894700  -1.24979000   0.3317780   0.1297840  -0.3758370
## 6   0.08370250  -0.85934200   1.24761000  -0.2231790   0.0428804  -1.0484100
##          PC198        PC199       PC200        PC201       PC202        PC203
## 1 -1.99684e-01  -1.11885000   0.9397460  -0.45154300  -1.3621900 -5.14614e-01
## 2 -3.49332e+00   0.73142000  -6.2420500   1.74009000  -1.5648700 -3.18236e-01
## 3 -2.44411e+00  -2.22062000   0.2857870   0.76019000  -0.5174860  2.58793e-02
## 4 -1.04568e+00  -1.31075000  -2.4926200   1.27123000  -1.0163900 -1.87660e+00
## 5  4.07572e-01   1.98122000  -0.3409920  -0.34483100   0.5092150 -8.46287e-02
## 6  8.17099e-01  -0.50323400   1.9966000  -0.97070300   1.3177200  1.33346e+00
##         PC204        PC205        PC206       PC207        PC208        PC209
## 1   0.9098230  -0.41043800  -2.00797000   1.1352300  -1.11808000  5.81459e-01
## 2   1.0411400   1.46901000  -0.23460100   1.2740300   2.81294000  1.93087e+00
## 3   0.2211800  -0.46630100   0.44417200   0.1508940   0.41571600 -1.31405e-02
## 4   0.7608390   5.58871000   9.62258000   2.0937900   9.72991000  7.71622e+01
## 5   1.4843300  -1.00699000   0.45052500   1.7733600  -1.58494000  1.68549e+00
## 6  -0.4368020  -5.70147000  -8.55191000  -2.6887200  -9.77064000 -7.68781e+01
##          PC210        PC211        PC212        PC213        PC214       PC215
## 1  -0.65222600   0.14640400  -0.22558600   0.16811200  1.18610e+00   0.5856490
## 2  -0.56022600   0.26494000   0.25803000  -1.62597000 -3.15615e-02  -1.1726300
## 3   1.32290000   0.40683800   0.08621740   0.65804400 -4.99558e-01  -0.4401960
## 4   4.45557000  -5.12672000  -7.00580000  -8.29989000 -2.84811e-01  -4.3650700
## 5  -1.10715000  -0.05929860  -1.35709000  -1.03973000 -1.02182e-01   0.2050010
## 6  -5.22369000   4.88191000   6.94937000   7.20167000 -7.07169e-02   3.0153300
##          PC216        PC217        PC218        PC219        PC220        PC221
## 1  -0.16960000   1.37399000   0.83040600 -2.48006e-01   0.47006400  1.02705e-01
## 2   0.74501200   0.63887700   0.24709000 -7.38958e-01  -0.02827680  2.72054e-01
## 3   0.35711800  -0.71013400   0.07146780  6.25675e-01  -0.30353300 -8.36693e-01
## 4   4.38679000  -1.62975000   2.37741000  1.13742e+00   1.77996000 -1.13534e+00
## 5   0.58847100   0.45009100   0.30454700  8.72864e-01   0.36281200 -1.44273e-01
## 6  -3.34138000   1.43260000  -1.57846000 -9.23672e-01  -1.83440000  8.54572e-01
##          PC222        PC223        PC224        PC225        PC226        PC227
## 1   0.13071500  -0.66060400  2.64907e-01   0.05866200   0.15490500  9.96340e-02
## 2   1.03223000  -0.67367400 -3.89672e-01   0.42516600   0.39389900  7.26387e-01
## 3  -0.24599200  -0.99939100  8.09871e-01  -0.37959700   0.27758400  2.05464e-01
## 4   1.21815000  -1.68822000  3.75583e-01   1.17819000  -1.01624000 -7.51697e-01
## 5  -0.43971500  -0.22348700 -3.23138e-01  -0.40193300  -0.13333400 -2.01302e-03
## 6  -1.65523000   1.56887000 -2.17489e-01  -0.12973700   1.10500000  2.48154e-01
##          PC228        PC229        PC230        PC231        PC232        PC233
## 1  8.94705e-01 -7.89690e-01   0.62239100   0.30400600  2.63599e-01  -0.04869920
## 2 -5.05877e-02  8.53152e-01  -0.00692285   0.03505550  4.56803e-01   0.03960260
## 3 -3.60440e-01  1.54412e-01  -0.03874390   0.23335300  2.13369e-01  -0.02447320
## 4  1.80893e-01  1.68740e-01   0.92470400  -0.67433500 -1.41894e-01  -0.17159700
## 5 -5.95439e-01 -8.82056e-02   0.59316400  -0.12300700 -2.30047e-01   0.03627420
## 6 -8.42116e-01 -3.43788e-02  -1.97393000   0.75467800 -7.31736e-01   0.08200520
##          PC234        PC235        PC236 PC237 Individual     region  Pop_City
## 1   0.07203700 -4.28714e-02   0.05709050   NAN          a South Asia Bengaluru
## 2   0.20814300  3.70834e-01  -0.16512800   NAN        257 South Asia Bengaluru
## 3  -0.24580000 -2.51930e-01   0.02853340   NAN        261 South Asia Bengaluru
## 4   0.08117620  4.23416e-02  -0.03686370   NAN        263 South Asia Bengaluru
## 5   0.06813150  1.74180e-01  -0.12070000   NAN        264 South Asia Bengaluru
## 6  -0.38657400 -2.79999e-01   0.03980820   NAN        262 South Asia Bengaluru
##   Country Latitude Longitude Region
## 1   India  12.9716   77.5946   Asia
## 2   India  12.9716   77.5946   Asia
## 3   India  12.9716   77.5946   Asia
## 4   India  12.9716   77.5946   Asia
## 5   India  12.9716   77.5946   Asia
## 6   India  12.9716   77.5946   Asia
# source the plotting function
source(
  here(
    "scripts", "analysis", "my_theme2.R"
  )
)

distinct_palette <- c(
  "#E69F00",
  "#009E73",
  "#0072B2",
  "#CC79A7",
  "#800000",
  "#808080",
  "#CCEBC5",
  "#FFB5B8",
  "#99CCFF",
  "#8E8BFF",
  "#F781BF",
  "#FFFF33",
  "#A65628",
  "#984EA3",
  "#D2D2D2",
  "#4DAF4A",
  "#DEB887",  # BurlyWood
  "#7FFFD4",  # Aquamarine
  "#D2691E",  # Chocolate
  "#DC143C",  # Crimson
  "#8B008B"   # DarkMagenta
)
merged_data$PC1 <- as.numeric(as.character(merged_data$PC1))
merged_data$PC2 <- as.numeric(as.character(merged_data$PC2))
merged_data$PC3 <- as.numeric(as.character(merged_data$PC3))

# make plot by continent and range
ggplot(merged_data, aes(PC1, PC2)) +
  geom_point(aes(colour = Country, shape = Country), size = 1) +
  xlab(paste0("PC1 (", perc[1], " Variance)")) +
  ylab(paste0("PC2 (", perc[2], " Variance)")) +
  labs(
    caption = "PCA with 57,780 SNPs of 237 mosquitoes from 28 localities in Asia."
  ) +
  guides(
    color = guide_legend(title = "Country", ncol = 3),
    shape = guide_legend(title = "Country", ncol = 3),
    fill = guide_legend(title = "Region", ncol = 1)
  ) +
  stat_ellipse(aes(fill = region, group = region), geom = "polygon", alpha = 0.2, level = 0.8) +
  scale_color_manual(values = distinct_palette) +
  scale_shape_manual(values=good.shapes[c(1:25, 58:67)]) +
  my_theme() +
  theme(
    plot.caption = element_text(face = "italic"),
    legend.position = "top",
    legend.justification = "top",
    legend.box.just = "center",
    legend.box.background = element_blank(),
    plot.margin = margin(5.5, 25, 5.5, 5.5, "points"),
    legend.margin = margin(10,10,10,10)
  )

# #   ____________________________________________________________________________
# #   save the pca plot                                                       ####
ggsave(
  here(
    "output", "populations", "figures", "PCA_lea_pc1_pc2_r2_0.1.pdf"
  ),
  width  = 8,
  height = 6,
  units  = "in"
)

PC1 an PC3

# source the plotting function
source(
  here(
    "scripts", "analysis", "my_theme2.R"
  )
)

distinct_palette <- c(
  "#E69F00",
  "#009E73",
  "#0072B2",
  "#CC79A7",
  "#800000",
  "#808080",
  "#CCEBC5",
  "#FFB5B8",
  "#99CCFF",
  "#8E8BFF",
  "#F781BF",
  "#FFFF33",
  "#A65628",
  "#984EA3",
  "#D2D2D2",
  "#4DAF4A",
  "#DEB887",  # BurlyWood
  "#7FFFD4",  # Aquamarine
  "#D2691E",  # Chocolate
  "#DC143C",  # Crimson
  "#8B008B"   # DarkMagenta
)


# make plot by continent and range
ggplot(merged_data, aes(PC1, PC2)) +
  geom_point(aes(colour = Country, shape = Country), size = 1) +
  xlab(paste0("PC1 (", perc[1], " Variance)")) +
  ylab(paste0("PC3 (", perc[3], " Variance)")) +
  labs(
    caption = "PCA with 57,780 SNPs of 237 mosquitoes from 28 localities in Asia."
  ) +
  guides(
    color = guide_legend(title = "Country", ncol = 3),
    shape = guide_legend(title = "Country", ncol = 3),
    fill = guide_legend(title = "Region", ncol = 1)
  ) +
  stat_ellipse(aes(fill = region, group = region), geom = "polygon", alpha = 0.2, level = 0.8) +
  scale_color_manual(values = distinct_palette) +
  scale_shape_manual(values=good.shapes[c(1:25, 58:67)]) +
  my_theme() +
  theme(
    plot.caption = element_text(face = "italic"),
    legend.position = "top",
    legend.justification = "top",
    legend.box.just = "center",
    legend.box.background = element_blank(),
    plot.margin = margin(5.5, 25, 5.5, 5.5, "points"),
    legend.margin = margin(10,10,10,10)
  )

# #   ____________________________________________________________________________
# #   save the pca plot                                                       ####
ggsave(
  here(
    "output", "populations", "figures", "PCA_lea_pc1_pc3_r2_0.1.pdf"
  ),
  width  = 8,
  height = 6,
  units  = "in"
)

Run LEA

# set output dir
setwd(
  here(
    "output", "populations"
  )
)
# main options
# K = number of ancestral populations
# entropy = TRUE computes the cross-entropy criterion, # CPU = 6 is the number of CPU used (hidden input) project = NULL
project = snmf(
  genotype,
  K = 1:15,
  project = "new",
  repetitions = 5,
  CPU = 4,
  entropy = TRUE
)

Load project

# To load the project, use:
project = load.snmfProject("output/populations/snps_sets/r2_0.1.snmfProject")

# To remove the project, use:
 # remove.snmfProject("snps_sets/r2_0.1.snmfProject")

Cross entropy

# Open a new pdf file
pdf(here("output","populations","figures","lea_cross_entropy_r2_0.1.pdf"), width = 6, height = 4)

# Create your plot
plot(project, col = "pink", pch = 19, cex = 1.2)

# Close the pdf file
dev.off()
plot(project, col = "pink", pch = 19, cex = 1.2)

We will not plot k=8 but only the k=5 to compare to admixture

3.1 k5

We can plot k=5 to compare to other algorithms

Find the best run

best8 = which.min(cross.entropy(project, K = 8)) # 5
best5 = which.min(cross.entropy(project, K = 5)) # 5
best8
## [1] 5
best5
## [1] 5

Run 3 for k=5

# Extract ancestry coefficients
leak5 <- read_delim(
  here("output", "populations", "snps_sets", "r2_0.1.snmf", "K5", "run5","r2_0.1_r5.5.Q"),
  delim = " ", # Specify the delimiter if different from the default (comma)
  col_names = FALSE,
  show_col_types = FALSE
) 
# unseen_pckmeans.7.Q
# pckmeans.7.Q
head(leak5)
## # A tibble: 6 × 5
##        X1     X2    X3       X4    X5
##     <dbl>  <dbl> <dbl>    <dbl> <dbl>
## 1 0.00901 0.134  0.601 0.0199   0.236
## 2 0.0206  0.155  0.568 0.0289   0.227
## 3 0.0175  0.103  0.711 0.000100 0.169
## 4 0.00882 0.0695 0.777 0.0136   0.131
## 5 0.00145 0.0634 0.808 0.0136   0.113
## 6 0.0128  0.104  0.723 0.000100 0.160

The fam file

fam_file <- here(
  "output", "populations", "snps_sets", "r2_0.1.fam"
)

# Read the .fam file
fam_data <- read.table(fam_file, 
                       header = FALSE,
                       col.names = c("FamilyID", "IndividualID", "PaternalID", "MaternalID", "Sex", "Phenotype"))

# View the first few rows
head(fam_data)
##   FamilyID IndividualID PaternalID MaternalID Sex Phenotype
## 1      OKI         1001          0          0   2        -9
## 2      OKI         1002          0          0   2        -9
## 3      OKI         1003          0          0   2        -9
## 4      OKI         1004          0          0   2        -9
## 5      OKI         1005          0          0   2        -9
## 6      OKI         1006          0          0   1        -9

Create ID column

# Change column name
colnames(fam_data)[colnames(fam_data) == "IndividualID"] <- "ind"


# Merge columns "FamilyID" and "IndividualID" with an underscore
# fam_data$ind <- paste(fam_data$FamilyID, fam_data$IndividualID, sep = "_")


# Change column name
colnames(fam_data)[colnames(fam_data) == "FamilyID"] <- "pop"

# Select ID
fam_data <- fam_data |>
  dplyr::select("ind", "pop")

# View the first few rows
head(fam_data)
##    ind pop
## 1 1001 OKI
## 2 1002 OKI
## 3 1003 OKI
## 4 1004 OKI
## 5 1005 OKI
## 6 1006 OKI

Add it to matrix

leak5 <- fam_data |>
  dplyr::select(ind, pop) |>
  bind_cols(leak5)

head(leak5)
##    ind pop         X1        X2       X3          X4       X5
## 1 1001 OKI 0.00901176 0.1344870 0.600824 0.019856000 0.235822
## 2 1002 OKI 0.02059050 0.1549340 0.568124 0.028927700 0.227423
## 3 1003 OKI 0.01745480 0.1030830 0.710789 0.000099991 0.168573
## 4 1004 OKI 0.00881626 0.0694981 0.777402 0.013613100 0.130670
## 5 1005 OKI 0.00145040 0.0633980 0.808295 0.013615700 0.113241
## 6 1006 OKI 0.01277640 0.1043520 0.723096 0.000099991 0.159675

Rename the columns

# Rename the columns starting from the third one
leak5 <- leak5 |>
  rename_with(~paste0("v", seq_along(.x)), .cols = -c(ind, pop))

# View the first few rows
head(leak5)
##    ind pop         v1        v2       v3          v4       v5
## 1 1001 OKI 0.00901176 0.1344870 0.600824 0.019856000 0.235822
## 2 1002 OKI 0.02059050 0.1549340 0.568124 0.028927700 0.227423
## 3 1003 OKI 0.01745480 0.1030830 0.710789 0.000099991 0.168573
## 4 1004 OKI 0.00881626 0.0694981 0.777402 0.013613100 0.130670
## 5 1005 OKI 0.00145040 0.0633980 0.808295 0.013615700 0.113241
## 6 1006 OKI 0.01277640 0.1043520 0.723096 0.000099991 0.159675

Import sample locations

sampling_loc <- readRDS(here("output", "local_adaptation", "sampling_loc.rds"))
head(sampling_loc)
## # A tibble: 6 × 6
##   Pop_City   Country  Latitude Longitude Region Abbreviation
##   <chr>      <chr>       <dbl>     <dbl> <chr>  <chr>       
## 1 Gelephu    Bhutan       26.9      90.5 Asia   GEL         
## 2 Phnom Penh Cambodia     11.6     105.  Asia   CAM         
## 3 Hainan     China        19.2     110.  Asia   HAI         
## 4 Yunnan     China        24.5     101.  Asia   YUN         
## 5 Hunan      China        27.6     112.  Asia   HUN         
## 6 Bengaluru  India        13.0      77.6 Asia   BEN
source(
  here(
    "scripts", "analysis", "my_theme3.R"
  )
)

# Create a named vector to map countries to regions
country_to_region <- c(
  "Bhutan" = "South Asia",
  "Cambodia" = "Southeast Asia",
  "China" = "East Asia",
  "India" = "South Asia",
  "Indonesia" = "Southeast Asia",
  "Japan" = "East Asia",
  "Malaysia" = "Southeast Asia",
  "Maldives" = "South Asia",
  "Nepal" = "South Asia",
  "Sri Lanka" = "South Asia",
  "Taiwan" = "East Asia",
  "Thailand" = "Southeast Asia",
  "Vietnam" = "Southeast Asia"
)

# Add the region to the data frame
sampling_loc$Region2 <- country_to_region[sampling_loc$Country]

# Melt the data frame for plotting
Q_melted <- leak5 |>
  pivot_longer(
    cols = -c(ind, pop),
    names_to = "variable",
    values_to = "value"
  )
# Join with sampling_loc to get sampling localities
Q_joined <- Q_melted |>
  left_join(sampling_loc, by = c("pop" = "Abbreviation"))

# Create a combined variable for Region and Country
Q_joined <- Q_joined |>
  mutate(Region_Country = interaction(Region, Country, sep = "_"))

# Order the combined variable by Region and Country, then by individual
Q_ordered <- Q_joined |>
  arrange(Region, Region2, Country, ind) |>
  mutate(ind = factor(ind, levels = unique(ind)))  # Convert ind to a factor with levels in the desired order

# Add labels: country names for the first individual in each country, NA for all other individuals
Q_ordered <- Q_ordered |>
  group_by(Region_Country) |>
  mutate(label = ifelse(row_number() == 1, as.character(Country), NA))

# Group by individual and variable, calculate mean ancestry proportions
Q_grouped <- Q_ordered |>
  group_by(ind, variable) |>
  summarise(value = mean(value), .groups = "drop")

# Create a data frame for borders
borders <-
  data.frame(Region_Country = unique(Q_ordered$Region_Country))

# Add the order of the last individual of each country to ensure correct placement of borders
borders$order <-
  sapply(borders$Region_Country, function(rc)
    max(which(Q_ordered$Region_Country == rc))) + 0.5  # Shift borders to the right edge of the bars

# Select only the first occurrence of each country in the ordered data
label_df <- Q_ordered |>
  filter(!is.na(label)) |>
  distinct(label, .keep_all = TRUE)

# Create a custom label function
label_func <- function(x) {
  labels <- rep("", length(x))
  labels[x %in% label_df$ind] <- label_df$label
  labels
}

# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) + 0)

# Calculate the position of population labels and bars
pop_labels <- Q_ordered |>
  mutate(Name = paste(pop, Pop_City, sep = " - ")) |>
  group_by(pop) |>
  slice_head(n = 1) |>
  ungroup() |>
  dplyr::select(ind, Pop_City, Country, Name) |>
  mutate(pos = as.numeric(ind))  # calculate position of population labels

pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)


# Calculate the position of lines
border_positions <- Q_ordered |>
  group_by(Country) |>
  summarise(pos = max(as.numeric(ind)) - 1)


pop_labels_bars <- pop_labels |>
  mutate(pos = as.numeric(ind)  - .5)

# Function to filter and normalize data
normalize_data <- function(df, min_value) {
  df |>
    filter(value > min_value) |>
    group_by(ind) |>
    mutate(value = value / sum(value))
}

# Use the function
Q_grouped_filtered <- normalize_data(Q_grouped, 0.1)
# 
color_palette <-
  c(
    "v1" = "#0000FF",
    "v2" = "#FFFF00",
    "v3" = "#FF0000",
    "v4" = "#FF00FF",
    "v5" = "#00FF00"
  )


# Generate all potential variable names
all_variables <- paste0("v", 1:5)

# Map each variable to a name
color_mapping <- data.frame(variable = all_variables,
                            color = names(color_palette))

# Merge with Q_grouped_filtered
Q_grouped_filtered <- merge(Q_grouped_filtered, color_mapping, by = "variable")

# Create the plot
ggplot(Q_grouped_filtered, aes(x = as.factor(ind), y = value, fill = color)) +
  geom_bar(stat = 'identity', width = 1) +
  geom_vline(
    data = pop_labels_bars,
    aes(xintercept = pos),
    color = "#2C444A",
    linewidth = .2
  ) +
  geom_text(
    data = pop_labels,
    aes(x = as.numeric(ind), y = 1, label = Name),
    vjust = 1.5,
    hjust = 0,
    size = 2,
    angle = 90,
    inherit.aes = FALSE
  ) +
  my_theme() +
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      size = 12
    ),
    legend.position = "none",
    plot.margin = unit(c(3, 0.5, 0.5, 0.5), "cm")
  ) +
  xlab("Admixture matrix") +
  ylab("Ancestry proportions") +
  labs(caption = "Each bar represents the ancestry proportions for an individual for k=5.\n LEA inference for k1:15 with SNPs prunned with r2 0.1.") +
  scale_x_discrete(labels = label_func) +
  scale_fill_manual(values = color_palette) +
  expand_limits(y = c(0, 1.5))

ggsave(
  here("output", "populations", "figures", "lea_k=5_r2_0.1.pdf"),
  width  = 12,
  height = 6,
  units  = "in",
  device = cairo_pdf
)