R语言可视化(一):散点图绘制
01.散点图绘制
清除当前环境中的变量
rm(list=ls())
设置工作目录
setwd("C:/Users/Dell/Desktop/R_Plots/01scatterplot/")
读取示例数据
data <- read.table("demo_scatterplot.txt", header = T, check.names = F)
# 查看数据
head(data)
## sampleID BRCA1 BRCA2
## 1 GTEX-1117F-2826-SM-5GZXL 0.1332195 -0.4301581
## 2 GTEX-111YS-1926-SM-5GICC 0.2645817 -0.2700257
## 3 GTEX-1122O-1226-SM-5H113 0.1354507 -0.3503731
## 4 GTEX-117XS-1926-SM-5GICO -0.1676188 -0.1320025
## 5 GTEX-117YX-1426-SM-5H12H 0.1583625 -0.5127202
## 6 GTEX-1192X-2326-SM-5987X 0.3144992 -0.3668346
dim(data)
## [1] 290 3
base plot函数绘制散点图
attach(data)
plot(BRCA1, BRCA2, col="red", pch=16)
# 线性拟合
lm.fit <- lm(BRCA2 ~ BRCA1)
# 查看拟合结果
summary(lm.fit)
##
## Call:
## lm(formula = BRCA2 ~ BRCA1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.73367 -0.14609 0.01372 0.15016 0.84578
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.51461 0.01708 -30.131 < 2e-16 ***
## BRCA1 0.47843 0.06708 7.133 7.99e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2301 on 288 degrees of freedom
## Multiple R-squared: 0.1501, Adjusted R-squared: 0.1472
## F-statistic: 50.87 on 1 and 288 DF, p-value: 7.987e-12
# 添加拟合曲线
abline(lm.fit, lty=2, lwd = 2, col="blue")
# 计算pearson相关性
cor_pearson <- cor.test(BRCA1, BRCA2, method = "pearson")
cor_pearson
##
## Pearson's product-moment correlation
##
## data: BRCA1 and BRCA2
## t = 7.1327, df = 288, p-value = 7.987e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2850037 0.4811701
## sample estimates:
## cor
## 0.3874642
cor_coef <- cor_pearson$estimate
cor_pvalue <- cor_pearson$p.value
plot(BRCA1,BRCA2,col="red",pch=16,
main = paste0("Pearson r = ",round(cor_coef,digits = 2)," P-value = ",cor_pvalue))
# 添加拟合直线
abline(lm.fit, lty=2, lwd = 2, col="blue")
# 添加拟合直线方程
a <- lm.fit$coefficients[2]
b <- lm.fit$coefficients[1]
a <- round(a, 3)
b <- round(b, 3)
text(x = -0.4, y = 0.2, labels = paste("y = ", a, " * x + ", b, sep = ""), cex = 1.5)
detach(data)
ggplot2包绘制散点图
library(ggplot2)
library(ggpubr)
## Loading required package: magrittr
p1 <- ggplot(data = data, mapping = aes(x = BRCA1, y = BRCA2)) +
geom_point(colour = "red", size = 2) +
geom_smooth(method = lm, colour='blue', fill='gray') #添加拟合曲线
p1
p1 + stat_cor(method = "pearson", label.x = -0.4, label.y = 0.2) #添加pearson相关系数
ggpubr包绘制散点图
library(ggpubr)
ggscatter(data, x = "BRCA1", y = "BRCA2",
color = "red", size =2, # Points color and size
add = "reg.line", # Add regression line
add.params = list(color = "blue", fill = "gray"), # Customize regression line
conf.int = TRUE, # Add confidence interval
cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
cor.coeff.args = list(method = "pearson"))
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=Chinese (Simplified)_China.936
## [2] LC_CTYPE=Chinese (Simplified)_China.936
## [3] LC_MONETARY=Chinese (Simplified)_China.936
## [4] LC_NUMERIC=C
## [5] LC_TIME=Chinese (Simplified)_China.936
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggpubr_0.2.1 magrittr_1.5 ggplot2_3.2.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.1 knitr_1.23 tidyselect_0.2.5 munsell_0.5.0
## [5] colorspace_1.4-1 R6_2.4.0 rlang_0.4.0 stringr_1.4.0
## [9] dplyr_0.8.3 tools_3.6.0 grid_3.6.0 gtable_0.3.0
## [13] xfun_0.8 withr_2.1.2 htmltools_0.3.6 yaml_2.2.0
## [17] lazyeval_0.2.2 digest_0.6.20 assertthat_0.2.1 tibble_2.1.3
## [21] ggsignif_0.5.0 crayon_1.3.4 purrr_0.3.2 glue_1.3.1
## [25] evaluate_0.14 rmarkdown_1.13 labeling_0.3 stringi_1.4.3
## [29] compiler_3.6.0 pillar_1.4.2 scales_1.0.0 pkgconfig_2.0.2
END