R语言 数据属性编辑
访问数据框变量
示例:
# 创建数据框
data.iris <- data.frame(Sepal.length = c(5.1, 4.9, 4.7, 4.6),
Sepal.Width = c(3.5, 3.0, 3.2, 3.1),
Petal.length = c(1.4, 1.4, 1.3, 1.5),
Petal.Width = rep(0.2,4))
# 打印data.iris
data.iris
# 运行结果为
# Sepal.length Sepal.Width Petal.length Petal.Width
# 1 5.1 3.5 1.4 0.2
# 2 4.9 3.0 1.4 0.2
# 3 4.7 3.2 1.3 0.2
# 4 4.6 3.1 1.5 0.2
# 列的索引
data.iris[ ,1] # 运行结果为 [1] 5.1 4.9 4.7 4.6
data.iris$Sepal.Length # 运行结果为 [1] 5.1 4.9 4.7 4.6
data.iris["Sepal.Length"]
# 运行结果为
# Sepal.length
# 1 5.1
# 2 4.9
# 3 4.7
# 4 4.6
# 行的索引
data.iris[1, ]
# 运行结果为
# Sepal.length Sepal.Width Petal.length Petal.Width
# 1 5.1 3.5 1.4 0.2
# 元素的索引
data.iris[1,1] # 运行结果为 [1] 5.1
data.iris$Sepal.length[1] # 运行结果为 [1] 5.1
# 条件索引
subset(data.iris,Sepal.Length<5)
# 运行结果为
# Sepal.length Sepal.Width Petal.length Petal.Width
# 2 4.9 3.0 1.4 0.2
# 3 4.7 3.2 1.3 0.2
# 4 4.6 3.1 1.5 0.2
创建新变量
如何创建新变量?
方法一:变量名 <- 表达式
“表达式”部分可以包含多种运算符和函数
算数运算符可用于构造公式(formula)
方法二:将新变量整合到原始数据框中,进行新变量创建
示例1:
mydata <- data.frame(x1 = c(2, 2, 6, 4), x2 = c(3, 4, 2, 8))
mydata
# 运行结果:
# x1 x2
# 1 2 3
# 2 2 4
# 3 6 2
# 4 4 8
(mydata$sumx <- mydata$x1 + mydata$x2)
# 运行结果:
# [1] 5 6 8 12
mydata
# 运行结果:
# x1 x2 sumx
# 1 2 3 5
# 2 2 4 6
# 3 6 2 8
# 4 4 8 12
(mydata$meanx <- (mydata$x1 + mydata$x2) / 2)
# [1] 2.5 3.0 4.0 6.0
mydata
# 运行结果:
# x1 x2 sumx meanx
# 1 2 3 5 2.5
# 2 2 4 6 3.0
# 3 6 2 8 4.0
# 4 4 8 12 6.0
示例2:
mydata <- data.frame(x1 = c(2, 2, 6, 4), x2 = c(3, 4, 2, 8))
mydata
# 运行结果:
# x1 x2
# 1 2 3
# 2 2 4
# 3 6 2
# 4 4 8
mydata <- transform(mydata,sumx=x1+x2,meanx=(x1+x2)/2)
mydata
# 运行结果:
# x1 x2 sumx meanx
# 1 2 3 5 2.5
# 2 2 4 6 3.0
# 3 6 2 8 4.0
# 4 4 8 12 6.0
mydata <- transform(mydata,x2=NULL)
mydata
# 运行结果:
# x1 sumx meanx
# 1 2 5 2.5
# 2 2 6 3.0
# 3 6 8 4.0
# 4 4 12 6.0
重命名变量
示例:
score <- data.frame(student = c("A", "B", "C", "D"),
gender = c("M", "M", "F", "F"),
math = c(90, 70, 80, 60),
Eng = c(88, 78, 69, 98),
p1 = c(66, 59, NA, 88))
score
# 运行结果:
# student gender math Eng p1
# 1 A M 90 88 66
# 2 B M 70 78 59
# 3 C F 80 69 NA
# 4 D F 60 98 88
score.list <- as.list(score)
score.list
# 运行结果:
# $student
# [1] A B C D
# Levels: A B C D
#
# $gender
# [1] M M F F
# Levels: F M
#
# $math
# [1] 90 70 80 60
#
# $Eng
# [1] 88 78 69 98
#
# $p1
# [1] 66 59 NA 88
#
install.packages("reshape")
library(reshape)
rename(score,c(p1="Chinese"))
# 运行结果:
# student gender math Eng Chinese
# 1 A M 90 88 66
# 2 B M 70 78 59
# 3 C F 80 69 NA
# 4 D F 60 98 88
rename(score.list,c(p1="Chinese"))
# 运行结果:
# $student
# [1] A B C D
# Levels: A B C D
#
# $gender
# [1] M M F F
# Levels: F M
#
# $math
# [1] 90 70 80 60
#
# $Eng
# [1] 88 78 69 98
#
# $Chinese
# [1] 66 59 NA 88
#
names(score)[5] <- "chinese" # 只能用于数据框和列表
score
# 运行结果:
# student gender math Eng chinese
# 1 A M 90 88 66
# 2 B M 70 78 59
# 3 C F 80 69 NA
# 4 D F 60 98 88
names(score.list)[5] <- "chinese" # 只能用于数据框和列表
# 运行结果:
# $student
# [1] A B C D
# Levels: A B C D
#
# $gender
# [1] M M F F
# Levels: F M
#
# $math
# [1] 90 70 80 60
#
# $Eng
# [1] 88 78 69 98
#
# $chinese
# [1] 66 59 NA 88
#
colnames(score)[5] <- "Ch"
score
# 运行结果:
# student gender math Eng Ch
# 1 A M 90 88 66
# 2 B M 70 78 59
# 3 C F 80 69 NA
# 4 D F 60 98 88
rownames(score) <- letters[1:4]
score
# 运行结果:
# student gender math Eng Ch
# a A M 90 88 66
# b B M 70 78 59
# c C F 80 69 NA
# d D F 60 98 88