setwd("E:/R")
leadership<-read.xlsx("leadership.xlsx",1)
4.2创建新变量
(1),mydata<-data.frame(x1=c(2,2,6,4),x2=c(3,4,2,8))
mydata$sumx<-mydata&x1+mydata&x2
mydata
(2),attach(mydata)
mydata$sumx<-x1+x2
mydata$sumx<-(x1+x2)/2
detach(mydata)
(3),(优先推荐)mydata<-transform(mydata,sumx=x1+x2,meanx=(x1+x2)/2)
leadership
4.3变量的重编码
(1)leadership$afe[leadership$afe == 99]<-NA
leadership$gfecat[leadership$afe>75]<-"Elder"
leadership$gfecat[leadership$afe>=55&leadership$afe<=75]<-"middle"
leadership$gfecat[leadership$afe<55]<-"young"
leadership
(2)(推荐)leadership<-within(leadership,{
afecat<-NA
afecat[afe>75]<-"elder"
afecat[afe>=55&afe<=75]<"middle"
afecat[afe<55]<-"young"})
4.4变量的重命名
(1)fix(leadership)#打开编辑器,编辑器中去重命名
(2)names(leadership)
names(leadership)[2]<-("testdata")
names(leadership)[6:10] <-c("c1","c2","c3","c4","c5")
(3)install.packages("plyr")
library("plyr")
leadership<-rename(leadership,c(manager="manageID",testdata="testdata1"))
4.5缺失值
y<-c(1,2,3,NA)
is.na(y)
is.na(leadership[,6:10])
#检测缺失值
(1)缺失值被认为是不可比较的,即便和自己比较
(2)R并不把无限的或者不可能出现的数值标记为缺失值
#4.5.1重编码某些值为缺失值
setwd("E:/R")
leadership<-read.xlsx("leadership.xlsx",1)
leadership$afe[leadership$age==99]<-NA
#4.5.2分析中排除缺失值
x<-c(1,2,NA,3)
y<-x[1]+x[2]+x[3]+x[4]
z<-sum(x)
y<-sum(x,na.rm=TRUE)#移除缺失值后剩余的值进行计算
#使用na.omit()删除不完整的观测
newdata<-na.omit(leadership)
newdata
#4.6 日期值
Sys.Date()
date()
today<-Sys.Date()
format(today,format="%B %d %Y")
startdate<-as.Date("2004-02-13")
enddate<-as.Date("2011-01-22")
days<-enddate-startdate
days
today<-Sys.Date()
dob<-as.Date("1994-05-29")
difftime(today,dob,units="days")
#4.6.1将日期转换为字符型变量
strDates<-as.character(dates)
#4.7类型转换
#4.8数据排序
newdate<-leadership[order(leadership$afe),]
attach(leadership)
newdate<-leadership[order(mate,afe),]
detach(leadership)
newdate
#4.9数据集的合并
#4.9.1向数据框添加列
total<-merge(dataframeA,dataframeB,by="ID")
#用clind()进行横向合并
total<-clind(A,B)
#4.9.2向数据框添加行
纵向添加行
total<-rbind(dataframeA,dataframeB)
需拥有相同的变量
(1)删除dataframeA中多余变量
(2)在dataframeB中创建追加变量并设为NA(缺失)
常用于添加观测
4.10数据集取子集
4.10.1选入(保留)变量
newdata<-leadership[,c(6:10)]
myvars<-c("q1","q2","q3","q4","q5")
newdata<-leadership[myvars]
myvars<-paste("q",1:4,sep="")
newdata<-leadership[myvars]
newdata
4.10.2剔除(丢弃)变量
myvars<-names(leadership)%in%c("q3","q4")
leadership$q3<-leadership$q4<-NULL
4.10.3选入观测
newdata<-leadership[1:3,]
attach(leadership)
leadership
newdata<-leadership[mate=='m'& afe>30,]
newdata
newdata<- subset(leadership,afe>= 35|afe<24,
select=c(q1,q2))
newdata