MEM专业R语言第二次作业
作业提交截至时间:10月9日(周五)晚上11:59前(邮件显示时间为准)
注意事项:
[if !supportLists](1) [endif]邮件请标注:姓名+学号+第二次作业
[if !supportLists](2) [endif]作业分为数据分析报告+R代码两个部分。分为两个文件上传提交,不需要打包提交。
[if !supportLists](3) [endif]杜绝抄袭
[if !supportLists](4) [endif]提交邮箱:chengken579315@gmail.com
前提:
本次作业使用Rstudio
加载以下包:
library(openxlsx)
library(tidyverse)
问题1:
(1)
x<-NULL
root<-function(a,b,c){
dert<-(b^2-4*a*c)
if(dert>0){
x[1]<-(-b+sqrt(dert))/(2*a)
x[2]<-(-b-sqrt(dert))/(2*a)
return(x)
}else if(dert == 0){
x[1]<-(-b)/(2*a)
x[2]<-x[1]
return(x)
}else{
x[1]<-"无实根"
x[2]<-"无实根"
return(x)
}
}
root(2,6,3)
(2)
get.prob<-function(m){
mx<-0
for(i in 1:m){
a<-runif(m,1,5)
b<-rnorm(m,3,10)
c<-rexp(m,1)
dert<-(b^2-4*a*c)
if(dert>=0){
mx=mx+1
}else{
mx=mx
}
}
return(mx)
}
get.prob(10000)
get.prob_1_2<-get.prob(10000)/10000
get.prob_1_2
随机产生10000组数据,测算的概率为:0.7733
问题2:给定数据,请完成以下任务,请给出code 和输出结果。
[if !supportLists](1) [endif]请读入数据,使用软件分别给出 price,
marketshare,和brand的缺失值数量。请按照每一个brand,将数据按照先marjetshare 后price 进行从高到低排序
mydata_21<-read.xlsx("E:/个人文件夹/2020MEM/学习/R/homework/hw2/HW2(1).xlsx",sheet=1)
mydata_21
sum(is.na(mydata_21$price))
sum(is.na(mydata_21$marketshare))
sum(is.na(mydata_21$brand))
mydata_21_1<-arrange(mydata_21,desc(brand,marketshare,price))
view(mydata_21_1)
price,
marketshare,和brand的缺失值数量为2、2、2
排序截图如下:
[if !supportLists](2) [endif]请按照brand 的种类,对price和marketshare 求均值。
mydata_21_2<-group_by(mydata_21,brand)
mean_price<-summarise(mydata_21_2,mean(price,na.rm= TRUE))
mean_marketshare<-summarise(mydata_21_2,mean(marketshare,na.rm= TRUE))
mean_price
mean_marketshare
[if !supportLists](3) [endif]请按照brand 的种类,对price和marketshare 画散点图。
ggplot(data= mydata_21_2)+
geom_point(mapping =aes(x=price,y=marketshare))+
facet_wrap(~brand,nrow = 2)
[if !supportLists](4) [endif]请按照价格的均值,产生新的变量price_new, 低于均值为“低价格”,高于均值为“高价格”。同样对市场份额也是,产生变量marketshare_new, 数值为“低市场份额”和“高市场份额”
price_1<-mean(mydata_21$price,na.rm= TRUE)
price_1
marketshare_1<-mean(mydata_21$marketshare,na.rm= TRUE)
marketshare_1
mydata_2_4<-mydata_21%>%
mutate(price_new = ifelse(price%
mutate(marketshare_new=ifelse(marketshare
mydata_2_4
[if !supportLists](5) [endif]请估计模型,marketshare为Y,price为X.
#2.5 去除na,线型拟合,求系数
mydata_2_5<-filter(mydata_21,!is.na(brand),!is.na(price),!is.na(marketshare))
mydata_2_5
mydata_2_51<-lm(marketshare~price,data= mydata_2_5)
coef(mydata_2_51)
[if !supportLists](6) [endif]请画出(5)的拟合直线。
ggplot(mydata_2_51,aes(price,marketshare))+
geom_point(size = 2)+
geom_abline(intercept = 0.03341635,slope =-0.36171159)
[if !supportLists](7) [endif]请随机产生若干直线,验证(5)的结果是最优的
#2.7
#随机产生8条线
models<-tibble(
a1=runif(8,-0.1,0.1),
a2=runif(8,-0.1,0.1)
)
ggplot(mydata_2_5,aes(price,marketshare))+
geom_abline(
aes(intercept = a1,slope = a2),
data=models,alpha=1/4
)+
geom_point()
model1<-function(a,data){
a[1]+data$price*a[2]
}
measure_distance<-function(mod,data){
diff<-data$marketshare-model1(mod,data)
sqrt(mean(diff^2))
}
mydata_2_7_dist<- function(a1,a2) {
measure_distance(c(a1,a2),mydata_2_5)
}
models_7<-models%>%
mutate(dist=purrr::map2_dbl(a1,a2,mydata_2_7_dist))
models_7
min_runif_distance<-min(models_7$dist)
coef_distance<-measure_distance(c(0.03341635,-0.36171159),mydata_2_5)
min_runif_distance>coef_distance
[if !supportLists](8) [endif]请估计模型,marketshare为Y,price和brand 为X.
mydata_2_8<-lm(marketshare~price+brand,data= mydata_2_5)
coef(mydata_2_8)
library(openxlsx)
library(tidyverse)
x<-NULL
get.root<-function(a,b,c){
dert<-(b^2-4*a*c)
if(dert>0){
x[1]<-(-b+sqrt(dert))/(2*a)
x[2]<-(-b-sqrt(dert))/(2*a)
return(x)
}else if(dert == 0){
x[1]<-(-b)/(2*a)
x[2]<-x[1]
return(x)
}else{
x[1]<-"无实根"
x[2]<-"无实根"
return(x)
}
}
get.root(2,6,3)
get.root(1,2,1)
get.root(1,2,8)
#第一大题第二小题
get.prob<-function(m){
mx<-0
for(i in 1:m){
a<-runif(m,1,5)
b<-rnorm(m,3,10)
c<-rexp(m,1)
dert<-(b^2-4*a*c)
if(dert>=0){
mx=mx+1
}else{
mx=mx
}
}
return(mx)
}
get.prob(10000)
get.prob_1_2<-get.prob(10000)/10000
get.prob_1_2
#第二大题
#1 读入数据,求缺失值,两个条件降序
mydata_21<-read.xlsx("E:/个人文件夹/2020MEM/学习/R/homework/hw2/HW2(1).xlsx",sheet=1)
mydata_21
sum(is.na(mydata_21$price))
sum(is.na(mydata_21$marketshare))
sum(is.na(mydata_21$brand))
mydata_21<-na.omit(mydata_21)
#mydata_21_1<-group_by(mydata_21,brand)
view(mydata_21_1)
mydata_21_1<-arrange(mydata_21,desc(brand,marketshare,price))
view(mydata_21_1)
#2.2求均值
mydata_21_2<-group_by(mydata_21,brand)
mean_price<-summarise(mydata_21_2,mean(price,na.rm = TRUE))
mean_marketshare<-summarise(mydata_21_2,mean(marketshare,na.rm = TRUE))
mean_price
mean_marketshare
#2.3画散点图
ggplot(data = mydata_21_2)+
geom_point(mapping = aes(x=price,y=marketshare))+
facet_wrap(~brand,nrow = 2)
#2.4
price_1<-mean(mydata_21$price,na.rm = TRUE)
price_1
marketshare_1<-mean(mydata_21$marketshare,na.rm = TRUE)
marketshare_1
mydata_2_4<-mydata_21%>%
mutate(price_new = ifelse(price<price_1,"低价格","高价格"))%>%
mutate(marketshare_new=ifelse(marketshare<marketshare_1,"低市场份额","高市场份额"))
mydata_2_4
#2.5 去除na,线型拟合,求系数
mydata_2_5<-filter(mydata_21,!is.na(brand),!is.na(price),!is.na(marketshare))
mydata_2_5
mydata_2_51<-lm(marketshare~price,data = mydata_2_5)
coef(mydata_2_51)
#2.6
ggplot(mydata_2_51,aes(price,marketshare))+
geom_point(size = 2)+
geom_abline(intercept = 0.03341635,slope = -0.36171159)
#2.7
#随机产生8条线
models<-tibble(
a1=runif(8,-0.1,0.1),
a2=runif(8,-0.1,0.1)
)
ggplot(mydata_2_5,aes(price,marketshare))+
geom_abline(
aes(intercept = a1,slope = a2),
data=models,alpha=1/4
)+
geom_point()
model1<-function(a,data){
a[1]+data$price*a[2]
}
measure_distance<-function(mod,data){
diff<-data$marketshare-model1(mod,data)
sqrt(mean(diff^2))
}
mydata_2_7_dist <- function(a1,a2) {
measure_distance(c(a1,a2),mydata_2_5)
}
models_7<-models%>%
mutate(dist=purrr::map2_dbl(a1,a2,mydata_2_7_dist))
models_7
min_runif_distance<-min(models_7$dist)
coef_distance<-measure_distance(c(0.03341635,-0.36171159),mydata_2_5)
min_runif_distance>coef_distance
#2.8数据取2.5去除na的数据
mydata_2_8<-lm(marketshare~price+brand,data = mydata_2_5)
coef(mydata_2_8)