require("readxl")
## Loading required package: readxl
## Warning: package 'readxl' was built under R version 3.5.1
require(RCurl)
## Loading required package: RCurl
## Warning: package 'RCurl' was built under R version 3.5.1
## Loading required package: bitops
url<-"http://mickael-clevenot.fr/wp-content/uploads/2017/09/RH_examen_blanc_sas.xlsx"
download.file(url, destfile = "./dataRH.xlsx",mode = 'wb')
RH <- read_excel("./dataRH.xlsx")#
RH1<-as.data.frame(RH)#
head(RH1)
## sexe salaire ancienneté jours d'arrêt qualification en entreprise
## 1 F 3.140278 8 13 1 1
## 2 M 3.140278 8 13 1 1
## 3 F 3.131736 2 6 1 0
## 4 F 3.138866 7 12 1 1
## 5 M 3.138866 7 12 1 1
## 6 F 3.133172 3 8 1 1
## turnover
## 1 1
## 2 1
## 3 2
## 4 1
## 5 1
## 6 1
tail(RH1)
## sexe salaire ancienneté jours d'arrêt qualification en entreprise
## 45 F 3.242765 90 10 3 1
## 46 M 3.210000 240 24 3 1
## 47 M 3.220000 150 12 3 1
## 48 M 3.253758 100 10 4 1
## 49 M 3.420000 260 4 4 1
## 50 F 3.300000 260 8 4 1
## turnover
## 45 0
## 46 0
## 47 0
## 48 0
## 49 0
## 50 0
summary(RH1)
## sexe salaire ancienneté jours d'arrêt
## Length:50 Min. :3.110 Min. : 2.00 Min. : 4.00
## Class :character 1st Qu.:3.138 1st Qu.: 7.00 1st Qu.:10.25
## Mode :character Median :3.140 Median : 10.50 Median :12.00
## Mean :3.162 Mean : 64.74 Mean :13.44
## 3rd Qu.:3.162 3rd Qu.:107.50 3rd Qu.:13.75
## Max. :3.420 Max. :260.00 Max. :64.00
## qualification en entreprise turnover
## Min. :1.00 Min. :0.00 Min. :0.00
## 1st Qu.:1.00 1st Qu.:1.00 1st Qu.:0.00
## Median :1.00 Median :1.00 Median :1.00
## Mean :1.66 Mean :0.88 Mean :0.64
## 3rd Qu.:2.00 3rd Qu.:1.00 3rd Qu.:1.00
## Max. :4.00 Max. :1.00 Max. :2.00
boxplot(RH1$ancienneté~RH1$sexe)

boxplot(RH1$ancienneté~RH1$sexe,,horizontal=T)

boxplot(RH1$salaire~RH1$sexe,horizontal=T)

boxplot(RH1$salaire~RH1$qualification,horizontal=T)

Hom <- subset(RH1, sexe%in%"M")
summary(Hom)
## sexe salaire ancienneté jours d'arrêt
## Length:20 Min. :3.135 Min. : 4.00 Min. : 4.00
## Class :character 1st Qu.:3.140 1st Qu.: 7.75 1st Qu.:11.00
## Mode :character Median :3.160 Median : 40.00 Median :12.00
## Mean :3.183 Mean : 71.40 Mean :15.85
## 3rd Qu.:3.205 3rd Qu.:102.50 3rd Qu.:16.25
## Max. :3.420 Max. :260.00 Max. :64.00
## qualification en entreprise turnover
## Min. :1.00 Min. :0.00 Min. :0.0
## 1st Qu.:1.00 1st Qu.:1.00 1st Qu.:0.0
## Median :1.00 Median :1.00 Median :0.5
## Mean :1.80 Mean :0.85 Mean :0.6
## 3rd Qu.:2.25 3rd Qu.:1.00 3rd Qu.:1.0
## Max. :4.00 Max. :1.00 Max. :2.0
Fem <- subset(RH1, sexe%in%"F")
summary(Fem)
## sexe salaire ancienneté jours d'arrêt
## Length:30 Min. :3.110 Min. : 2.0 Min. : 6.00
## Class :character 1st Qu.:3.135 1st Qu.: 7.0 1st Qu.:10.00
## Mode :character Median :3.139 Median : 8.0 Median :12.00
## Mean :3.148 Mean : 60.3 Mean :11.83
## 3rd Qu.:3.140 3rd Qu.:107.5 3rd Qu.:13.00
## Max. :3.300 Max. :260.0 Max. :18.00
## qualification en entreprise turnover
## Min. :1.000 Min. :0.0 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:1.0 1st Qu.:0.0000
## Median :1.000 Median :1.0 Median :1.0000
## Mean :1.567 Mean :0.9 Mean :0.6667
## 3rd Qu.:2.000 3rd Qu.:1.0 3rd Qu.:1.0000
## Max. :4.000 Max. :1.0 Max. :2.0000
all<-rbind(Hom,Fem)
hist(all$salaire,10)

hist(all$qualification, breaks=4, col="light blue")

summary(all$qualification)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 1.00 1.00 1.66 2.00 4.00
lm1<-lm(salaire~qualification,data=all)
summary(lm1)
##
## Call:
## lm(formula = salaire ~ qualification, data = all)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.068118 -0.017394 0.006434 0.009263 0.147675
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.083911 0.010075 306.082 < 2e-16 ***
## qualification 0.047103 0.005355 8.796 1.42e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03354 on 48 degrees of freedom
## Multiple R-squared: 0.6171, Adjusted R-squared: 0.6092
## F-statistic: 77.37 on 1 and 48 DF, p-value: 1.418e-11
plot( all$qualification,all$salaire, main="Qualification salaire",
xlab="Qualif ", ylab="salaire", pch=8,cex=0.3,)
abline(lm1,col="blue")
text(2.5,3.4, paste("Log Salaire =", round(lm1$coefficients[1],3),"+ Qualif *",round(lm1$coefficients[2],3)),col="blue",cex=0.6)
text(1.5,3.375,paste("R² ajusté",round(summary(lm1)$adj.r.squared,3)),col="blue",cex=0.6)
grid(NULL,NULL)

#test d'écart des salaires moyen
summary(Fem)
## sexe salaire ancienneté jours d'arrêt
## Length:30 Min. :3.110 Min. : 2.0 Min. : 6.00
## Class :character 1st Qu.:3.135 1st Qu.: 7.0 1st Qu.:10.00
## Mode :character Median :3.139 Median : 8.0 Median :12.00
## Mean :3.148 Mean : 60.3 Mean :11.83
## 3rd Qu.:3.140 3rd Qu.:107.5 3rd Qu.:13.00
## Max. :3.300 Max. :260.0 Max. :18.00
## qualification en entreprise turnover
## Min. :1.000 Min. :0.0 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:1.0 1st Qu.:0.0000
## Median :1.000 Median :1.0 Median :1.0000
## Mean :1.567 Mean :0.9 Mean :0.6667
## 3rd Qu.:2.000 3rd Qu.:1.0 3rd Qu.:1.0000
## Max. :4.000 Max. :1.0 Max. :2.0000
m_f<-mean(Fem$salaire)
m_h<-mean(Hom$salaire)
n_f<-length(Fem$salaire)
n_h<-length(Hom$salaire)
var_f<-var(Fem$salaire)
var_h<-var(Hom$salaire)
sdem=sqrt((((n_f-1)*(var_f)+(n_h-1)*(var_h)))/(n_f+n_h-2))
sd_D=sdem*sqrt(1/n_f+1/n_h) ;
student=(m_f-m_h)/sd_D
test<-t.test(Hom$salaire,Fem$salaire,var.equal=TRUE)
test
##
## Two Sample t-test
##
## data: Hom$salaire and Fem$salaire
## t = 2.3253, df = 48, p-value = 0.02433
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.004667062 0.064314164
## sample estimates:
## mean of x mean of y
## 3.182798 3.148307
test$statistic
## t
## 2.325277
var.test(Hom$salaire,Fem$salaire)
##
## F test to compare two variances
##
## data: Hom$salaire and Fem$salaire
## F = 3.3024, num df = 19, denom df = 29, p-value = 0.003772
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 1.480034 7.932083
## sample estimates:
## ratio of variances
## 3.302362
tapply(all$salaire, all$sexe, mean)
## F M
## 3.148307 3.182798
t.test(all$salaire ~ all$sexe)
##
## Welch Two Sample t-test
##
## data: all$salaire by all$sexe
## t = -2.0832, df = 26.732, p-value = 0.04693
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.0684780947 -0.0005031315
## sample estimates:
## mean in group F mean in group M
## 3.148307 3.182798
par(mfrow = c(1, 2))
hist(all$salaire[all$sexe == "F"], col = "pink",xlab="salaires des femmes",main="",ylab="Fréquence")
hist(all$salaire[all$sexe == "M"], col = "blue",xlab="salaires des hommes",main="",ylab="fréquence")
title(main="Distriubtion des salaires par genre",outer=TRUE,line=-3)

par(mfrow = c(1, 1))
lm2<-lm(salaire~qualification+sexe,data=all)
summary(lm2)
##
## Call:
## lm(formula = salaire ~ qualification + sexe, data = all)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.057979 -0.017803 -0.002693 0.016284 0.137328
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.077184 0.009873 311.667 < 2e-16 ***
## qualification 0.045397 0.005105 8.892 1.23e-11 ***
## sexeM 0.023898 0.009230 2.589 0.0128 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03171 on 47 degrees of freedom
## Multiple R-squared: 0.6649, Adjusted R-squared: 0.6507
## F-statistic: 46.64 on 2 and 47 DF, p-value: 6.928e-12
lm3<-lm(salaire~qualification+sexe+ancienneté,data=all)
summary(lm3)
##
## Call:
## lm(formula = salaire ~ qualification + sexe + ancienneté, data = all)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.056446 -0.017204 -0.002737 0.015335 0.139357
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.072e+00 1.284e-02 239.286 < 2e-16 ***
## qualification 5.222e-02 1.176e-02 4.438 5.62e-05 ***
## sexeM 2.331e-02 9.333e-03 2.498 0.0161 *
## ancienneté -9.031e-05 1.401e-04 -0.644 0.5225
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0319 on 46 degrees of freedom
## Multiple R-squared: 0.6679, Adjusted R-squared: 0.6463
## F-statistic: 30.84 on 3 and 46 DF, p-value: 4.419e-11
lm4<-lm(salaire~qualification+sexe+ancienneté+turnover,data=all)
summary(lm4)
##
## Call:
## lm(formula = salaire ~ qualification + sexe + ancienneté + turnover,
## data = all)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.050093 -0.020316 -0.002899 0.015620 0.127486
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.051e+00 1.713e-02 178.119 < 2e-16 ***
## qualification 5.425e-02 1.154e-02 4.700 2.48e-05 ***
## sexeM 2.297e-02 9.114e-03 2.520 0.0154 *
## ancienneté 6.373e-06 1.470e-04 0.043 0.9656
## turnover 1.806e-02 1.001e-02 1.804 0.0780 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03115 on 45 degrees of freedom
## Multiple R-squared: 0.6903, Adjusted R-squared: 0.6628
## F-statistic: 25.08 on 4 and 45 DF, p-value: 5.808e-11
lm5<-lm(salaire~qualification+sexe+ancienneté+turnover+all$`jours d'arrêt`,data=all)
summary(lm5)
##
## Call:
## lm(formula = salaire ~ qualification + sexe + ancienneté + turnover +
## all$`jours d'arrêt`, data = all)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.049953 -0.017937 0.000338 0.016475 0.115198
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.0707650 0.0197650 155.364 < 2e-16 ***
## qualification 0.0487711 0.0116089 4.201 0.000128 ***
## sexeM 0.0277388 0.0092300 3.005 0.004370 **
## ancienneté 0.0000595 0.0001458 0.408 0.685243
## turnover 0.0152088 0.0098639 1.542 0.130267
## all$`jours d'arrêt` -0.0010642 0.0005686 -1.872 0.067907 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03032 on 44 degrees of freedom
## Multiple R-squared: 0.7132, Adjusted R-squared: 0.6806
## F-statistic: 21.88 on 5 and 44 DF, p-value: 6.1e-11
require(xtable)
## Loading required package: xtable
## Warning: package 'xtable' was built under R version 3.5.1
xtable(lm5)
## % latex table generated in R 3.5.0 by xtable 1.8-2 package
## % Fri Nov 16 23:48:27 2018
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
## \hline
## & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\
## \hline
## (Intercept) & 3.0708 & 0.0198 & 155.36 & 0.0000 \\
## qualification & 0.0488 & 0.0116 & 4.20 & 0.0001 \\
## sexeM & 0.0277 & 0.0092 & 3.01 & 0.0044 \\
## ancienneté & 0.0001 & 0.0001 & 0.41 & 0.6852 \\
## turnover & 0.0152 & 0.0099 & 1.54 & 0.1303 \\
## all\$`jours d'arrêt` & -0.0011 & 0.0006 & -1.87 & 0.0679 \\
## \hline
## \end{tabular}
## \end{table}