require("readxl")
## Loading required package: readxl
## Warning: package 'readxl' was built under R version 3.5.1
require(RCurl)
## Loading required package: RCurl
## Warning: package 'RCurl' was built under R version 3.5.1
## Loading required package: bitops
url<-"http://mickael-clevenot.fr/wp-content/uploads/2017/09/RH_examen_blanc_sas.xlsx"
download.file(url, destfile = "./dataRH.xlsx",mode = 'wb')
RH <- read_excel("./dataRH.xlsx")#
RH1<-as.data.frame(RH)#
head(RH1)
##   sexe  salaire ancienneté jours d'arrêt qualification en entreprise
## 1    F 3.140278          8            13             1             1
## 2    M 3.140278          8            13             1             1
## 3    F 3.131736          2             6             1             0
## 4    F 3.138866          7            12             1             1
## 5    M 3.138866          7            12             1             1
## 6    F 3.133172          3             8             1             1
##   turnover
## 1        1
## 2        1
## 3        2
## 4        1
## 5        1
## 6        1
tail(RH1)
##    sexe  salaire ancienneté jours d'arrêt qualification en entreprise
## 45    F 3.242765         90            10             3             1
## 46    M 3.210000        240            24             3             1
## 47    M 3.220000        150            12             3             1
## 48    M 3.253758        100            10             4             1
## 49    M 3.420000        260             4             4             1
## 50    F 3.300000        260             8             4             1
##    turnover
## 45        0
## 46        0
## 47        0
## 48        0
## 49        0
## 50        0
summary(RH1)
##      sexe              salaire        ancienneté     jours d'arrêt  
##  Length:50          Min.   :3.110   Min.   :  2.00   Min.   : 4.00  
##  Class :character   1st Qu.:3.138   1st Qu.:  7.00   1st Qu.:10.25  
##  Mode  :character   Median :3.140   Median : 10.50   Median :12.00  
##                     Mean   :3.162   Mean   : 64.74   Mean   :13.44  
##                     3rd Qu.:3.162   3rd Qu.:107.50   3rd Qu.:13.75  
##                     Max.   :3.420   Max.   :260.00   Max.   :64.00  
##  qualification  en entreprise     turnover   
##  Min.   :1.00   Min.   :0.00   Min.   :0.00  
##  1st Qu.:1.00   1st Qu.:1.00   1st Qu.:0.00  
##  Median :1.00   Median :1.00   Median :1.00  
##  Mean   :1.66   Mean   :0.88   Mean   :0.64  
##  3rd Qu.:2.00   3rd Qu.:1.00   3rd Qu.:1.00  
##  Max.   :4.00   Max.   :1.00   Max.   :2.00
boxplot(RH1$ancienneté~RH1$sexe)

boxplot(RH1$ancienneté~RH1$sexe,,horizontal=T)

boxplot(RH1$salaire~RH1$sexe,horizontal=T) 

boxplot(RH1$salaire~RH1$qualification,horizontal=T) 

Hom <- subset(RH1, sexe%in%"M")
summary(Hom)
##      sexe              salaire        ancienneté     jours d'arrêt  
##  Length:20          Min.   :3.135   Min.   :  4.00   Min.   : 4.00  
##  Class :character   1st Qu.:3.140   1st Qu.:  7.75   1st Qu.:11.00  
##  Mode  :character   Median :3.160   Median : 40.00   Median :12.00  
##                     Mean   :3.183   Mean   : 71.40   Mean   :15.85  
##                     3rd Qu.:3.205   3rd Qu.:102.50   3rd Qu.:16.25  
##                     Max.   :3.420   Max.   :260.00   Max.   :64.00  
##  qualification  en entreprise     turnover  
##  Min.   :1.00   Min.   :0.00   Min.   :0.0  
##  1st Qu.:1.00   1st Qu.:1.00   1st Qu.:0.0  
##  Median :1.00   Median :1.00   Median :0.5  
##  Mean   :1.80   Mean   :0.85   Mean   :0.6  
##  3rd Qu.:2.25   3rd Qu.:1.00   3rd Qu.:1.0  
##  Max.   :4.00   Max.   :1.00   Max.   :2.0
Fem <- subset(RH1, sexe%in%"F")
summary(Fem)
##      sexe              salaire        ancienneté    jours d'arrêt  
##  Length:30          Min.   :3.110   Min.   :  2.0   Min.   : 6.00  
##  Class :character   1st Qu.:3.135   1st Qu.:  7.0   1st Qu.:10.00  
##  Mode  :character   Median :3.139   Median :  8.0   Median :12.00  
##                     Mean   :3.148   Mean   : 60.3   Mean   :11.83  
##                     3rd Qu.:3.140   3rd Qu.:107.5   3rd Qu.:13.00  
##                     Max.   :3.300   Max.   :260.0   Max.   :18.00  
##  qualification   en entreprise    turnover     
##  Min.   :1.000   Min.   :0.0   Min.   :0.0000  
##  1st Qu.:1.000   1st Qu.:1.0   1st Qu.:0.0000  
##  Median :1.000   Median :1.0   Median :1.0000  
##  Mean   :1.567   Mean   :0.9   Mean   :0.6667  
##  3rd Qu.:2.000   3rd Qu.:1.0   3rd Qu.:1.0000  
##  Max.   :4.000   Max.   :1.0   Max.   :2.0000
all<-rbind(Hom,Fem)

hist(all$salaire,10)

hist(all$qualification, breaks=4, col="light blue")

summary(all$qualification)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    1.00    1.00    1.66    2.00    4.00
lm1<-lm(salaire~qualification,data=all)
summary(lm1)
## 
## Call:
## lm(formula = salaire ~ qualification, data = all)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.068118 -0.017394  0.006434  0.009263  0.147675 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.083911   0.010075 306.082  < 2e-16 ***
## qualification 0.047103   0.005355   8.796 1.42e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03354 on 48 degrees of freedom
## Multiple R-squared:  0.6171, Adjusted R-squared:  0.6092 
## F-statistic: 77.37 on 1 and 48 DF,  p-value: 1.418e-11
plot( all$qualification,all$salaire, main="Qualification salaire",
     xlab="Qualif ", ylab="salaire", pch=8,cex=0.3,)
abline(lm1,col="blue")

text(2.5,3.4, paste("Log Salaire =", round(lm1$coefficients[1],3),"+ Qualif *",round(lm1$coefficients[2],3)),col="blue",cex=0.6)
text(1.5,3.375,paste("R² ajusté",round(summary(lm1)$adj.r.squared,3)),col="blue",cex=0.6)
grid(NULL,NULL)

#test d'écart des salaires moyen
summary(Fem)
##      sexe              salaire        ancienneté    jours d'arrêt  
##  Length:30          Min.   :3.110   Min.   :  2.0   Min.   : 6.00  
##  Class :character   1st Qu.:3.135   1st Qu.:  7.0   1st Qu.:10.00  
##  Mode  :character   Median :3.139   Median :  8.0   Median :12.00  
##                     Mean   :3.148   Mean   : 60.3   Mean   :11.83  
##                     3rd Qu.:3.140   3rd Qu.:107.5   3rd Qu.:13.00  
##                     Max.   :3.300   Max.   :260.0   Max.   :18.00  
##  qualification   en entreprise    turnover     
##  Min.   :1.000   Min.   :0.0   Min.   :0.0000  
##  1st Qu.:1.000   1st Qu.:1.0   1st Qu.:0.0000  
##  Median :1.000   Median :1.0   Median :1.0000  
##  Mean   :1.567   Mean   :0.9   Mean   :0.6667  
##  3rd Qu.:2.000   3rd Qu.:1.0   3rd Qu.:1.0000  
##  Max.   :4.000   Max.   :1.0   Max.   :2.0000
m_f<-mean(Fem$salaire)
m_h<-mean(Hom$salaire)
n_f<-length(Fem$salaire)
n_h<-length(Hom$salaire)
var_f<-var(Fem$salaire)
var_h<-var(Hom$salaire)
sdem=sqrt((((n_f-1)*(var_f)+(n_h-1)*(var_h)))/(n_f+n_h-2)) 
sd_D=sdem*sqrt(1/n_f+1/n_h) ;
student=(m_f-m_h)/sd_D
test<-t.test(Hom$salaire,Fem$salaire,var.equal=TRUE)
test
## 
##  Two Sample t-test
## 
## data:  Hom$salaire and Fem$salaire
## t = 2.3253, df = 48, p-value = 0.02433
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.004667062 0.064314164
## sample estimates:
## mean of x mean of y 
##  3.182798  3.148307
test$statistic
##        t 
## 2.325277
var.test(Hom$salaire,Fem$salaire)
## 
##  F test to compare two variances
## 
## data:  Hom$salaire and Fem$salaire
## F = 3.3024, num df = 19, denom df = 29, p-value = 0.003772
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  1.480034 7.932083
## sample estimates:
## ratio of variances 
##           3.302362
tapply(all$salaire, all$sexe, mean)
##        F        M 
## 3.148307 3.182798
t.test(all$salaire ~ all$sexe)
## 
##  Welch Two Sample t-test
## 
## data:  all$salaire by all$sexe
## t = -2.0832, df = 26.732, p-value = 0.04693
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.0684780947 -0.0005031315
## sample estimates:
## mean in group F mean in group M 
##        3.148307        3.182798
par(mfrow = c(1, 2))
hist(all$salaire[all$sexe == "F"], col = "pink",xlab="salaires des femmes",main="",ylab="Fréquence")
hist(all$salaire[all$sexe == "M"], col = "blue",xlab="salaires des hommes",main="",ylab="fréquence")
title(main="Distriubtion des salaires par genre",outer=TRUE,line=-3)

par(mfrow = c(1, 1))
lm2<-lm(salaire~qualification+sexe,data=all)
summary(lm2)
## 
## Call:
## lm(formula = salaire ~ qualification + sexe, data = all)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.057979 -0.017803 -0.002693  0.016284  0.137328 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.077184   0.009873 311.667  < 2e-16 ***
## qualification 0.045397   0.005105   8.892 1.23e-11 ***
## sexeM         0.023898   0.009230   2.589   0.0128 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03171 on 47 degrees of freedom
## Multiple R-squared:  0.6649, Adjusted R-squared:  0.6507 
## F-statistic: 46.64 on 2 and 47 DF,  p-value: 6.928e-12
lm3<-lm(salaire~qualification+sexe+ancienneté,data=all)
summary(lm3)
## 
## Call:
## lm(formula = salaire ~ qualification + sexe + ancienneté, data = all)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.056446 -0.017204 -0.002737  0.015335  0.139357 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.072e+00  1.284e-02 239.286  < 2e-16 ***
## qualification  5.222e-02  1.176e-02   4.438 5.62e-05 ***
## sexeM          2.331e-02  9.333e-03   2.498   0.0161 *  
## ancienneté    -9.031e-05  1.401e-04  -0.644   0.5225    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0319 on 46 degrees of freedom
## Multiple R-squared:  0.6679, Adjusted R-squared:  0.6463 
## F-statistic: 30.84 on 3 and 46 DF,  p-value: 4.419e-11
lm4<-lm(salaire~qualification+sexe+ancienneté+turnover,data=all)
summary(lm4)
## 
## Call:
## lm(formula = salaire ~ qualification + sexe + ancienneté + turnover, 
##     data = all)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.050093 -0.020316 -0.002899  0.015620  0.127486 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.051e+00  1.713e-02 178.119  < 2e-16 ***
## qualification 5.425e-02  1.154e-02   4.700 2.48e-05 ***
## sexeM         2.297e-02  9.114e-03   2.520   0.0154 *  
## ancienneté    6.373e-06  1.470e-04   0.043   0.9656    
## turnover      1.806e-02  1.001e-02   1.804   0.0780 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03115 on 45 degrees of freedom
## Multiple R-squared:  0.6903, Adjusted R-squared:  0.6628 
## F-statistic: 25.08 on 4 and 45 DF,  p-value: 5.808e-11
lm5<-lm(salaire~qualification+sexe+ancienneté+turnover+all$`jours d'arrêt`,data=all)
summary(lm5)
## 
## Call:
## lm(formula = salaire ~ qualification + sexe + ancienneté + turnover + 
##     all$`jours d'arrêt`, data = all)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.049953 -0.017937  0.000338  0.016475  0.115198 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.0707650  0.0197650 155.364  < 2e-16 ***
## qualification        0.0487711  0.0116089   4.201 0.000128 ***
## sexeM                0.0277388  0.0092300   3.005 0.004370 ** 
## ancienneté           0.0000595  0.0001458   0.408 0.685243    
## turnover             0.0152088  0.0098639   1.542 0.130267    
## all$`jours d'arrêt` -0.0010642  0.0005686  -1.872 0.067907 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03032 on 44 degrees of freedom
## Multiple R-squared:  0.7132, Adjusted R-squared:  0.6806 
## F-statistic: 21.88 on 5 and 44 DF,  p-value: 6.1e-11
require(xtable)
## Loading required package: xtable
## Warning: package 'xtable' was built under R version 3.5.1
xtable(lm5)
## % latex table generated in R 3.5.0 by xtable 1.8-2 package
## % Fri Nov 16 23:48:27 2018
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
##   \hline
##  & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\ 
##   \hline
## (Intercept) & 3.0708 & 0.0198 & 155.36 & 0.0000 \\ 
##   qualification & 0.0488 & 0.0116 & 4.20 & 0.0001 \\ 
##   sexeM & 0.0277 & 0.0092 & 3.01 & 0.0044 \\ 
##   ancienneté & 0.0001 & 0.0001 & 0.41 & 0.6852 \\ 
##   turnover & 0.0152 & 0.0099 & 1.54 & 0.1303 \\ 
##   all\$`jours d'arrêt` & -0.0011 & 0.0006 & -1.87 & 0.0679 \\ 
##    \hline
## \end{tabular}
## \end{table}