require("readxl")
## Loading required package: readxl
## Warning: package 'readxl' was built under R version 3.5.1
require(RCurl)
## Loading required package: RCurl
## Warning: package 'RCurl' was built under R version 3.5.1
## Loading required package: bitops
url<-"http://mickael-clevenot.fr/wp-content/uploads/2017/09/RH_examen_blanc_sas.xlsx"

download.file(url, destfile = "./dataRH.xlsx",mode = 'wb')

RH <- read_excel("./dataRH.xlsx")#

RH1<-as.data.frame(RH)#

head(RH1)
##   sexe  salaire ancienneté jours d'arrêt qualification en entreprise

## 1    F 3.140278          8            13             1             1

## 2    M 3.140278          8            13             1             1

## 3    F 3.131736          2             6             1             0

## 4    F 3.138866          7            12             1             1

## 5    M 3.138866          7            12             1             1

## 6    F 3.133172          3             8             1             1

##   turnover

## 1        1

## 2        1

## 3        2

## 4        1

## 5        1

## 6        1
tail(RH1)
##    sexe  salaire ancienneté jours d'arrêt qualification en entreprise

## 45    F 3.242765         90            10             3             1

## 46    M 3.210000        240            24             3             1

## 47    M 3.220000        150            12             3             1

## 48    M 3.253758        100            10             4             1

## 49    M 3.420000        260             4             4             1

## 50    F 3.300000        260             8             4             1

##    turnover

## 45        0

## 46        0

## 47        0

## 48        0

## 49        0

## 50        0
summary(RH1)
##      sexe              salaire        ancienneté     jours d'arrêt  

##  Length:50          Min.   :3.110   Min.   :  2.00   Min.   : 4.00  

##  Class :character   1st Qu.:3.138   1st Qu.:  7.00   1st Qu.:10.25  

##  Mode  :character   Median :3.140   Median : 10.50   Median :12.00  

##                     Mean   :3.162   Mean   : 64.74   Mean   :13.44  

##                     3rd Qu.:3.162   3rd Qu.:107.50   3rd Qu.:13.75  

##                     Max.   :3.420   Max.   :260.00   Max.   :64.00  

##  qualification  en entreprise     turnover   

##  Min.   :1.00   Min.   :0.00   Min.   :0.00  

##  1st Qu.:1.00   1st Qu.:1.00   1st Qu.:0.00  

##  Median :1.00   Median :1.00   Median :1.00  

##  Mean   :1.66   Mean   :0.88   Mean   :0.64  

##  3rd Qu.:2.00   3rd Qu.:1.00   3rd Qu.:1.00  

##  Max.   :4.00   Max.   :1.00   Max.   :2.00
boxplot(RH1$ancienneté~RH1$sexe)

boxplot(RH1$ancienneté~RH1$sexe,,horizontal=T)

boxplot(RH1$salaire~RH1$sexe,horizontal=T) 

boxplot(RH1$salaire~RH1$qualification,horizontal=T) 

Hom <- subset(RH1, sexe%in%"M")

summary(Hom)
##      sexe              salaire        ancienneté     jours d'arrêt  

##  Length:20          Min.   :3.135   Min.   :  4.00   Min.   : 4.00  

##  Class :character   1st Qu.:3.140   1st Qu.:  7.75   1st Qu.:11.00  

##  Mode  :character   Median :3.160   Median : 40.00   Median :12.00  

##                     Mean   :3.183   Mean   : 71.40   Mean   :15.85  

##                     3rd Qu.:3.205   3rd Qu.:102.50   3rd Qu.:16.25  

##                     Max.   :3.420   Max.   :260.00   Max.   :64.00  

##  qualification  en entreprise     turnover  

##  Min.   :1.00   Min.   :0.00   Min.   :0.0  

##  1st Qu.:1.00   1st Qu.:1.00   1st Qu.:0.0  

##  Median :1.00   Median :1.00   Median :0.5  

##  Mean   :1.80   Mean   :0.85   Mean   :0.6  

##  3rd Qu.:2.25   3rd Qu.:1.00   3rd Qu.:1.0  

##  Max.   :4.00   Max.   :1.00   Max.   :2.0
Fem <- subset(RH1, sexe%in%"F")

summary(Fem)
##      sexe              salaire        ancienneté    jours d'arrêt  

##  Length:30          Min.   :3.110   Min.   :  2.0   Min.   : 6.00  

##  Class :character   1st Qu.:3.135   1st Qu.:  7.0   1st Qu.:10.00  

##  Mode  :character   Median :3.139   Median :  8.0   Median :12.00  

##                     Mean   :3.148   Mean   : 60.3   Mean   :11.83  

##                     3rd Qu.:3.140   3rd Qu.:107.5   3rd Qu.:13.00  

##                     Max.   :3.300   Max.   :260.0   Max.   :18.00  

##  qualification   en entreprise    turnover     

##  Min.   :1.000   Min.   :0.0   Min.   :0.0000  

##  1st Qu.:1.000   1st Qu.:1.0   1st Qu.:0.0000  

##  Median :1.000   Median :1.0   Median :1.0000  

##  Mean   :1.567   Mean   :0.9   Mean   :0.6667  

##  3rd Qu.:2.000   3rd Qu.:1.0   3rd Qu.:1.0000  

##  Max.   :4.000   Max.   :1.0   Max.   :2.0000
all<-rbind(Hom,Fem)



hist(all$salaire,10)

hist(all$qualification, breaks=4, col="light blue")

summary(all$qualification)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 

##    1.00    1.00    1.00    1.66    2.00    4.00
lm1<-lm(salaire~qualification,data=all)

summary(lm1)
## 

## Call:

## lm(formula = salaire ~ qualification, data = all)

## 

## Residuals:

##       Min        1Q    Median        3Q       Max 

## -0.068118 -0.017394  0.006434  0.009263  0.147675 

## 

## Coefficients:

##               Estimate Std. Error t value Pr(>|t|)    

## (Intercept)   3.083911   0.010075 306.082  < 2e-16 ***

## qualification 0.047103   0.005355   8.796 1.42e-11 ***

## ---

## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 

## Residual standard error: 0.03354 on 48 degrees of freedom

## Multiple R-squared:  0.6171, Adjusted R-squared:  0.6092 

## F-statistic: 77.37 on 1 and 48 DF,  p-value: 1.418e-11
plot( all$qualification,all$salaire, main="Qualification salaire",

     xlab="Qualif ", ylab="salaire", pch=8,cex=0.3,)

abline(lm1,col="blue")



text(2.5,3.4, paste("Log Salaire =", round(lm1$coefficients[1],3),"+ Qualif *",round(lm1$coefficients[2],3)),col="blue",cex=0.6)

text(1.5,3.375,paste("R² ajusté",round(summary(lm1)$adj.r.squared,3)),col="blue",cex=0.6)

grid(NULL,NULL)

#test d'écart des salaires moyen

summary(Fem)
##      sexe              salaire        ancienneté    jours d'arrêt  

##  Length:30          Min.   :3.110   Min.   :  2.0   Min.   : 6.00  

##  Class :character   1st Qu.:3.135   1st Qu.:  7.0   1st Qu.:10.00  

##  Mode  :character   Median :3.139   Median :  8.0   Median :12.00  

##                     Mean   :3.148   Mean   : 60.3   Mean   :11.83  

##                     3rd Qu.:3.140   3rd Qu.:107.5   3rd Qu.:13.00  

##                     Max.   :3.300   Max.   :260.0   Max.   :18.00  

##  qualification   en entreprise    turnover     

##  Min.   :1.000   Min.   :0.0   Min.   :0.0000  

##  1st Qu.:1.000   1st Qu.:1.0   1st Qu.:0.0000  

##  Median :1.000   Median :1.0   Median :1.0000  

##  Mean   :1.567   Mean   :0.9   Mean   :0.6667  

##  3rd Qu.:2.000   3rd Qu.:1.0   3rd Qu.:1.0000  

##  Max.   :4.000   Max.   :1.0   Max.   :2.0000
m_f<-mean(Fem$salaire)

m_h<-mean(Hom$salaire)

n_f<-length(Fem$salaire)

n_h<-length(Hom$salaire)

var_f<-var(Fem$salaire)

var_h<-var(Hom$salaire)

sdem=sqrt((((n_f-1)*(var_f)+(n_h-1)*(var_h)))/(n_f+n_h-2)) 

sd_D=sdem*sqrt(1/n_f+1/n_h) ;

student=(m_f-m_h)/sd_D

test<-t.test(Hom$salaire,Fem$salaire,var.equal=TRUE)

test
## 

##  Two Sample t-test

## 

## data:  Hom$salaire and Fem$salaire

## t = 2.3253, df = 48, p-value = 0.02433

## alternative hypothesis: true difference in means is not equal to 0

## 95 percent confidence interval:

##  0.004667062 0.064314164

## sample estimates:

## mean of x mean of y 

##  3.182798  3.148307
test$statistic
##        t 

## 2.325277
var.test(Hom$salaire,Fem$salaire)
## 

##  F test to compare two variances

## 

## data:  Hom$salaire and Fem$salaire

## F = 3.3024, num df = 19, denom df = 29, p-value = 0.003772

## alternative hypothesis: true ratio of variances is not equal to 1

## 95 percent confidence interval:

##  1.480034 7.932083

## sample estimates:

## ratio of variances 

##           3.302362
tapply(all$salaire, all$sexe, mean)
##        F        M 

## 3.148307 3.182798
t.test(all$salaire ~ all$sexe)
## 

##  Welch Two Sample t-test

## 

## data:  all$salaire by all$sexe

## t = -2.0832, df = 26.732, p-value = 0.04693

## alternative hypothesis: true difference in means is not equal to 0

## 95 percent confidence interval:

##  -0.0684780947 -0.0005031315

## sample estimates:

## mean in group F mean in group M 

##        3.148307        3.182798
par(mfrow = c(1, 2))

hist(all$salaire[all$sexe == "F"], col = "pink",xlab="salaires des femmes",main="",ylab="Fréquence")

hist(all$salaire[all$sexe == "M"], col = "blue",xlab="salaires des hommes",main="",ylab="fréquence")

title(main="Distriubtion des salaires par genre",outer=TRUE,line=-3)

par(mfrow = c(1, 1))

lm2<-lm(salaire~qualification+sexe,data=all)

summary(lm2)
## 

## Call:

## lm(formula = salaire ~ qualification + sexe, data = all)

## 

## Residuals:

##       Min        1Q    Median        3Q       Max 

## -0.057979 -0.017803 -0.002693  0.016284  0.137328 

## 

## Coefficients:

##               Estimate Std. Error t value Pr(>|t|)    

## (Intercept)   3.077184   0.009873 311.667  < 2e-16 ***

## qualification 0.045397   0.005105   8.892 1.23e-11 ***

## sexeM         0.023898   0.009230   2.589   0.0128 *  

## ---

## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 

## Residual standard error: 0.03171 on 47 degrees of freedom

## Multiple R-squared:  0.6649, Adjusted R-squared:  0.6507 

## F-statistic: 46.64 on 2 and 47 DF,  p-value: 6.928e-12
lm3<-lm(salaire~qualification+sexe+ancienneté,data=all)

summary(lm3)
## 

## Call:

## lm(formula = salaire ~ qualification + sexe + ancienneté, data = all)

## 

## Residuals:

##       Min        1Q    Median        3Q       Max 

## -0.056446 -0.017204 -0.002737  0.015335  0.139357 

## 

## Coefficients:

##                 Estimate Std. Error t value Pr(>|t|)    

## (Intercept)    3.072e+00  1.284e-02 239.286  < 2e-16 ***

## qualification  5.222e-02  1.176e-02   4.438 5.62e-05 ***

## sexeM          2.331e-02  9.333e-03   2.498   0.0161 *  

## ancienneté    -9.031e-05  1.401e-04  -0.644   0.5225    

## ---

## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 

## Residual standard error: 0.0319 on 46 degrees of freedom

## Multiple R-squared:  0.6679, Adjusted R-squared:  0.6463 

## F-statistic: 30.84 on 3 and 46 DF,  p-value: 4.419e-11
lm4<-lm(salaire~qualification+sexe+ancienneté+turnover,data=all)

summary(lm4)
## 

## Call:

## lm(formula = salaire ~ qualification + sexe + ancienneté + turnover, 

##     data = all)

## 

## Residuals:

##       Min        1Q    Median        3Q       Max 

## -0.050093 -0.020316 -0.002899  0.015620  0.127486 

## 

## Coefficients:

##                Estimate Std. Error t value Pr(>|t|)    

## (Intercept)   3.051e+00  1.713e-02 178.119  < 2e-16 ***

## qualification 5.425e-02  1.154e-02   4.700 2.48e-05 ***

## sexeM         2.297e-02  9.114e-03   2.520   0.0154 *  

## ancienneté    6.373e-06  1.470e-04   0.043   0.9656    

## turnover      1.806e-02  1.001e-02   1.804   0.0780 .  

## ---

## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 

## Residual standard error: 0.03115 on 45 degrees of freedom

## Multiple R-squared:  0.6903, Adjusted R-squared:  0.6628 

## F-statistic: 25.08 on 4 and 45 DF,  p-value: 5.808e-11
lm5<-lm(salaire~qualification+sexe+ancienneté+turnover+all$`jours d'arrêt`,data=all)

summary(lm5)
## 

## Call:

## lm(formula = salaire ~ qualification + sexe + ancienneté + turnover + 

##     all$`jours d'arrêt`, data = all)

## 

## Residuals:

##       Min        1Q    Median        3Q       Max 

## -0.049953 -0.017937  0.000338  0.016475  0.115198 

## 

## Coefficients:

##                       Estimate Std. Error t value Pr(>|t|)    

## (Intercept)          3.0707650  0.0197650 155.364  < 2e-16 ***

## qualification        0.0487711  0.0116089   4.201 0.000128 ***

## sexeM                0.0277388  0.0092300   3.005 0.004370 ** 

## ancienneté           0.0000595  0.0001458   0.408 0.685243    

## turnover             0.0152088  0.0098639   1.542 0.130267    

## all$`jours d'arrêt` -0.0010642  0.0005686  -1.872 0.067907 .  

## ---

## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 

## Residual standard error: 0.03032 on 44 degrees of freedom

## Multiple R-squared:  0.7132, Adjusted R-squared:  0.6806 

## F-statistic: 21.88 on 5 and 44 DF,  p-value: 6.1e-11
require(xtable)
## Loading required package: xtable
## Warning: package 'xtable' was built under R version 3.5.1
xtable(lm5)
## % latex table generated in R 3.5.0 by xtable 1.8-2 package

## % Fri Nov 16 23:48:27 2018

## \begin{table}[ht]

## \centering

## \begin{tabular}{rrrrr}

##   \hline

##  & Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\ 

##   \hline

## (Intercept) & 3.0708 & 0.0198 & 155.36 & 0.0000 \\ 

##   qualification & 0.0488 & 0.0116 & 4.20 & 0.0001 \\ 

##   sexeM & 0.0277 & 0.0092 & 3.01 & 0.0044 \\ 

##   ancienneté & 0.0001 & 0.0001 & 0.41 & 0.6852 \\ 

##   turnover & 0.0152 & 0.0099 & 1.54 & 0.1303 \\ 

##   all\$`jours d'arrêt` & -0.0011 & 0.0006 & -1.87 & 0.0679 \\ 

##    \hline

## \end{tabular}

## \end{table}