En el fichero Deportistas.csv aparecen datos sobre estatura, peso, ı́ndice de masa corporal, etc. en una muestra de deportistas profesionales australianos. En concreto aparecen las siguientes variables:
# Importación de bibliotecas y lectura de datos desde fichero externo
#install.packages("dplyr")
library(dplyr)
#install.packages("magrittr")
library(magrittr)
#install.packages("knitr")
library(knitr)
athletes_data <- read.csv('./Deportistas.csv')
Sin filtros
athletes_data %>%
select(Ht, Wt, BMI, Bfat) %>%
select(order(colnames(.))) %>%
summary() %>%
kable()
Bfat | BMI | Ht | Wt | |
---|---|---|---|---|
Min. : 5.630 | Min. :16.75 | Min. :148.9 | Min. : 37.80 | |
1st Qu.: 8.545 | 1st Qu.:21.08 | 1st Qu.:174.0 | 1st Qu.: 66.53 | |
Median :11.650 | Median :22.72 | Median :179.7 | Median : 74.40 | |
Mean :13.507 | Mean :22.96 | Mean :180.1 | Mean : 75.01 | |
3rd Qu.:18.080 | 3rd Qu.:24.46 | 3rd Qu.:186.2 | 3rd Qu.: 84.12 | |
Max. :35.520 | Max. :34.42 | Max. :209.4 | Max. :123.20 |
Por sexo: Masculino y Femenino
athletes_data %>%
mutate(Sex = factor(Sex, labels=c('M','F'))) %>%
group_by(Sex) %>%
select(Sex, Ht, Wt, BMI, Bfat) %>%
summarise_each(funs(mean, median, min, max)) %>%
select(Sex, order(colnames(.))) %>%
kable()
Sex | Bfat_max | Bfat_mean | Bfat_median | Bfat_min | BMI_max | BMI_mean | BMI_median | BMI_min | Ht_max | Ht_mean | Ht_median | Ht_min | Wt_max | Wt_mean | Wt_median | Wt_min |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
M | 19.94 | 9.250882 | 8.625 | 5.63 | 34.42 | 23.90363 | 23.560 | 19.63 | 209.4 | 185.5059 | 185.55 | 165.3 | 123.2 | 82.52353 | 83.00 | 53.8 |
F | 35.52 | 17.849100 | 17.940 | 8.07 | 31.93 | 21.98920 | 21.815 | 16.75 | 195.9 | 174.5940 | 175.00 | 148.9 | 96.3 | 67.34250 | 68.05 | 37.8 |
Por deporte
athletes_data %>%
group_by(Sport) %>%
select(Sport, Ht, Wt, BMI, Bfat) %>%
summarise_each(funs(mean, median, min, max)) %>%
select(Sport, order(colnames(.))) %>%
kable()
Sport | Bfat_max | Bfat_mean | Bfat_median | Bfat_min | BMI_max | BMI_mean | BMI_median | BMI_min | Ht_max | Ht_mean | Ht_median | Ht_min | Wt_max | Wt_mean | Wt_median | Wt_min |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
b_ball | 28.83 | 14.807200 | 15.070 | 7.06 | 25.93 | 22.25840 | 22.030 | 18.96 | 209.4 | 188.6600 | 188.70 | 169.1 | 113.7 | 79.77600 | 77.70 | 62.3 |
field | 24.88 | 14.880000 | 13.970 | 6.43 | 34.42 | 27.53947 | 27.390 | 20.12 | 195.4 | 180.6000 | 180.10 | 169.8 | 123.2 | 89.97105 | 87.50 | 58.0 |
gym | 13.46 | 11.317500 | 11.355 | 9.10 | 20.31 | 18.52000 | 18.360 | 17.05 | 158.9 | 153.4250 | 152.95 | 148.9 | 47.8 | 43.62500 | 44.45 | 37.8 |
netball | 35.52 | 21.609130 | 21.320 | 11.29 | 26.24 | 22.43957 | 22.630 | 18.26 | 183.3 | 176.0870 | 176.00 | 168.6 | 83.8 | 69.59348 | 68.80 | 51.9 |
row | 25.16 | 15.582432 | 16.580 | 6.96 | 26.79 | 23.49811 | 23.690 | 19.69 | 198.0 | 182.3757 | 181.80 | 156.0 | 97.0 | 78.53784 | 78.70 | 49.8 |
swim | 18.48 | 10.583636 | 9.300 | 6.16 | 26.73 | 22.93864 | 22.665 | 19.00 | 194.4 | 180.5455 | 180.75 | 165.0 | 96.9 | 75.14545 | 75.00 | 55.1 |
t_400m | 16.20 | 8.633793 | 6.990 | 5.63 | 23.12 | 20.74310 | 20.890 | 16.75 | 191.0 | 175.4517 | 176.00 | 162.0 | 77.5 | 64.04655 | 64.70 | 49.2 |
tennis | 20.86 | 12.876364 | 11.500 | 6.26 | 25.36 | 21.10545 | 21.250 | 17.06 | 190.8 | 174.1636 | 175.00 | 157.9 | 80.0 | 64.47273 | 69.70 | 45.8 |
t_sprnt | 11.64 | 8.249333 | 7.520 | 5.80 | 26.51 | 22.89800 | 23.130 | 19.54 | 189.1 | 176.3867 | 174.90 | 163.9 | 94.8 | 71.50667 | 70.80 | 57.3 |
w_polo | 19.17 | 12.245294 | 11.630 | 7.82 | 27.79 | 24.46647 | 24.280 | 21.26 | 197.5 | 188.2235 | 190.50 | 179.3 | 101.0 | 86.72941 | 87.30 | 74.4 |
Por sexo y deporte
athletes_data %>%
mutate(Sex = factor(Sex, labels=c('M','F'))) %>%
group_by(Sex, Sport) %>%
select(Sex, Sport, Ht, Wt, BMI, Bfat) %>%
summarise_each(funs(mean, median, min, max)) %>%
select(Sex, Sport, order(colnames(.))) %>%
kable()
Sex | Sport | Bfat_max | Bfat_mean | Bfat_median | Bfat_min | BMI_max | BMI_mean | BMI_median | BMI_min | Ht_max | Ht_mean | Ht_median | Ht_min | Wt_max | Wt_mean | Wt_median | Wt_min |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
M | b_ball | 14.53 | 8.893333 | 8.640 | 7.06 | 25.93 | 23.17667 | 23.160 | 19.81 | 209.4 | 195.5833 | 195.25 | 186.1 | 113.7 | 88.92500 | 88.65 | 75.5 |
M | field | 19.94 | 11.907500 | 10.760 | 6.43 | 34.42 | 27.95250 | 28.680 | 22.59 | 195.4 | 185.2750 | 185.05 | 179.1 | 123.2 | 95.76250 | 96.25 | 75.2 |
M | row | 12.61 | 9.409333 | 9.360 | 6.96 | 26.79 | 24.59333 | 25.060 | 19.69 | 198.0 | 187.5333 | 188.30 | 165.3 | 97.0 | 86.80667 | 88.20 | 53.8 |
M | swim | 11.72 | 8.296154 | 8.470 | 6.16 | 26.73 | 23.66154 | 23.290 | 21.38 | 194.4 | 185.6462 | 184.00 | 172.7 | 96.9 | 81.66154 | 83.00 | 67.0 |
M | t_400m | 9.50 | 6.685556 | 6.430 | 5.63 | 23.12 | 21.21667 | 21.155 | 19.63 | 191.0 | 179.1889 | 178.55 | 169.1 | 77.5 | 68.20833 | 68.70 | 57.4 |
M | tennis | 11.50 | 9.080000 | 9.280 | 6.26 | 23.76 | 22.29500 | 22.175 | 21.07 | 190.8 | 183.9500 | 183.30 | 178.4 | 80.0 | 75.40000 | 75.25 | 71.1 |
M | t_sprnt | 9.56 | 7.287273 | 6.760 | 5.80 | 26.51 | 23.73727 | 23.580 | 21.65 | 189.1 | 178.5364 | 178.00 | 171.3 | 94.8 | 75.79091 | 72.90 | 69.1 |
M | w_polo | 19.17 | 12.245294 | 11.630 | 7.82 | 27.79 | 24.46647 | 24.280 | 21.26 | 197.5 | 188.2235 | 190.50 | 179.3 | 101.0 | 86.72941 | 87.30 | 74.4 |
F | b_ball | 28.83 | 20.266154 | 19.880 | 15.07 | 25.75 | 21.41077 | 21.200 | 18.96 | 195.9 | 182.2692 | 184.60 | 169.1 | 96.3 | 71.33077 | 69.10 | 62.3 |
F | field | 24.88 | 19.975714 | 20.100 | 11.77 | 31.93 | 26.83143 | 26.950 | 20.12 | 175.6 | 172.5857 | 172.30 | 169.8 | 94.8 | 80.04286 | 82.80 | 58.0 |
F | gym | 13.46 | 11.317500 | 11.355 | 9.10 | 20.31 | 18.52000 | 18.360 | 17.05 | 158.9 | 153.4250 | 152.95 | 148.9 | 47.8 | 43.62500 | 44.45 | 37.8 |
F | netball | 35.52 | 21.609130 | 21.320 | 11.29 | 26.24 | 22.43957 | 22.630 | 18.26 | 183.3 | 176.0870 | 176.00 | 168.6 | 83.8 | 69.59348 | 68.80 | 51.9 |
F | row | 25.16 | 19.791364 | 19.590 | 12.20 | 25.44 | 22.75136 | 23.010 | 19.76 | 186.3 | 178.8591 | 179.65 | 156.0 | 80.5 | 72.90000 | 73.95 | 49.8 |
F | swim | 18.48 | 13.887778 | 13.350 | 11.22 | 25.17 | 21.89444 | 22.040 | 19.00 | 181.0 | 173.1778 | 173.30 | 165.0 | 75.6 | 65.73333 | 64.80 | 55.1 |
F | t_400m | 16.20 | 11.821818 | 11.070 | 8.07 | 22.76 | 19.96818 | 20.120 | 16.75 | 177.0 | 169.3364 | 170.80 | 162.0 | 65.2 | 57.23636 | 57.30 | 49.2 |
F | tennis | 20.86 | 15.045714 | 15.310 | 8.45 | 25.36 | 20.42571 | 20.530 | 17.06 | 177.5 | 168.5714 | 167.90 | 157.9 | 71.5 | 58.22857 | 56.10 | 45.8 |
F | t_sprnt | 11.64 | 10.895000 | 10.895 | 10.15 | 22.37 | 20.59000 | 20.225 | 19.54 | 178.0 | 170.4750 | 170.00 | 163.9 | 61.9 | 59.72500 | 59.85 | 57.3 |
BMI_and_sex <- athletes_data %>%
select(Sex, BMI) %>%
mutate(Sex = factor(Sex, labels=c('M','F')))
BMI_and_sex %>%
filter(Sex == 'M') %>%
select(BMI) %>%
t.test(., conf=0.98)
##
## One Sample t-test
##
## data: .
## t = 87.233, df = 101, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 98 percent confidence interval:
## 23.25589 24.55137
## sample estimates:
## mean of x
## 23.90363
El intervalo de confianza del 98% para el BMI medio en deportistas Masculinos es \([23.25589, 24.55137]\).
BMI_and_sex %>%
filter(Sex == 'F') %>%
select(BMI) %>%
t.test(., conf=0.98)
##
## One Sample t-test
##
## data: .
## t = 83.292, df = 99, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 98 percent confidence interval:
## 21.36494 22.61346
## sample estimates:
## mean of x
## 21.9892
El intervalo de confianza del 98% para el BMI medio en deportistas Femeninos es \([21.36494, 22.61346]\).
\[H_0: \mu BMI_M = \mu BMI_F\] \[H_1: \mu BMI_M \neq \mu BMI_F\]
alpha <- 0.003
conf <- 1 - alpha
M_BMI <- BMI_and_sex %>%
filter(Sex == 'M') %>%
select(BMI)
F_BMI <- BMI_and_sex %>%
filter(Sex == 'F') %>%
select(BMI)
Antes de poder seguir adelante con el contraste de las medias, es necesario saber si hay o no diferencias significativas entre las varianzas.
Para ello, vamos a realizar un nuevo contraste de hipótesis.
\[H_0: \sigma^2 BMI_M = \sigma^2 BMI_F\]
\[H_1: \sigma^2 BMI_M \neq \sigma^2 BMI_F\]
var.test(x = as.matrix(M_BMI), y = as.matrix(F_BMI), ratio = 1, alternative = 'two.sided', conf.level = conf)
##
## F test to compare two variances
##
## data: as.matrix(M_BMI) and as.matrix(F_BMI)
## F = 1.0989, num df = 101, denom df = 99, p-value = 0.6388
## alternative hypothesis: true ratio of variances is not equal to 1
## 99.7 percent confidence interval:
## 0.6029699 1.9997363
## sample estimates:
## ratio of variances
## 1.098884
En este caso nos da un p-valor de 0.6388 que es mucho mayor que el alpha que hemos elegido de 0.003, por lo tanto en este caso se entiende que no hay diferencias significativas entre las varianzas porque no podemos rechazar \(H_0\).
Finalmente continuamos con el contraste de las medias:
t.test(x = M_BMI, y = F_BMI, alternative='two.sided', mu=0, paired = FALSE, var.equal = TRUE, conf.level=conf)
##
## Two Sample t-test
##
## data: M_BMI and F_BMI
## t = 5.0289, df = 200, p-value = 1.094e-06
## alternative hypothesis: true difference in means is not equal to 0
## 99.7 percent confidence interval:
## 0.7706465 3.0582084
## sample estimates:
## mean of x mean of y
## 23.90363 21.98920
Como el p-valor es menor que el valor de alfa \(1.094·10^{-6} \lt 0.003\), rechazamos la \(H_0\), es decir, hay diferencias entre el BMI medio de los deportistas en base a su sexo.
\[H_0: \mu Bfat_M = \mu Bfat_F\] \[H_1: \mu Bfat_M \neq \mu Bfat_F\]
alpha <- 0.003
conf <- 1 - alpha
Bfat_and_sex <- athletes_data %>%
select(Sex, Bfat) %>%
mutate(Sex = factor(Sex, labels=c('M','F')))
M_Bfat <- Bfat_and_sex %>%
filter(Sex == 'M') %>%
select(Bfat)
F_Bfat <- Bfat_and_sex %>%
filter(Sex == 'F') %>%
select(Bfat)
Antes de poder seguir adelante con el contraste de las medias, es necesario saber si hay o no diferencias significativas entre las varianzas.
Para ello, vamos a realizar un nuevo contraste de hipótesis.
\[H_0: \sigma^2 Bfat_M = \sigma^2 Bfat_F\]
\[H_1: \sigma^2 Bfat_M \neq \sigma^2 Bfat_F\]
var.test(x = as.matrix(M_Bfat), y = as.matrix(F_Bfat), ratio = 1, alternative = 'two.sided', conf.level = conf)
##
## F test to compare two variances
##
## data: as.matrix(M_Bfat) and as.matrix(F_Bfat)
## F = 0.3411, num df = 101, denom df = 99, p-value = 1.5e-07
## alternative hypothesis: true ratio of variances is not equal to 1
## 99.7 percent confidence interval:
## 0.1871653 0.6207295
## sample estimates:
## ratio of variances
## 0.3410999
En este caso nos da un p-valor de \(1.5·10^{-7}\) que es menor que el alpha que hemos elegido de 0.003, por lo tanto en este caso se entiende que sí hay diferencias significativas entre las varianzas, por lo que rechazamos \(H_0\).
Finalmente continuamos con el contraste de las medias:
t.test(x = M_Bfat, y = F_Bfat, alternative='two.sided', mu=0, paired = FALSE, var.equal = FALSE, conf.level=conf)
##
## Welch Two Sample t-test
##
## data: M_Bfat and F_Bfat
## t = -13.65, df = 158.87, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 99.7 percent confidence interval:
## -10.496861 -6.699574
## sample estimates:
## mean of x mean of y
## 9.250882 17.849100
En este caso, el p-valor es menor que \(2.2·10^{-16}\), y por lo tanto es menor que el alpha que hemos elegido de 0.003, así que rechazamos \(H_0\) y eso quiere decir que sí hay diferencias en el porcentaje medio de grasa corporal en base al sexo de los deportistas.