#################################################################################################################
#################################################################################################################
#################################################################################################################

#################################################################################################################
############################################ LOAD THE PREPARED DATA #############################################
#################################################################################################################

#################################################################################################################
#################################################################################################################
#################################################################################################################
rm(list = ls())
setwd('C:/Users/malir/Desktop/data/Handstand') #setwd("E:/data/Handstand")
setwd("E:/data/Handstand/Handstand 1")
handstandART <- readxl::read_excel('handstandART.xlsx')
View(handstandART)
head(handstandART)
#################################################################################################################
#################################################################################################################
#################################################################################################################

#################################################################################################################
############################################ DENSITY/NORMALITY OF THE DATA ######################################
#################################################################################################################

#################################################################################################################
#################################################################################################################
#################################################################################################################

#### for using aes() in ggplots
suppressPackageStartupMessages({
  library(ggplot2)
})
library(ggplot2)
#### density of all corresponding variables in the data set
ggplot2::ggplot(handstandART, aes(x = age)) + geom_bar(fill = 'orange')
ggplot2::ggplot(handstandART, aes(x = height)) + geom_density(fill = 'red')
ggplot2::ggplot(handstandART, aes(x = weight)) + geom_density(fill = 'red')
ggplot2::ggplot(handstandART, aes(x = CKCUEST)) + geom_density(fill = 'red') + 
  geom_vline(xintercept = mean(handstandART$CKCUEST))
ggplot2::ggplot(handstandART, aes(x = UQYBTL)) + geom_density(fill = 'red')
ggplot2::ggplot(handstandART, aes(x = UQYBTR)) + geom_density(fill = 'red')
ggplot2::ggplot(handstandART, aes(x = UQYBT)) + geom_density(fill = 'red') + 
  geom_vline(xintercept = median(handstandART$UQYBT))
ggplot2::ggplot(handstandART, aes(x = arm_L)) + geom_density(fill = 'red')
ggplot2::ggplot(handstandART, aes(x = arm_R)) + geom_density(fill = 'red')
ggplot2::ggplot(handstandART, aes(x = laterality_arm)) + geom_bar(fill = 'orange')
ggplot2::ggplot(handstandART, aes(x = laterality_leg)) + geom_bar(fill = 'orange')
ggplot2::ggplot(handstandART, aes(x = sex)) + geom_bar(fill = 'orange')
ggplot2::ggplot(handstandART, aes(x = scale)) + geom_bar(fill = 'orange')
ggplot2::ggplot(handstandART, aes(x = E_score)) + geom_bar(fill = 'orange')
ggplot2::ggplot(handstandART, aes(x = E_score)) + geom_density(fill = 'red') 
ggplot2::ggplot(handstandART, aes(x = `pain_%`)) + geom_density(fill = 'red')

ggplot(handstandART, aes(x = (E_score))) + geom_bar()

#### count of the "Assessment of the quality value" (AQV) among all participants

library("ggpubr")
library("gridExtra")
library("grid")
AQV_count <- ggplot2::ggplot(handstandART, aes(x = AQV)) + geom_bar(fill = 'darkgrey')+
  xlab('AQV')+
  ylab('Frequency')+
  theme(axis.text.x = element_text(size=10, hjust = 1),
        axis.text.y = element_text(size=10, hjust = 1),
        axis.title.x = element_text(size=20, face = 'bold'),
        axis.title.y = element_text(size=20, face = 'bold'))+
  theme(plot.margin = margin(3,1,1,1, "cm"),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))

Escore_count <- ggplot2::ggplot(handstandART, aes(x = E_score)) + geom_bar(fill = 'darkgrey')+
  xlab('E-score')+
  ylab('Frequency')+
  theme(axis.text.x = element_text(size=10, hjust = 1),
        axis.text.y = element_text(size=10, hjust = 1),
        axis.title.x = element_text(size=20, face = 'bold'),
        axis.title.y = element_text(size=20, face = 'bold'))+
  theme(plot.margin = margin(3,1,1,0.5, "cm"),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))+
  xlim(0, 2.4)

grid.arrange(arrangeGrob(AQV_count, Escore_count, ncol = 2, nrow = 1,
                         top = textGrob("",
                             gp = gpar(fontsize = 20, font = 3),
                             hjust = 0.5, vjust = 3)))


#### density for UQYBTL/UQYBTR
dens <- apply(handstandART[c(18,20)], 2, density)
plot(NA, xlim = range(sapply(dens, "[", "x")), ylim = range(sapply(dens, "[", "y")))
mapply(lines, dens, col = 1:length(dens))
legend("topright", legend = names(dens), fill = 1:length(dens))

#### checking normality of included variables in the regression model by Shapiro-wilk test
shapiro.test(handstandART$UQYBTL) # we can assume normality (p = 0.39)
shapiro.test(handstandART$UQYBTR) # we can assume normality (p = 0.94)
shapiro.test(handstandART$UQYBT) # we can assume normality (p = 0.90)
shapiro.test(handstandART$CKCUEST) # we can assume normality (p = 0.46)
shapiro.test(handstandART$E_score) # we cannot assume normality (p = 0.0001)


#### checking normality of included variables in the regression model 
# all points have to be along the line for normality
car::qqPlot(handstandART$UQYBT) # we can assume normality 
car::qqPlot(handstandART$CKCUEST) # we can assume normality
car::qqPlot(handstandART$E_score) # we cannot assume normality

#################################################################################################################
#################################################################################################################
#################################################################################################################

#################################################################################################################
############################################ SUMMARY STATISTICS #################################################
#################################################################################################################

#################################################################################################################
#################################################################################################################
#################################################################################################################

table(handstandART$laterality_leg)
table(handstandART$laterality_arm)
max(table(handstandART$E_score))
?IQR
IQR(handstandART$E_score)

#################################################################################################################
#################################################################################################################
#################################################################################################################

#################################################################################################################
######################################### CORRELATION OF THE DATA ###############################################
#################################################################################################################

#################################################################################################################
#################################################################################################################
#################################################################################################################

#### Kendall's coefficient of concordance in AQV
(kendall_W_AQV <- irr::kendall(handstandART[c(11,13,15)], correct = F))

# with ci
rcompanion::kendallW(handstandART[c(11,13,15)], correct = F, ci = T)


#### Kendall's coefficient of concordance in E-score
(kendall_W_Escore <- irr::kendall(handstandART[c(12,14,16)], correct = F))

# with ci
rcompanion::kendallW(handstandART[c(12,14,16)], correct = F, ci = T)

citation("irr")
citation("rcompanion")

#### correlation between UQYBTR/UQYBTL
cor.test(handstandART$UQYBTL, handstandART$UQYBTR, method = 'pearson', alternative = "two.sided")
ggplot2::ggplot(handstandART, mapping = aes(UQYBTR, UQYBTL)) + 
  geom_point(pch = 1, col = 'blue', size = 5) +
  geom_abline()

#### correlation between UQYBTR/UQYBTT
cor.test(handstandART$UQYBTR, handstandART$UQYBT, method = 'pearson', alternative = "two.sided")
ggplot2::ggplot(handstandART, mapping = aes(UQYBTR, UQYBT)) + 
  geom_point(pch = 1, col = 'blue', size = 5) +
  geom_abline()
#### correlation between UQYBTL/UQYBTT
cor.test(handstandART$UQYBTL, handstandART$UQYBT, method = 'pearson', alternative = "two.sided")
ggplot2::ggplot(handstandART, mapping = aes(UQYBTL, UQYBT)) + 
  geom_point(pch = 1, col = 'blue', size = 5) +
  geom_abline()

#### correlation between UQYBT & CKCUEST
cor.test(handstandART$UQYBT, handstandART$CKCUEST, method = 'pearson', alternative = "two.sided")
ggplot2::ggplot(handstandART, mapping = aes(CKCUEST, UQYBT)) + 
  geom_point(pch = 1, col = 'blue', size = 5) +
  geom_abline()



#### correlation between  AQV and E_score
AQV_Escore <- cor.test(handstandART$AQV, handstandART$E_score, method = "kendall",
                       conf.level = 0.95)

# CI for AQV_E_score
library(NSM3)
kendall.ci(handstandART$AQV, handstandART$E_score, alpha = 0.05, type = "t")

# Visualization of the relationship between AQV and E-score
ggplot2::ggplot(handstandART, mapping = aes(AQV, E_score)) + 
  geom_point(pch = 20, col = 'black', size = 5)+ 
  theme(axis.text = element_text(size = 10),
        axis.title=element_text(size=20,face="bold"))+
  theme_gray(base_size = 25)+
  ggtitle("Relationship between AQV and E-score")+
  geom_smooth(method = "auto")




#### corrplot of the anthropometrics
corrplot::corrplot.mixed(cor(handstandART[c(2:4,17,19)], method = "pearson"),
                         lower = 'circle', upper = 'circle', order = 'AOE',
                         addrect = 2, addCoef.col = 'orange', number.cex = 1.0, tl.cex = 2,
                         main = "Correlation of age/height/weight/arm length")



#################################################################################################################
#################################################################################################################
#################################################################################################################

#################################################################################################################
####################################### QUALITY OF HANDSTAND ~ STABILITY TESTS ##################################
#################################################################################################################

#################################################################################################################
#################################################################################################################
#################################################################################################################

#### Is there a relationship between quality of handstand and stability of the shoulder joint?
#### Is there a relationship between quality of handstand and flexibility of the shoulder in flexion?

#### checking the variance equality
# F-test or Bartlett`s test for normally distributed
# Levene's or Fligner-Killeen?s test for non-parametric data
# (Levene's test can assess more than two variances/groups)

stats::fligner.test(UQYBT ~ E_score, data = handstandART) # variances are homogeneous
stats::fligner.test(CKCUEST ~ E_score, data = handstandART) # variances are homogeneous

citation("stats")


#### Checking multicollinearity between all predictors UQYBT/CKCUEST.
# If the VIF (variance inflation factor) is grater then 5. the multicollinearity is present.
# If multicollinearity is present, than it depends on the order of predictors.
library(car)
model_001i_vif <- lm(scale(AQV) ~ UQYBT + CKCUEST, data = handstandART)
(VIF_001i <- car::vif(model_001i_vif))

# plot the VIF
barplot(VIF_001i, main = 'VIF values', horiz = T, col = 'orange', xlim = c(0,7) )
citation("car")

#################################################################################################################
##################################################### ORDINAL LOGISTIC REGRESSION ###############################
#################################################################################################################


#### Packages for ordinal logistic regression implementation.
library(foreign)
library(MASS)
library(Hmisc)
library(reshape2)
library(ggplot2)
library(brant)


###################################################### regression model with three predictors ///// AQV #########


# Convert the AQV into as.factor.
handstandART$AQV <- as.factor(handstandART$AQV)
# Check the class of all variables.
str(handstandART)

#### Used formula of the ordinal logistic regression.
formula <- as.formula(AQV ~ UQYBT + CKCUEST)

#### Making the model of ordinal logistic regression.
model_001i <- MASS::polr(formula,
                         data = handstandART, Hess = T, method = "logistic")
#### Print the model.
print(model_001i)
#### Print the summary of the model
summary(model_001i)
#### Making the table with coefficients and cut-points/t values/p values
coef_model_001i <- coef(summary(model_001i))
p_model_001i <- pnorm(abs(coef_model_001i[, "t value"]), lower.tail = FALSE) * 2
coef_model_001i <- cbind(coef_model_001i, 'p value' = p_model_001i)


#### Print out and export information about the model: degrees of freedom of the model,
# residual degrees of freedom of the model, the (effective) number of degrees of freedom,
# residual deviance coefficients, the pseudo R squared (McFadden) and Chi sq.
print(model_001i)
summary(model_001i)
model_001i$df.residual # residual degrees of freedom
model_001i$edf # the (effective) number of degrees of freedom used by the model

profile(model_001i)

plot(profile(model_001i))
?polr

head(fitted(model_001i))

# Package for adjusted R2 calculation.
library(performance)
citation("performance")
r2_mcfadden(model_001i) # McFadden pseudo R2

# Package for Chi sq calculation. 
library(car)
citation("car")
Anova(model_001i) # Chi sq

# Next we see the estimates for the two intercepts, which are sometimes called cut-points. 
# The intercepts indicate where the latent variable is cut to make the five groups that we 
# observe in our data. Note that this latent variable is continuous. In general, these are 
# not used in the interpretation of the results. The cut-points are closely related to thresholds,
# which are reported by other statistical packages.
print(coef_model_001i) # into table and present it - coefficients of independent variables (cut-points)


confint(model_001i) # CI of the model
model_001i$coefficients # coefficients of the model
exp(coef(model_001i)) # exponential of the coefficients of the model



# Exported into table and present it - proportional odds ratios of variables
ORcoef <- exp(cbind(OR = coef(model_001i), confint(model_001i)))

# Exported into table and present it - 
coef_model_001i_df <- as.data.frame(coef_model_001i)
writexl::write_xlsx(coef_model_001i_df, 'C:/Users/malir/Desktop/data/Handstand\\coefficientsOLR.xlsx')

# Exported into table and present it 
ORcoef_df <- as.data.frame(ORcoef)
writexl::write_xlsx(ORcoef_df, 'C:/Users/malir/Desktop/data/Handstand\\ORcoef_df.xlsx')

# Checking the 'proportional odds assumption'/'parallel regression assumption'
# that means we  assumes that the coefficients that describe the relationship 
# between, say, the lowest versus all higher categories of the dependent variable
# are the same as those that describe the relationship between the next 
# lowest category and all higher categories, etc. 
sf <- function(y) {     
  c(`Y>= 0` = qlogis(mean(y >= 1)),
    `Y>= 1` = qlogis(mean(y >= 2)),
    `Y>= 2` = qlogis(mean(y >= 3)),
    `Y>= 3` = qlogis(mean(y >= 4)),
    `Y>= 4` = qlogis(mean(y >= 5))) 
}

(s <- with(handstandART,
           summary(as.numeric(AQV) ~ UQYBT + CKCUEST, fun = sf)))

s[, 4] <- s[, 4] - s[, 3]
s[, 3] <- s[, 3] - s[, 3]
s

# parallel regression assumption plot
plot(s, which = 1:3, pch = 0:4, xlab = "logit",
     main = "Parallel regression assumption of predictor variables",
     xlim = range(s[,3:4]))

# Parallel regression assumption Brant's test. We conclude that the parallel
# assumption holds since the probability (p-values) for all variables are greater than alpha = 0.05.
# The output also contains an Omnibus variable, which stands for the whole model,
# and it has to be still greater than 0.05
Brant_model_001i <-brant(model_001i)
citation("brant")

?brant
####
newdat <- data.frame(UQYBT = rep(seq(from = 71.8, to = 100.4, length.out = 100)),
                     CKCUEST = rep(seq(from = 15.7, to = 39.3,length.out = 100)))


newdat <- cbind(newdat, predict(model_001i, newdat, type = "probs"))

head(newdat)

lnewdat <- melt(newdat, id.vars = c("UQYBT", "CKCUEST"),
                variable.name = "Level",
                value.name = "Probability")

head(lnewdat)

ggplot(lnewdat, aes(x = UQYBT, y = Probability, colour = Level))+
  geom_line()

ggplot(lnewdat, aes(x = CKCUEST, y = Probability, colour = Level))+
  geom_line()


#### predicting and visualization of the ordinal logistic regression
library(effects)
library(car)
library(MASS)
library(splines)

# we can generate predicted values
Effect(focal.predictors = c('UQYBT', 'CKCUEST'), model_001i)


###################################################### Ordinal logistic regression ///// E-score ##########
# E_score variable is not normally distributed 

# Convert the E_score into as.factor.
handstandART$E_score <- as.factor(handstandART$E_score)
# Check the class of all variables.
str(handstandART)

#### Used formula of the ordinal logistic regression.
formula <- as.formula(E_score ~ UQYBT + CKCUEST)

#### Making the model of ordinal logistic regression.
model_002i <- MASS::polr(formula,
                         data = handstandART, Hess = T, method = "logistic")
#### Print the model.
print(model_002i)
#### Print the summary of the model
summary(model_002i)
#### Making the table with coefficients and cut-points/t values/p values
coef_model_002i <- coef(summary(model_002i))
p_model_002i <- pnorm(abs(coef_model_002i[, "t value"]), lower.tail = FALSE) * 2
coef_model_002i <- cbind(coef_model_002i, 'p value' = p_model_002i)


#### Print out and export information about the model: degrees of freedom of the model,
# residual degrees of freedom of the model, the (effective) number of degrees of freedom,
# residual deviance coefficients, the pseudo R squared (McFadden) and Chi sq.
print(model_002i)
summary(model_002i)
model_002i$df.residual # residual degrees of freedom
model_002i$edf # the (effective) number of degrees of freedom used by the model

profile(model_002i)

plot(profile(model_002i))
?polr

head(fitted(model_001i))

# Package for adjusted R2 calculation.
library(performance)
citation("performance")
r2_mcfadden(model_002i) # McFadden pseudo R2

# Package for Chi sq calculation. 
library(car)
citation("car")
Anova(model_002i) # Chi sq

# Next we see the estimates for the two intercepts, which are sometimes called cut-points. 
# The intercepts indicate where the latent variable is cut to make the five groups that we 
# observe in our data. Note that this latent variable is continuous. In general, these are 
# not used in the interpretation of the results. The cut-points are closely related to thresholds,
# which are reported by other statistical packages.
print(coef_model_002i) # into table and present it - coefficients of independent variables (cut-points)


confint(model_001i) # CI of the model
model_002i$coefficients # coefficients of the model
exp(coef(model_002i)) # exponential of the coefficients of the model



# Exported into table and present it - proportional odds ratios of variables
ORcoef2 <- exp(cbind(OR = coef(model_002i), confint(model_002i)))

# Exported into table and present it - 
coef_model_002i_df <- as.data.frame(coef_model_002i)
writexl::write_xlsx(coef_model_002i_df, 'C:/Users/malir/Desktop/data/Handstand\\coefficientsOLR.xlsx')

# Exported into table and present it 
ORcoef_df2 <- as.data.frame(ORcoef)
writexl::write_xlsx(ORcoef_df2, 'C:/Users/malir/Desktop/data/Handstand\\ORcoef_df.xlsx')

# Checking the 'proportional odds assumption'/'parallel regression assumption'
# that means we  assumes that the coefficients that describe the relationship 
# between, say, the lowest versus all higher categories of the dependent variable
# are the same as those that describe the relationship between the next 
# lowest category and all higher categories, etc. 
sf <- function(y) {     
  c(`Y>= 0` = qlogis(mean(y >= 1)),
    `Y>= 1` = qlogis(mean(y >= 2)),
    `Y>= 2` = qlogis(mean(y >= 3)),
    `Y>= 3` = qlogis(mean(y >= 4)),
    `Y>= 4` = qlogis(mean(y >= 5))) 
}

(s <- with(handstandART,
           summary(as.numeric(AQV) ~ UQYBT + CKCUEST, fun = sf)))

s[, 4] <- s[, 4] - s[, 3]
s[, 3] <- s[, 3] - s[, 3]
s

# parallel regression assumption plot
plot(s, which = 1:3, pch = 0:4, xlab = "logit",
     main = "Parallel regression assumption of predictor variables",
     xlim = range(s[,3:4]))

# Parallel regression assumption Brant's test. We conclude that the parallel
# assumption holds since the probability (p-values) for all variables are greater than alpha = 0.05.
# The output also contains an Omnibus variable, which stands for the whole model,
# and it has to be still greater than 0.05
Brant_model_002i <-brant(model_002i)
citation("brant")

?brant
####
newdat <- data.frame(UQYBT = rep(seq(from = 71.8, to = 100.4, length.out = 100)),
                     CKCUEST = rep(seq(from = 15.7, to = 39.3,length.out = 100)))


newdat <- cbind(newdat, predict(model_002i, newdat, type = "probs"))

head(newdat)

lnewdat <- melt(newdat, id.vars = c("UQYBT", "CKCUEST"),
                variable.name = "Level",
                value.name = "Probability")

head(lnewdat)

ggplot(lnewdat, aes(x = UQYBT, y = Probability, colour = Level))+
  geom_line()

ggplot(lnewdat, aes(x = CKCUEST, y = Probability, colour = Level))+
  geom_line()



#### predicting and visualization of the ordinal logistic regression
library(effects)
library(car)
library(MASS)
library(splines)

# we can generate predicted values
Effect(focal.predictors = c('UQYBT', 'CKCUEST'), model_002i)


Hmisc::describe(handstandART[,c(2:4,18,20:25)])
sample_stats<-pastecs::stat.desc(handstandART[,c(2:4,18,20:25)])
sample_stats <- as.data.frame(t(sample_stats))
sample_stats$max
###################################################################################################################
#################################### Make a final plot with contributions of coefficients (within POR) #######


base_AQV <- data.frame(mean = c(0.0545, 0.0148),
                       ci.lb = c(0.98, 0.89),
                       ci.ub = c(1.14, 1.09),
                       variable = c("UQYBT on AQV", "CKCUEST on AQV"),
                       POR = c(1.06, 0.99))

base_Escore <- data.frame(mean = model_002i$coefficients,
                          lower = c(ORcoef2[,2]),
                          upper = c(ORcoef2[,3]),
                          variable = c("UQYBT on E-score", "CKCUEST on E-score"),
                          POR = c(exp(coef(model_002i))))

names(base_AQV)[names(base_AQV) == "ci.lb"] <- "lower"
names(base_AQV)[names(base_AQV) == "ci.ub"] <- "upper"
base <- rbind(base_Escore,base_AQV)

View(base)
rownames <- rownames(base)
rownames(base) <- c("1","2", "3", "4")
print(base)

# fix the order of variables

base$variable <- factor(base$variable, levels = base$variable)

# plot contributions of coefficients with CI's
ggplot(base, aes(x = variable, y = POR, ymin = lower, ymax = upper)) +
  geom_point(position = position_dodge(width = 0.2)) +
  geom_errorbar(position = position_dodge(width = 0.2), width = 0.1) +
  coord_flip()+
  theme_classic()+
  scale_y_continuous(limits = c(0.0,3.0))+
  geom_hline(yintercept = 1.0, lwd = 0.2, lty = 2)+
  theme(text = element_text(size=16, family="Comic Sans MS", face = "bold"),plot.margin = margin(2, 2, 2, 1, "cm"),
        axis.ticks.y = element_blank(),
        panel.grid = element_blank(),element_line(linetype = 1),
        axis.line.y = element_blank(),axis.title.x = element_text(vjust = -5, hjust = 0.2),
        legend.position = "none")+
  ylab(expression(paste("<- Decrement   Improvement ->\n    (Proportional odds ratio)")))+
  xlab("")



# Test for Autocorrelation/non-independence of errors
library(lmtest)
dwtest(model_002d)