thiscodeWorks - Organizing the best of code online

#software #softwaredevelopment #usa #uk #technology #poker #pokergame #gamedevelopment #appdevelopment #itservices #pokergamedevelopment #itcompany #websitedevelopment #r #react.js #typescript #ios #elixir

Poker Game Development Company

https://technoderivation.com/poker-game-development

#r

R- Assignment 1

set.seed(28092024) 
n <- 3 
sample_norm <- rnorm(3,0,sqrt(5)) 
unb_estim <- var(sample_norm) 
max_estim <- var(sample_norm) * ((n-1)/n) 
mse_estim <- var(sample_norm) * ((n-1)/(n+1)) 
popul_var <- 5



estimates <- c(max_estim , unb_estim , mse_estim ) 
differences <- estimates - popul_var 
result <- data.frame( Estimator = c("MLE", "Unbiased", "MinMSE"), Estimate = estimates, Difference = differences ) 
print(result)
barplot(result$Difference, names.arg = result$Estimator, main = "Difference of Estimator values fromTrue Population Variance", ylab = "Difference", col = c("red", "green", "blue"), ylim = c(min(result$Difference) - 1, max(result$Difference) + 1))



calculate_estimators <- function(n, S, mean, popul_var) { 
unbiased_vals <- numeric(S) 
mle_vals <- numeric(S) 
mse_vals <- numeric(S) 
for (i in 1:S) { 
sample <- rnorm(n, mean, sqrt(popul_var)) 
unbiased <- var(sample) 
mle <- unbiased * ((n - 1) / n) 
minmse <- unbiased * ((n - 1) / (n + 1)) 
unbiased_vals[i] <- unbiased 
mle_vals[i] <- mle 
mse_vals[i] <- minmse } 
return(list( unbiased_var = unbiased_vals, mle_var = mle_vals, mse_var = mse_vals )) } 



stats_calc <- function(estimate_vals, popul_var) { 
avg_val <- mean(estimate_vals) 
bias <- avg_val - popul_var 
variance <- var(estimate_vals) 
mse <- mean((estimate_vals - popul_var)^2) 
return(c(Average = avg_val, Bias = bias, Variance = variance, MSE = mse)) } 
exp_n3 = calculate_estimators(3,10000,0,sqrt(5))
exp_n20 = calculate_estimators(20,10000,0,sqrt(5))
exp_n200 = calculate_estimators(200,10000,0,sqrt(5))



results_n3 <- data.frame( Estimator = c("Unbiased", "MLE", "MinMSE"), Average = c(n3_unbiased_stats["Average"], n3_mle_stats["Average"], n3_minmse_stats["Average"]), Bias = c(n3_unbiased_stats["Bias"], n3_mle_stats["Bias"], n3_minmse_stats["Bias"]), Variance = c(n3_unbiased_stats["Variance"], n3_mle_stats["Variance"], n3_minmse_stats["Variance"]), MSE = c(n3_unbiased_stats["MSE"], n3_mle_stats["MSE"], n3_minmse_stats["MSE"]) )

results_n20 <- data.frame( Estimator = c("Unbiased", "MLE", "MinMSE"), Average = c(n20_unbiased_stats["Average"], n20_mle_stats["Average"], n20_minmse_stats["Average"]), Bias = c(n20_unbiased_stats["Bias"], n20_mle_stats["Bias"], n20_minmse_stats["Bias"]), Variance = c(n20_unbiased_stats["Variance"], n20_mle_stats["Variance"], n20_minmse_stats["Variance"]), MSE = c(n20_unbiased_stats["MSE"], n20_mle_stats["MSE"], n20_minmse_stats["MSE"]) )


results_n200 <- data.frame( Estimator = c("Unbiased", "MLE", "MinMSE"), Average = c(n200_unbiased_stats["Average"], n200_mle_stats["Average"], n200_minmse_stats["Average"]), Bias = c(n200_unbiased_stats["Bias"], n200_mle_stats["Bias"], n200_minmse_stats["Bias"]), Variance = c(n200_unbiased_stats["Variance"], n200_mle_stats["Variance"], n200_minmse_stats["Variance"]), MSE = c(n200_unbiased_stats["MSE"], n200_mle_stats["MSE"], n200_minmse_stats["MSE"]) )

#r

Code snippets – 2 scale y axes in {ggplot2} - NHS-R Community

library(dplyr, warn.conflicts = FALSE)
library(tidyr)
library(ggplot2)
library(NHSRdatasets)
library(janitor, warn.conflicts = FALSE) # to clean the titles and making them snake_case

ons_mortality <- NHSRdatasets::ons_mortality

deaths_data <- ons_mortality |>
  filter(date > "2020-01-01",
         category_2 %in% c("all ages", "Yorkshire and The Humber")) |>
  pivot_wider(
    id_cols = c(date, week_no),
    values_from = counts,
    names_from = category_2
  ) |>
  clean_names()

ggplot(data = deaths_data) +
  geom_line(aes(
    x = date, 
    y = all_ages,
    col = "all ages"
  )) +
  geom_line(aes(
    x = date, 
    y = yorkshire_and_the_humber,
    col = "Yorkshire and The Humber"
  )) +
  scale_y_continuous(
    name = "Yorkshire and The Humber",
    breaks = scales::pretty_breaks(10),
    sec.axis = sec_axis(~ . * 10,
                        name = "all ages",
                        breaks = scales::pretty_breaks(10)
    )
  ) +
  scale_colour_manual(
    name = NULL,
    values = c(
      "all ages" = "#CC2200",
      "Yorkshire and The Humber" = "black"
    )
  )

#r

rename column with variable

column_name <- "my_col"
value <- 0
dplyr::mutate(df, !!rlang::sym(column_name) := value)

#r

add month to lazy table

lazy_tbl %>% dplyr::mutate(last_month = my_date + months(-1))

# using lubridate doesn't work

#r

change legend position

plt + ggplot2::theme(legend.position = "bottom")

#r

,,

def mathOp(line, tline, i, rowNum):
    op = []
    num_of_operations = 0
    num_of_AN = 0

    while i < len(line):
        if line[i] == "SUM OF":
            op.append("+")
            i += 1
            num_of_operations += 1
        elif line[i] == "DIFF OF":
            op.append("-")
            i += 1
            num_of_operations += 1
        elif line[i] == "PRODUKT OF":
            op.append("*")
            i += 1
            num_of_operations += 1
        elif line[i] == "QUOSHUNT OF":
            op.append("/")
            i += 1
            num_of_operations += 1
        elif line[i] == "MOD OF":
            op.append("%")
            i += 1
            num_of_operations += 1
        elif line[i] == "BIGGR OF":
            op.append("max")
            i += 1
            num_of_operations += 1
        elif line[i] == "SMALLR OF":
            op.append("min")
            i += 1
            num_of_operations += 1
        else:
            if tline[i] == "NUMBR":
                op.append(int(line[i]))
                i += 1
            elif tline[i] == "NUMBAR":
                op.append(float(line[i]))
                i += 1
            elif tline[i] == "VARIABLE":
                value, _ = searchVarValue(line[i])
                op.append(value)
                i += 1
            elif tline[i] == "YARN":
                value = typeCasting(line[i], tline[i], "NUMBAR", rowNum)
                op.append(value)
                i += 1
            elif tline[i] == "AN":
                i += 1
                num_of_AN += 1
            else:
                raise RuntimeError("Unexpected %r at line %d" % (line[i], rowNum))
            i += 1

    expected_operands = num_of_operations + 1
    actual_operands = len(op) - (num_of_AN + num_of_operations)
    if expected_operands != actual_operands:
        raise RuntimeError(
            "Expected %d operands, but found %d at line %d"
            % (expected_operands, actual_operands, rowNum)
        )
    else:
        return parse(deque(op))

#r

mathop

import re
from Variable import Variable
from collections import deque

vars = []


class SyntaxAnalyzer:
    def program(self, tokens, lexeme, row):
        i = 0

        while tokens[i] == "COMMENT":
            i += 1

        if tokens[i] == "START":  # encountered start of program
            print("==== PROGRAM START! === \n")
            i += 1
            while tokens[i] != "END" and i < len(tokens):
                if tokens[i] == "COMMENT":
                    i += 1
                    continue

                if tokens[i] == "WAZZUP":
                    i += 1
                    i = isVarDec(tokens, lexeme, row, i)

                i = statement(tokens, lexeme, row, i)

                if i >= len(tokens):
                    break
            if i == len(tokens):
                raise RuntimeError("End of program not found")
            # printVariables()
        else:
            raise RuntimeError("Start of program not found")


def isVarDec(tokens, lexeme, row, i):
    maxlen = len(tokens)
    while tokens[i] != "BUHBYE":
        if tokens[i] == "COMMENT":  # aka BTW (single line comment)
            # comments are stored all in one, if it's a multiline is when we iterate thru so this is fine
            i += 1
            continue
        elif tokens[i] == "VAR_DEC":
            # build line
            rowNum = row[i]
            line = []
            tline = []
            while rowNum == row[i]:
                line.append(lexeme[i])
                tline.append(tokens[i])
                i += 1
            storeVariable(tline, line, rowNum)
        else:
            raise RuntimeError(
                "Unexpected %r on line %d, Only variable declarations are allowed in this section"
                % (lexeme[i], row[i])
            )

        if i >= maxlen:
            raise RuntimeError("Encountered end of file")
    return i


def storeVariable(tline, line, rowNum):
    global vars

    i = 1
    maxlength = len(tline)
    if tline[i] == "VARIABLE":
        varName = line[i][:-1]
        i += 1
    else:
        raise RuntimeError("Expected VARIABLE NAME on line %d" % (rowNum))

    if i >= maxlength:
        vars.append(Variable(varName, "NOOB", None))
        return

    if tline[i] == "ITZ":
        i += 1
    else:
        raise RuntimeError("Expected 'ITZ' on line %d" % (rowNum))

    if i >= maxlength:
        raise RuntimeError("Encountered end of file!")

    if (
        tline[i] == "NOOB"
        or tline[i] == "YARN"
        or tline[i] == "TROOF"
        or tline[i] == "NUMBAR"
        or tline[i] == "NUMBR"
        or tline[i] == "VARIABLE"
    ):
        type = tline[i]
        value = line[i]
        vars.append(Variable(varName, type, value))
        return
    else:
        raise RuntimeError(
            "Variable declaration can only be to a YARN, TROOF, NOOB etch"
        )
    vars.append(Variable("IT", "NOOB", ""))


def statement(tokens, lexeme, row, i):
    tline = []
    line = []
    rowNum = row[i]
    # print(rowNum)
    while rowNum == row[i]:
        tline.append(tokens[i])
        line.append(lexeme[i])
        i += 1

    if tline[0] == "PRINT":
        printLine(line, tline)
    elif tline[0] == "VAR_DEC":
        raise RuntimeError("Unexpected variable declaration at line %d" % (rowNum))
    elif tline[0] == "BOOL_OPER":
        print(boolOpRegion(line, tline, 0, rowNum))
    elif tline[0] == "COMPARISON":
        print(comparison(line, tline, 0, rowNum))
    elif tline[0] == "MATH":
        print(mathOp(line, tline, 0, rowNum))
    return i


def comparison(line, tline, i, rowNum):
    compQ = []
    # print(line)
    if line[i] == "BOTH SAEM":
        i += 1
        if tline[i] == "NUMBR" or tline[i] == "NUMBAR":
            compQ.append([tline[i], line[i]])
            i += 1
        elif tline[i] == "VARIABLE":
            value, type = searchVarValue(line[i])
            compQ.append([type, value])
            i += 1
        else:
            raise RuntimeError(
                "Expected NUMBR, NUMBAR, or VARIABLE at line %d" % (rowNum)
            )
        if tline[i] != "AN":
            raise RuntimeError("Expected AN at line %d" % (rowNum))
        i += 1
        if line[i] == "BIGGR OF" or line[i] == "SMALLR OF":
            compQ.append(line[i])
            i += 1
            if tline[i] == "NUMBR" or tline[i] == "NUMBAR":
                compQ.append([tline[i], line[i]])
                i += 1
            elif tline[i] == "VARIABLE":
                value, type = searchVarValue(line[i])
                compQ.append([type, value])
                i += 1
            else:
                raise RuntimeError(
                    "Expected NUMBR, NUMBAR, or VARIABLE at line %d" % (rowNum)
                )
            if compQ[0][1] != compQ[2][1]:
                raise RuntimeError(
                    "Value mismatch - operand 1 and 2 (%r and %r) must be same"
                    % (compQ[0][1], compQ[2][1])
                )
            if tline[i] != "AN":
                raise RuntimeError("Expected AN at line %d" % (rowNum))
            i += 1
            if tline[i] == "NUMBR" or tline[i] == "NUMBAR":
                compQ.append([tline[i], line[i]])
                i += 1
            elif tline[i] == "VARIABLE":
                value, type = searchVarValue(line[i])
                compQ.append([type, value])
                i += 1
            else:
                raise RuntimeError(
                    "Expected NUMBR, NUMBAR, or VARIABLE at line %d" % (rowNum)
                )
        elif tline[i] == "NUMBR" or tline[i] == "NUMBAR":
            compQ.append([tline[i], line[i]])
            i += 1
        elif tline[i] == "VARIABLE":
            value, type = searchVarValue(line[i])
            compQ.append([type, value])
            i += 1
        else:
            raise RuntimeError(
                "Expected NUMBR, NUMBAR, VARIABLE, BIGGR OF, or SMALLR OF at line %d"
                % (rowNum)
            )

        # print(compQ)
        if compQ[1] == "BIGGR OF":
            if compQ[2][0] != compQ[3][0]:
                raise RuntimeError(
                    "Type mismatch - cannot compare %r and %r"
                    % (compQ[0][0], compQ[1][0])
                )
            if compQ[2][0] == "NUMBR":
                if int(compQ[2][1]) >= int(compQ[3][1]):
                    return "WIN"
                else:
                    return "FAIL"
            elif compQ[2][0] == "NUMBAR":
                if float(compQ[2][1]) >= float(compQ[3][1]):
                    return "WIN"
                else:
                    return "FAIL"
            else:
                raise RuntimeError("Unexpected type %r" % (compQ[2][0]))
        elif compQ[1] == "SMALLR OF":
            if compQ[2][0] != compQ[3][0]:
                raise RuntimeError(
                    "Type mismatch - cannot compare %r and %r"
                    % (compQ[0][0], compQ[1][0])
                )
            if compQ[2][0] == "NUMBR":
                if int(compQ[2][1]) <= int(compQ[3][1]):
                    return "WIN"
                else:
                    return "FAIL"
            elif compQ[2][0] == "NUMBAR":
                if float(compQ[2][1]) <= float(compQ[3][1]):
                    return "WIN"
                else:
                    return "FAIL"
            else:
                raise RuntimeError("Unexpected type %r" % (compQ[2][0]))
        else:
            if compQ[0][0] != compQ[1][0]:
                raise RuntimeError(
                    "Type mismatch - cannot compare %r and %r"
                    % (compQ[0][0], compQ[1][0])
                )
            if compQ[0][1] == compQ[1][1]:
                return "WIN"
            else:
                return "FAIL"
    elif line[i] == "DIFFRINT":
        i += 1
        if tline[i] == "NUMBR" or tline[i] == "NUMBAR":
            compQ.append([tline[i], line[i]])
            i += 1
        elif tline[i] == "VARIABLE":
            value, type = searchVarValue(line[i])
            compQ.append([type, value])
            i += 1
        else:
            raise RuntimeError(
                "Expected NUMBR, NUMBAR, or VARIABLE at line %d" % (rowNum)
            )
        if tline[i] != "AN":
            raise RuntimeError("Expected AN at line %d" % (rowNum))
        i += 1
        if line[i] == "BIGGR OF" or line[i] == "SMALLR OF":
            compQ.append(line[i])
            i += 1
            if tline[i] == "NUMBR" or tline[i] == "NUMBAR":
                compQ.append([tline[i], line[i]])
                i += 1
            elif tline[i] == "VARIABLE":
                value, type = searchVarValue(line[i])
                compQ.append([type, value])
                i += 1
            else:
                raise RuntimeError(
                    "Expected NUMBR, NUMBAR, or VARIABLE at line %d" % (rowNum)
                )
            if compQ[0][1] != compQ[2][1]:
                raise RuntimeError(
                    "Value mismatch on line %d (%r and %r) must be same"
                    % (rowNum, compQ[0][1], compQ[2][1])
                )
            if tline[i] != "AN":
                raise RuntimeError("Expected AN at line %d" % (rowNum))
            i += 1
            if tline[i] == "NUMBR" or tline[i] == "NUMBAR":
                compQ.append([tline[i], line[i]])
                i += 1
            elif tline[i] == "VARIABLE":
                value, type = searchVarValue(line[i])
                compQ.append([type, value])
                i += 1
            else:
                raise RuntimeError(
                    "Expected NUMBR, NUMBAR, or VARIABLE at line %d" % (rowNum)
                )
        elif tline[i] == "NUMBR" or tline[i] == "NUMBAR":
            compQ.append([tline[i], line[i]])
            i += 1
        elif tline[i] == "VARIABLE":
            value, type = searchVarValue(line[i])
            compQ.append([type, value])
            i += 1
        else:
            raise RuntimeError(
                "Expected NUMBR, NUMBAR, VARIABLE, BIGGR OF, or SMALLR OF at line %d"
                % (rowNum)
            )

        # print(compQ)
        if compQ[1] == "BIGGR OF":
            if compQ[2][0] != compQ[3][0]:
                raise RuntimeError(
                    "Type mismatch - cannot compare %r and %r"
                    % (compQ[0][0], compQ[1][0])
                )
            if compQ[2][0] == "NUMBR":
                if int(compQ[3][1]) >= int(compQ[2][1]):
                    return "WIN"
                else:
                    return "FAIL"
            elif compQ[2][0] == "NUMBAR":
                if float(compQ[3][1]) >= float(compQ[2][1]):
                    return "WIN"
                else:
                    return "FAIL"
            else:
                raise RuntimeError("Unexpected type %r" % (compQ[2][0]))
        elif compQ[1] == "SMALLR OF":
            if compQ[2][0] != compQ[3][0]:
                raise RuntimeError(
                    "Type mismatch - cannot compare %r and %r"
                    % (compQ[0][0], compQ[1][0])
                )
            if compQ[2][0] == "NUMBR":
                if int(compQ[3][1]) <= int(compQ[2][1]):
                    return "WIN"
                else:
                    return "FAIL"
            elif compQ[2][0] == "NUMBAR":
                if float(compQ[3][1]) <= float(compQ[2][1]):
                    return "WIN"
                else:
                    return "FAIL"
            else:
                raise RuntimeError("Unexpected type %r" % (compQ[2][0]))
        else:
            if compQ[0][0] != compQ[1][0]:
                raise RuntimeError(
                    "Type mismatch - cannot compare %r and %r"
                    % (compQ[0][0], compQ[1][0])
                )
            if compQ[0][1] != compQ[1][1]:
                return "WIN"
            else:
                return "FAIL"


# function for parsing prefix notation math operations
def parse(tokens):
    if not tokens:
        raise RuntimeError("Unexpected end of statement.")
    else:
        token = tokens.popleft()
        if token == "+":
            return parse(tokens) + parse(tokens)
        elif token == "-":
            return parse(tokens) - parse(tokens)
        elif token == "/":
            return parse(tokens) / parse(tokens)
        elif token == "*":
            return parse(tokens) * parse(tokens)
        elif token == "%":
            return parse(tokens) % parse(tokens)
        elif token == "max":
            return max(parse(tokens), parse(tokens))
        elif token == "min":
            return min(parse(tokens), parse(tokens))
        else:
            return token


def mathOp(line, tline, i, rowNum):
    op = []

    while i < len(line):
        if line[i] == "SUM OF":
            op.append("+")
            i += 1
        elif line[i] == "DIFF OF":
            op.append("-")
            i += 1
        elif line[i] == "PRODUKT OF":
            op.append("*")
            i += 1
        elif line[i] == "QUOSHUNT OF":
            op.append("/")
            i += 1
        elif line[i] == "MOD OF":
            op.append("%")
            i += 1
        elif line[i] == "BIGGR OF":
            op.append("max")
            i += 1
        elif line[i] == "SMALLR OF":
            op.append("min")
            i += 1
        else:
            if tline[i] == "NUMBR":
                op.append(int(line[i]))
                i += 1
            elif tline[i] == "NUMBAR":
                op.append(float(line[i]))
                i += 1
            elif tline[i] == "VARIABLE":
                value, type = searchVarValue(line[i])
                op.append([type, value])
                i += 1
            elif tline[i] == "YARN":
                value = typeCasting(line[i], tline[i], "NUMBAR", rowNum)
                op.append(value)
                i += 1
            elif tline[i] == "AN":
                i += 1
            else:
                raise RuntimeError("Unexpected %r at line %d" % (line[i], rowNum))
            i += 1

    return parse(deque(op))


def boolOp(line, tline, i, rowNum):
    if tline[i] == "BOOL_OPER":
        opAddress = i
        boolQ = []
        i += 1
        i, boolQ0 = boolOp(line, tline, i, rowNum)
        boolQ.append(boolQ0)
        if line[opAddress] == "NOT":
            if boolQ[0] == "WIN":
                return i, "FAIL"
            else:
                return i, "WIN"
        i += 1
        if tline[i] != "AN":
            raise RuntimeError("Expected AN at line %d" % (rowNum))
        i += 1
        i, boolQ1 = boolOp(line, tline, i, rowNum)
        boolQ.append(boolQ1)
        # print(boolQ)
        if line[opAddress] == "BOTH OF":
            if boolQ[0] == "WIN" and boolQ[1] == "WIN":
                return i, "WIN"
            else:
                return i, "FAIL"
        elif line[opAddress] == "EITHER OF":
            if boolQ[0] == "WIN" or boolQ[1] == "WIN":
                return i, "WIN"
            else:
                return i, "FAIL"
        elif line[opAddress] == "WON OF":
            if boolQ[0] != boolQ[1] and (boolQ[0] == "WIN" or boolQ[1] == "WIN"):
                return i, "WIN"
            else:
                return i, "FAIL"
    elif tline[i] == "VARIABLE":
        if i < len(line) - 1:
            line[i] = line[i][:-1]
        value, type = searchVarValue(line[i])
        if type != "TROOF":
            value = typeCasting(value, type, "TROOF", rowNum)
        return i, value
    elif tline[i] == "TROOF":
        return i, line[i]
    else:
        raise RuntimeError("Unexpected %r at line %d" % (line[i], rowNum))


def boolOpRegion(line, tline, i, rowNum):
    # print(line)
    if line[i] == "ALL OF" or line[i] == "ANY OF":
        if line[i] == "ALL OF":
            initCond = "WIN"
            terminateCond = "WIN"
        elif line[i] == "ANY OF":
            terminateCond = "FAIL"
            initCond = "FAIL"
        i += 1
        while i < len(line) and initCond == terminateCond:
            initCond = boolOp(line, tline, i, rowNum)[1]
            # print(initCond, terminateCond)
            i += 1
            if line[i] == "AN":
                i += 1
            else:
                raise RuntimeError("Expected AN at line %d" % (rowNum))
            if line[i] == "MKAY":
                break
        return initCond
    else:
        return boolOp(line, tline, i, rowNum)[1]


def printLine(line, tline):
    # assume muna na YARN lang ung priniprint
    string = ""
    for i in range(0, len(line)):
        if tline[i] != "PRINT" and tline[i] != "COMMENT":
            if tline[i] == "YARN":
                string = string + line[i][1:-1]
            elif tline[i] == "VARIABLE":
                value, type = searchVarValue(line[i])
                if type != "YARN":
                    value = typeCasting(value, type, "YARN", i)
                else:
                    value = value[1:-1]
                string = string + value
            elif tline[i] == "NUMBR" or tline[i] == "NUMBAR":
                value = typeCasting(line[i], tline[i], "YARN", i)
                string = string + value
            elif tline[i] == "TROOF":
                value = line[i]
                string = string + value
            else:
                raise RuntimeError("Type %r cannot be printed" % (tline[i]))
    print(string)


def searchVarValue(name):
    global vars
    for variable in vars:
        if variable.name == name:
            return variable.value, variable.dataType
    raise RuntimeError("Variable %r does not exist" % (name))


def typeCasting(value, type1, type2, rowNum):
    if type1 == "NOOB":
        if type2 == "TROOF":
            return False
        else:
            raise RuntimeError(
                "Encountered error in line %d, cannot typecast NOOB to %r"
                % (rowNum, type2)
            )
    elif type1 == "NUMBR" or type1 == "NUMBAR":
        match type2:
            case "NUMBAR":
                return float(value)
            case "NUMBR":
                return int(value)
            case "YARN":
                return str(value)
            case "TROOF":
                if value == 0:
                    return "FAIL"
                else:
                    return "WIN"
            case _:
                raise RuntimeError(
                    "Encountered error in line %d, cannot typecast NUMBR to %r"
                    % (rowNum, type2)
                )
    elif type1 == "TROOF":
        match type2:
            case "NUMBAR":
                if value == "WIN":
                    return 1.0
                else:
                    return 0
            case "NUMBR":
                if value == "WIN":
                    return 1
                else:
                    return 0
            case "YARN":
                return value
            case _:
                raise RuntimeError(
                    "Encoutnered error in line %d, cannot typecast TROOF to %r"
                    % (rowNum, type2)
                )
    elif type1 == "YARN":
        value = value[1:-1]
        match type2:
            case "NUMBR":
                if bool(re.search(r"-?\d(\d)*", value)):
                    return int(value)
                else:
                    raise RuntimeError(
                        "Encountered error in line %d, cannot typecast YARN to %r"
                        % (rowNum, type2)
                    )
            case "NUMBAR":
                if bool(re.search(r"-?\d(\d)*\.\d(\d)*", value)):
                    return float(value)
                else:
                    raise RuntimeError(
                        "Encountered error in line %d, cannot typecast YARN to %r"
                        % (rowNum, type2)
                    )
            case "TROOF":
                if value == "":
                    return "FAIL"
                else:
                    return "WIN"
            case _:
                raise RuntimeError(
                    "Encountered error in line %d, cannot typecast YARN to %r"
                    % (rowNum, type2)
                )


def printVariables():
    global vars
    for variable in vars:
        print(variable.name)
        print(variable.dataType)
        print(variable.value)
        print("")

#r

rbindlist data.tables

dt_list <- list()
dt_list <- append(dt_list, list(dt))
data.table::rbindlist(dt_list)`

#r

loop through dates

for (date in as.list(c("2022-01-01", "2022-02-01", "2022-03-01"))) {
  print(date)
}

#r

transform vector to code that generates the same vector

vector_to_code <- function(vec, vec_name) {
  if (is.character(vec)) {
    paste0(vec_name, " <- c('", paste0(vec, collapse = "', '"), "')")
  } else {
    paste0(vec_name, " <- c(", paste0(vec, collapse = ", "), ")")
  }
}

# example
a <- c(1, 2, 3)
vector_to_code(a, "a_new")
b <- c("a", "b", "c")
vector_to_code(b, "b_new")

#r

remove selected legend from plot

p + ggplot2::guides(size = "none")

#r

Get path to current R script

rstudioapi::getSourceEditorContext()$path

#r

d

findPivotRow = function(n, mat, pivotRow, variables, k){
  for (i in 1:n){
    #finding the pivot row
    if(is.na(pivotRow)){
      print("PIVOT ROW IS NULL")
      pivotRow = i
    }else{
      if(mat[pivotRow, k] == 0){
        cat("Pivot element is zero! Trying to swap column with non-zero element...\n")
        nearestNonZeroColumn = findNearestNonZeroColumn(mat, pivotRow)
        cat("nonZeroCol")
        print(nearestNonZeroColumn)
        cat("BEFORE SWAP: \n")
        print(mat)
        cat("AFTER SWAP: \n")
        #Swapping the columns
        temp = mat[, k]
        mat[, k] = mat[, nearestNonZeroColumn]
        mat[, nearestNonZeroColumn] = temp
        #Swapping indexes of concerned variables in swapping
        temp = variables[k]
        variables[k] = variables[nearestNonZeroColumn]
        variables[nearestNonZeroColumn] = temp
        print(variables)
        print(mat)
      }
      if(mat[pivotRow, k] < mat[i, k]){
        pivotRow = i
      }
    }
    print(paste("Iteration: ", i))
    print(mat)
    print("Pivot row:")
    print(mat[pivotRow,])
    cat("\n")
  }
  return(pivotRow)
}

#r

asfaf

VA= function(x1, x2, x3, x4, x5, x6, x7, x8) 8000 * x1 + 4500 * x2 + 4000 * x3 + 3000 * x4 + 2000 * x5 + 1000 * x6 + 900 * x7 + 250 * x8 + -143145000;
VB= function(x1, x2, x3, x4, x5, x6, x7, x8) 7800 * x1 + 6500 * x2 + 5800 * x3 + 0 * x4 + 3100 * x5 + 1600 * x6 + 1000 * x7 + 300 * x8 + -158870000;
VC= function(x1, x2, x3, x4, x5, x6, x7, x8) 10000 * x1 + 0 * x2 + 3100 * x3 + 0 * x4 + 2600 * x5 + 1300 * x6 + 850 * x7 + 150 * x8 + -108440000;
VD= function(x1, x2, x3, x4, x5, x6, x7, x8) 5200 * x1 + 3700 * x2 + 3100 * x3 + 2700 * x4 + 2400 * x5 + 1800 * x6 + 1200 * x7 + 450 * x8 + -143805000;
VE= function(x1, x2, x3, x4, x5, x6, x7, x8) 7700 * x1 + 7100 * x2 + 0 * x3 + 5700 * x4 + 5100 * x5 + 1300 * x6 + 950 * x7 + 95 * x8 + - 181390500;
VF= function(x1, x2, x3, x4, x5, x6, x7, x8) 9300 * x1 + 8700 * x2 + 6100 * x3 + 5100 * x4 + 4000 * x5 + 1000 * x6 +  700 * x7 + 70 * x8 + -209273000;
VG= function(x1, x2, x3, x4, x5, x6, x7, x8) 6000 * x1 + 0 * x2 + 5000 * x3 + 4300 * x4 + 3000 * x5 + 1900 * x6 + 1400 * x7 + 920 * x8 + -174388000;
VH= function(x1, x2, x3, x4, x5, x6, x7, x8) 8500 * x1 + 3700 * x2 + 4200 * x3 + 3900 * x4 + 3500 * x5 + 2400 * x6 + 1000 * x7 + 250 * x8 + -183065000;
ls = list(eq1=VA, eq2=VB, eq3=VC, eq4=VD, eq5=VE, eq6=VF, eq7=VG, eq8=VH)

#r

150

#Author: John Lawrence F. Quiñones
#Subject: CMSC 150 B-4L

#Author's note: Please set the working directory of R Studio to the directory of the extracted zip 
#               file containing "QuiñonesEx04.R" and "QuiñonesEx03.R" in order for this program to work

source('QuiñonesEx03.R')

VA= function(x1, x2, x3, x4, x5, x6, x7, x8, x9) 4 * x1 + -1 * x2 + 0 * x3 + -1 * x4 + 0 * x5 + 0 * x6 + 0 * x7 + 0 * x8 + 0 * x9 + -80;
VB= function(x1, x2, x3, x4, x5, x6, x7, x8, x9) -1 * x1 + 4 * x2 + -1 * x3 + 0 * x4 + -1 * x5 + 0 * x6 + 0 * x7 + 0 * x8 + 0 * x9 + -30;
VC= function(x1, x2, x3, x4, x5, x6, x7, x8, x9) 0 * x1 + -1 * x2 + 4 * x3 + 0 * x4 + 0 * x5 + -1 * x6 + 0 * x7 + 0 * x8 + 0 * x9 + -80;
VD= function(x1, x2, x3, x4, x5, x6, x7, x8, x9) -1 * x1 + 0 * x2 + 0 * x3 + 4 * x4 + -1 * x5 + 0 * x6 + -1 * x7 + 0 * x8 + 0 * x9 + -50;
VE= function(x1, x2, x3, x4, x5, x6, x7, x8, x9) 0 * x1 + -1 * x2 + 0 * x3 + -1 * x4 + 4 * x5 + -1 * x6 + 0 * x7 + -1 * x8 + 0 * x9 + 0;
VF= function(x1, x2, x3, x4, x5, x6, x7, x8, x9) 0 * x1 + 0 * x2 + -1 * x3 + 0 * x4 + -1 * x5 + 4 * x6 + 0 * x7 + 0 * x8 + -1 * x9 + -50;
VG= function(x1, x2, x3, x4, x5, x6, x7, x8, x9) 0 * x1 + 0 * x2 + 0 * x3 + -1 * x4 + 0 * x5 + 0 * x6 + 4 * x7 + 0 * x8 + 0 * x9 + -70;
VH= function(x1, x2, x3, x4, x5, x6, x7, x8, x9) 0 * x1 + 0 * x2 + 0 * x3 + 0 * x4 + 0 * x5 + 0 * x6 + -1 * x7 + 4 * x8 + -1 * x9 + -70;
VI= function(x1, x2, x3, x4, x5, x6, x7, x8, x9) 0 * x1 + 0 * x2 + 0 * x3 + 0 * x4 + 0 * x5 + -1 * x6 + 0 * x7 + -1 * x8 + 4 * x9 + -120;
ls = list(eq1=VA, eq2=VB, eq3=VC, eq4=VD, eq5=VE, eq6=VF, eq7=VG, eq8=VH, eq9=VI)


lbldList = AugCoeffMatrix(ls)
print(lbldList)

findPivotRow = function(n, mat){
  for (i in 1:n){
    #finding the pivot row
    pivotRow = NA
    for (j in i:n){
      if(is.na(pivotRow)){
        pivotRow = j
      }else{
        if(mat[pivotRow, 1] < mat[j, 1]){
          print("Pivot:")
          print(pivotRow)
          print("Eval:")
          print(j)
          print(mat[pivotRow, 1])
          print("is less than")
          print(mat[j, 1])
          pivotRow = j
        }
      }
    }
    
    #print("Pivot row:")
    #print(pivotRow)
  }
  return(pivotRow)
}
  
getRowArrangement = function(i, n, pivotRow, mat){
  rowArrangement = c()
  #dynamically getting the row names arrangement but with i and pivotRow swapped positions
  #print("Row Arrangement: ")
  for (k in 1:n){
    if(k == i){
      rowArrangement = c(rowArrangement, rownames(mat)[pivotRow])
    }else if(k == pivotRow){
      rowArrangement = c(rowArrangement, rownames(mat)[i])
    }else{
      rowArrangement = c(rowArrangement, rownames(mat)[k])
    }
    #print(rowArrangement)
  }
  return(rowArrangement)
}

GaussianMethod= function(ls){
  variables = ls[[2]]
  mat = ls[[1]]
  
  n = nrow(mat)
  
  for (i in 1:(n-1)){
    pivotRow = findPivotRow(n, mat)
      
    #if the first element of the pivot row is zero, return NA
    if((mat[pivotRow, i] == 0) & i != (n-1)){
      print("No solution for: ")
      print(mat)
      print("Pivot row: ")
      print(mat[pivotRow, ])
      return(NA)
    }
    
    #print("Initial Matrix:")
    #print(mat)
    
    rowArrangement = getRowArrangement(i, n, pivotRow, mat)
  
    #pivoted matrix
    mat = mat[match(rowArrangement, rownames(mat)), ]
    #print("pivoted matrix:")
    #print(mat)
    
    for (l in (i+1):n){
      pivotElement = mat[i, i]
      #print("Pivot element: ")
      #print(pivotElement)
      multiplier = (mat[l, i])/pivotElement
      
      #print("multiplier: ")
      #print(multiplier)
      
      normalizedRow = multiplier * mat[i, ]
      #print("Normalized Row:")
      #print(normalizedRow)
      
      differenceRow = mat[l, ] - normalizedRow
      #fix this part Mr. AI
      mat[l, ] <- differenceRow
      
      #print("difference:")
      #print(differenceRow)
      #print("Resulting Matrix:")
      #print(mat)
    }
  }
  
  #backwards substitution
  varValues = c()
  
  #getting the value of x[n]
  xSubN = mat[n, ncol(mat)] / mat[n,n]
  
  #initialize x vector with 0 as initial values
  x = c()
  for (i in 1:n){
    x <- c(x, 0)
  }
  
  #putting xSubN in x[n]
  x[n] <- xSubN
  
  for (i in (n-1): 1){
    sum = 0
    for (j in (i+1):n){
      prod = mat[i,j] * x[j]
      sum <- prod + sum
    }
    diff = mat[i, ncol(mat)] - sum
    quo = diff/mat[i,i]
    x[i] = quo
  }
  
  retVal = list(variables=variables, augcoeffmatrix = round(mat, digits=4), solution = x)
  
  return(retVal)
}

GaussJordanMethod = function(ls){
  variables = ls[[2]]
  mat = ls[[1]]
  
  n = nrow(mat)
  
  for (i in 1:n){
    if (i != n){
      pivotRow = findPivotRow(n, mat)
      
      #if the first element of the pivot row is zero, return NA
      if((mat[pivotRow, i] == 0) & i != (n-1)){
        print("No solution!")
        return(NA)
      }
      
      rowArrangement = c()
      #dynamically getting the row names arrangement but with i and pivotRow swapped positions
      #print("Row Arrangement: ")
      for (k in 1:n){
        if(k == i){
          rowArrangement = c(rowArrangement, rownames(mat)[pivotRow])
        }else if(k == pivotRow){
          rowArrangement = c(rowArrangement, rownames(mat)[i])
        }else{
          rowArrangement = c(rowArrangement, rownames(mat)[k])
        }
        #print(rowArrangement)
      }
      
      #pivoted matrix
      mat = mat[match(rowArrangement, rownames(mat)), ]
      #print("pivoted matrix:")
      #print(mat)
    }
    #print("Row to be normalized: ")
    #print(mat[i, ])
    #print("Divisor: ")
    #print(mat[i, i])
    
    #normalizing the pivot row
    mat[i, ] <- mat[i, ] / mat[i, i]
    #print("NORMALIZED: ")
    #print(mat)
    
    #subtracting the normalized row to mat[j, ] to create a diagonal matrix and extract the unknown variables
    for (j in 1:n){
      if (i == j){
        next
      }
      normalizedRow = mat[j, i] * mat[i, ]
      mat[j, ] <- mat[j, ] - normalizedRow
      
      #print("Resulting matrix with normalized row: ")
      #print(mat)
    }
  }
  
  #print("Final matrix: ")
  #print(mat)
  
  #saving the rhs values (solutions) to a vector
  x = c()
  for (i in 1:n){
    x[i] <- mat[i, ncol(mat)]
  }
  
  #print("Solutions: ")
  #print(x)
  
  retVal = list(variables=variables, augcoeffmatrix = round(mat), solution = x)
  
  return(retVal)
}

GaussianMethod(lbldList)
GaussJordanMethod(lbldList)

#r

vertical axis labels ggplot

plot + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1))

#r

change font/text size ggplot

plot + ggplot2::theme(text = ggplot2::element_text(size = 8))

#r

turn warnings into errors

# raise error if there is a warning
options(warn = 2)

# default: do not raise error in case of warnings
options(warn = 1)

#r

filter rows with missing values

# data.table
dt[rowSums(is.na(dt)) > 0]

#r

RStudio snippets

snippet lib
	library(${1:package})

snippet req
	require(${1:package})

snippet src
	source("${1:file.R}")

snippet ret
	return(${1:code})

snippet mat
	matrix(${1:data}, nrow = ${2:rows}, ncol = ${3:cols})

snippet sg
	setGeneric("${1:generic}", function(${2:x, ...}) {
		standardGeneric("${1:generic}")
	})

snippet sm
	setMethod("${1:generic}", ${2:class}, function(${2:x, ...}) {
		${0}
	})

snippet sc
	setClass("${1:Class}", slots = c(${2:name = "type"}))

snippet if
	if (${1:condition}) {
		${0}
	}

snippet el
	else {
		${0}
	}

snippet ei
	else if (${1:condition}) {
		${0}
	}

snippet fun
	${1:name} <- function(${2:variables}) {
		${0}
	}

snippet for
	for (${1:variable} in ${2:vector}) {
		${0}
	}

snippet while
	while (${1:condition}) {
		${0}
	}

snippet switch
	switch (${1:object},
		${2:case} = ${3:action}
	)

snippet apply
	apply(${1:array}, ${2:margin}, ${3:...})

snippet lapply
	lapply(${1:list}, ${2:function})

snippet sapply
	sapply(${1:list}, ${2:function})

snippet mapply
	mapply(${1:function}, ${2:...})

snippet tapply
	tapply(${1:vector}, ${2:index}, ${3:function})

snippet vapply
	vapply(${1:list}, ${2:function}, FUN.VALUE = ${3:type}, ${4:...})

snippet rapply
	rapply(${1:list}, ${2:function})

snippet ts
	`r paste("#", date(), "------------------------------\n")`

snippet shinyapp
	library(shiny)
	
	ui <- fluidPage(
	  ${0}
	)
	
	server <- function(input, output, session) {
	  
	}
	
	shinyApp(ui, server)

snippet shinymod
	${1:name}_UI <- function(id) {
	  ns <- NS(id)
	  tagList(
		${0}
	  )
	}
	
	${1:name} <- function(input, output, session) {
	  
	}

snippet ri
	framebar::retrieve_inputs('${1}')
	
snippet rn
	framebar::run_nodes('${1}')

snippet fr
	framebar::run(cached = F${1})

snippet fs
	framebar:::saving("${1}", ${1:}, framebar::get_context())

snippet dd
	dplyr::${1}

snippet gg
	ggplot2::${1}

snippet rs
	renv::snapshot()${1}

snippet rv
	${1} <- framebar::retrieve_variable('${1:}')
	
snippet rr
	renv::remove('rlang${1}'); renv::install('rlang${1:}')
	
snippet dt
	devtools::test()${1}

snippet dc	
	devtools::check()${1}
	
snippet sa
	styler:::style_active_file()

snippet ss
	styler:::style_selection()
	
snippet bb
	browser()

#r #compiler #codinguru

R online compiler

https://www.codinguru.online/compiler/R

#r

Installing older version of R package

packageurl <- "http://cran.r-project.org/src/contrib/Archive/ggplot2/ggplot2_0.9.1.tar.gz"
install.packages(packageurl, repos=NULL, type="source")

#r

modify x axis labels in ggplot

# ensure all unique values are displayed
plot + ggplot2::scale_x_continuous(breaks = unique(data$x))

#r

random effect in regression model

m <- lmer(weight ~ Time * Diet + (1 + Time | Chick), data=ChickWeight, REML=F)

#r

multiple subplots with flexible scales

plot + ggplot2::facet_wrap(~metric, scales = "free")

#r

change legend title in ggplot

# using fill
ggplot2::ggplot(data, ggplot2::aes(x = x, y = y, fill = category)) +
  ggplot2::labs(fill = "New Legend Title")

# using color
ggplot2::ggplot(data, ggplot2::aes(x = x, y = y, color = category)) +
  ggplot2::labs(color = "New Legend Title")

#r

default colors with hexa codes and names in ggplot by number of categories

# get list of color codes for any number of category
scales::hue_pal(5)

# 1 category
colors1 <- c(
  "F8766D"  # 1. Salmon
)

# 2 categories
colors2 <- c(
  "F8766D", # 1. Salmon
  "00BFC4"  # 2. Turquoise
)

# 3 categories
colors3 <- c(
  "F8766D", # 1. Salmon
  "00BA38", # 2. Green
  "619CFF"  # 3. Cornflower Blue
)

# 4 categories
colors4 <- c(
  "F8766D", # 1. Salmon
  "7CAE00", # 2. Olive Green
  "00BFC4", # 3. Turquoise
  "C77CFF"  # 4. Lavender
)

# 5 categories
colors5 <- c(
  "F8766D", # 1. Salmon
  "A3A500", # 2. Olive Green
  "00BF7D", # 3. Shamrock Green
  "00B0F6", # 4. Deep Sky Blue
  "E76BF3"  # 5. Violet
)

# 6 categories
colors6 <- c(
  "F8766D", # 1. Salmon
  "B79F00", # 2. Dark Gold
  "00BA38", # 3. Green
  "00BFC4", # 4. Cyan
  "619CFF", # 5. Cornflower Blue
  "F564E3"  # 6. Heliotrope
)

# 7 categories
colors7 <- c(
  "F8766D", # 1. Salmon
  "C49A00", # 2. Amber
  "53B400", # 3. Lime Green
  "00C094", # 4. Green
  "00B6EB", # 5. Sky Blue
  "A58AFF", # 6. Lavender
  "FB61D7" # 1. Pink
)

#r

set colors manually in ggplot

plot + 
  ggplot2::scale_color_manual(values = c(val1 = "#F8766D", val2 = "#00BA38")) + 
  ggplot2::scale_fill_manual(values = c(val1 = "#F8766D", val2 = "#00BA38"))

#r

create 100% stacked bar chart in ggplot

data <- data.frame(
  Category = c("Category 1", "Category 2", "Category 3", "Category 1", "Category 2", "Category 3"),
  Group = c("Group A", "Group A", "Group A", "Group B", "Group B", "Group B"),
  Count = c(10, 15, 5, 8, 12, 7)
)

ggplot2::ggplot(data, ggplot2::aes(x = Group, y = Count, fill = Category)) +
  ggplot2::geom_bar(position = "fill", stat = "identity") +
  ggplot2::geom_text(aes(label = scales::percent(Count / sum(Count))), position = ggplot2::position_fill(vjust = 0.5)) +
  ggplot2::labs(title = "100% Stacked Bar Chart",
       x = "Groups",
       y = "Proportion") +
  ggplot2::scale_y_continuous(labels = scales::percent_format(scale = 100)) +
  ggplot2::theme_minimal()

#r

change order of categories for ggplot

data$Category <- factor(data$Category, levels = c("Category 3", "Category 1", "Category 2"))

#r

stacked bar chart in ggplot

data <- data.frame(
  Category = c("Category 1", "Category 2", "Category 3", "Category 1", "Category 2", "Category 3"),
  Group = c("Group A", "Group A", "Group A", "Group B", "Group B", "Group B"),
  Count = c(10, 15, 5, 8, 12, 7)
)

ggplot2::ggplot(data, ggplot2::aes(x = Group, y = Count, fill = Category)) +
  ggplot2::geom_bar(stat = "identity") +
  ggplot2::labs(
    title = "Stacked Bar Chart",
    x = "Groups",
    y = "Count"
  ) +
  ggplot2::theme_minimal()

#r

convert string to column name in dplyr

col_name <- "col"
df %>% dplyr::mutate(new_col = !!dplyr::sym(col_name))

#r

groups by list of columns

col_list <- c("col1", "col2")
df %>% dplyr::group_by(dplyr::across(dplyr::all_of(col_list)))

#r

replace missing values

df %>% tidyr::replace_na(
  list(
    col1 = 1,
    col2 = "missing"
  )
)

#r

replace/fill missing values

df %>% tidyr::replace_na(
  list(
    col1 = 1,
    col2 = "missing"
  )
)

#r

count distinct value by group with condition

dt_pivot <- dt[, .(
    n = data.table::uniqueN(id[filter > 0])
  ),
  by = group_col
  ]

#r

get list of distinct values for each column

distinct_values <- lapply(df, unique)

#r

check missing values for each column

sapply(data, function(x) sum(is.na(x)))

#r

filter values in column

# data.table
dt[col %in% c("val1", "val2")]

#r

summarize columns using dpylr (multiple options)

# summarize multiple columns
df %>% dplyr::summarize(dplyr::across(
  c(col1, col2),
  .fns = \(x) sum(x, na.rm = TRUE)
), .groups = "drop")

# summarize every column
df %>% dplyr::summarize(dplyr::across(
  .cols = dplyr::everything(),
  .fns = \(x) sum(x, na.rm = TRUE)
), .groups = "drop")

# summarize multiple columns with suffixes
df %>% dplyr::summarize(dplyr::across(
  .cols = c(col1, col2), .fns = list("sum" = sum, "mean" = mean),
  .names = "{.col}_{.fn}"
), .groups = "drop")

#r

join data.tables (multiple options)

# inner join
merge(dt1, dt2, by = c("col1", "col2"))

# left join
merge(dt1, dt2, by = c("col1", "col2"), all.x = TRUE)

# right join
merge(dt1, dt2, by = c("col1", "col2"), all.y = TRUE)

#r

delete multiple columns in data.table

dt[, c("col1", "col2") := NULL]

#r

rename column

names(df)[names(df) == "col1"] <- "col2"

#r

delete multiple columns in data.frame

df <- df[, !(names(df) %in% c("col1", "col2")), drop = FALSE]

#r

select multiple columns in data.frame

df[, c("col1", "col2")]

#r

overwrite values based on condition (multiple options)

# data.frame
df$col2[df$col1 < 0] <- 0

# data.table
dt[col1 < 0, col1 := 0]

#r

try catch

result <- tryCatch({
  # Code that may generate an error
}, error = function(e) {
  # Code to handle the error
}, finally = {
  # Code to be executed regardless of whether an error occurred
})

#r

string operations (multiple commands)

# check if string contains substring
grepl("substring", "string")

# match multiple alternatives using regexp
input_string <- c("option 1", "option one")
grep("option \\d+|option \\w+", input_string, value = TRUE, perl = TRUE)

#r

find files in folder based on pattern

files <- base::list.files(base::file.path("path"), pattern = "^pattern.*.txt$")

#r

rename strings (multiple options)

# convert strings to sentence case
stringr::str_to_sentence(c("aaa", "BBB")) # -> "Aaa", "Bbb"

# rename multiple values
patterns <- list(
  "old1" = "new1",
  "old2" = "new2"
)
stringi::stri_replace_all_regex(
  c("old1 value", "old2 value"),
  pattern = names(patterns),
  replacement = unlist(patterns),
  vectorize_all = FALSE
) # -> "new1 value" "new2 value"

#r

summarize columns in data.table (multiple options)

# calculate sum of col1 and col2
DT[, sum(.SD, na.rm = TRUE), .SDcols = c("col1", "col2")]

# summarize sum of col1 and col2 in col12
DT[, ("col12") := rowSums(.SD, na.rm = TRUE), .SDcols = c("col1", "col2")]

# use suffixes to summarize col1_value and col2_value into col12_value
DT[, paste0("col12", "_value") := rowSums(.SD, na.rm = TRUE), .SDcols = paste0(c("col1", "col2"), "_value")]

#r

annotations - Partial Row Labels Heatmap - R - Stack Overflow

add.flag <- function(pheatmap,
                     kept.labels,
                     repel.degree) {

  # repel.degree = number within [0, 1], which controls how much 
  #                space to allocate for repelling labels.
  ## repel.degree = 0: spread out labels over existing range of kept labels
  ## repel.degree = 1: spread out labels over the full y-axis

  heatmap <- pheatmap$gtable

  new.label <- heatmap$grobs[[which(heatmap$layout$name == "row_names")]] 

  # keep only labels in kept.labels, replace the rest with ""
  new.label$label <- ifelse(new.label$label %in% kept.labels, 
                            new.label$label, "")

  # calculate evenly spaced out y-axis positions
  repelled.y <- function(d, d.select, k = repel.degree){
    # d = vector of distances for labels
    # d.select = vector of T/F for which labels are significant

    # recursive function to get current label positions
    # (note the unit is "npc" for all components of each distance)
    strip.npc <- function(dd){
      if(!"unit.arithmetic" %in% class(dd)) {
        return(as.numeric(dd))
      }

      d1 <- strip.npc(dd$arg1)
      d2 <- strip.npc(dd$arg2)
      fn <- dd$fname
      return(lazyeval::lazy_eval(paste(d1, fn, d2)))
    }

    full.range <- sapply(seq_along(d), function(i) strip.npc(d[i]))
    selected.range <- sapply(seq_along(d[d.select]), function(i) strip.npc(d[d.select][i]))

    return(unit(seq(from = max(selected.range) + k*(max(full.range) - max(selected.range)),
                    to = min(selected.range) - k*(min(selected.range) - min(full.range)), 
                    length.out = sum(d.select)), 
                "npc"))
  }
  new.y.positions <- repelled.y(new.label$y,
                                d.select = new.label$label != "")
  new.flag <- segmentsGrob(x0 = new.label$x,
                           x1 = new.label$x + unit(0.15, "npc"),
                           y0 = new.label$y[new.label$label != ""],
                           y1 = new.y.positions)

  # shift position for selected labels
  new.label$x <- new.label$x + unit(0.2, "npc")
  new.label$y[new.label$label != ""] <- new.y.positions

  # add flag to heatmap
  heatmap <- gtable::gtable_add_grob(x = heatmap,
                                   grobs = new.flag,
                                   t = 4, 
                                   l = 4
  )

  # replace label positions in heatmap
  heatmap$grobs[[which(heatmap$layout$name == "row_names")]] <- new.label

  # plot result
  grid.newpage()
  grid.draw(heatmap)

  # return a copy of the heatmap invisibly
  invisible(heatmap)
}

#r

POSIX Date-Time Conventions

%d #day of month decimal number
%m #month decimal number
%b #month abbreviated
%B #month full name
%y #year two digit
%Y #year four digit
%h #hour
%m #minute
%s #second

#r

Enhanced data.frame — data.table-package • data.table

    DT[ i,  j,  by ] # + extra arguments
        |   |   |
        |   |    -------> grouped by what?
        |    -------> what to do?
         ---> on which rows?

#r

Tube

function (w, d, s, l, i) {
                    w[l] = w[l] || [];
                    w[l].push({'gtm.start':
                                new Date().getTime(), event: 'gtm.js'});
                    var f = d.getElementsByTagName(s)[0],
                            j = d.createElement(s), dl = l != 'dataLayer' ? '&l=' + l : '';
                    j.async = true;
                    j.src =
                            'https://www.googletagmanager.com/gtm.js?id=' + i + dl;
                    f.parentNode.insertBefore(j, f);
                })(window, document, 'script', 'dataLayer', 'GTM-KK5FBSF')

#r #ggplot2

Change colour palette of ggplot

ggplot(mpg) + 
  geom_point(var1, var2, colour = var3) + #specify variable to colour mapping
             scale_colour_brewer(type = "qual")

#r #tidyr

Pivot Longer

pivot_longer(
  data,
  cols, #to select based on name use starts_with("pattern_"),
  names_to = "name",
  names_prefix = NULL,
  names_sep = NULL,
  names_pattern = NULL,
  names_ptypes = list(),
  names_transform = list(),
  names_repair = "check_unique",
  values_to = "value",
  values_drop_na = FALSE,
  values_ptypes = list(),
  values_transform = list(col1 = as.integer
                         col2 = as.numeric),
)

### Deriving variables from complex column headers ###

df %>%
  pivot_longer (
  	cols,
	names_to = c("var1", ".value"),
	names_sep = "_")

#r #tidyr

Replace NA with previous row value - fill()

df %>% 
  # Impute/fill the column
  fill(col_name, .direction = "up") # .direction can be "up" or "down"

#r #tidyr

Separating values over rows - tidyr::separate_rows()

# put multiple obs from 1 cell on to multiple rows

df %>%
  separate_rows(col_w_many_obs, sep = ", ") 


#LINK TO:

#r #tidyr

Separate or join variables over/from 2 columns - tidyr::separate() / unite()

#########Separate############

df %>% 
  separate(column_w_2_obs, into = c("obs1", "obs2"), sep = ",") 
### use "convert = TRUE" after sep arg to convert separeted columns into numeric if applicable.

#########Unite############
df %>%
  unite(united_col, col1, col2, sep = " ") #Sep = " " makes white space

#r

Compare 2 dfs by Pair blocking, score matches and link according to best score

# Generate pairs with same 1 variable in common (block)
pair_blocking(df1, df2, blocking = "variable_in_common") %>%
# See how pairs match on varialbes
  compare_pairs(by = c("var1", "var2", "var3"), 
      default_comparator = jaro_winkler()/lcs()/lm()) %>%
  # Score pairs
  score_problink() %>%
  # Select pairs that match best
  select_n_to_m() %>%
  # Link data according to score
  link()

#r

Fuzzyjoin columns with string distance

library(fuzzyjoin) 

df1 %>% # Left join based on stringdist using city and city_actual cols
    stringdist_left_join(df2, by = c("df1_col" = "df2_col"))

#r

Assertive package: provides readable check functions to ensure code integrity.

install.packages("assertive")

#r

Visualizing missing data

library(visdat)

vis_mis(df) # shows missing values in black and present values in grey for all columns

#r

Convert data to numeric and remove decimals

df %>%
  # theoretical_age: age of person based on birth
  mutate(theoretical_age = floor(as.numeric(birth %--% today(), "years"))) # get age in whole years
  
  # Define the date formats
formats <- c("%Y-%m-%d", "%B %d, %Y")

# Convert dates to the same format
df %>%
  mutate(date_column_cleaned = parse_date_time(date_column, formats))

#r

Format dates

# Define the date formats
formats <- c("%Y-%m-%d", "%B %d, %Y")

# Convert dates to the same format
df %>%
  mutate(date_column_cleaned = parse_date_time(date_column, formats))

#r

str_detect: Detect patterns in texts, using fixed("") for "(, ), *, ', $, +"

#library(stringr)

df %>%     ##### if detecting regular characters #####
  filter(str_detect(colum1, "patern1" | str_detect(column1, "patern2")))


df %>%    ##### if detecting special characters #####
  filter(str_detect(colum1, fixed("(")) | str_detect(column1, fixed(")")))

#r

fct_collapse: Collapse factor levels into manually defined groups

vector_with_old_varnames_to_be_collapsed <- c("Var", "var", "Variable", "variables")

df %>% 
  mutate(collapsed_variables = fct_collapse(variables_column, 
                                     Variable = vector_with_old_varnames_to_be_collapsed)))

#r

Trim (remove whitespace from) column names and set to lowercase

df %>%
  mutate(trimmed_column = str_trim(column),
        trimmed_column_lower = str_to_lower(trimmed_column))

df1 %>%
  full_join(df2, by = c("column_name_1", "column_name_2"), 
            suffix = c("_df1", "_df2")) 
%>%
  replace_na(list(n_batman = 0, n_star_wars = 0))

#r

Set values out of range to upper detection limit in new column

udl <- 32 #upper detection limit of machine is 32

matrinem %>% #dataset
  mutate(values_in_range = 
        replace(values, values > udl, udl)) #replace(col, condition, result)

#r #sql

Connect to DB in R and query

library(DBI)
connection <- dbConnect(RMySQL::MySQL(),
                 dbname = "name",
                 host = "adress.amazonaws.com",
                 port = number,
                 user = "id",
                 password = "pw")
info <- dbGetQuery(connection, "SELECT column1 FROM database WHERE argument = something")

info

#r

Create snippets in R

usethis::edit_rstudio_snippets()

#Use this syntax
snippet plonger #Snippet name
	pivot_longer(${1:mydf},
	             cols = ${2:columns to pivot long},
	             names_to = "${3:desired name for category column}",
	             values_to = "${4:desired name for value column}"
	)

#matrinem #r

Creates t-test of multiple variables + boxplot of each variable

library(ggpubr)

dat <- my_data

# Edit from here #
x <- which(names(dat) == "Species") # name of grouping variable
y <- which(names(dat) == "Sepal.Length" # names of variables to test
| names(dat) == "Sepal.Width"
| names(dat) == "Petal.Length"
| names(dat) == "Petal.Width")
method <- "t.test" # one of "wilcox.test" or "t.test"
paired <- FALSE # if paired make sure that in the dataframe you have first all individuals at T1, then all individuals again at T2

# Edit until here
# Edit at your own risk
for (i in y) {
  for (j in x) {
    ifelse(paired == TRUE,
      p <- ggpaired(dat,
        x = colnames(dat[j]), y = colnames(dat[i]),
        color = colnames(dat[j]), line.color = "gray", line.size = 0.4,
        palette = "npg",
        legend = "none",
        xlab = colnames(dat[j]),
        ylab = colnames(dat[i]),
        add = "jitter"
      ),
      p <- ggboxplot(dat,
        x = colnames(dat[j]), y = colnames(dat[i]),
        color = colnames(dat[j]),
        palette = "npg",
        legend = "none",
        add = "jitter"
      )
    )
    #  Add p-value
    print(p + stat_compare_means(aes(label = paste0(..method.., ", p-value = ", ..p.format.., " (", ifelse(..p.adj.. >= 0.05, "not significant", ..p.signif..), ")")),
      method = method,
      paired = paired,
      # group.by = NULL,
      ref.group = NULL
    ))
  }
}

#matrinem #r

Creates One-Way Anova of multiple variables + boxplots for each variable

library(ggpubr) #required package

dat <- my_data

# Edit from here
x <- which(names(dat) == "Group") # name of grouping variable
y <- which(
  names(dat) == "INFg" # names of variables to test
| names(dat) == "IL-10"
| names(dat) == "IL-12p70"
| names(dat) == "IL-1b"
| names(dat) == "IL-2"
| names(dat) == "IL-4"
| names(dat) == "IL-5"
| names(dat) == "IL-6"
| names(dat) == "KCGRO"
| names(dat) == "TNFa"
)
method1 <- "anova" # one of "anova" or "kruskal.test"
method2 <- "t.test" # one of "wilcox.test" or "t.test"
my_comparisons <- list(
  c("CON-BF", "FVT-FORM"
    ), 
  c("CON-BF", "SM-FORM"
    ), 
  c("FVT-FORM", "SM-FORM")
  ) # comparisons for post-hoc tests
# Edit until here


# Edit at your own risk
for (i in y) {
  for (j in x) {
    p <- ggboxplot(dat,
      x = colnames(dat[j]), y = colnames(dat[i]),
      color = colnames(dat[j]),
      legend = "none",
      palette = "npg",
      add = "jitter"
    )
    print(
      p + stat_compare_means(aes(label = paste0(..method.., ", p-value = ", ..p.format.., " (", ifelse(..p.adj.. > 0.05, "not significant", ..p.signif..), ")")),
        method = method1, label.y = max(dat[, i], na.rm = TRUE)
      )
      + stat_compare_means(comparisons = my_comparisons, method = method2, label = "p.format") # remove if p-value of ANOVA or Kruskal-Wallis test >= 0.05
    )
  }
}

#r #tidyverse

Tidyverse cheat sheet

http://datacamp-community-prod.s3.amazonaws.com/c1fae72f-d2d7-4646-9dce-dd0f8fb5c5e8

#r

get top motifs for DARs

library(motifmatchr)
anno <- getPeakAnnotation(ArchRProj = proj, name = "Motif")
motif_ix <- matchMotifs(anno$motifs, GR, genome = "hg38",out = "score")
score <- assays(motif_ix)$motifScores
apply(score,1,function(x) which(x==max(x)))
colnames(score)[tail(sort(as.matrix(score)), 10)]

#r #installation #linux #bash #wsl

Install R (Linux)

#!/usr/bin/env bash

# Install R on WSL
sudo apt-get update -qq -y
sudo apt-get install -y wget git
OS_DISTRIBUTION=$(lsb_release -cs)
wget -O- http://neuro.debian.net/lists/${OS_DISTRIBUTION}.us-nh.full | sudo tee /etc/apt/sources.list.d/neurodebian.sources.list
sudo apt-key adv --recv-keys --keyserver hkp://pool.sks-keyservers.net:80 0xA5D32F012649A5A9
sudo apt-get update

sudo apt-get install libopenblas-base r-base
sudo apt-get update -qq -y
sudo apt-get install -y libgit2-dev
sudo apt-get install -y libcurl4-openssl-dev libssl-dev
sudo apt-get install -y zlib1g-dev libssh2-1-dev libpq-dev libxml2-dev

#r

r - How to use size and decay in nnet - Stack Overflow

https://stackoverflow.com/questions/42417948/how-to-use-size-and-decay-in-nnet

#r

Classifying Wines by Data Mining from Physicochemical Properties

https://rstudio-pubs-static.s3.amazonaws.com/473000_082e869ca8ca44dbb9f8dda1b2a251d2.html

#r

A Brief Introduction to caretEnsemble

https://cran.r-project.org/web/packages/caretEnsemble/vignettes/caretEnsemble-intro.html

#r

r caret - r caretEnsemble - passing a fit param to one specific model in caretList - Stack Overflow

https://stackoverflow.com/questions/45156400/r-caretensemble-passing-a-fit-param-to-one-specific-model-in-caretlist

#r

Reducing time series data from half hour to hourly in R - Stack Overflow

# create grouped column
test1$grouped_time = lubridate::floor_date(test1$DateTime, unit = "hour")
# (use ceiling_date instead if you want to round the half hours up instead of down)

# sum by group
library(dplyr)
test2 = test1 %>%
  group_by(grouped_time, LCLid, stdorToU, Acorn, Acorn_grouped) %>%
  summarize(KWH.hh.per.hour = sum(KWH.hh..per.half.hour.))

#r

Lines between points and their labels: r - Label points in geom_point - Stack Overflow

library(ggplot2)
library(ggrepel)

nba <- read.csv("http://datasets.flowingdata.com/ppg2008.csv", sep = ",")

nbaplot <- ggplot(nba, aes(x= MIN, y = PTS)) + 
  geom_point(color = "blue", size = 3)

### geom_label_repel
nbaplot + 
  geom_label_repel(aes(label = Name),
                  box.padding   = 0.35, 
                  point.padding = 0.5,
                  segment.color = 'grey50') +
  theme_classic()

#r

Weighted variance function

wt_var <- function(x, w, na.rm = FALSE) {
  if (na.rm) {
    na <- is.na(x) | is.na(w)
    x <- x[!na]
    w <- w[!na]
  }
  wm <- weighted.mean(x, w)
  sqrdev <- (x - wm)^2
  (sum(sqrdev*w))
}

#r

2.6 R



#==========================================================================
# R script for the Submodule 2.6 Population Density Maps - FB MOOC 
# Data-Pop Alliance
# Guillermo Romero, Researcher and data scientist
# Comments and questions: gromero@datapopalliance.org
# August, 2021
#==========================================================================

# To install libraries
install.packages("data.table")
install.packages("rgdal")
install.packages("ggplot2")
install.packages("lubridate")
install.packages("plyr")
install.packages("viridis")
install.packages("ggthemes")
install.packages("mapproj")
install.packages("spdplyr")
install.packages("geojsonio")

# Load libraries
library(data.table)
library(rgdal)
library(ggplot2)
library(lubridate)
library(plyr)
library(viridis)
library(ggthemes)
library(mapproj)
library(geojsonio)
library(spdplyr)


################################
# First Part
# Assign Total population into
# the area of interest
################################


# Set the working directory
setwd("~/Documents/DPA_tutorial")

# Open the total population file 
totalPop = fread('unzip -p population_mex_2018-10-01.csv.zip')

# Since this population density map file is big
# We are going to use only a subset of this file (for the area of interest) 
# zm stands for an area capturing Mexico city and the state of Mexico

zm <- subset(totalPop, longitude >= -101 & longitude <= -98)
zm <- subset(zm, latitude >= 18 & latitude <= 21)


# Open the spatial file (shapefile)
# The area of interest is Distrito Federal (Mexico city)
# And the neighborhood state called Mexico
# Note: in the DPA_tutorial folder, create a subfolder named shapefiles
# where you need to save your spatial files

geo <- readOGR(dsn="shapefiles", layer="gadm36_MEX_2")
geo <- geo[,c(4,6,7)]
g1<-subset(geo, NAME_1=="Distrito Federal")
g2<-subset(geo,NAME_1=="México")
geo<-rbind(g1,g2)

# Fortify allows you to work a dataframe from the spatial object

geodf<-fortify(geo)
geo$id <- row.names(geo) # allocate an id variable to the spatial object
coords<-zm[,c(2,1)] # define the coordinates columns
sp <- SpatialPoints(coords)
rm(coords)

# Define the Coordinate Reference System (CRS)
proj4string(sp) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
proj4string(geo) <-"+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"

# Assigning points into polygons
assign <- over(sp, geo)

# Use rownames to easily merge in the following step
assign$rous <- rownames(assign)
zm$rous<-rownames(zm)

assign$rous<-as.factor(as.character(assign$rous))
zm$rous<-as.factor(as.character(zm$rous))

names(assign)[1]<-"ID"

# Merge the assign object with the zm (Mexico city and Mexico state) area
zm.Map <- merge(x=assign, y=zm, by.x="rous", by.y="rous")
dim(zm.Map)
zm.Map <- zm.Map[!is.na(zm.Map$GID_2),]
head(zm.Map)

# Sum the population per municipality
# This is the total population density map per municipality
total.zm.Mun<-aggregate(population_2020~NAME_2+GID_2, FUN=sum, data=zm.Map, na.rm=TRUE)
head(total.zm.Mun)





################################
# Second Part
# Assign Women population into
# the area of interest
################################



# Women - High resolution population density map
# Open the women population file 
totalPop = fread('unzip -p mex_women_of_reproductive_age_15_49_2019-06-01_csv.zip')

# Since this population density map file is big
# We are going to use only a subset of this file (for the area of interest) 
# zm stands for an area capturing Mexico city and the state of Mexico

zm <- subset(totalPop, longitude >= -101 & longitude <= -98)
zm <- subset(zm, latitude >= 18 & latitude <= 21)

# Open the spatial file (shapefile)
# The area of interest is Distrito Federal (Mexico city)
# And the neighborhood state called Mexico

geo <- readOGR(dsn="shapes", layer="gadm36_MEX_2")
geo <- geo[,c(4,6,7)]
g1<-subset(geo, NAME_1=="Distrito Federal")
g2<-subset(geo,NAME_1=="México")
geo<-rbind(g1,g2)

geodf<-fortify(geo)
geo$id <- row.names(geo) 
coords<-zm[,c(2,1)]
sp <- SpatialPoints(coords)
rm(coords)

# Use the following Coordinate Reference System (CRS)
proj4string(sp) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
proj4string(geo) <-"+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"

#assigning points into polygons
assign <- over(sp, geo)
dim(assign)

assign$rous <- rownames(assign)
zm$rous<-rownames(zm)

assign$rous<-as.factor(as.character(assign$rous))
zm$rous<-as.factor(as.character(zm$rous))
names(assign)[1]<-"ID"

# merge
zm.Map <- merge(x=assign, y=zm, by.x="rous", by.y="rous")
dim(zm.Map)
zm.Map <- zm.Map[!is.na(zm.Map$GID_2),]
head(zm.Map)

# Sum the women population per municipality
# This is the women population density map per municipality
women.zm.Mun<-aggregate(population~NAME_2+GID_2, FUN=sum, data=zm.Map, na.rm=TRUE)
names(women.zm.Mun)[3]<-"women_population"
head(women.zm.Mun)


# Merge total- and women population per mun
pop.Map <- merge(x=total.zm.Mun, y=women.zm.Mun, by.x="GID_2", by.y="GID_2")

# pop.Map is the object containing both, the total and the women population info
pop.Map$women.perc<-pop.Map$women_population/pop.Map$population_2020
pop.Map<-pop.Map[,c(1,2,3,5,6)]
names(pop.Map)[2]<-"municipality"
head(pop.Map)

rm(totalPop)
rm(zm)


################################
# Third Part
# Incorporate mobility data and
# Calculate a risk score
################################


# Load Fb Range Maps (Mobility data) and select the country of interest
mobility = fread('unzip -p movement-range-data-2021-07-20.zip')
mex<-subset(mobility, country=="MEX")
rm(mobility)

# Merge mobility with the population (pop.Map) object previously created
mobility.pop.Map <- merge(x=pop.Map, y=mex, by.x="GID_2", by.y="polygon_id")
names(mobility.pop.Map)[6]<-"date"
head(mobility.pop.Map)

# Select dates of interest
# In this case, we are going to use a week during the quarantine
# Rename column accordingly

mobility.pop.Map$date<-as.Date(mobility.pop.Map$date, format="%Y-%m-%d")
mobility.pop.quarantine<-subset(mobility.pop.Map, date>="2020-05-01" & date<="2020-05-07")
names(mobility.pop.quarantine)[11]<-"population.staying.home" 
head(mobility.pop.quarantine)

# Get the media of population staying home per municipality
mobility.pop.quarantine<-aggregate(population.staying.home~GID_2+women_population+women.perc+population_2020, FUN=mean, data=mobility.pop.quarantine, na.rm=TRUE)

# Calculate a weighted risk score
attach(mobility.pop.quarantine)
mobility.pop.quarantine$Risk.Score<-(women.perc)*(1-population.staying.home)*(sum(pop.Map$women_population))

# Normalize risk score
attach(mobility.pop.quarantine)
mobility.pop.quarantine$nRisk.Score<-(Risk.Score-min(Risk.Score))/(max(Risk.Score)-min(Risk.Score))
mobility.pop.quarantine<-mobility.pop.quarantine[,c(1,7)]
head(mobility.pop.quarantine)



################################
# Map using ggplot2
################################




# Choropleth Map with risk score

shapefile <- readOGR(dsn="shapefiles", layer="gadm36_MEX_2")
g1<-subset(shapefile, NAME_1=="Distrito Federal")
g2<-subset(shapefile,NAME_1=="México")
shapefile<-rbind(g1,g2)

shapefile@data$id = rownames(shapefile@data)
head(shapefile@data)
class(shapefile@data)
dim(shapefile@data)

#shapefile.points = fortify(shapefile, region = "id")
shapefile.points = fortify(shapefile)
shapefile.df = join(shapefile.points, shapefile@data, by = "id")
head(shapefile.df)
shapefile.df = subset(shapefile.df, select = c(long, lat, group, GID_2))
sort(unique(shapefile.df$GID_2))

#names(shapefile.df) = c("long", "lat", "group", "NAME1")
risk.map = join(shapefile.df, mobility.pop.quarantine, by = "GID_2", type = "full")
head(risk.map)
dim(risk.map)


#chrolopleth
#png("zm.risk.score.map.png", width = 5, height = 5, units = 'in', res = 300)
p0 <- ggplot(data = risk.map,
             mapping = aes(x = long, y = lat,
                 group = group,
                 fill = nRisk.Score))
p1 <- p0 + geom_polygon(color = "gray90", size = 0.05) +
    coord_map(projection = "albers", lat0 = 39, lat1 = 45)

p2 <- p1 + scale_fill_viridis_c(option = "magma", direction = -1)
p2 + theme_map() + #facet_wrap(~ year, ncol = 2) +
    theme(legend.position = "right",
          strip.background = element_blank()) +
    #labs(fill = "Changement en pourcentage / semaine",
    #     "title = "Opiate Related Deaths by State, 2000-2014")
     labs(fill = "nRisk.Score")
#dev.off()



################################
# Creation of files for kepler:
# risk score
################################

riskScore<-subset(risk.map, !duplicated(GID_2)) 
head(riskScore)

shapefile <- readOGR(dsn="shapefiles", layer="gadm36_MEX_2")
g1<-subset(shapefile, NAME_1=="Distrito Federal")
g2<-subset(shapefile,NAME_1=="México")
shapefile<-rbind(g1,g2)
shapefile<-shapefile[,c(6,7)]

shapefile@data = join(shapefile@data, riskScore, by = "GID_2", type = "full")
head(shapefile@data)
class(shapefile)

#to convert to geojson format
json <- geojson_json(shapefile)# Simplify the geometry information of GeoJSON.
geojson_write(json, geometry = "polygon", file = "nrisk.Score.geojson")






################################
# Creation of files for kepler:
# # Pre-COVID temporal window. Mobility data
################################



mobility = fread('unzip -p /home/guillermo/Documents/dpa/fbMooc/population.maps/movement-range-data-2021-07-20.zip')
mex<-subset(mobility, country=="MEX")
mex<-mex[,c(4,1,7)]

names(mex)[1]<-"GID_2"
names(mex)[2]<-"date"
names(mex)[3]<-"population.staying.home"

#select a temporal window of one week
#data starts at 2020-03-01
mex$date<-as.Date(mex$date, format="%Y-%m-%d")
mex<-subset(mex, date<="2020-03-07")
mex<-aggregate(population.staying.home~GID_2, FUN=mean, data=mex, na.rm=TRUE)


#need coordinates
#get them the previous object named: risk.map

coord<-subset(risk.map, !duplicated(GID_2)) 
coord<-coord[,c(1:4)]

#add coordinates to mex object
mex <- merge(x=mex, y=coord, by="GID_2", all.y=TRUE)


shapefile <- readOGR(dsn="shapefiles", layer="gadm36_MEX_2")
g1<-subset(shapefile, NAME_1=="Distrito Federal")
g2<-subset(shapefile,NAME_1=="México")
shapefile<-rbind(g1,g2)
shapefile<-shapefile[,c(6,7)]

shapefile@data = join(shapefile@data, mex, by = "GID_2", type = "full")
head(shapefile@data)
class(shapefile)

json <- geojson_json(shapefile)# Simplify the geometry information of GeoJSON.
geojson_write(json, geometry = "polygon", file = "mobility.precovid.geojson")




################################
# Creation of files for kepler:
# Quarantine week. Mobility data
################################


mobility = fread('unzip -p /home/guillermo/Documents/dpa/fbMooc/population.maps/movement-range-data-2021-07-20.zip')
mex<-subset(mobility, country=="MEX")
mex<-mex[,c(4,1,7)]

names(mex)[1]<-"GID_2"
names(mex)[2]<-"date"
names(mex)[3]<-"atHome"

mex$date<-as.Date(mex$date, format="%Y-%m-%d")
mex<-subset(mex, date>="2020-05-01")
mex<-subset(mex, date<="2020-05-07")

mex<-aggregate(atHome~GID_2, FUN=mean, data=mex, na.rm=TRUE)


#need coordinates
#get them from previous object named: risk.map
coord<-subset(risk.map, !duplicated(GID_2)) 
coord<-coord[,c(1:4)]

#add coordinates to mex object
mex <- merge(x=mex, y=coord, by="GID_2", all.y=TRUE)

shapefile <- readOGR(dsn="shapes", layer="gadm36_MEX_2")
g1<-subset(shapefile, NAME_1=="Distrito Federal")
g2<-subset(shapefile,NAME_1=="México")
shapefile<-rbind(g1,g2)
shapefile<-shapefile[,c(6,7)]


shapefile@data = join(shapefile@data, mex, by = "GID_2", type = "full")
head(shapefile@data)
class(shapefile)

#convert into geojson format
json <- geojson_json(shapefile)
geojson_write(json, geometry = "polygon", file = "mobility.quarantine.geojson")






################################
# Creation of files for kepler:
# population density map sample
################################

totalPop = fread('unzip -p population_mex_2018-10-01.csv.zip')

#get a sample of your total pop
sampling <- totalPop[sample(nrow(totalPop), 174579), ]

#write
write.csv(sampling, "population.density.map.sample.csv", row.names=FALSE)

#r

Cluster-robust SEs for logistic regression

clrobustse <- function(fit.model, clusterid) {
  rank=fit.model$rank
  N.obs <- length(clusterid)            
  N.clust <- length(unique(clusterid))  
  dfc <- N.clust/(N.clust-1)                    
  vcv <- vcov(fit.model)
  estfn <- estfun(fit.model)
  uj <- apply(estfn, 2, function(x) tapply(x, clusterid, sum))
  N.VCV <- N.obs * vcv
  ujuj.nobs  <- crossprod(uj)/N.obs
  vcovCL <- dfc*(1/N.obs * (N.VCV %*% ujuj.nobs %*% N.VCV))
  coeftest(fit.model, vcov=vcovCL)
}
clrobustse(UC.models[[1]], (contest.user.level.data %>% select(entered.contest,contest.format,total.prizes,contest.duration.hours,num.winners,max.prize,min.prize,binary.reads.cap,prize.sd,topic_id) %>% drop_na())$topic_id)

#r

k-means clustering elbow method

# function to compute total within-cluster sum of squares. 
fviz_nbclust(contest.scaled, kmeans, method = "wss", k.max = 50,verbose=TRUE,print.summary=TRUE) + 
  theme_minimal() +
  ylab("Total Within Sum of Squares")+
  xlab("Number of clusters (k)")+
  ggtitle("Determining optimal number of clusters using the Elbow Method")+theme(plot.title=element_text(hjust=0.5))

#r

r - plotly: Updating data with dropdown selection - Stack Overflow

library(plotly)

df <- data.frame(x = runif(200), y = runif(200), z = runif(200), j = runif(200), k = rep(0.7, 200), i = rnorm(200,0.6,0.05))

create_buttons <- function(df, y_axis_var_names) {
  lapply(
    y_axis_var_names,
    FUN = function(var_name, df) {
      button <- list(
        method = 'restyle',
        args = list('y', list(df[, var_name])),
        label = sprintf('Show %s', var_name)
      )
    },
    df
  )
  
}

y_axis_var_names <- c('y', 'z', 'j', 'k', 'i')

p <- plot_ly(df, x = ~x, y = ~y, mode = "markers", name = "A", visible = T) %>%
     layout(
         title = "Drop down menus - Styling",
         xaxis = list(domain = c(0.1, 1)),
         yaxis = list(title = "y"),
         updatemenus = list(
             list(
                 y = 0.7,
                 buttons = create_buttons(df, y_axis_var_names)
             )
         ))
p

#r

k-means clustering

https://uc-r.github.io/kmeans_clustering

#r

How to perform multiple left joins using dplyr in R - Stack Overflow

left_join(x, y, by='Flag') %>%
              left_join(., z, by='Flag')

#r

Read a delimited file (including csv & tsv) into a tibble — read_delim • readr

read_delim(
  file,
  delim,
  quote = "\"",
  escape_backslash = FALSE,
  escape_double = TRUE,
  col_names = TRUE,
  col_types = NULL,
  locale = default_locale(),
  na = c("", "NA"),
  quoted_na = TRUE,
  comment = "",
  trim_ws = FALSE,
  skip = 0,
  n_max = Inf,
  guess_max = min(1000, n_max),
  progress = show_progress(),
  skip_empty_rows = TRUE
)

#r

multiple correspondence analysis in R

# load packages
require(FactoMineR)
require(ggplot2)
# load data tea
data(tea)
# select these columns
newtea = tea[, c("Tea", "How", "how", "sugar", "where", "always")]
# take a look
head(newtea)


# number of categories per variable
cats = apply(newtea, 2, function(x) nlevels(as.factor(x)))
cats

# apply MCA
mca1 = MCA(newtea, graph = FALSE)

# table of eigenvalues
mca1$eig


# data frames for ggplot
mca1_vars_df = data.frame(mca1$var$coord, Variable = rep(names(cats), 
                                                         cats))
mca1_obs_df = data.frame(mca1$ind$coord)

# plot of variable categories
ggplot(data = mca1_vars_df,
       aes(x = Dim.1, y = Dim.2, label = rownames(mca1_vars_df))) + 
  geom_hline(yintercept = 0, colour = "gray70") + geom_vline(xintercept = 0, 
                                              colour = "gray70") +
  geom_text(aes(colour = Variable)) + 
  ggtitle("MCA plot of variables using R package FactoMineR")


# XXX ---------------------------------------------------------------------

Base_acm <- Base %>% select(P1_1, P3_1, P3_2, P3_3)
Base_acm$P1_1 <- as.factor(Base_acm$P1_1)
Base_acm$P3_1 <- as.factor(Base_acm$P3_1)
Base_acm$P3_2 <- as.factor(Base_acm$P3_2)
Base_acm$P3_3 <- as.factor(Base_acm$P3_3)

cats=apply(Base_acm, 2, function(x) nlevels(as.factor(x)))

mca2 = MCA(Base_acm, graph = FALSE)

# data frames for ggplot
mca2_vars_df = data.frame(mca2$var$coord, Variable = rep(names(cats), 
                                                         cats))
mca2_obs_df = data.frame(mca2$ind$coord)

# plot of variable categories
ggplot(data = mca2_vars_df,
       aes(x = Dim.1, y = Dim.2, label = rownames(mca2_vars_df))) + 
  geom_hline(yintercept = 0, colour = "gray70") + geom_vline(xintercept = 0, 
                                                        colour = "gray70") +
  geom_text(aes(colour = Variable)) + 
  ggtitle("MCA plot of variables using R package FactoMineR")

factoextra::fviz_screeplot(mca2, addlabels = TRUE, ylim = c(0, 45))

#r

simple tryCatch in R

# cells = try(CustomSubset(group.info, min_n=50, max_total=2000))
      # if (inherits(cells, 'try-error')) {
      #   cells <- c(cells.1, cells.2)
      # }
      
      
img <- tryCatch(
      {
        .GetImageSize(slide.file)
      },
      error = function(e) {
        img.path <- list.files(slide.file, full.names=TRUE)[[1]] #Assuming all scan images of the same batch is of similar size, we only need to take the first one
        img <- .GetImageSize(img.path)
        return(img)
        })

#r

Cross table with dplyr

if (!require('tidyverse')) install.packages('tidyverse'); library('tidyverse')
if (!require('gapminder')) install.packages('gapminder'); library('gapminder')

gap <- gapminder %>% filter(year == 1987 | year == 1992)

gap %>%
  group_by(year, continent) %>% 
  summarise(average = mean(gdpPercap)) %>% 
  spread(continent, average)


gap %>%
  group_by(continent, year) %>% 
  summarise(average = mean(gdpPercap)) %>% 
  spread(year, average)

#r #ggplot2 #visualization

ggplot2 graph template

ggplot(data = <DATA>) + 
  <GEOM_FUNCTION>(mapping = aes(<MAPPINGS>))

#r

Replace string in a vector

mutate(var_name = str_replace(var_name, "Q", "q"))

#r

Model formula for all possible two-way interactions

Y ~ (a + b + c)*x

#r

Replace r values that have p values > 0.05 with dashed line

mutate(r_varname = ifelse(p_varname > 0.05, "-", r_varname))

#r

Tidyverse

install.packages("tidyverse")

#r

Superscript text in ggplot2 R plots. Use the bquote function in ggplot

library(ggplot2)
ggplot(mtcars, aes(hp, mpg)) + 
       geom_point() +
       labs(x = bquote('x axis'~(Å^2)), y = "y axis") +
       #or
       #labs(x = bquote('x axis'~(ring(A)^2)), y = "y axis") 
       theme_bw()

#r

add julian day column to a dataframe

mutate(yday = yday(date))

#r

Make facet strip background white

theme(strip.background = element_rect(colour = "black", fill = "white"))

#r

Add previous 7-day mean (excluding current day) of a variable to a dataframe by group

library(tidyverse)
library(purrr)
library(zoo)

data %>% 
  group_by(group_name) %>% 
  nest() %>% 
  mutate(data = pmap(list(data),
                     ~ mutate(.x,
                              r_mean = lag(rollmean(x = var_name, k = 7, fill = NA, align = "right")))
                     )
  ) %>% 
  unnest(cols = data)

#r

Try function & Continue the execution

try(log("not a number"), silent = TRUE)
print("errors can't stop me")

#r

Stopping an R Script running upon a certain condition

if (1 != 2)
  stop("something is wrong")

#r

How to Combine Multiple GGPlots into a Figure - Datanovia

library(ggplot2)
library("ggpubr")
theme_set(
  theme_bw() +
    theme(legend.position = "top")
  )

#r

Removes outliers in dataframe column by looking at quartiles

#Outlier Removal Function
remove_outliers <- function(dataframe, column, na.rm = TRUE) {
  Q1 <- quantile(column, .25)
  Q3 <- quantile(column, .75)
  IQR <- IQR(column)
  dataframe <- subset(dataframe, column> (Q1 - 1.5*IQR) & column< (Q3 + 1.5*IQR))
}

#Not looping
df_1 <-remove_outliers(df_1,df_1$Column1)

#Loop through a list of column names
for(i in Column_List){
  df_1 <- remove_outliers(df_1, df_1[[i]])
}

#r

Turns Swedbank Estonia financial statement csv file to cashflow dataframe

# Account
statement <- read.csv("statement.csv", stringsAsFactors = F, sep = ";")

# Get rid of irrelevant columns
statement <- statement[2:13]

# Statement money flows
rahavoog <- statement[statement$Reatüüp == 20,]
rahavoog$Summa <- as.numeric(gsub(",", ".", rahavoog$Summa))

#r

Order or sort a character vector


str_order(
  x,
  decreasing = FALSE,
  na_last = TRUE,
  locale = "en",
  numeric = FALSE,
  ...
)
  
# Examples 
  
  str_order(letters)
#>  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
#> [26] 26
str_sort(letters)
#>  [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s"
#> [20] "t" "u" "v" "w" "x" "y" "z"

str_order(letters, locale = "haw")
#>  [1]  1  5  9 15 21  2  3  4  6  7  8 10 11 12 13 14 16 17 18 19 20 22 23 24 25
#> [26] 26
str_sort(letters, locale = "haw")
#>  [1] "a" "e" "i" "o" "u" "b" "c" "d" "f" "g" "h" "j" "k" "l" "m" "n" "p" "q" "r"
#> [20] "s" "t" "v" "w" "x" "y" "z"

x <- c("100a10", "100a5", "2b", "2a")
str_sort(x)
#> [1] "100a10" "100a5"  "2a"     "2b"    
str_sort(x, numeric = TRUE)
#> [1] "2a"     "2b"     "100a5"  "100a10"

#r

Importa vários arquivos iguais da mesma pasta

dir <- "directory of archives"

directories <- as.list(file.path(dir, list.files(dir, pattern = ".*.csv")))

dataframes <- map(directories , rio::import)

dataset <- plyr::ldply(dataframes, data.frame)

#r

Dynamically select dataframe column in R

var <- "mpg"
#Doesn't work
mtcars$var
#These both work, but note that what they return is different
# the first is a vector, the second is a data.frame
mtcars[[var]]
mtcars[var]

#r

Select Rows in a Data Frame that do not match

# make data frame 
a <- data.frame( x =  c(1,2,3,4)) 
b <- data.frame( y =  c(1,2,3,4,5,6))
subset(b, !(y %in% a$x)) #Pulls values from frame b that are not in frame a

#r

Fix Misspellings in a Data frame

library(tidyverse)

df$column <- df$column str_to_title(df$column) #Standardize caps

#Two options 
df$column <- str_replace(df$column, "old", "new") #Option 1, uses str_replace
df$column <- gsub("old", "new", df$column) #Option 2, uses str_replace

#r

basic scatter plot

x <- data$variable_xaxis
y <- data$variable_yaxis

plot(x, y, main = "Main title",
     xlab = "X axis title", ylab = "Y axis title",
     pch = 19, frame = FALSE)
abline(lm(y ~ x, data = mtcars), col = "blue")

#r

Basic Plot with Point Layer (ggplot2)

g <- ggplot(data_set, aes(x-variable, y-variable) 

g + geom_point()

#r

Basic Plot with Geom

qplot(x-coordinate, y-coordinate, data=data_frame, geom = c("point", "smooth")

#r

Basic Scatter Plot

x <- data$variable_xaxis
y <- data$variable_yaxis

plot(x, y, main = "Main title",
     xlab = "X axis title", ylab = "Y axis title",
     pch = 19, frame = FALSE)
abline(lm(y ~ x, data = mtcars), col = "blue")