R snippet
Initial
install.packages("spAddins")
install.packages("remedy")
Useful website
Package
Repos
options(repos = "http://cran.rstudio.org")
# options(repos = "https://cloud.r-project.org")
Offline install package
install.packages(".../path/to/package.tar.gz", type="source", repos=NULL)
ggplot
Awesome ggplot2
Themes and aesthetics
- gghighlight{Eg: gghighlight(max(value) > 19)}
- ggsci {Eg: scale_color_palname(), scale_fill_palname()}
- ggfittext {Eg. geom_fit_text(reflow = TRUE, grow = TRUE)}
- hrbrtheme {Eg: scale_y_comma(), theme_ipsum_rc()}
- bbplot {https://bbc.github.io/rcookbook/, Eg: bbc_style(), finalise_plot()}
- ggthemr {https://github.com/cttobin/ggthemr}
Presentation, composition and scales
- tagger
format axis
scale_y_continuous(label=scales::comma)
scale_x_date(date_breaks = "2 month", date_labels = "%m-%Y")
theme(legend.title = element_blank(),
legend.position = "bottom",
axis.text.x = element_text(angle = 30))
format axis text
theme(axis.text.x = element_text(size = 14), axis.title.x = element_text(size = 16),
axis.text.y = element_text(size = 14), axis.title.y = element_text(size = 16),
plot.title = element_text(size = 20, face = "bold", color = "darkgreen"))
theme(axis.ticks.length.y = unit(nc * 0.15,"cm"))
geom_line with x is factor
geom_line(aes(group = 1))
combine graphs
https://cran.r-project.org/web/packages/patchwork/vignettes/patchwork.html
vietnamese font
Sys.setlocale(category = "LC_ALL", locale = "vietnamese")
eval(parse("R/graph_pqr_201911.R", encoding = "UTF-8"))
plot(1:4,rep(1,4), pch=c("\u0111","\u01B0","\u01A1","\u0103"),cex=4)
# Uppercase
plot(1:4,rep(1,4), pch=c("\U0110","\u01AF","\u01A0","\u0102"),cex=4)
File
edit file
="blah text blah blah etc etc"
linewrite(line,file=paste0("vignettes/", ticker, ".Rmd"),append=TRUE)
data.table
https://atrebas.github.io/post/2019-03-03-datatable-dplyr/
dplyr
# select na vars
<- function(x) {any(is.na(x))}
fnc_na_vars <- dt %>%
na_vars select_if(fnc_na_vars) %>%
names()
group_by(xxx) %>% count()
<- enquo(category)
category <- df %>%
p group_by(!!category) %>%
summarise(cnt = n())
::quo_text(category)
rlang
<- sym(.symbol) .var
top_n_by function
<- function(df, n, top_by){
f_top_n <- enquo(top_by)
top_by
<- df %>%
top_val filter(is.finite(!!top_by)) %>%
pull(!!top_by) %>%
unique() %>%
sort(decreasing = TRUE) %>%
head(n)
filter(df, !!top_by %in% top_val) %>% return()
}
lubridate
<- as.Date("2009-09-02")
x wday(ymd(080101), label = TRUE, abbr = TRUE)
month(x)
year(x)
zoo
::as.yearmon("Mar 2012", "%b %Y") zoo
file and folder
<- Sys.glob(paste0(my_folder, "2017/*.xlsx"))
r18_2017
<- list.files(paste0(my_folder, "2017/"), full.names = T)
r18_2017
<- rio::import_list(r18_files, rbind = TRUE) dt
string
Bỏ dấu
::stri_trans_general('Nguyễn Ngọc Bình', "latin-ascii" ) stringi
optimize code with provis
## Generate data
<- 4e5
times <- 150
cols <- as.data.frame(x = matrix(rnorm(times * cols, mean = 5), ncol = cols))
data <- cbind(id = paste0("g", seq_len(times)), data)
data
::profvis({
profvis<- data # Store in another variable for this run
data1
### Get column means
<- apply(data1[, names(data1) != "id"], 2, mean)
means
### Subtract mean from each column
for (i in seq_along(means)) {
names(data1) != "id"][, i] <- data1[, names(data1) != "id"][, i] - means[i]
data1[,
} })
purrr
compose
<- compose(tidy, lm)
tidy_lm tidy_lm(Sepal.Length ~ Species, data = iris)
partial
<- partial(mean, na.rm = TRUE) mean_na_rm
reduce
<- list(
dfs age = tibble(name = "John", age = 30),
sex = tibble(name = c("John", "Mary"), sex = c("M", "F")),
trt = tibble(name = "Mary", treatment = "A")
)
%>% reduce(full_join) dfs
Hàm xử lý outlier
<- function(x){
f_outlier <- quantile(x, probs = c(0.005, 0.95), na.rm = TRUE, type = 3)
threshold
<- case_when(x > threshold[2] ~ threshold[2],
y < threshold[1] ~ threshold[1],
x TRUE ~ x)
return(y)
}
So sánh khác biệt giữa 2 file
library(diffr)
diffr("D:/TMP/new 1.txt", "D:/TMP/new 2.txt", contextSize = 0, minJumpSize = 500)
Optical Character Recognition (OCR)
if(!require("tesseract")) {install.packages("tesseract")}
library(tesseract)
library(dplyr)
<- ocr("D:/tmp/image2.png", engine = tesseract("eng"))
text cat(text)
%>% strsplit(split = "\n") %>% rio::export("x.xlsx") text
Markdown
rpub
---
title: "Correlation analysis"
author: "Nguyễn Ngọc Bình"
date: "`r format(Sys.Date(),'%Y-%m-%d')`"
output:
html_document:
code_download: true
code_folding: show
number_sections: yes
theme: "default"
toc: TRUE
toc_float: TRUE
dev: 'svg'
editor_options:
chunk_output_type: console
---
image

bookdown::html_document2, bookdown::word_document2

@ref(fig:nnet2) \
or
::opts_chunk$set(echo = FALSE, fig.height = 5, fig.width = 7, out.width = "70%")
knitr::include_graphics("figures/d_i_d_graph.png") knitr
Format number in rmarkdown
<- function(df) {
fnc_kbl %>%
df mutate_if(
is.numeric,
format,digits = 2,
nsmall = 2,
big.mark = ".",
decimal.mark = ","
%>%
) kbl() %>%
kable_classic(full_width = F)
}
Format table in word
<- function(tbl_name) {
fnc_print_tbl_df <- tbl_name %>%
out ungroup() %>%
mutate_if(is.numeric, round, 2) %>%
flextable() %>%
autofit() %>%
theme_zebra(odd_header = '#8064A2') %>%
font(fontname = 'Tahoma', part = 'all') %>%
fontsize(size = 10, part = 'all')%>%
border(border = officer::fp_border(color = "#8064A2"))
return(out)
}
import data
::import(xml2::xml2_example("cd_catalog.xml")) rio
Reticulate
file.edit(file.path("~", ".Rprofile"))
# RETICULATE_PYTHON="C/Users/nguye/anaconda3"