1. Freeze experimental features
Command to import all COCA files from directory with fileID, register and corpus
get file names from directory
files <- list.files(here("evaluation"))
split to save names; name for data frame will be first element
names <- strsplit(files, "\\.")
now get the files
for (i in 1:length(files)) { for each file in the list
fileName <- files[[i]] save filename of element i
dataName <- names[[i]][[1]] save data name of element i
tempData <- importEval(file = read_excel(here("evaluation", "fileName"), col_types = "text"), fileID = dataName, register = "spoken", corpus = "COCA")
assign (dataName, tempData, envir=.GlobalEnv) assign the results of file to the data named
}
COCA_acad_4000541 <- importEval(file = read_excel(here("evaluation", "COCA_acad_4000541.xlsx"), col_types = "text"), fileID = "COCA_acad_4000541", register = "academic", corpus = "COCA")
COCA_acad_4017541 <- importEval(file = read_excel(here("evaluation", "COCA_acad_4017541.xlsx"), col_types = "text"), fileID = "COCA_acad_4017541", register = "academic", corpus = "COCA")
COCA_acad_4170341 <- importEval(file = read_excel(here("evaluation", "COCA_acad_4170341.xlsx"), col_types = "text"), fileID = "COCA_acad_4170341", register = "academic", corpus = "COCA")
COCA_blog_5157941 <- importEval(file = read_excel(here("evaluation", "COCA_blog_5157941.xlsx"), col_types = "text"), fileID = "COCA_blog_5157941", register = "e-language", corpus = "COCA")
COCA_blog_5174141 <- importEval(file = read_excel(here("evaluation", "COCA_blog_5174141.xlsx"), col_types = "text"), fileID = "COCA_blog_5174141", register = "e-language", corpus = "COCA")
COCA_blog_5176541 <- importEval(file = read_excel(here("evaluation", "COCA_blog_5176541.xlsx"), col_types = "text"), fileID = "COCA_blog_5176541", register = "e-language", corpus = "COCA")
COCA_fict_1000441 <- importEval(file = read_excel(here("evaluation", "COCA_fict_1000441.xlsx"), col_types = "text"), fileID = "COCA_fict_1000441", register = "fiction", corpus = "COCA")
COCA_fict_1003141 <- importEval(file = read_excel(here("evaluation", "COCA_fict_1003141.xlsx"), col_types = "text"), fileID = "COCA_fict_1003141", register = "fiction", corpus = "COCA")
COCA_fict_5003241 <- importEval(file = read_excel(here("evaluation", "COCA_fict_5003241.xlsx"), col_types = "text"), fileID = "COCA_fict_5003241", register = "fiction", corpus = "COCA")
COCA_mag_2029741 <- importEval(file = read_excel(here("evaluation", "COCA_mag_2029741.xlsx"), col_types = "text"), fileID = "COCA_mag_2029741", register = "news", corpus = "COCA")
COCA_mag_2030941 <- importEval(file = read_excel(here("evaluation", "COCA_mag_2030941.xlsx"), col_types = "text"), fileID = "COCA_mag_2030941", register = "news", corpus = "COCA")
COCA_mag_4180341 <- importEval(file = read_excel(here("evaluation", "COCA_mag_4180341.xlsx"), col_types = "text"), fileID = "COCA_mag_4180341", register = "news", corpus = "COCA")
COCA_News_4087357 <- importEval(file = read_excel(here("evaluation", "COCA_News_4087357.xlsx"), col_types = "text"), fileID = "COCA_News_4087357", register = "news", corpus = "COCA")
COCA_News_4087464 <- importEval(file = read_excel(here("evaluation", "COCA_News_4087464.xlsx"), col_types = "text"), fileID = "COCA_News_4087464", register = "news", corpus = "COCA")
COCA_News_4087649 <- importEval(file = read_excel(here("evaluation", "COCA_News_4087649.xlsx"), col_types = "text"), fileID = "COCA_News_4087649", register = "news", corpus = "COCA")
COCA_News_4087995 <- importEval(file = read_excel(here("evaluation", "COCA_News_4087995.xlsx"), col_types = "text"), fileID = "COCA_News_4087995", register = "news", corpus = "COCA")
COCA_Opinion_4061065 <- importEval(file = read_excel(here("evaluation", "COCA_Opinion_4061065.xlsx"), col_types = "text"), fileID = "COCA_Opinion_4061065", register = "news", corpus = "COCA")
COCA_Opinion_4062489 <- importEval(file = read_excel(here("evaluation", "COCA_Opinion_4062489.xlsx"), col_types = "text"), fileID = "COCA_Opinion_4062489", register = "news", corpus = "COCA")
COCA_Opinion_4079063 <- importEval(file = read_excel(here("evaluation", "COCA_Opinion_4079063.xlsx"), col_types = "text"), fileID = "COCA_Opinion_4079063", register = "news", corpus = "COCA")
COCA_Opinion_4090647 <- importEval(file = read_excel(here("evaluation", "COCA_Opinion_4090647.xlsx"), col_types = "text"), fileID = "COCA_Opinion_4090647", register = "news", corpus = "COCA")
COCA_Spoken_4082518 <- importEval(file = read_excel(here("evaluation", "COCA_Spoken_4082518.xlsx"), col_types = "text"), fileID = "COCA_Spoken_4082518", register = "spoken", corpus = "COCA")
COCA_Spoken_4082551 <- importEval(file = read_excel(here("evaluation", "COCA_Spoken_4082551.xlsx"), col_types = "text"), fileID = "COCA_Spoken_4082551", register = "spoken", corpus = "COCA")
COCA_Spoken_4082571 <- importEval(file = read_excel(here("evaluation", "COCA_Spoken_4082571.xlsx"), col_types = "text"), fileID = "COCA_Spoken_4082571", register = "spoken", corpus = "COCA")
COCA_Spoken_4082646 <- importEval(file = read_excel(here("evaluation", "COCA_Spoken_4082646.xlsx"), col_types = "text"), fileID = "COCA_Spoken_4082646", register = "spoken", corpus = "COCA")
COCA_tvm_5208241 <- importEval(file = read_excel(here("evaluation", "COCA_tvm_5208241.xlsx"), col_types = "text"), fileID = "COCA_tvm_5208241", register = "TV/movies", corpus = "COCA")
COCA_tvm_5215441 <- importEval(file = read_excel(here("evaluation", "COCA_tvm_5215441.xlsx"), col_types = "text"), fileID = "COCA_tvm_5215441", register = "TV/movies", corpus = "COCA")
COCA_tvm_5246241 <- importEval(file = read_excel(here("evaluation", "COCA_tvm_5246241.xlsx"), col_types = "text"), fileID = "COCA_tvm_5246241", register = "TV/movies", corpus = "COCA")
COCA_web_5026941 <- importEval(file = read_excel(here("evaluation", "COCA_web_5026941.xlsx"), col_types = "text"), fileID = "COCA_web_5026941", register = "e-language", corpus = "COCA")
COCA_web_5035341 <- importEval(file = read_excel(here("evaluation", "COCA_web_5035341.xlsx"), col_types = "text"), fileID = "COCA_web_5035341", register = "e-language", corpus = "COCA")
COCA_web_5080941 <- importEval(file = read_excel(here("evaluation", "COCA_web_5080941.xlsx"), col_types = "text"), fileID = "COCA_web_5080941", register = "e-language", corpus = "COCA")
Command to rbind all COCA and BNC R objects in the local environment
list_of_dataframes <- objects(pattern = "BNC|COCA")
list_of_dataframes <- toString(objects(pattern = "BNC|COCA"))
list_of_dataframes
EvalData <- rbind(BNC_AcaHumBk34, BNC_BAcjH78, BNC_BAcjM107, BNC_BEBl293, BNC_BEEm76, BNC_BERe31, BNC_BFict_b2, BNC_BMass311, BNC_BReg495, BNC_BSer145, BNC_ElanBlogBla12, BNC_ElanBlogSlu30, BNC_ElanEmail102, BNC_ElanForumCar5, BNC_ElanForumRig1, BNC_ElanRev27, BNC_ElanSms33, BNC_ElanSocFac4_pt1, BNC_ElanSocTwi49_pt7, BNC_ElanSocTwi6_pt4, BNC_FictFan41, BNC_FictMis228, BNC_MagAut1397, BNC_MagPc275, BNC_NewMaDas2819, BNC_NewReBet1393, BNC_NewSeGua553, BNC_Sp2m0f33, BNC_Sp2m2f63, BNC_Sp3m1f10, COCA_acad_4000541, COCA_acad_4017541, COCA_acad_4170341, COCA_blog_5157941, COCA_blog_5174141, COCA_blog_5176541, COCA_fict_1000441, COCA_fict_1003141, COCA_fict_5003241, COCA_mag_2029741, COCA_mag_2030941, COCA_mag_4180341, COCA_News_4087357, COCA_News_4087464, COCA_News_4087649, COCA_News_4087995, COCA_Opinion_4061065, COCA_Opinion_4062489, COCA_Opinion_4079063, COCA_Opinion_4090647, COCA_Spoken_4082518, COCA_Spoken_4082551, COCA_Spoken_4082571, COCA_Spoken_4082646, COCA_tvm_5208241, COCA_tvm_5215441, COCA_tvm_5246241, COCA_web_5026941, COCA_web_5035341, COCA_web_5080941)
summary(EvalData)
unique(EvalData$FileID)
unique(EvalData$TagGold)
unique(EvalData$Tag)
EvalData <- EvalData %>%
mutate(TagGold = ifelse(TagGold == "none", "NONE", as.character(TagGold))) %>%
mutate(TagGold = as.factor(ifelse(TagGold == "unclear", "UNCLEAR", as.character(TagGold))))
saveRDS(EvalData, here("evaluation", "MFTE_Python_Eval_Results.rds")) Last saved 9 August 2023
write.csv(EvalData, here("evaluation", "MFTE_Python_Eval_Results.csv")) Last saved 9 August 2023
saveRDS(EvalData, here("evaluation", "MFTE_Python_Eval_Results.rds")) Last saved 10 August 2023
write.csv(EvalData, here("evaluation", "MFTE_Python_Eval_Results.csv")) Last saved 9 August 2023
nrow(EvalData)
summary(EvalData$TagGold) 293 UNCLEAR
BinomCI(293, 61140,