Put your code in a function:
extract_info = function(file) { ## Add the code you need to read the text from the file ## Something like ## text <- readLines(file) ## or whatever you are using to read in the file clean_text <- as.data.frame(strsplit(text$text, '\\*' ), col.names = "text") %>% mutate(text = str_replace_all(text, "\n", " "), text = str_replace_all(text, "- ", ""), text = str_replace_all(text,"^\\s", "")) %>% filter(!text == " ") %>% mutate(paragraphs = ifelse(grepl("^[[:digit:]]", text) == T, text, NA)) %>% rename(category = text) %>% mutate(category = ifelse(grepl("^[[:digit:]]", category) == T, NA, category)) %>% fill(category) %>% filter(!is.na(paragraphs)) %>% mutate(paragraphs = strsplit(paragraphs, '^[[:digit:]]{1,3}\\.|\\t\\s[[:digit:]]{1,3}\\.')) %>% unnest(paragraphs) %>% mutate(paragraphs = strsplit(paragraphs, 'Download as PDF')) %>% unnest(paragraphs) %>% mutate(paragraphs = str_replace_all(paragraphs, "\t", "")) %>% mutate(paragraphs = ifelse(grepl("javascript", paragraphs), "", paragraphs)) %>% mutate(paragraphs = str_replace_all(paragraphs, "^\\s+", "")) %>% filter(!paragraphs == "") }
Test your function to make sure it works on one file:
extract_info("your_file_name.txt")## does the result work and look right? ## work on your function until it does
Get a list of all the files you want to run
my_files = list.files()## by default this will give you all the files in your working directory## use the `pattern` argument if you only want files that follow## a certain naming convention
Apply your function to those files:
results = lapply(my_files, extract_info)