# Load data into an SQLite database library(tidyverse) src <- src_sqlite("datathon_plus_missing.sqlite", create=T) patients <- read_tsv("MelbDatathon2017/Lookups/patients.txt") copy_to(src, patients, "patients", temporary=F) drug <- read_tsv("MelbDatathon2017/Lookups/Drug_LookUp.txt") copy_to(src, drug, "drug", temporary=F) atc <- read_tsv("MelbDatathon2017/Lookups/ATC_LookUp.txt") copy_to(src, atc, "atc", temporary=F) stores <- read_tsv("MelbDatathon2017/Lookups/stores.txt") copy_to(src, stores, "stores", temporary=F) illness <- read_tsv("MelbDatathon2017/Lookups/ChronicIllness_LookUp.txt") copy_to(src, illness, "illness", temporary=F) filenames <- list.files("MelbDatathon2017/Transactions", full.names=TRUE) # SourceSystem often starts with all F, but it is not actually a logical column # Dates got mangled somehow, so treating as character col_types <- list(SourceSystem_Code=col_character(), Prescription_Week=col_character(), Dispense_Week=col_character()) trans <- read_tsv("MelbDatathon2017/Transactions/patients_1.txt", col_types=col_types) #db_drop_table(src$con, "trans") copy_to(src, trans, "trans", temporary=F, indexes=list( c("Patient_ID","Drug_ID") )) for(i in 2:50) { print(i) trans <- read_tsv(paste0("MelbDatathon2017/Transactions/patients_",i,".txt"), col_types=col_types) db_insert_into(src$con, "trans", trans) } # Some further data was provided after the competition started for(i in 1:50) { print(i) trans <- read_tsv(paste0("unzip/missing_patients_",i,".txt"), col_types=col_types) db_insert_into(src$con, "trans", trans) }