Title: | Data Extract, Transform, Test and Load |
---|---|
Description: | Data extract, transform, test and load tool for sanitising your workflow. |
Authors: | Robert Ashton [aut, cre] |
Maintainer: | Robert Ashton <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.1.0 |
Built: | 2024-11-17 03:25:04 UTC |
Source: | https://github.com/vimc/dettl |
Create an import object using functions defined at specified path
dettl(path, db_name = NULL)
dettl(path, db_name = NULL)
path |
Path to directory containing functions for import. |
db_name |
The name of the db to connect to. Connection info must be
configured via the |
An Import object.
The automatic load function loops over the transformed data and appends each data frame to the matching table in the database. If the appended table contains a key referenced by one of the foreign key constraints then when the data is inserted into the database this returns the value of the key for the new rows. Then loop over all tables in which this is used as a foreign key and update the previous values to use the returned actual values for the referenced key.
dettl_auto_load(transformed_data, con)
dettl_auto_load(transformed_data, con)
transformed_data |
The list of transformed data frames to append to tables in the database. |
con |
Connection to the database to add data to. |
Expect that this should only be called from within a custom load function if we want to load data to the database in the automatic way but have some special edge cases which we need to add some custom handling for before or after running the automatic load.
path <- dettl:::prepare_test_import( system.file("examples", "person_information", package = "dettl"), system.file("examples", "dettl_config.yml", package = "dettl") ) import <- dettl::dettl(file.path(path, "person_information"), "test") con <- import$get_connection() data <- list("people" = data.frame( name = c("Alice", "Bob"), age = c(25, 43), height = c(175, 187), stringsAsFactors = FALSE)) dettl_auto_load(data, con)
path <- dettl:::prepare_test_import( system.file("examples", "person_information", package = "dettl"), system.file("examples", "dettl_config.yml", package = "dettl") ) import <- dettl::dettl(file.path(path, "person_information"), "test") con <- import$get_connection() data <- list("people" = data.frame( name = c("Alice", "Bob"), age = c(25, 43), height = c(175, 187), stringsAsFactors = FALSE)) dettl_auto_load(data, con)
Initialise the database by creating log table if it doesn't already exist
dettl_create_log_table(path, db_name)
dettl_create_log_table(path, db_name)
path |
Path to import directory containing db connection configuration. |
db_name |
The name of the db to connect to. Connection info must be
configured via the |
path <- dettl:::prepare_test_import( system.file("examples", "person_information", package = "dettl"), system.file("examples", "dettl_config.yml", package = "dettl"), add_log_table = FALSE ) dettl::dettl_create_log_table(file.path(path, "person_information"), "test")
path <- dettl:::prepare_test_import( system.file("examples", "person_information", package = "dettl"), system.file("examples", "dettl_config.yml", package = "dettl"), add_log_table = FALSE ) dettl::dettl_create_log_table(file.path(path, "person_information"), "test")
Create new directory and templated code for new dettl process.
dettl_new(name)
dettl_new(name)
name |
The name of the project directory to be created. Should be human readable and meaningful. Any non a-z,0-9,_ characters will be stripped and replaced with _s. Directory name will be prepended with created date. |
t <- tempfile() dir.create(t) withr::with_dir(t, { dettl::dettl_new("test import") })
t <- tempfile() dir.create(t) withr::with_dir(t, { dettl::dettl_new("test import") })
Run specified stages of an import
dettl_run( import, db_name = NULL, comment = NULL, dry_run = FALSE, allow_dirty_git = FALSE, stage = c("extract", "transform"), ... )
dettl_run( import, db_name = NULL, comment = NULL, dry_run = FALSE, allow_dirty_git = FALSE, stage = c("extract", "transform"), ... )
import |
Path to import directory. |
db_name |
The name of the db to connect to. Connection info must be configured via the 'dettl_config.yml'. If name is left blank this will default to using the first db configured. |
comment |
Optional comment to be written to db log table when import is run. |
dry_run |
If TRUE then any changes to the database will be rolled back. |
allow_dirty_git |
If TRUE then skips check that the import is up to date with remote git repo. |
stage |
The stage or stages of the import to be run. |
... |
Additional args passed to run_import for a specific import type
see |
The import object
path <- dettl:::prepare_test_import( system.file("examples", "person_information", package = "dettl"), system.file("examples", "dettl_config.yml", package = "dettl") ) dettl::dettl_run(file.path(path, "person_information/"), "test", comment = "Example import") dettl::dettl_run(file.path(path, "person_information/"), "test", comment = "Example import", save = tempfile()) import <- dettl::dettl_run(file.path(path, "person_information/"), "test", stage = "extract") dettl::dettl_run(file.path(path, "person_information/"), "test", stage = c("extract", "transform", "load"), comment = "Example import")
path <- dettl:::prepare_test_import( system.file("examples", "person_information", package = "dettl"), system.file("examples", "dettl_config.yml", package = "dettl") ) dettl::dettl_run(file.path(path, "person_information/"), "test", comment = "Example import") dettl::dettl_run(file.path(path, "person_information/"), "test", comment = "Example import", save = tempfile()) import <- dettl::dettl_run(file.path(path, "person_information/"), "test", stage = "extract") dettl::dettl_run(file.path(path, "person_information/"), "test", stage = c("extract", "transform", "load"), comment = "Example import")
Saves any extracted and/or transformed data as separate sheet of an xlsx file.
dettl_save(import, file, stage)
dettl_save(import, file, stage)
import |
The import object to save the data for. |
file |
File path at which to save the data. |
stage |
The stage or stages to save. 'extract' and/or 'transform' |
path <- dettl:::prepare_test_import( system.file("examples", "person_information", package = "dettl"), system.file("examples", "dettl_config.yml", package = "dettl") ) import <- dettl::dettl(file.path(path, "person_information"), "test") import$extract() import$transform() t <- tempfile() dettl::dettl_save(import, t, "extract") t2 <- tempfile() dettl::dettl_save(import, t2, "transform") t3 <- tempfile() dettl::dettl_save(import, t3, c("extract", "transform"))
path <- dettl:::prepare_test_import( system.file("examples", "person_information", package = "dettl"), system.file("examples", "dettl_config.yml", package = "dettl") ) import <- dettl::dettl(file.path(path, "person_information"), "test") import$extract() import$transform() t <- tempfile() dettl::dettl_save(import, t, "extract") t2 <- tempfile() dettl::dettl_save(import, t2, "transform") t3 <- tempfile() dettl::dettl_save(import, t3, c("extract", "transform"))
Copies an example import to a new temp directory, sets up git for the directory and creates a test SQLite DB in the temp directory as test.sqlite.
prepare_test_import( example_dir = "example", dettl_config = "dettl_config.yml", create_db = TRUE, add_data = FALSE, add_job_table = FALSE, add_log_table = TRUE, add_fk_data = FALSE, add_cyclic_fks = FALSE )
prepare_test_import( example_dir = "example", dettl_config = "dettl_config.yml", create_db = TRUE, add_data = FALSE, add_job_table = FALSE, add_log_table = TRUE, add_fk_data = FALSE, add_cyclic_fks = FALSE )
example_dir |
The example directory to copy to temp. |
dettl_config |
Path to the dettl config file. |
create_db |
If TRUE then test SQLite db will be created |
add_data |
If TRUE data is bootstrapped to people table in test DB. |
add_job_table |
If TRUE also bootstrap job table related to people table. |
add_log_table |
If TRUE then also bootstrap log table. |
add_fk_data |
If TRUE then bootstrap three tables with foreign key |
add_cyclic_fks |
If TRUE then bootstrap two tables with cyclic foreign key constraints. constraints for testing automatic reading of foreign key constraints from db. |
This should only be called from a test, vignette or roxygen example.
dettl:::prepare_test_import( system.file("examples", "person_information", package = "dettl"), system.file("examples", "dettl_config.yml", package = "dettl") )
dettl:::prepare_test_import( system.file("examples", "person_information", package = "dettl"), system.file("examples", "dettl_config.yml", package = "dettl") )
Manage R based data import.
Manage R based data import.
This object should not be initialised directly. Use dettl
to
create the object.
Import can be run by working with import object returned by
dettl
or by running top-level functions. Run the import by
working with this object if you want to step through the import process
stage by stage and inspect the data after each stage.
dettl::Import
-> RImport
reload()
Reload the objects sources to refresh source code or repair a broken Postgres connection.
RImport$reload()
read_config()
Read and parse config from path.
RImport$read_config()
get_extracted_data()
Get the extracted data created by the extract step
RImport$get_extracted_data()
The extracted data
get_transformed_data()
Get the transformed data created by the transform step
RImport$get_transformed_data()
The transformed data
extract()
Run the extract stage of the data import
RImport$extract()
transform()
Run the transform stage of the data import
RImport$transform()
pre_modify_checks()
Run suite of checks to verify that db can be modified
RImport$pre_modify_checks(dry_run, allow_dirty_git)
dry_run
Whether to run in dry run mode. If TRUE then any database changes will be rolled back. Defaults to FALSE.
allow_dirty_git
If TRUE then skips check that the import is up to date with remote git repo. FALSE by default.
run_import()
Run multiple stages of the data import
RImport$run_import( comment = NULL, dry_run = FALSE, allow_dirty_git = FALSE, stage = c("extract", "transform"), save = FALSE )
comment
Optional comment to be written to db log table when import is run.
dry_run
If TRUE then any changes to the database will be rolled back.
allow_dirty_git
If TRUE then skips check that the import is up to date
stage
The stage or stages of the import to be run.
save
Path and name to save data from each stage at, if TRUE then will save to a tempfile.
path <- dettl:::prepare_test_import( system.file("examples", "person_information", package = "dettl"), system.file("examples", "dettl_config.yml", package = "dettl")) import_path <- file.path(path, "person_information") import <- dettl::dettl(import_path, db_name = "test") import$extract() import$transform() import$load()
path <- dettl:::prepare_test_import( system.file("examples", "person_information", package = "dettl"), system.file("examples", "dettl_config.yml", package = "dettl")) import_path <- file.path(path, "person_information") import <- dettl::dettl(import_path, db_name = "test") import$extract() import$transform() import$load()
Manage SQL based data import.
Manage SQL based data import.
This object should not be initialised directly. Use dettl
to
create the object.
Import can be run by working with import object returned by
dettl
or by running top-level functions. Run the import by
working with this object if you want to step through the import process
stage by stage and inspect the data after each stage.
dettl::Import
-> SqlImport
dettl::Import$begin_transaction()
dettl::Import$commit_transaction()
dettl::Import$extract()
dettl::Import$format()
dettl::Import$get_connection()
dettl::Import$get_log_table()
dettl::Import$help()
dettl::Import$initialize()
dettl::Import$load()
dettl::Import$pre_modify_checks()
dettl::Import$rollback_transaction()
dettl::Import$run_import()
dettl::Import$transform()
reload()
Reload the objects sources to refresh source code or repair a broken Postgres connection.
SqlImport$reload()
read_config()
Read and parse config from path.
SqlImport$read_config()
path <- dettl:::prepare_test_import( system.file("examples", "sql_example", package = "dettl"), system.file("examples", "dettl_config.yml", package = "dettl")) import_path <- file.path(path, "sql_example") import <- dettl::dettl(import_path, db_name = "test") import$run_import(stage = c("extract", "transform", "load"))
path <- dettl:::prepare_test_import( system.file("examples", "sql_example", package = "dettl"), system.file("examples", "dettl_config.yml", package = "dettl")) import_path <- file.path(path, "sql_example") import <- dettl::dettl(import_path, db_name = "test") import$run_import(stage = c("extract", "transform", "load"))