install.packages('frictionless')
25 Create Tabular Data Package with R
The code examples in this chapter must to be interpreted by R instead of Bash.
You need to have the R frictionless
package installed. To install the package, run
After install the package, load it and the tidyverse
package:
library(tidyverse)
library(frictionless)
Load our data:
<- read_csv('iris.csv')
df $Species <- as.factor(df$Species) df
Create the Tabular Data Package in memory:
<-
my_data_package create_package() %>%
add_resource(
resource_name = 'iris',
data = df
)
Inspect the Tabular Data Package:
str(my_data_package)
List of 3
$ profile : chr "tabular-data-package"
$ resources:List of 1
..$ :List of 4
.. ..$ name : chr "iris"
.. ..$ data : spec_tbl_df [150 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
.. .. ..$ Sepal.Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
.. .. ..$ Sepal.Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
.. .. ..$ Petal.Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
.. .. ..$ Petal.Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
.. .. ..$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
.. .. ..- attr(*, "spec")=
.. .. .. .. cols(
.. .. .. .. Sepal.Length = col_double(),
.. .. .. .. Sepal.Width = col_double(),
.. .. .. .. Petal.Length = col_double(),
.. .. .. .. Petal.Width = col_double(),
.. .. .. .. Species = col_character()
.. .. .. .. )
.. .. ..- attr(*, "problems")=<externalptr>
.. ..$ profile: chr "tabular-data-resource"
.. ..$ schema :List of 1
.. .. ..$ fields:List of 5
.. .. .. ..$ :List of 2
.. .. .. .. ..$ name: chr "Sepal.Length"
.. .. .. .. ..$ type: chr "number"
.. .. .. ..$ :List of 2
.. .. .. .. ..$ name: chr "Sepal.Width"
.. .. .. .. ..$ type: chr "number"
.. .. .. ..$ :List of 2
.. .. .. .. ..$ name: chr "Petal.Length"
.. .. .. .. ..$ type: chr "number"
.. .. .. ..$ :List of 2
.. .. .. .. ..$ name: chr "Petal.Width"
.. .. .. .. ..$ type: chr "number"
.. .. .. ..$ :List of 3
.. .. .. .. ..$ name : chr "Species"
.. .. .. .. ..$ type : chr "string"
.. .. .. .. ..$ constraints:List of 1
.. .. .. .. .. ..$ enum: chr [1:3] "setosa" "versicolor" "virginica"
$ directory: chr "."
Now, it’s time to share our Tabular Data Package. Write our Tabular Data Package to our disk:
write_package(
my_data_package,directory = 'tabular-data-package/iris-r'
)zip(
zipfile = "tabular-data-package/iris-r.zip",
files = c("tabular-data-package/iris-r/datapackage.json", "tabular-data-package/iris-r/iris.csv"),
extras = "--junk-paths",
zip = "gzip" # This might not required in many machines
)
Last, deposit our Tabular Data Package on a research repository.
25.1 Improve Tabular Data Package
You can add more information to our Tabular Data Package:
$title <- "Collection of Edgar Anderson's Iris Data"
my_data_package$description <- "Collection with the measurements in centimeters of the variables sepal length and width and petal length and width."
my_data_package$contributors <- list(
my_data_packagerole="author",
title="Edgar Anderson"
)<- list(
my_data_package.licenses name="CC-BY-4.0",
path="https://creativecommons.org/licenses/by/4.0/",
title="Creative Commons Attribution 4.0"
)
Inspect the Tabular Data Package:
str(my_data_package)
List of 6
$ profile : chr "tabular-data-package"
$ resources :List of 1
..$ :List of 4
.. ..$ name : chr "iris"
.. ..$ data : spec_tbl_df [150 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
.. .. ..$ Sepal.Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
.. .. ..$ Sepal.Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
.. .. ..$ Petal.Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
.. .. ..$ Petal.Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
.. .. ..$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
.. .. ..- attr(*, "spec")=
.. .. .. .. cols(
.. .. .. .. Sepal.Length = col_double(),
.. .. .. .. Sepal.Width = col_double(),
.. .. .. .. Petal.Length = col_double(),
.. .. .. .. Petal.Width = col_double(),
.. .. .. .. Species = col_character()
.. .. .. .. )
.. .. ..- attr(*, "problems")=<externalptr>
.. ..$ profile: chr "tabular-data-resource"
.. ..$ schema :List of 1
.. .. ..$ fields:List of 5
.. .. .. ..$ :List of 2
.. .. .. .. ..$ name: chr "Sepal.Length"
.. .. .. .. ..$ type: chr "number"
.. .. .. ..$ :List of 2
.. .. .. .. ..$ name: chr "Sepal.Width"
.. .. .. .. ..$ type: chr "number"
.. .. .. ..$ :List of 2
.. .. .. .. ..$ name: chr "Petal.Length"
.. .. .. .. ..$ type: chr "number"
.. .. .. ..$ :List of 2
.. .. .. .. ..$ name: chr "Petal.Width"
.. .. .. .. ..$ type: chr "number"
.. .. .. ..$ :List of 3
.. .. .. .. ..$ name : chr "Species"
.. .. .. .. ..$ type : chr "string"
.. .. .. .. ..$ constraints:List of 1
.. .. .. .. .. ..$ enum: chr [1:3] "setosa" "versicolor" "virginica"
$ directory : chr "."
$ title : chr "Collection of Edgar Anderson's Iris Data"
$ description : chr "Collection with the measurements in centimeters of the variables sepal length and width and petal length and width."
$ contributors:List of 2
..$ role : chr "author"
..$ title: chr "Edgar Anderson"
25.2 Improve Tabular Data Resource
You can add more information to resources in our Tabular Data Package:
$resources[[1]]$title <- "Edgar Anderson's Iris Data"
my_data_package$resources[[1]]$description <- "The measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are Iris setosa, versicolor, and virginica." my_data_package
Inspect the Tabular Data Package:
str(my_data_package)
List of 6
$ profile : chr "tabular-data-package"
$ resources :List of 1
..$ :List of 6
.. ..$ name : chr "iris"
.. ..$ data : spec_tbl_df [150 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
.. .. ..$ Sepal.Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
.. .. ..$ Sepal.Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
.. .. ..$ Petal.Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
.. .. ..$ Petal.Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
.. .. ..$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
.. .. ..- attr(*, "spec")=
.. .. .. .. cols(
.. .. .. .. Sepal.Length = col_double(),
.. .. .. .. Sepal.Width = col_double(),
.. .. .. .. Petal.Length = col_double(),
.. .. .. .. Petal.Width = col_double(),
.. .. .. .. Species = col_character()
.. .. .. .. )
.. .. ..- attr(*, "problems")=<externalptr>
.. ..$ profile : chr "tabular-data-resource"
.. ..$ schema :List of 1
.. .. ..$ fields:List of 5
.. .. .. ..$ :List of 2
.. .. .. .. ..$ name: chr "Sepal.Length"
.. .. .. .. ..$ type: chr "number"
.. .. .. ..$ :List of 2
.. .. .. .. ..$ name: chr "Sepal.Width"
.. .. .. .. ..$ type: chr "number"
.. .. .. ..$ :List of 2
.. .. .. .. ..$ name: chr "Petal.Length"
.. .. .. .. ..$ type: chr "number"
.. .. .. ..$ :List of 2
.. .. .. .. ..$ name: chr "Petal.Width"
.. .. .. .. ..$ type: chr "number"
.. .. .. ..$ :List of 3
.. .. .. .. ..$ name : chr "Species"
.. .. .. .. ..$ type : chr "string"
.. .. .. .. ..$ constraints:List of 1
.. .. .. .. .. ..$ enum: chr [1:3] "setosa" "versicolor" "virginica"
.. ..$ title : chr "Edgar Anderson's Iris Data"
.. ..$ description: chr "The measurements in centimeters of the variables sepal length and width and petal length and width, respectivel"| __truncated__
$ directory : chr "."
$ title : chr "Collection of Edgar Anderson's Iris Data"
$ description : chr "Collection with the measurements in centimeters of the variables sepal length and width and petal length and width."
$ contributors:List of 2
..$ role : chr "author"
..$ title: chr "Edgar Anderson"
25.3 Improve Table Schema
The R frictionless
package already added a Table Schema to the resources in our Tabular Data Package, including the constraints in the column Species
. If need, the Table Schema can be modified.
$resources[[1]]$schema$fields[[1]]$title <- "Sepal's Length"
my_data_package$resources[[1]]$schema$fields[[2]]$title <- "Sepal's Width"
my_data_package$resources[[1]]$schema$fields[[3]]$title <- "Petal's Length"
my_data_package$resources[[1]]$schema$fields[[4]]$title <- "Petal's Width"
my_data_package
$resources[[1]]$schema$fields[[1]]$description <- "Measurements in centimeters"
my_data_package$resources[[1]]$schema$fields[[2]]$description <- "Measurements in centimeters"
my_data_package$resources[[1]]$schema$fields[[3]]$description <- "Measurements in centimeters"
my_data_package$resources[[1]]$schema$fields[[4]]$description <- "Measurements in centimeters"
my_data_package
$resources[[1]]$schema$fields[[5]]$title <- "Species" my_data_package
Inspect the Tabular Data Package:
str(my_data_package)
List of 6
$ profile : chr "tabular-data-package"
$ resources :List of 1
..$ :List of 6
.. ..$ name : chr "iris"
.. ..$ data : spec_tbl_df [150 × 5] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
.. .. ..$ Sepal.Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
.. .. ..$ Sepal.Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
.. .. ..$ Petal.Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
.. .. ..$ Petal.Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
.. .. ..$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
.. .. ..- attr(*, "spec")=
.. .. .. .. cols(
.. .. .. .. Sepal.Length = col_double(),
.. .. .. .. Sepal.Width = col_double(),
.. .. .. .. Petal.Length = col_double(),
.. .. .. .. Petal.Width = col_double(),
.. .. .. .. Species = col_character()
.. .. .. .. )
.. .. ..- attr(*, "problems")=<externalptr>
.. ..$ profile : chr "tabular-data-resource"
.. ..$ schema :List of 1
.. .. ..$ fields:List of 5
.. .. .. ..$ :List of 4
.. .. .. .. ..$ name : chr "Sepal.Length"
.. .. .. .. ..$ type : chr "number"
.. .. .. .. ..$ title : chr "Sepal's Length"
.. .. .. .. ..$ description: chr "Measurements in centimeters"
.. .. .. ..$ :List of 4
.. .. .. .. ..$ name : chr "Sepal.Width"
.. .. .. .. ..$ type : chr "number"
.. .. .. .. ..$ title : chr "Sepal's Width"
.. .. .. .. ..$ description: chr "Measurements in centimeters"
.. .. .. ..$ :List of 4
.. .. .. .. ..$ name : chr "Petal.Length"
.. .. .. .. ..$ type : chr "number"
.. .. .. .. ..$ title : chr "Petal's Length"
.. .. .. .. ..$ description: chr "Measurements in centimeters"
.. .. .. ..$ :List of 4
.. .. .. .. ..$ name : chr "Petal.Width"
.. .. .. .. ..$ type : chr "number"
.. .. .. .. ..$ title : chr "Petal's Width"
.. .. .. .. ..$ description: chr "Measurements in centimeters"
.. .. .. ..$ :List of 4
.. .. .. .. ..$ name : chr "Species"
.. .. .. .. ..$ type : chr "string"
.. .. .. .. ..$ constraints:List of 1
.. .. .. .. .. ..$ enum: chr [1:3] "setosa" "versicolor" "virginica"
.. .. .. .. ..$ title : chr "Species"
.. ..$ title : chr "Edgar Anderson's Iris Data"
.. ..$ description: chr "The measurements in centimeters of the variables sepal length and width and petal length and width, respectivel"| __truncated__
$ directory : chr "."
$ title : chr "Collection of Edgar Anderson's Iris Data"
$ description : chr "Collection with the measurements in centimeters of the variables sepal length and width and petal length and width."
$ contributors:List of 2
..$ role : chr "author"
..$ title: chr "Edgar Anderson"
25.4 Replace Data Resource Location
You can replace local resources with their copy in the internet:
$resources[[1]]$data <- NULL
my_data_package$resources[[1]]$path <- "https://zenodo.org/record/1319069/files/iris.csv?download=1" my_data_package
Inspect the Tabular Data Package:
str(my_data_package)
List of 6
$ profile : chr "tabular-data-package"
$ resources :List of 1
..$ :List of 6
.. ..$ name : chr "iris"
.. ..$ profile : chr "tabular-data-resource"
.. ..$ schema :List of 1
.. .. ..$ fields:List of 5
.. .. .. ..$ :List of 4
.. .. .. .. ..$ name : chr "Sepal.Length"
.. .. .. .. ..$ type : chr "number"
.. .. .. .. ..$ title : chr "Sepal's Length"
.. .. .. .. ..$ description: chr "Measurements in centimeters"
.. .. .. ..$ :List of 4
.. .. .. .. ..$ name : chr "Sepal.Width"
.. .. .. .. ..$ type : chr "number"
.. .. .. .. ..$ title : chr "Sepal's Width"
.. .. .. .. ..$ description: chr "Measurements in centimeters"
.. .. .. ..$ :List of 4
.. .. .. .. ..$ name : chr "Petal.Length"
.. .. .. .. ..$ type : chr "number"
.. .. .. .. ..$ title : chr "Petal's Length"
.. .. .. .. ..$ description: chr "Measurements in centimeters"
.. .. .. ..$ :List of 4
.. .. .. .. ..$ name : chr "Petal.Width"
.. .. .. .. ..$ type : chr "number"
.. .. .. .. ..$ title : chr "Petal's Width"
.. .. .. .. ..$ description: chr "Measurements in centimeters"
.. .. .. ..$ :List of 4
.. .. .. .. ..$ name : chr "Species"
.. .. .. .. ..$ type : chr "string"
.. .. .. .. ..$ constraints:List of 1
.. .. .. .. .. ..$ enum: chr [1:3] "setosa" "versicolor" "virginica"
.. .. .. .. ..$ title : chr "Species"
.. ..$ title : chr "Edgar Anderson's Iris Data"
.. ..$ description: chr "The measurements in centimeters of the variables sepal length and width and petal length and width, respectivel"| __truncated__
.. ..$ path : chr "https://zenodo.org/record/1319069/files/iris.csv?download=1"
$ directory : chr "."
$ title : chr "Collection of Edgar Anderson's Iris Data"
$ description : chr "Collection with the measurements in centimeters of the variables sepal length and width and petal length and width."
$ contributors:List of 2
..$ role : chr "author"
..$ title: chr "Edgar Anderson"
Write our Tabular Data Package to our disk:
write_package(
my_data_package,directory = 'tabular-data-package/iris-r'
)
And last, let’s deposit our Tabular Data Package on a research repository.