From b2c9942e5c918b5f8831918e6aa4b4d85d841d86 Mon Sep 17 00:00:00 2001 From: Gregor Gorjanc Date: Tue, 27 Jan 2026 02:20:39 +0000 Subject: [PATCH 1/7] Prepping for release --- README.md | 9 +- RcppTskit/.Rbuildignore | 1 + RcppTskit/DESCRIPTION | 28 +- RcppTskit/LICENSE | 2 +- RcppTskit/LICENSE.note | 2 + RcppTskit/R/Class-TreeSequence.R | 62 +++- RcppTskit/R/RcppExports.R | 4 - RcppTskit/R/RcppTskit-package.R | 9 +- RcppTskit/cran-comments.md | 5 + RcppTskit/man/RcppTskit-package.Rd | 21 +- RcppTskit/man/TreeSequence.Rd | 314 +++++++++++++++--- RcppTskit/notes_pkg_dev.Rmd | 52 +++ RcppTskit/src/RcppExports.cpp | 12 - RcppTskit/src/RcppTskit.cpp | 36 -- .../testthat/test_load_summary_and_dump.R | 14 + RcppTskit/vignettes/RcppTskit_intro.qmd | 89 ++--- 16 files changed, 488 insertions(+), 172 deletions(-) create mode 100644 RcppTskit/LICENSE.note create mode 100644 RcppTskit/cran-comments.md diff --git a/README.md b/README.md index 802ba14..a430bf6 100644 --- a/README.md +++ b/README.md @@ -2,12 +2,13 @@ ## Overview -Tskit enables performant storage, manipulation, and analysis of ancestral +`Tskit` enables performant storage, manipulation, and analysis of ancestral recombination graphs (ARGs) using succinct tree sequence encoding. See https://tskit.dev for project news, documentation, and tutorials. -Tskit provides Python, C, and Rust APIs. The Python API can be called from R -via the `reticulate` R package to seamlessly load and analyse a tree sequence, -as described at https://tskit.dev/tutorials/RcppTskit.html. +`Tskit` provides Python, C, and Rust application programming interfaces (APIs). +The Python API can be called from R via the `reticulate` R package to +seamlessly load and analyse a tree sequence, as described at +https://tskit.dev/tutorials/RcppTskit.html. `RcppTskit` provides R access to the `tskit` C API for use cases where the `reticulate` option is not optimal. For example, for high-performance and low-level work with tree sequences. Currently, `RcppTskit` provides a limited diff --git a/RcppTskit/.Rbuildignore b/RcppTskit/.Rbuildignore index 3d4c74b..c48f6cd 100644 --- a/RcppTskit/.Rbuildignore +++ b/RcppTskit/.Rbuildignore @@ -10,6 +10,7 @@ ^\.covrignore$ ^\.github$ ^codecov\.yaml$ +^cran-comments\.md$ ^inst/examples/create_test\.trees\.R$ ^inst/examples/create_test\.trees\.py$ ^notes_pkg_dev\.Rmd$ diff --git a/RcppTskit/DESCRIPTION b/RcppTskit/DESCRIPTION index 18e10ed..e9c4cb3 100644 --- a/RcppTskit/DESCRIPTION +++ b/RcppTskit/DESCRIPTION @@ -1,18 +1,22 @@ Type: Package Package: RcppTskit -Title: R access to the tskit C API +Title: R Access to the Tskit C API Version: 0.1.0 -Date: 2026-01-2607 -Authors@R: - person("Gregor", "Gorjanc", , "gregor.gorjanc@gmail.com", role = c("aut", "cre"), - comment = c(ORCID = "0000-0001-8008-2787")) -Description: Tskit enables performant storage, manipulation, and analysis - of ancestral recombination graphs (ARGs) using succinct tree sequence - encoding. See https://tskit.dev for project news, documentation, and - tutorials. Tskit provides Python, C, and Rust APIs. The Python API - can be called from R via the `reticulate` R package to seamlessly load - and analyse a tree sequence as described at - https://tskit.dev/tutorials/RcppTskit.html. `RcppTskit` provides R +Date: 2026-01-27 +Authors@R: c( + person("Gregor", "Gorjanc", , "gregor.gorjanc@gmail.com", role = c("aut", "cre", "cph"), + comment = c(ORCID = "0000-0001-8008-2787")), + person("Tskit Developers", role = "cph", + comment = "Authors of included tskit C library") + ) +Description: `Tskit` enables performant storage, manipulation, and + analysis of ancestral recombination graphs (ARGs) using succinct tree + sequence encoding. See https://tskit.dev for project news, + documentation, and tutorials. `Tskit` provides Python, C, and Rust + application programming interfaces (APIs). The Python API can be + called from R via the `reticulate` R package to seamlessly load and + analyse a tree sequence as described at + https://tskit.dev/tutorials/tskitr.html. `RcppTskit` provides R access to the `tskit` C API for use cases where the `reticulate` option is not optimal. For example, for high-performance and low-level work with tree sequences. Currently, `RcppTskit` provides a limited diff --git a/RcppTskit/LICENSE b/RcppTskit/LICENSE index 3c9b7a0..28bd2b6 100644 --- a/RcppTskit/LICENSE +++ b/RcppTskit/LICENSE @@ -1,2 +1,2 @@ -YEAR: 2025 +YEAR: 2026 COPYRIGHT HOLDER: Gregor Gorjanc diff --git a/RcppTskit/LICENSE.note b/RcppTskit/LICENSE.note new file mode 100644 index 0000000..e1e89a0 --- /dev/null +++ b/RcppTskit/LICENSE.note @@ -0,0 +1,2 @@ +The tskit C library is distributed under the MIT license. +Full copies of the license are included in `inst/include/tskit/LICENSE` and `src/tskit/LICENSE. diff --git a/RcppTskit/R/Class-TreeSequence.R b/RcppTskit/R/Class-TreeSequence.R index 504b572..ba15c79 100644 --- a/RcppTskit/R/Class-TreeSequence.R +++ b/RcppTskit/R/Class-TreeSequence.R @@ -71,8 +71,8 @@ TreeSequence <- R6Class( }, #' @description Alias for \code{\link[=TreeSequence]{TreeSequence$dump}}. - #' @param file see - #' @param options see + #' @param file see \code{\link[=TreeSequence]{TreeSequence$dump}}. + #' @param options see \code{\link[=TreeSequence]{TreeSequence$dump}}. write = function(file, options = 0L) { self$dump(file = file, options = options) }, @@ -120,107 +120,137 @@ TreeSequence <- R6Class( }, #' @description Get the number of provenances in a tree sequence. + #' @examples #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") #' ts <- ts_load(ts_file) - #' ts_num_provenances(ts) + #' ts$num_provenances() num_provenances = function() { ts_num_provenances_ptr(self$pointer) }, #' @description Get the number of populations in a tree sequence. + #' @examples #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") #' ts <- ts_load(ts_file) - #' ts_num_populations(ts) + #' ts$num_populations() num_populations = function() { ts_num_populations_ptr(self$pointer) }, #' @description Get the number of migrations in a tree sequence. + #' @examples #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") #' ts <- ts_load(ts_file) - #' ts_num_migrations(ts) + #' ts$num_migrations() num_migrations = function() { ts_num_migrations_ptr(self$pointer) }, #' @description Get the number of individuals in a tree sequence. + #' @examples #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") #' ts <- ts_load(ts_file) - #' ts_num_individuals(ts) + #' ts$num_individuals() num_individuals = function() { ts_num_individuals_ptr(self$pointer) }, #' @description Get the number of samples (of nodes) in a tree sequence. + #' @examples #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") #' ts <- ts_load(ts_file) - #' ts_num_samples(ts) + #' ts$num_samples() num_samples = function() { ts_num_samples_ptr(self$pointer) }, #' @description Get the number of nodes in a tree sequence. + #' @examples #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") #' ts <- ts_load(ts_file) - #' ts_num_nodes(ts) + #' ts$num_nodes() num_nodes = function() { ts_num_nodes_ptr(self$pointer) }, #' @description Get the number of edges in a tree sequence. + #' @examples #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") #' ts <- ts_load(ts_file) - #' ts_num_edges(ts) + #' ts$num_edges() num_edges = function() { ts_num_edges_ptr(self$pointer) }, #' @description Get the number of trees in a tree sequence. + #' @examples #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") #' ts <- ts_load(ts_file) - #' ts_num_trees(ts) + #' ts$num_trees() num_trees = function() { ts_num_trees_ptr(self$pointer) }, #' @description Get the number of sites in a tree sequence. + #' @examples #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") #' ts <- ts_load(ts_file) - #' ts_num_sites(ts) + #' ts$num_sites() num_sites = function() { ts_num_sites_ptr(self$pointer) }, #' @description Get the number of mutations in a tree sequence. + #' @examples #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") #' ts <- ts_load(ts_file) - #' ts_num_mutations(ts) + #' ts$num_mutations() num_mutations = function() { ts_num_mutations_ptr(self$pointer) }, #' @description Get the sequence length. + #' @examples #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") #' ts <- ts_load(ts_file) - #' ts_sequence_length(ts) + #' ts$sequence_length() sequence_length = function() { ts_sequence_length_ptr(self$pointer) }, #' @description Get the time units string. + #' @examples #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") #' ts <- ts_load(ts_file) - #' ts_time_units(ts) + #' ts$time_units() time_units = function() { ts_time_units_ptr(self$pointer) }, + #' @description Get the min time in node table and mutation table. + #' @examples + #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") + #' ts <- ts_load(ts_file) + #' ts$min_time() + min_time = function() { + ts_min_time_ptr(self$pointer) + }, + + #' @description Get the max time in node table and mutation table. + #' @examples + #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") + #' ts <- ts_load(ts_file) + #' ts$max_time() + max_time = function() { + ts_max_time_ptr(self$pointer) + }, + #' @description Get the length of metadata in a tree sequence and its tables. #' @return A named list with the length of metadata. #' @examples - #'ts_file <- system.file("examples/test.trees", package = "RcppTskit") + #' ts_file <- system.file("examples/test.trees", package = "RcppTskit") #' ts <- ts_load(ts_file) - #'ts$metadata_length() + #' ts$metadata_length() metadata_length = function() { ts_metadata_length_ptr(self$pointer) } diff --git a/RcppTskit/R/RcppExports.R b/RcppTskit/R/RcppExports.R index 9c267ce..ac3b830 100644 --- a/RcppTskit/R/RcppExports.R +++ b/RcppTskit/R/RcppExports.R @@ -95,10 +95,6 @@ ts_metadata_length_ptr <- function(ts) { .Call(`_RcppTskit_ts_metadata_length_ptr`, ts) } -ts_grow <- function(ts) { - .Call(`_RcppTskit_ts_grow`, ts) -} - test_tsk_bug_assert_c <- function() { invisible(.Call(`_RcppTskit_test_tsk_bug_assert_c`)) } diff --git a/RcppTskit/R/RcppTskit-package.R b/RcppTskit/R/RcppTskit-package.R index 037512c..4876a8e 100644 --- a/RcppTskit/R/RcppTskit-package.R +++ b/RcppTskit/R/RcppTskit-package.R @@ -1,12 +1,13 @@ # Contains the package description and .onLoad() function #' @description -#' Tskit enables performant storage, manipulation, and analysis of +#' `Tskit` enables performant storage, manipulation, and analysis of #' ancestral recombination graphs (ARGs) using succinct tree sequence encoding. #' See https://tskit.dev for project news, documentation, and tutorials. -#' Tskit provides Python, C, and Rust APIs. The Python API can be called from R -#' via the `reticulate` R package to seamlessly load and analyse a tree sequence -#' as described at https://tskit.dev/tutorials/tskitr.html. +#' `Tskit` provides Python, C, and Rust application programming interfaces (APIs). +#' The Python API can be called from R via the `reticulate` R package to +#' seamlessly load and analyse a tree sequence as described at +#' https://tskit.dev/tutorials/tskitr.html. #' `RcppTskit` provides R access to the `tskit` C API for use cases where the #' `reticulate` option is not optimal. For example, for high-performance #' and low-level work with tree sequences. Currently, `RcppTskit` provides a diff --git a/RcppTskit/cran-comments.md b/RcppTskit/cran-comments.md new file mode 100644 index 0000000..858617d --- /dev/null +++ b/RcppTskit/cran-comments.md @@ -0,0 +1,5 @@ +## R CMD check results + +0 errors | 0 warnings | 1 note + +* This is a new release. diff --git a/RcppTskit/man/RcppTskit-package.Rd b/RcppTskit/man/RcppTskit-package.Rd index ece18c5..308d531 100644 --- a/RcppTskit/man/RcppTskit-package.Rd +++ b/RcppTskit/man/RcppTskit-package.Rd @@ -4,14 +4,15 @@ \name{RcppTskit-package} \alias{RcppTskit} \alias{RcppTskit-package} -\title{RcppTskit: R access to the tskit C API} +\title{RcppTskit: R Access to the Tskit C API} \description{ -Tskit enables performant storage, manipulation, and analysis of +`Tskit` enables performant storage, manipulation, and analysis of ancestral recombination graphs (ARGs) using succinct tree sequence encoding. See https://tskit.dev for project news, documentation, and tutorials. -Tskit provides Python, C, and Rust APIs. The Python API can be called from R -via the `reticulate` R package to seamlessly load and analyse a tree sequence -as described at https://tskit.dev/tutorials/tskitr.html. +`Tskit` provides Python, C, and Rust application programming interfaces (APIs). +The Python API can be called from R via the `reticulate` R package to +seamlessly load and analyse a tree sequence as described at +https://tskit.dev/tutorials/tskitr.html. `RcppTskit` provides R access to the `tskit` C API for use cases where the `reticulate` option is not optimal. For example, for high-performance and low-level work with tree sequences. Currently, `RcppTskit` provides a @@ -24,12 +25,18 @@ vignette(package="RcppTskit") \seealso{ Useful links: \itemize{ - \item \url{https://github.com/highlanderlab/RcppTskit} + \item \url{https://github.com/HighlanderLab/RcppTskit} + \item Report bugs at \url{https://github.com/HighlanderLab/RcppTskit/issues} } } \author{ -\strong{Maintainer}: Gregor Gorjanc \email{gregor.gorjanc@gmail.com} (\href{https://orcid.org/0000-0001-8008-2787}{ORCID}) +\strong{Maintainer}: Gregor Gorjanc \email{gregor.gorjanc@gmail.com} (\href{https://orcid.org/0000-0001-8008-2787}{ORCID}) [copyright holder] + +Other contributors: +\itemize{ + \item Tskit Developers (Authors of included tskit C library) [copyright holder] +} } \keyword{internal} diff --git a/RcppTskit/man/TreeSequence.Rd b/RcppTskit/man/TreeSequence.Rd index e833360..da180b6 100644 --- a/RcppTskit/man/TreeSequence.Rd +++ b/RcppTskit/man/TreeSequence.Rd @@ -60,6 +60,118 @@ ts_py$num_samples # 160 ts2_py <- ts_py$simplify(samples = c(0L, 1L, 2L, 3L)) ts2_py$num_samples # 4 +## ------------------------------------------------ +## Method `TreeSequence$num_provenances` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_provenances() + +## ------------------------------------------------ +## Method `TreeSequence$num_populations` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_populations() + +## ------------------------------------------------ +## Method `TreeSequence$num_migrations` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_migrations() + +## ------------------------------------------------ +## Method `TreeSequence$num_individuals` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_individuals() + +## ------------------------------------------------ +## Method `TreeSequence$num_samples` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_samples() + +## ------------------------------------------------ +## Method `TreeSequence$num_nodes` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_nodes() + +## ------------------------------------------------ +## Method `TreeSequence$num_edges` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_edges() + +## ------------------------------------------------ +## Method `TreeSequence$num_trees` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_trees() + +## ------------------------------------------------ +## Method `TreeSequence$num_sites` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_sites() + +## ------------------------------------------------ +## Method `TreeSequence$num_mutations` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_mutations() + +## ------------------------------------------------ +## Method `TreeSequence$sequence_length` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$sequence_length() + +## ------------------------------------------------ +## Method `TreeSequence$time_units` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$time_units() + +## ------------------------------------------------ +## Method `TreeSequence$min_time` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$min_time() + +## ------------------------------------------------ +## Method `TreeSequence$max_time` +## ------------------------------------------------ + +ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$max_time() + ## ------------------------------------------------ ## Method `TreeSequence$metadata_length` ## ------------------------------------------------ @@ -101,6 +213,8 @@ ts$metadata_length() \item \href{#method-TreeSequence-num_mutations}{\code{TreeSequence$num_mutations()}} \item \href{#method-TreeSequence-sequence_length}{\code{TreeSequence$sequence_length()}} \item \href{#method-TreeSequence-time_units}{\code{TreeSequence$time_units()}} +\item \href{#method-TreeSequence-min_time}{\code{TreeSequence$min_time()}} +\item \href{#method-TreeSequence-max_time}{\code{TreeSequence$max_time()}} \item \href{#method-TreeSequence-metadata_length}{\code{TreeSequence$metadata_length()}} \item \href{#method-TreeSequence-clone}{\code{TreeSequence$clone()}} } @@ -193,9 +307,9 @@ Alias for \code{\link[=TreeSequence]{TreeSequence$dump}}. \subsection{Arguments}{ \if{html}{\out{
}} \describe{ -\item{\code{file}}{see} +\item{\code{file}}{see \code{\link[=TreeSequence]{TreeSequence$dump}}.} -\item{\code{options}}{see} +\item{\code{options}}{see \code{\link[=TreeSequence]{TreeSequence$dump}}.} } \if{html}{\out{
}} } @@ -273,156 +387,280 @@ ts2_py$num_samples # 4 \if{latex}{\out{\hypertarget{method-TreeSequence-num_provenances}{}}} \subsection{Method \code{num_provenances()}}{ Get the number of provenances in a tree sequence. -ts_file <- system.file("examples/test.trees", package = "RcppTskit") -ts <- ts_load(ts_file) -ts_num_provenances(ts) \subsection{Usage}{ \if{html}{\out{
}}\preformatted{TreeSequence$num_provenances()}\if{html}{\out{
}} } +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_provenances() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-TreeSequence-num_populations}{}}} \subsection{Method \code{num_populations()}}{ Get the number of populations in a tree sequence. -ts_file <- system.file("examples/test.trees", package = "RcppTskit") -ts <- ts_load(ts_file) -ts_num_populations(ts) \subsection{Usage}{ \if{html}{\out{
}}\preformatted{TreeSequence$num_populations()}\if{html}{\out{
}} } +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_populations() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-TreeSequence-num_migrations}{}}} \subsection{Method \code{num_migrations()}}{ Get the number of migrations in a tree sequence. -ts_file <- system.file("examples/test.trees", package = "RcppTskit") -ts <- ts_load(ts_file) -ts_num_migrations(ts) \subsection{Usage}{ \if{html}{\out{
}}\preformatted{TreeSequence$num_migrations()}\if{html}{\out{
}} } +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_migrations() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-TreeSequence-num_individuals}{}}} \subsection{Method \code{num_individuals()}}{ Get the number of individuals in a tree sequence. -ts_file <- system.file("examples/test.trees", package = "RcppTskit") -ts <- ts_load(ts_file) -ts_num_individuals(ts) \subsection{Usage}{ \if{html}{\out{
}}\preformatted{TreeSequence$num_individuals()}\if{html}{\out{
}} } +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_individuals() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-TreeSequence-num_samples}{}}} \subsection{Method \code{num_samples()}}{ Get the number of samples (of nodes) in a tree sequence. -ts_file <- system.file("examples/test.trees", package = "RcppTskit") -ts <- ts_load(ts_file) -ts_num_samples(ts) \subsection{Usage}{ \if{html}{\out{
}}\preformatted{TreeSequence$num_samples()}\if{html}{\out{
}} } +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_samples() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-TreeSequence-num_nodes}{}}} \subsection{Method \code{num_nodes()}}{ Get the number of nodes in a tree sequence. -ts_file <- system.file("examples/test.trees", package = "RcppTskit") -ts <- ts_load(ts_file) -ts_num_nodes(ts) \subsection{Usage}{ \if{html}{\out{
}}\preformatted{TreeSequence$num_nodes()}\if{html}{\out{
}} } +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_nodes() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-TreeSequence-num_edges}{}}} \subsection{Method \code{num_edges()}}{ Get the number of edges in a tree sequence. -ts_file <- system.file("examples/test.trees", package = "RcppTskit") -ts <- ts_load(ts_file) -ts_num_edges(ts) \subsection{Usage}{ \if{html}{\out{
}}\preformatted{TreeSequence$num_edges()}\if{html}{\out{
}} } +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_edges() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-TreeSequence-num_trees}{}}} \subsection{Method \code{num_trees()}}{ Get the number of trees in a tree sequence. -ts_file <- system.file("examples/test.trees", package = "RcppTskit") -ts <- ts_load(ts_file) -ts_num_trees(ts) \subsection{Usage}{ \if{html}{\out{
}}\preformatted{TreeSequence$num_trees()}\if{html}{\out{
}} } +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_trees() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-TreeSequence-num_sites}{}}} \subsection{Method \code{num_sites()}}{ Get the number of sites in a tree sequence. -ts_file <- system.file("examples/test.trees", package = "RcppTskit") -ts <- ts_load(ts_file) -ts_num_sites(ts) \subsection{Usage}{ \if{html}{\out{
}}\preformatted{TreeSequence$num_sites()}\if{html}{\out{
}} } +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_sites() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-TreeSequence-num_mutations}{}}} \subsection{Method \code{num_mutations()}}{ Get the number of mutations in a tree sequence. -ts_file <- system.file("examples/test.trees", package = "RcppTskit") -ts <- ts_load(ts_file) -ts_num_mutations(ts) \subsection{Usage}{ \if{html}{\out{
}}\preformatted{TreeSequence$num_mutations()}\if{html}{\out{
}} } +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$num_mutations() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-TreeSequence-sequence_length}{}}} \subsection{Method \code{sequence_length()}}{ Get the sequence length. -ts_file <- system.file("examples/test.trees", package = "RcppTskit") -ts <- ts_load(ts_file) -ts_sequence_length(ts) \subsection{Usage}{ \if{html}{\out{
}}\preformatted{TreeSequence$sequence_length()}\if{html}{\out{
}} } +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$sequence_length() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-TreeSequence-time_units}{}}} \subsection{Method \code{time_units()}}{ Get the time units string. -ts_file <- system.file("examples/test.trees", package = "RcppTskit") -ts <- ts_load(ts_file) -ts_time_units(ts) \subsection{Usage}{ \if{html}{\out{
}}\preformatted{TreeSequence$time_units()}\if{html}{\out{
}} } +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$time_units() +} +\if{html}{\out{
}} + +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TreeSequence-min_time}{}}} +\subsection{Method \code{min_time()}}{ +Get the min time in node table and mutation table. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TreeSequence$min_time()}\if{html}{\out{
}} +} + +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$min_time() +} +\if{html}{\out{
}} + +} + +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-TreeSequence-max_time}{}}} +\subsection{Method \code{max_time()}}{ +Get the max time in node table and mutation table. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{TreeSequence$max_time()}\if{html}{\out{
}} +} + +\subsection{Examples}{ +\if{html}{\out{
}} +\preformatted{ts_file <- system.file("examples/test.trees", package = "RcppTskit") +ts <- ts_load(ts_file) +ts$max_time() +} +\if{html}{\out{
}} + +} + } \if{html}{\out{
}} \if{html}{\out{}} diff --git a/RcppTskit/notes_pkg_dev.Rmd b/RcppTskit/notes_pkg_dev.Rmd index cc73e1b..230ceb8 100644 --- a/RcppTskit/notes_pkg_dev.Rmd +++ b/RcppTskit/notes_pkg_dev.Rmd @@ -2,6 +2,11 @@ ## Next TODOs +TODO: Add citation for tskit to DESCRIPTION file #46 + https://github.com/HighlanderLab/RcppTskit/issues/46 + +# TODO: Create a minimal package to demonstrate how to link against RcppTskit and call tskit C API + # Release (TODO) # TODO: Tag a release #15 # https://github.com/HighlanderLab/RcppTskit/issues/15 @@ -17,6 +22,42 @@ remotes::install_github("HighlanderLab/RcppTskit/RcppTskit") # TODO: Publish on CRAN #14 # https://github.com/HighlanderLab/RcppTskit/issues/14 +// # nocov start +int table_collection_num_nodes_zero_check() { + tsk_table_collection_t tables; + int ret = tsk_table_collection_init(&tables, 0); + if (ret != 0) { + tsk_table_collection_free(&tables); + Rcpp::stop(tsk_strerror(ret)); + } + int n = static_cast(tables.nodes.num_rows); + tsk_table_collection_free(&tables); + return n; +} +// # nocov end + +// TODO: This will go into AlphaSimR +// [[Rcpp::export]] +SEXP ts_grow(SEXP ts) { + RcppTskit_treeseq_xptr ts_xptr(ts); + int ret; + ret = 0; + // TODO: What do we need to do here now? How do we grow a tree sequence? + // Look into the simple example in C code online or look into what SLiM + // is doing!? + if (ret != 0) { + // TODO: What should we do if something goes wrong? We can clearly throw an + // error using Rcpp::stop(), but should we also do something with the + // ts pointer and object? If we delete, we discard/delete past work, + // but if we don't, do we risk of returning a corrupted ts? + // tsk_treeseq_free(ts_ptr); + // delete ts_ptr; + Rcpp::stop(tsk_strerror(ret)); + } + return ts_xptr; + // return ret; +} + * This PR nicely shows how slendr is dealing with ts objects (it saves various ts information as attributes) https://github.com/bodkan/slendr/pull/191/changes#diff-f46eb0da2f9267022ecc6e09316598fde6bdcd2f980963906dc041b5096f344f @@ -101,6 +142,17 @@ PATCH version when you make backward compatible bug fixes ### Security (vulnerabilities) ``` +## Prepare for release + +``` +use_upkeep_issue(year = NULL) # https://usethis.r-lib.org/reference/use_upkeep_issue.html +use_release_issue(version = NULL) # https://usethis.r-lib.org/reference/use_release_issue.html +use_cran_comments() # https://usethis.r-lib.org/reference/use_cran_comments.html +use_github_release(publish = TRUE) # https://usethis.r-lib.org/reference/use_github_release.html +devtools::release(pkg = ".", check = TRUE) +# pkg The package to use, can be a file path to the package or a package object. See as.package() for more information +``` + ## Setup ``` diff --git a/RcppTskit/src/RcppExports.cpp b/RcppTskit/src/RcppExports.cpp index cfeebb1..0769daf 100644 --- a/RcppTskit/src/RcppExports.cpp +++ b/RcppTskit/src/RcppExports.cpp @@ -230,17 +230,6 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// ts_grow -SEXP ts_grow(SEXP ts); -RcppExport SEXP _RcppTskit_ts_grow(SEXP tsSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< SEXP >::type ts(tsSEXP); - rcpp_result_gen = Rcpp::wrap(ts_grow(ts)); - return rcpp_result_gen; -END_RCPP -} // test_tsk_bug_assert_c void test_tsk_bug_assert_c(); RcppExport SEXP _RcppTskit_test_tsk_bug_assert_c() { @@ -299,7 +288,6 @@ static const R_CallMethodDef CallEntries[] = { {"_RcppTskit_ts_max_time_ptr", (DL_FUNC) &_RcppTskit_ts_max_time_ptr, 1}, {"_RcppTskit_ts_summary_ptr", (DL_FUNC) &_RcppTskit_ts_summary_ptr, 1}, {"_RcppTskit_ts_metadata_length_ptr", (DL_FUNC) &_RcppTskit_ts_metadata_length_ptr, 1}, - {"_RcppTskit_ts_grow", (DL_FUNC) &_RcppTskit_ts_grow, 1}, {"_RcppTskit_test_tsk_bug_assert_c", (DL_FUNC) &_RcppTskit_test_tsk_bug_assert_c, 0}, {"_RcppTskit_test_tsk_bug_assert_cpp", (DL_FUNC) &_RcppTskit_test_tsk_bug_assert_cpp, 0}, {"_RcppTskit_test_tsk_trace_error_c", (DL_FUNC) &_RcppTskit_test_tsk_trace_error_c, 0}, diff --git a/RcppTskit/src/RcppTskit.cpp b/RcppTskit/src/RcppTskit.cpp index 6c722e0..6e1863b 100644 --- a/RcppTskit/src/RcppTskit.cpp +++ b/RcppTskit/src/RcppTskit.cpp @@ -341,39 +341,3 @@ Rcpp::String ts_metadata_ptr(const SEXP ts) { return Rcpp::String(metadata); } // # nocov end - -// # nocov start -int table_collection_num_nodes_zero_check() { - tsk_table_collection_t tables; - int ret = tsk_table_collection_init(&tables, 0); - if (ret != 0) { - tsk_table_collection_free(&tables); - Rcpp::stop(tsk_strerror(ret)); - } - int n = static_cast(tables.nodes.num_rows); - tsk_table_collection_free(&tables); - return n; -} -// # nocov end - -// TODO: This will go into AlphaSimR -// [[Rcpp::export]] -SEXP ts_grow(SEXP ts) { - RcppTskit_treeseq_xptr ts_xptr(ts); - int ret; - ret = 0; - // TODO: What do we need to do here now? How do we grow a tree sequence? - // Look into the simple example in C code online or look into what SLiM - // is doing!? - if (ret != 0) { - // TODO: What should we do if something goes wrong? We can clearly throw an - // error using Rcpp::stop(), but should we also do something with the - // ts pointer and object? If we delete, we discard/delete past work, - // but if we don't, do we risk of returning a corrupted ts? - // tsk_treeseq_free(ts_ptr); - // delete ts_ptr; - Rcpp::stop(tsk_strerror(ret)); - } - return ts_xptr; - // return ret; -} diff --git a/RcppTskit/tests/testthat/test_load_summary_and_dump.R b/RcppTskit/tests/testthat/test_load_summary_and_dump.R index d13cb05..4bfe6b4 100644 --- a/RcppTskit/tests/testthat/test_load_summary_and_dump.R +++ b/RcppTskit/tests/testthat/test_load_summary_and_dump.R @@ -42,84 +42,98 @@ test_that("ts_load(), ts_summary*(), and ts_dump(x) work", { n_ptr <- ts_num_provenances_ptr(ts_ptr) expect_true(is.integer(n_ptr)) expect_equal(n_ptr, 2L) + expect_equal(ts$num_provenances(), 2L) expect_error(ts_num_populations_ptr()) expect_error(ts_num_populations_ptr(ts)) n_ptr <- ts_num_populations_ptr(ts_ptr) expect_true(is.integer(n_ptr)) expect_equal(n_ptr, 1L) + expect_equal(ts$num_populations(), 1L) expect_error(ts_num_migrations_ptr()) expect_error(ts_num_migrations_ptr(ts)) n_ptr <- ts_num_migrations_ptr(ts_ptr) expect_true(is.integer(n_ptr)) expect_equal(n_ptr, 0L) + expect_equal(ts$num_migrations(), 0L) expect_error(ts_num_individuals_ptr()) expect_error(ts_num_individuals_ptr(ts)) n_ptr <- ts_num_individuals_ptr(ts_ptr) expect_true(is.integer(n_ptr)) expect_equal(n_ptr, 80L) + expect_equal(ts$num_individuals(), 80L) expect_error(ts_num_samples_ptr()) expect_error(ts_num_samples_ptr(ts)) n_ptr <- ts_num_samples_ptr(ts_ptr) expect_true(is.integer(n_ptr)) expect_equal(n_ptr, 160L) + expect_equal(ts$num_samples(), 160L) expect_error(ts_num_nodes_ptr()) expect_error(ts_num_nodes_ptr(ts)) n_ptr <- ts_num_nodes_ptr(ts_ptr) expect_true(is.integer(n_ptr)) expect_equal(n_ptr, 344L) + expect_equal(ts$num_nodes(), 344L) expect_error(ts_num_edges_ptr()) expect_error(ts_num_edges_ptr(ts)) n_ptr <- ts_num_edges_ptr(ts_ptr) expect_true(is.integer(n_ptr)) expect_equal(n_ptr, 414L) + expect_equal(ts$num_edges(), 414L) expect_error(ts_num_trees_ptr()) expect_error(ts_num_trees_ptr(ts)) n_ptr <- ts_num_trees_ptr(ts_ptr) expect_true(is.integer(n_ptr)) expect_equal(n_ptr, 26L) + expect_equal(ts$num_trees(), 26L) expect_error(ts_num_sites_ptr()) expect_error(ts_num_sites_ptr(ts)) n_ptr <- ts_num_sites_ptr(ts_ptr) expect_true(is.integer(n_ptr)) expect_equal(n_ptr, 2376L) + expect_equal(ts$num_sites(), 2376L) expect_error(ts_num_mutations_ptr()) expect_error(ts_num_mutations_ptr(ts)) n_ptr <- ts_num_mutations_ptr(ts_ptr) expect_true(is.integer(n_ptr)) expect_equal(n_ptr, 2700L) + expect_equal(ts$num_mutations(), 2700L) expect_error(ts_sequence_length_ptr()) expect_error(ts_sequence_length_ptr(ts)) n_ptr <- ts_sequence_length_ptr(ts_ptr) expect_true(is.numeric(n_ptr)) expect_equal(n_ptr, 10000) + expect_equal(ts$sequence_length(), 10000) expect_error(ts_time_units_ptr()) expect_error(ts_time_units_ptr(ts)) c_ptr <- ts_time_units_ptr(ts_ptr) expect_true(is.character(c_ptr)) expect_equal(c_ptr, "generations") + expect_equal(ts$time_units(), "generations") expect_error(ts_min_time_ptr()) expect_error(ts_min_time_ptr(ts)) d_ptr <- ts_min_time_ptr(ts_ptr) expect_true(is.numeric(d_ptr)) expect_equal(d_ptr, 0.0) + expect_equal(ts$min_time(), 0.0) expect_error(ts_max_time_ptr()) expect_error(ts_max_time_ptr(ts)) d_ptr <- ts_max_time_ptr(ts_ptr) expect_true(is.numeric(d_ptr)) expect_equal(d_ptr, 7.470281689748594) + expect_equal(ts$max_time(), 7.470281689748594) # ---- ts_print_ptr() and ts$print() ---- diff --git a/RcppTskit/vignettes/RcppTskit_intro.qmd b/RcppTskit/vignettes/RcppTskit_intro.qmd index 54df229..0877dfc 100644 --- a/RcppTskit/vignettes/RcppTskit_intro.qmd +++ b/RcppTskit/vignettes/RcppTskit_intro.qmd @@ -17,7 +17,7 @@ knitr: The aim of this vignette is to introduce you to working with tree sequences in R using `RcppTskit` package, -which provides R access to the `tskit` C API +which provides R access to the `tskit` C application programming interface (API) [@jeffrey2026population] (https://tskit.dev/tskit/docs/stable/c-api.html). If you are new to tree sequences and the more general concept of ancestral recombination graphs (ARGs), @@ -35,7 +35,7 @@ Python is the most widely used environment to work with tree sequences. We are of the view that most R users can and should leverage this large ecosystem of Python packages, in particular the `tskit` Python API [@jeffrey2026population] (https://tskit.dev/tskit/docs/stable/python-api.html), -via the R package `reticulate` [@ushey2025reticulate] (https://rstudio.github.io/reticulate). +via the R package `reticulate` [@ushey2025reticulate] (https://rstudio.github.io/reticulate/). With this in mind, `RcppTskit` provides R access to the `tskit` C API [@jeffrey2026population] for use cases where the `reticulate` option is not optimal. @@ -51,59 +51,60 @@ hence our recommendation to use `reticulate` in the first instance. ## State of the tree sequence ecosystem The tree sequence ecosystem is rapidly evolving. -The website https://tskit.dev/software lists tools that closely interoperate with `tskit`, +The website https://tskit.dev/software/ lists tools that closely interoperate with `tskit`, while @jeffrey2026population lists several other tools that depend on `tskit` functionality. Thence, there are now multiple tools for the generation and analysis of tree sequences. Below is a quick summary of some of the tools relevant to `RcppTskit` as of January 2026. -- `tskit` (https://tskit.dev/tskit/docs, https://github.com/tskit-dev/tskit) +- `tskit` (https://tskit.dev/tskit/docs/, https://github.com/tskit-dev/tskit) is the core toolkit for working with tree sequences. It has an efficient C API and user-friendly Python API. The python API is a popular entry point for most users and expands the C API in certain aspects (for example metadata encoding/decoding). There is also a Rust API that wraps the C API. -- `msprime` (https://tskit.dev/msprime/docs, https://github.com/tskit-dev/msprime) +- `msprime` (https://tskit.dev/msprime/docs/, https://github.com/tskit-dev/msprime) generates tree sequences with backward in time simulation. It has a Python API and command line interface. -- `SLiM` (https://messerlab.org/slim, https://github.com/MesserLab/SLiM) +- `SLiM` (https://messerlab.org/slim/, https://github.com/MesserLab/SLiM) generates tree sequences with forward in time simulation. It is written in C++ (with embedded `tskit` C library) and has a command line and a GUI interface. Its tree sequence recording is described in detail at https://github.com/MesserLab/SLiM/blob/master/treerec/implementation.md. -- `pyslim` (https://tskit.dev/pyslim/docs, https://github.com/tskit-dev/pyslim) - provides a Python API for reading and modifying `tskit` tree sequence files from SLiM, - or modifying files produced by other programs (e.g., msprime) for use in SLiM. +- `pyslim` (https://tskit.dev/pyslim/docs/, https://github.com/tskit-dev/pyslim) + provides a Python API for reading and modifying `tskit` SLiM tree sequences, + or modifying tree sequences from other programs (e.g., msprime) for use in SLiM. -- `fwdpy11` (https://molpopgen.github.io/fwdpy11, https://github.com/molpopgen/fwdpy11) +- `fwdpy11` (https://molpopgen.github.io/fwdpy11/, https://github.com/molpopgen/fwdpy11) generates tree sequences with forward in time simulation. It has a Python API, which is built on a C++ API (`fwdpp`). -- `stdpopsim` (https://popsim-consortium.github.io/stdpopsim-docs, https://github.com/popsim-consortium/stdpopsim) - is a standard library of population genetic models used in simulations with `msprime` and `SLiM`. +- `stdpopsim` (https://popsim-consortium.github.io/stdpopsim-docs/, https://github.com/popsim-consortium/stdpopsim) + is a standard library of population genetic models used in simulations with + `msprime` and `SLiM`. It has a Python API and command line interface. -- `slendr` (https://bodkan.net/slendr, https://github.com/bodkan/slendr) +- `slendr` (https://bodkan.net/slendr/, https://github.com/bodkan/slendr) is an R package for describing population genetic models, simulating them with either `msprime` or `SLiM`, and analysing resulting tree sequences using `tskit`. -- `slimr` (https://rdinnager.github.io/slimr, https://github.com/rdinnager/slimr) +- `slimr` (https://rdinnager.github.io/slimr/, https://github.com/rdinnager/slimr) provides an R API for specifying and running SLiM scripts and analysing results in R. It runs `SLiM` via the R package `processx`. The above tools enable work with tree sequences and/or generate them via simulation. -There is a growing list of tools that generate (estimate/infer) ARGs from observed genomic data -and can export it in the tree sequence file format. +There is a growing list of tools that estimate ARGs from observed genomic data +and can export them in the tree sequence file format. Notable mentions are: -`tsinfer` (https://tskit.dev/tsinfer/docs, https://github.com/tskit-dev/tsinfer), -`Relate` (https://myersgroup.github.io/relate, https://github.com/MyersGroup/relate), +`tsinfer` (https://tskit.dev/tsinfer/docs/, https://github.com/tskit-dev/tsinfer), +`Relate` (https://myersgroup.github.io/relate/, https://github.com/MyersGroup/relate), `SINGER` (https://github.com/popgenmethods/SINGER), -`ARGNeedle` (https://palamaralab.github.io/software/argneedle, https://github.com/PalamaraLab/arg-needle-lib), and -`Threads` (https://palamaralab.github.io/software/threads, https://github.com/palamaraLab/threads). +`ARGNeedle` (https://palamaralab.github.io/software/argneedle/, https://github.com/PalamaraLab/arg-needle-lib), and +`Threads` (https://palamaralab.github.io/software/threads/, https://github.com/palamaraLab/threads). As described above, the tree sequence ecosystem is extensive. Python is the most widely used platform to interact with tree sequences @@ -113,14 +114,17 @@ There is interest in working with tree sequences in R. Because we can call Python from within R using the `reticulate` R package, there is no pressing need for a dedicated R support for work with tree sequences. See https://tskit.dev/tutorials/tskitr.html on how this option looks like. -In a way, this situation will positively focus the community on the Python collection of packages. +In a way, this situation will positively focus the community on +the Python collection of packages. While there are differences between Python and R, many R users should be able to follow the extensive Python API documentation, examples, and tutorials listed above, -in particular those at https://tskit.dev/tutorials. +in particular those at https://tskit.dev/tutorials/. -To provide idiomatic R interface to some population genetic simulation steps and operations with tree sequences, -`slendr` implemented bespoke functions and wrapper functions to interact with `msprime`, `SLiM`, and `tskit`. +To provide idiomatic R interface to some population genetic simulation steps and +operations with tree sequences, +`slendr` implemented bespoke functions and wrapper functions to interact with +`msprime`, `SLiM`, and `tskit`. It uses `reticulate` for the interaction with Python APIs of these packages. As such, `slendr` further lowers barriers for R users to work with tree sequences. @@ -155,11 +159,14 @@ after we describe the implemented data and class model. `RcppTskit` represents a tree sequence as a lightweight R object of R6 class `TreeSequence`. R6 class was partially chosen so the R code calls resemble Python code. -`TreeSequence` wraps an external pointer (`externalptr`) to the `tskit` C data structure (`tsk_treeseq_t`). -Most methods (for example, `ts$num_individuals()`, `ts$dump()`, etc.) call the `tskit` C API via `Rcpp`, -so the calls are fast and the object is not copied unless you explicitly write/read or change it. -The underlying pointer is exposed as `TreeSequence$pointer` for developers and advanced users -that can write C++ code. +`TreeSequence` wraps an external pointer (`externalptr`) to the `tskit` +C data structure (`tsk_treeseq_t`). +Most methods (for example, `ts$num_individuals()`, `ts$dump()`, etc.) +call the `tskit` C API via `Rcpp`, +so the calls are fast and the object is not copied +unless you explicitly write/read or change it. +The underlying pointer is exposed as `TreeSequence$pointer` +for developers and advanced users that can write C++ code. ## For typical use cases @@ -264,10 +271,12 @@ ts_num_individuals2 <- Rcpp::cppFunction( ts_file <- system.file("examples/test.trees", package = "RcppTskit") ts <- ts_load(ts_file) -# Apply the compiled function (on the pointer) +# Apply the compiled function +# (on the pointer) ts_num_individuals2(ts$pointer) -# An identical RcppTskit implementation (available as the method of the TreeSequence class) +# An identical RcppTskit implementation +# (available as the method of the TreeSequence class) ts$num_individuals() ``` @@ -280,22 +289,25 @@ at this commit (proof of concept of using `tskit` C API via `tskitr`[^1]): https://github.com/HighlanderLab/AlphaSimR/commit/12657b08e7054d88bc214413d13f36c7cde60d95 (with time this implementation might require changes). -[^1]: At that time we named the package as `tskitr`. Simply replace `tskitr` with `RcppTskit`, +[^1]: At that time we named the package as `tskitr`. +Simply replace `tskitr` with `RcppTskit`, respecting the lower and upper case depending on the file. a) Open `DESCRIPTION` file and add `RcppTskit` to the `LinkingTo:` field. b) Add `#include ` as needed to your C++ header files in `src` directory. -c) Call `tskit` C API as needed in your C++ code in `src` directory (see examples in `RcppTskit`). +c) Call `tskit` C API as needed in your C++ code in `src` directory + (see examples in `RcppTskit`). -d) Configure your package build to use the `RcppTskit` library file using the following steps: +d) Configure your package build to use the `RcppTskit` library file + with the following steps: - - Add `src/Makevars.in` and `src/Makevars.win.in` files with `PKG_LIB = @RCPPTSKIT_LIB@` flag, - in addition to other flags. + - Add `src/Makevars.in` and `src/Makevars.win.in` files with + `PKG_LIB = @RCPPTSKIT_LIB@` flag, in addition to other flags. - Add `tools/configure.R` file, - which will replace `@RCPPTSKIT_LIB@` in `src/Makevars.in` and `src/Makevars.win.in` files + which will replace `@RCPPTSKIT_LIB@` in `src/Makevars.in` and `src/Makevars.win.in` with the installed `RcppTskit` library file (including appropriate flags), and generate `src/Makevars` and `src/Makevars.win`. @@ -306,7 +318,8 @@ d) Configure your package build to use the `RcppTskit` library file using the fo to remove `src/Makevars` and `src/Makevars.win` as well as compilation files. e) You should now be ready to build, check, and install your package using - `devtools::build()`, `devtools::check()`, and `devtools::install()` or their `R CMD` equivalents. + `devtools::build()`, `devtools::check()`, and `devtools::install()` + or their `R CMD` equivalents. ```{r} #| label: use_case_4 From c4d652bc59590685e55313fa238f6072b6d65f24 Mon Sep 17 00:00:00 2001 From: Gregor Gorjanc Date: Tue, 27 Jan 2026 11:59:16 +0000 Subject: [PATCH 2/7] More prepping for release --- RcppTskit/DESCRIPTION | 16 ++--- RcppTskit/NAMESPACE | 1 + RcppTskit/R/Class-TreeSequence.R | 13 ++-- RcppTskit/R/RcppTskit.R | 67 +++++++++++++------ RcppTskit/man/TreeSequence.Rd | 26 ++++--- RcppTskit/man/get_tskit_py.Rd | 23 +++++-- RcppTskit/man/ts_py_to_r.Rd | 20 +++--- RcppTskit/notes_pkg_dev.Rmd | 7 +- RcppTskit/tests/testthat/test_TreeSequence.R | 2 - RcppTskit/tests/testthat/test_get_tskit_py.R | 37 ++++++++-- .../testthat/test_load_summary_and_dump.R | 2 - RcppTskit/tests/testthat/test_misc.R | 2 - .../tests/testthat/test_r_to_py_and_py_to_r.R | 6 +- RcppTskit/vignettes/RcppTskit_intro.qmd | 21 +++--- 14 files changed, 160 insertions(+), 83 deletions(-) diff --git a/RcppTskit/DESCRIPTION b/RcppTskit/DESCRIPTION index e9c4cb3..36b8ee0 100644 --- a/RcppTskit/DESCRIPTION +++ b/RcppTskit/DESCRIPTION @@ -9,19 +9,19 @@ Authors@R: c( person("Tskit Developers", role = "cph", comment = "Authors of included tskit C library") ) -Description: `Tskit` enables performant storage, manipulation, and +Description: 'Tskit' enables performant storage, manipulation, and analysis of ancestral recombination graphs (ARGs) using succinct tree - sequence encoding. See https://tskit.dev for project news, - documentation, and tutorials. `Tskit` provides Python, C, and Rust + sequence encoding. See for project news, + documentation, and tutorials. 'Tskit' provides Python, C, and Rust application programming interfaces (APIs). The Python API can be - called from R via the `reticulate` R package to seamlessly load and + called from R via the 'reticulate' R package to seamlessly load and analyse a tree sequence as described at - https://tskit.dev/tutorials/tskitr.html. `RcppTskit` provides R - access to the `tskit` C API for use cases where the `reticulate` + . 'RcppTskit' provides R + access to the 'tskit' C API for use cases where the 'reticulate' option is not optimal. For example, for high-performance and low-level - work with tree sequences. Currently, `RcppTskit` provides a limited + work with tree sequences. Currently, 'RcppTskit' provides a limited number of R functions due to the availability of extensive Python API - and the `reticulate` option. + and the 'reticulate' option. License: MIT + file LICENSE URL: https://github.com/HighlanderLab/RcppTskit BugReports: https://github.com/HighlanderLab/RcppTskit/issues diff --git a/RcppTskit/NAMESPACE b/RcppTskit/NAMESPACE index 47987d1..c0a1db7 100644 --- a/RcppTskit/NAMESPACE +++ b/RcppTskit/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export(TreeSequence) +export(check_tskit_py) export(get_tskit_py) export(kastore_version) export(ts_load) diff --git a/RcppTskit/R/Class-TreeSequence.R b/RcppTskit/R/Class-TreeSequence.R index ba15c79..137f681 100644 --- a/RcppTskit/R/Class-TreeSequence.R +++ b/RcppTskit/R/Class-TreeSequence.R @@ -106,11 +106,14 @@ TreeSequence <- R6Class( #' ts_r$num_samples() # 160 #' #' # Transfer the tree sequence to reticulate Python and use tskit Python API - #' ts_py <- ts_r$r_to_py() - #' is(ts_py) - #' ts_py$num_samples # 160 - #' ts2_py <- ts_py$simplify(samples = c(0L, 1L, 2L, 3L)) - #' ts2_py$num_samples # 4 + #' tskit <- get_tskit_py() + #' if (check_tskit_py(tskit)) { + #' ts_py <- ts_r$r_to_py() + #' is(ts_py) + #' ts_py$num_samples # 160 + #' ts2_py <- ts_py$simplify(samples = c(0L, 1L, 2L, 3L)) + #' ts2_py$num_samples # 4 + #' } r_to_py = function(tskit_module = get_tskit_py(), cleanup = TRUE) { ts_r_to_py_ptr( self$pointer, diff --git a/RcppTskit/R/RcppTskit.R b/RcppTskit/R/RcppTskit.R index d55c511..5c50f13 100644 --- a/RcppTskit/R/RcppTskit.R +++ b/RcppTskit/R/RcppTskit.R @@ -3,27 +3,32 @@ #' @title Get the reticulate Python tskit module #' @description This function imports the reticulate Python \code{tskit} module #' and if it is not yet installed, then it attempts to install it first. -#' @param obj_name character name of the object holding \code{tskit} reticulate +#' @param object_name character name of the object holding \code{tskit} reticulate #' Python module. If this object exists in the global R environment and is a #' reticulate Python object, then it is returned. Otherwise, the function #' attempts to install and import tskit before returning it. If \code{NULL}, #' then the function directly attempts to install and import tskit before #' returning it. +#' @param object reticulate Python module object, hopefully. +#' @param stop logical for throwing an error in \code{check_tskit_py}. #' @details This function is meant for users running \code{tskit <- get_tskit_py()} #' or similar code, but also by other functions in this package that need the #' \code{tskit} reticulate Python module. The point of \code{get_tskit_py} is #' to avoid importing the module repeatedly, if it has been imported already. -#' @return \code{tskit} reticulate Python module. +#' @return \code{get_tskit_py} returns \code{tskit} reticulate Python module. +#' \code{check_tskit_py} returns \code{TRUE} if #' @examples #' tskit <- get_tskit_py() #' is(tskit) -#' tskit$ALLELES_01 +#' if (check_tskit_py(tskit)) { +#' tskit$ALLELES_01 +#' } #' @export -get_tskit_py <- function(obj_name = "tskit") { - test <- !is.null(obj_name) && - exists(obj_name, envir = .GlobalEnv, inherits = FALSE) +get_tskit_py <- function(object_name = "tskit") { + test <- !is.null(object_name) && + exists(object_name, envir = .GlobalEnv, inherits = FALSE) if (test) { - tskit <- get(obj_name, envir = .GlobalEnv, inherits = FALSE) + tskit <- get(object_name, envir = .GlobalEnv, inherits = FALSE) test <- reticulate::is_py_object(tskit) && is(tskit) == "python.builtin.module" if (test) { @@ -31,8 +36,8 @@ get_tskit_py <- function(obj_name = "tskit") { } else { txt <- paste0( "Object '", - obj_name, - "' exists in the global environment but is not a reticulate Python module" + object_name, + "' exists in the global environment but is not a reticulate Python module!" ) stop(txt) } @@ -42,13 +47,31 @@ get_tskit_py <- function(obj_name = "tskit") { # nocov start if (!reticulate::py_module_available("tskit")) { txt <- "Python module 'tskit' is not available. Attempting to install it ..." - cat(txt) + message(txt) reticulate::py_require("tskit") } # nocov end return(reticulate::import("tskit", delay_load = TRUE)) } +#' @describeIn get_tskit_py Test if \code{get_tskit_py} returned a reticulate Python module object +#' @export +check_tskit_py <- function(object, stop = FALSE) { + test <- reticulate::is_py_object(object) && + ("python.builtin.module" %in% is(object)) + if (test) { + return(TRUE) + } else { + msg <- "object must be a reticulate Python module object!" + if (stop) { + stop(msg) + } else { + message(msg) + } + return(FALSE) + } +} + #' @title Load a tree sequence from a file #' @param file a string specifying the full path of the tree sequence file. #' @param options integer bitwise options (see details at @@ -182,9 +205,7 @@ ts_r_to_py_ptr <- function(ts, tskit_module = get_tskit_py(), cleanup = TRUE) { if (!is(ts, "externalptr")) { stop("ts must be an object of externalptr class!") } - if (!reticulate::is_py_object(tskit_module)) { - stop("tskit_module must be a reticulate Python module object!") - } + check_tskit_py(tskit_module, stop = TRUE) ts_file <- tempfile(fileext = ".trees") if (cleanup) { on.exit(file.remove(ts_file)) @@ -247,16 +268,18 @@ ts_py_to_r_ptr <- function(ts, cleanup = TRUE) { #' #' # Use the tskit Python API to work with a tree sequence (via reticulate) #' tskit <- get_tskit_py() -#' ts_py <- tskit$load(ts_file) -#' is(ts_py) -#' ts_py$num_samples # 160 -#' ts2_py <- ts_py$simplify(samples = c(0L, 1L, 2L, 3L)) -#' ts2_py$num_samples # 4 +#' if (check_tskit_py(tskit)) { +#' ts_py <- tskit$load(ts_file) +#' is(ts_py) +#' ts_py$num_samples # 160 +#' ts2_py <- ts_py$simplify(samples = c(0L, 1L, 2L, 3L)) +#' ts2_py$num_samples # 4 #' -#' # Transfer the tree sequence to R and use RcppTskit -#' ts2_r <- ts_py_to_r(ts2_py) -#' is(ts2_r) -#' ts2_r$num_samples() # 4 +#' # Transfer the tree sequence to R and use RcppTskit +#' ts2_r <- ts_py_to_r(ts2_py) +#' is(ts2_r) +#' ts2_r$num_samples() # 4 +#' } #' @export ts_py_to_r <- function(ts, cleanup = TRUE) { ptr <- ts_py_to_r_ptr(ts = ts, cleanup = cleanup) diff --git a/RcppTskit/man/TreeSequence.Rd b/RcppTskit/man/TreeSequence.Rd index da180b6..febf10d 100644 --- a/RcppTskit/man/TreeSequence.Rd +++ b/RcppTskit/man/TreeSequence.Rd @@ -54,11 +54,14 @@ is(ts_r) ts_r$num_samples() # 160 # Transfer the tree sequence to reticulate Python and use tskit Python API -ts_py <- ts_r$r_to_py() -is(ts_py) -ts_py$num_samples # 160 -ts2_py <- ts_py$simplify(samples = c(0L, 1L, 2L, 3L)) -ts2_py$num_samples # 4 +tskit <- get_tskit_py() +if (check_tskit_py(tskit)) { + ts_py <- ts_r$r_to_py() + is(ts_py) + ts_py$num_samples # 160 + ts2_py <- ts_py$simplify(samples = c(0L, 1L, 2L, 3L)) + ts2_py$num_samples # 4 +} ## ------------------------------------------------ ## Method `TreeSequence$num_provenances` @@ -371,11 +374,14 @@ is(ts_r) ts_r$num_samples() # 160 # Transfer the tree sequence to reticulate Python and use tskit Python API -ts_py <- ts_r$r_to_py() -is(ts_py) -ts_py$num_samples # 160 -ts2_py <- ts_py$simplify(samples = c(0L, 1L, 2L, 3L)) -ts2_py$num_samples # 4 +tskit <- get_tskit_py() +if (check_tskit_py(tskit)) { + ts_py <- ts_r$r_to_py() + is(ts_py) + ts_py$num_samples # 160 + ts2_py <- ts_py$simplify(samples = c(0L, 1L, 2L, 3L)) + ts2_py$num_samples # 4 +} } \if{html}{\out{}} diff --git a/RcppTskit/man/get_tskit_py.Rd b/RcppTskit/man/get_tskit_py.Rd index f47bc55..7ab5501 100644 --- a/RcppTskit/man/get_tskit_py.Rd +++ b/RcppTskit/man/get_tskit_py.Rd @@ -2,20 +2,28 @@ % Please edit documentation in R/RcppTskit.R \name{get_tskit_py} \alias{get_tskit_py} +\alias{check_tskit_py} \title{Get the reticulate Python tskit module} \usage{ -get_tskit_py(obj_name = "tskit") +get_tskit_py(object_name = "tskit") + +check_tskit_py(object, stop = FALSE) } \arguments{ -\item{obj_name}{character name of the object holding \code{tskit} reticulate +\item{object_name}{character name of the object holding \code{tskit} reticulate Python module. If this object exists in the global R environment and is a reticulate Python object, then it is returned. Otherwise, the function attempts to install and import tskit before returning it. If \code{NULL}, then the function directly attempts to install and import tskit before returning it.} + +\item{object}{reticulate Python module object, hopefully.} + +\item{stop}{logical for throwing an error in \code{check_tskit_py}.} } \value{ -\code{tskit} reticulate Python module. +\code{get_tskit_py} returns \code{tskit} reticulate Python module. + \code{check_tskit_py} returns \code{TRUE} if } \description{ This function imports the reticulate Python \code{tskit} module @@ -27,8 +35,15 @@ This function is meant for users running \code{tskit <- get_tskit_py()} \code{tskit} reticulate Python module. The point of \code{get_tskit_py} is to avoid importing the module repeatedly, if it has been imported already. } +\section{Functions}{ +\itemize{ +\item \code{check_tskit_py()}: Test if \code{get_tskit_py} returned a reticulate Python module object + +}} \examples{ tskit <- get_tskit_py() is(tskit) -tskit$ALLELES_01 +if (check_tskit_py(tskit)) { + tskit$ALLELES_01 +} } diff --git a/RcppTskit/man/ts_py_to_r.Rd b/RcppTskit/man/ts_py_to_r.Rd index 15547e6..90483f6 100644 --- a/RcppTskit/man/ts_py_to_r.Rd +++ b/RcppTskit/man/ts_py_to_r.Rd @@ -23,16 +23,18 @@ ts_file <- system.file("examples/test.trees", package = "RcppTskit") # Use the tskit Python API to work with a tree sequence (via reticulate) tskit <- get_tskit_py() -ts_py <- tskit$load(ts_file) -is(ts_py) -ts_py$num_samples # 160 -ts2_py <- ts_py$simplify(samples = c(0L, 1L, 2L, 3L)) -ts2_py$num_samples # 4 +if (check_tskit_py(tskit)) { + ts_py <- tskit$load(ts_file) + is(ts_py) + ts_py$num_samples # 160 + ts2_py <- ts_py$simplify(samples = c(0L, 1L, 2L, 3L)) + ts2_py$num_samples # 4 -# Transfer the tree sequence to R and use RcppTskit -ts2_r <- ts_py_to_r(ts2_py) -is(ts2_r) -ts2_r$num_samples() # 4 + # Transfer the tree sequence to R and use RcppTskit + ts2_r <- ts_py_to_r(ts2_py) + is(ts2_r) + ts2_r$num_samples() # 4 +} } \seealso{ \code{\link[=TreeSequence]{TreeSequence$r_to_py}} diff --git a/RcppTskit/notes_pkg_dev.Rmd b/RcppTskit/notes_pkg_dev.Rmd index 230ceb8..4d1f660 100644 --- a/RcppTskit/notes_pkg_dev.Rmd +++ b/RcppTskit/notes_pkg_dev.Rmd @@ -2,11 +2,14 @@ ## Next TODOs +# TODO: Create a minimal package to demonstrate how to link against RcppTskit and call tskit C API + https://github.com/HighlanderLab/RcppTskit/issues/48 + + + TODO: Add citation for tskit to DESCRIPTION file #46 https://github.com/HighlanderLab/RcppTskit/issues/46 -# TODO: Create a minimal package to demonstrate how to link against RcppTskit and call tskit C API - # Release (TODO) # TODO: Tag a release #15 # https://github.com/HighlanderLab/RcppTskit/issues/15 diff --git a/RcppTskit/tests/testthat/test_TreeSequence.R b/RcppTskit/tests/testthat/test_TreeSequence.R index ec299cf..cc9abfb 100644 --- a/RcppTskit/tests/testthat/test_TreeSequence.R +++ b/RcppTskit/tests/testthat/test_TreeSequence.R @@ -1,5 +1,3 @@ -context("TreeSequence$new()") - test_that("TreeSequence$new() works", { ts_file <- system.file("examples/test.trees", package = "RcppTskit") expect_error(TreeSequence$new(), regexp = "Provide a file name or a pointer!") diff --git a/RcppTskit/tests/testthat/test_get_tskit_py.R b/RcppTskit/tests/testthat/test_get_tskit_py.R index 97b2529..cbc0604 100644 --- a/RcppTskit/tests/testthat/test_get_tskit_py.R +++ b/RcppTskit/tests/testthat/test_get_tskit_py.R @@ -1,5 +1,3 @@ -context("get_tskit_py()") - test_that("get_tskit_py() works", { # Testing that get_tskit_py() fails with a non-module object # Next two lines ensure that testthat is looking into the global environment @@ -7,7 +5,7 @@ test_that("get_tskit_py() works", { assign("rubbish", "something_else_than_a_py_module", envir = .GlobalEnv) on.exit(rm("rubbish", envir = .GlobalEnv), add = TRUE) expect_error( - get_tskit_py(obj_name = "rubbish"), + get_tskit_py(object_name = "rubbish"), regexp = "Object 'rubbish' exists in the global environment but is not a reticulate Python module" ) @@ -42,9 +40,40 @@ test_that("get_tskit_py() works", { expect_equal(tskit$`__name__`, tskit2$`__name__`) # Re-importing - tskit3 <- get_tskit_py(obj_name = NULL) + tskit3 <- get_tskit_py(object_name = NULL) # lobstr::obj_addr(tskit3) # "0x161ec00f0" --> different address because we are obtaining a new object # but it is still the same module expect_equal(tskit$`__name__`, tskit3$`__name__`) }) + +test_that("check_tskit_py() validates python module objects", { + expect_message( + expect_false(check_tskit_py(1)), + "object must be a reticulate Python module object!" + ) + expect_error( + check_tskit_py(1, stop = TRUE), + "object must be a reticulate Python module object!" + ) + + if (!reticulate::py_available(initialize = FALSE)) { + skip("Python not available for reticulate tests.") + } + + obj <- reticulate::py_eval("1") + expect_message( + expect_false(check_tskit_py(obj)), + "object must be a reticulate Python module object" + ) + + sys <- reticulate::import("sys") + expect_silent(expect_true(check_tskit_py(sys))) + + if (reticulate::py_module_available("tskit")) { + tskit <- get_tskit_py() + expect_true(check_tskit_py(tskit)) + } else { + skip("tskit module not available for reticulate tests.") + } +}) diff --git a/RcppTskit/tests/testthat/test_load_summary_and_dump.R b/RcppTskit/tests/testthat/test_load_summary_and_dump.R index 4bfe6b4..eadef5e 100644 --- a/RcppTskit/tests/testthat/test_load_summary_and_dump.R +++ b/RcppTskit/tests/testthat/test_load_summary_and_dump.R @@ -1,5 +1,3 @@ -context("ts_load(), ts_summary*(), and ts_dump()") - test_that("ts_load(), ts_summary*(), and ts_dump(x) work", { # ---- ts_load() ---- diff --git a/RcppTskit/tests/testthat/test_misc.R b/RcppTskit/tests/testthat/test_misc.R index ffaf94d..0e35918 100644 --- a/RcppTskit/tests/testthat/test_misc.R +++ b/RcppTskit/tests/testthat/test_misc.R @@ -1,5 +1,3 @@ -context("misc") - test_that("kastore_version() works", { v <- kastore_version() expect_true(is.integer(v)) diff --git a/RcppTskit/tests/testthat/test_r_to_py_and_py_to_r.R b/RcppTskit/tests/testthat/test_r_to_py_and_py_to_r.R index d62862e..15dc56c 100644 --- a/RcppTskit/tests/testthat/test_r_to_py_and_py_to_r.R +++ b/RcppTskit/tests/testthat/test_r_to_py_and_py_to_r.R @@ -1,5 +1,3 @@ -context("r_to_py() and py_to_r()") - skip_if_no_tskit_py <- function() { if (!covr::in_covr()) { # To get_tskit_py() we need internet connection @@ -22,7 +20,7 @@ test_that("ts_r_to_py() and ts_py_to_r() work", { expect_error( ts_r$r_to_py(tskit_module = "bla"), - regexp = "tskit_module must be a reticulate Python module object!" + regexp = "object must be a reticulate Python module object!" ) ts_py <- ts_r$r_to_py() @@ -68,7 +66,7 @@ test_that("ts_r_to_py() and ts_py_to_r() work", { ) expect_error( ts_r_to_py_ptr(ts_r$pointer, tskit_module = "not_a_module"), - regexp = "tskit_module must be a reticulate Python module object!" + regexp = "object must be a reticulate Python module object!" ) ts_py <- ts_r_to_py_ptr(ts_r$pointer) diff --git a/RcppTskit/vignettes/RcppTskit_intro.qmd b/RcppTskit/vignettes/RcppTskit_intro.qmd index 0877dfc..32e61f4 100644 --- a/RcppTskit/vignettes/RcppTskit_intro.qmd +++ b/RcppTskit/vignettes/RcppTskit_intro.qmd @@ -201,7 +201,7 @@ if (!test) { # Load a tree sequence ts_file <- system.file("examples/test.trees", package = "RcppTskit") ts <- ts_load(ts_file) -is(ts) +methods::is(ts) # Print the summary of the tree sequence ts$print() @@ -222,14 +222,17 @@ ts_file <- system.file("examples/test.trees", package = "RcppTskit") ts <- ts_load(ts_file) # If you now want to use tskit Python API in reticulate Python, use -ts_py <- ts$r_to_py() -# ... continue in reticulate Python ... -ts_py$num_individuals # 80 -ts2_py = ts_py$simplify(samples = c(0L, 1L, 2L, 3L)) -ts2_py$num_individuals # 2 -# ... and to bring it back to R, use ... -ts2 <- ts_py_to_r(ts2_py) -ts2$num_individuals() # 2 +tskit <- get_tskit_py() +if (check_tskit_py(tskit)) { + ts_py <- ts$r_to_py() + # ... continue in reticulate Python ... + ts_py$num_individuals # 80 + ts2_py = ts_py$simplify(samples = c(0L, 1L, 2L, 3L)) + ts2_py$num_individuals # 2 + # ... and to bring it back to R, use ... + ts2 <- ts_py_to_r(ts2_py) + ts2$num_individuals() # 2 +} # If you prefer standard (non-reticulate) Python, use ts_file <- tempfile() From 9eedf949d6b7eef720861c9aa6010cbd6b284e43 Mon Sep 17 00:00:00 2001 From: Gregor Gorjanc Date: Tue, 27 Jan 2026 14:38:27 +0000 Subject: [PATCH 3/7] Missed one bit of unstaged code --- README.md | 3 ++- RcppTskit/R/RcppTskit.R | 20 +++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index a430bf6..5d0e4f5 100644 --- a/README.md +++ b/README.md @@ -92,10 +92,11 @@ By contributing to this project, you agree to abide by its terms. ### Clone -First clone the repository: +First clone the repository and step into the directory: ``` git clone https://github.com/HighlanderLab/RcppTskit.git +cd RcppTskit ``` ### Pre-commit install diff --git a/RcppTskit/R/RcppTskit.R b/RcppTskit/R/RcppTskit.R index 5c50f13..2619317 100644 --- a/RcppTskit/R/RcppTskit.R +++ b/RcppTskit/R/RcppTskit.R @@ -232,16 +232,18 @@ ts_r_to_py_ptr <- function(ts, tskit_module = get_tskit_py(), cleanup = TRUE) { # # # Use the tskit Python API to work with a tree sequence (via reticulate) # tskit <- get_tskit_py() -# ts_py <- tskit$load(ts_file) -# is(ts_py) -# ts_py$num_samples # 160 -# ts2_py <- ts_py$simplify(samples = c(0L, 1L, 2L, 3L)) -# ts2_py$num_samples # 4 +# if (check_tskit_py(tskit)) { +# ts_py <- tskit$load(ts_file) +# is(ts_py) +# ts_py$num_samples # 160 +# ts2_py <- ts_py$simplify(samples = c(0L, 1L, 2L, 3L)) +# ts2_py$num_samples # 4 # -# # Transfer the tree sequence to R and use RcppTskit -# ts2_ptr_r <- RcppTskit:::ts_py_to_r_ptr(ts2_py) -# is(ts2_ptr_r) -# RcppTskit:::ts_num_samples_ptr(ts2_ptr_r) # 4 +# # Transfer the tree sequence to R and use RcppTskit +# ts2_ptr_r <- RcppTskit:::ts_py_to_r_ptr(ts2_py) +# is(ts2_ptr_r) +# RcppTskit:::ts_num_samples_ptr(ts2_ptr_r) # 4 +# } ts_py_to_r_ptr <- function(ts, cleanup = TRUE) { if (!reticulate::is_py_object(ts)) { stop("ts must be a reticulate Python object!") From 1f15a9ca4560747b3b08ad8a047a635dfd981458 Mon Sep 17 00:00:00 2001 From: Gregor Gorjanc Date: Tue, 27 Jan 2026 17:43:16 +0000 Subject: [PATCH 4/7] More polish for different platfroms and checks --- RcppTskit/.Rbuildignore | 1 + RcppTskit/LICENSE.md | 21 ++++++++++++++++ RcppTskit/R/RcppTskit.R | 26 +++++++++++++++----- RcppTskit/tests/testthat/test_get_tskit_py.R | 2 +- 4 files changed, 43 insertions(+), 7 deletions(-) create mode 100644 RcppTskit/LICENSE.md diff --git a/RcppTskit/.Rbuildignore b/RcppTskit/.Rbuildignore index c48f6cd..e746244 100644 --- a/RcppTskit/.Rbuildignore +++ b/RcppTskit/.Rbuildignore @@ -13,6 +13,7 @@ ^cran-comments\.md$ ^inst/examples/create_test\.trees\.R$ ^inst/examples/create_test\.trees\.py$ +^LICENSE.md$ ^notes_pkg_dev\.Rmd$ ^pkg_dev_notes\.md$ ^tests/testthat/_snaps$ diff --git a/RcppTskit/LICENSE.md b/RcppTskit/LICENSE.md new file mode 100644 index 0000000..a18d7b9 --- /dev/null +++ b/RcppTskit/LICENSE.md @@ -0,0 +1,21 @@ +# MIT License + +Copyright (c) 2026 Gregor Gorjanc + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/RcppTskit/R/RcppTskit.R b/RcppTskit/R/RcppTskit.R index 2619317..7ca49ee 100644 --- a/RcppTskit/R/RcppTskit.R +++ b/RcppTskit/R/RcppTskit.R @@ -30,7 +30,7 @@ get_tskit_py <- function(object_name = "tskit") { if (test) { tskit <- get(object_name, envir = .GlobalEnv, inherits = FALSE) test <- reticulate::is_py_object(tskit) && - is(tskit) == "python.builtin.module" + is(tskit, "python.builtin.module") if (test) { return(tskit) } else { @@ -43,22 +43,36 @@ get_tskit_py <- function(object_name = "tskit") { } } # else - # These lines are hard to hit with tests with cached reticulate Python and modules + # These lines are hard to hit with tests and cached reticulate Python and modules # nocov start + msgSuccess <- 'reticulate::py_require("tskit") succeded!' + msgFail <- 'reticulate::py_require("tskit") failed!' + e <- simpleError(msgFail) if (!reticulate::py_module_available("tskit")) { txt <- "Python module 'tskit' is not available. Attempting to install it ..." message(txt) - reticulate::py_require("tskit") + out <- tryCatch( + reticulate::py_require("tskit"), + error = function(s) e + ) + if (is(out, "simpleError")) { + return(msgFail) + } } - # nocov end - return(reticulate::import("tskit", delay_load = TRUE)) + msgFail <- 'reticulate::import("tskit") failed!' + e <- simpleError(msgFail) + out <- tryCatch( + reticulate::import("tskit", delay_load = TRUE), + error = function(e) e + ) + return(out) } #' @describeIn get_tskit_py Test if \code{get_tskit_py} returned a reticulate Python module object #' @export check_tskit_py <- function(object, stop = FALSE) { test <- reticulate::is_py_object(object) && - ("python.builtin.module" %in% is(object)) + (is(object, "python.builtin.module")) if (test) { return(TRUE) } else { diff --git a/RcppTskit/tests/testthat/test_get_tskit_py.R b/RcppTskit/tests/testthat/test_get_tskit_py.R index cbc0604..3547fa4 100644 --- a/RcppTskit/tests/testthat/test_get_tskit_py.R +++ b/RcppTskit/tests/testthat/test_get_tskit_py.R @@ -26,7 +26,7 @@ test_that("get_tskit_py() works", { # lobstr::obj_addr(tskit) # "0x12218b910" expect_true(is_py_object(tskit)) - expect_true(is(tskit) == "python.builtin.module") + expect_true(is(tskit, "python.builtin.module")) expect_equal(tskit$`__name__`, "tskit") # Testing that get_tskit_py() returns the same tskit object if it already exists From 911f82b763e6142d3add45625d80f72a0c1272bb Mon Sep 17 00:00:00 2001 From: Gregor Gorjanc Date: Tue, 27 Jan 2026 17:58:13 +0000 Subject: [PATCH 5/7] End of day --- RcppTskit/vignettes/RcppTskit_intro.qmd | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/RcppTskit/vignettes/RcppTskit_intro.qmd b/RcppTskit/vignettes/RcppTskit_intro.qmd index 32e61f4..058e97a 100644 --- a/RcppTskit/vignettes/RcppTskit_intro.qmd +++ b/RcppTskit/vignettes/RcppTskit_intro.qmd @@ -286,24 +286,23 @@ ts$num_individuals() ### 4) Call `tskit` C API in C++ code in another R package To call the `tskit` C API in your own R package via `Rcpp` you can leverage `RcppTskit`. -Just follow the steps below. -To see details of each step, see the files in the R package `AlphaSimR` -at this commit (proof of concept of using `tskit` C API via `tskitr`[^1]): -https://github.com/HighlanderLab/AlphaSimR/commit/12657b08e7054d88bc214413d13f36c7cde60d95 -(with time this implementation might require changes). +Just follow the steps below and check how these were implemented in +the test R package `RcppTskitTestPkgLinkingTo` at TODO. -[^1]: At that time we named the package as `tskitr`. -Simply replace `tskitr` with `RcppTskit`, -respecting the lower and upper case depending on the file. +a) Open `DESCRIPTION` file and add `RcppTskit` to the `LinkingTo:` field, + TODO: likely also `Imports:` field. -a) Open `DESCRIPTION` file and add `RcppTskit` to the `LinkingTo:` field. + TODO: And Rcpp too? b) Add `#include ` as needed to your C++ header files in `src` directory. -c) Call `tskit` C API as needed in your C++ code in `src` directory - (see examples in `RcppTskit`). +c) Add `// [[Rcpp::depends(RcppTskit)]]` to your C++ files in `src` directory. -d) Configure your package build to use the `RcppTskit` library file +d) Add `// [[Rcpp::plugins(RcppTskit)]]` to your C++ files in `src` directory. + +e) Call `tskit` C API as needed in your C++ code in `src` directory. + +f) Configure your package build to use the `RcppTskit` library file with the following steps: - Add `src/Makevars.in` and `src/Makevars.win.in` files with @@ -320,7 +319,7 @@ d) Configure your package build to use the `RcppTskit` library file - Add `cleanup` and `cleanup.win` scripts (and make them executable) to remove `src/Makevars` and `src/Makevars.win` as well as compilation files. -e) You should now be ready to build, check, and install your package using +g) You should now be ready to build, check, and install your package using `devtools::build()`, `devtools::check()`, and `devtools::install()` or their `R CMD` equivalents. From 308152421cab313aaff070f515c594d3d0ea40fd Mon Sep 17 00:00:00 2001 From: Gregor Gorjanc Date: Tue, 27 Jan 2026 21:45:36 +0000 Subject: [PATCH 6/7] More work on get_tskit_py() and tests --- RcppTskit/DESCRIPTION | 2 +- RcppTskit/R/RcppTskit.R | 75 +++++++++++--------- RcppTskit/man/get_tskit_py.Rd | 22 ++++-- RcppTskit/notes_pkg_dev.Rmd | 3 + RcppTskit/tests/testthat/test_get_tskit_py.R | 23 ++++-- 5 files changed, 79 insertions(+), 46 deletions(-) diff --git a/RcppTskit/DESCRIPTION b/RcppTskit/DESCRIPTION index 36b8ee0..53c305f 100644 --- a/RcppTskit/DESCRIPTION +++ b/RcppTskit/DESCRIPTION @@ -39,7 +39,7 @@ Suggests: spelling, testthat (>= 3.0.0) LinkingTo: - Rcpp + Rcpp (>= 1.1.0) VignetteBuilder: quarto Config/testthat/edition: 3 diff --git a/RcppTskit/R/RcppTskit.R b/RcppTskit/R/RcppTskit.R index 7ca49ee..7460c75 100644 --- a/RcppTskit/R/RcppTskit.R +++ b/RcppTskit/R/RcppTskit.R @@ -6,17 +6,24 @@ #' @param object_name character name of the object holding \code{tskit} reticulate #' Python module. If this object exists in the global R environment and is a #' reticulate Python object, then it is returned. Otherwise, the function -#' attempts to install and import tskit before returning it. If \code{NULL}, -#' then the function directly attempts to install and import tskit before -#' returning it. +#' attempts to install and import tskit before returning it. +#' @param force logical force installation and/or import before returning the +#' reticulate Python module. #' @param object reticulate Python module object, hopefully. #' @param stop logical for throwing an error in \code{check_tskit_py}. #' @details This function is meant for users running \code{tskit <- get_tskit_py()} #' or similar code, but also by other functions in this package that need the #' \code{tskit} reticulate Python module. The point of \code{get_tskit_py} is -#' to avoid importing the module repeatedly, if it has been imported already. -#' @return \code{get_tskit_py} returns \code{tskit} reticulate Python module. -#' \code{check_tskit_py} returns \code{TRUE} if +#' to avoid importing the module repeatedly, if it has been imported already +#' in which case we use that imported module. Since this process can be +#' finicky (it depends on the availability of reticulate Python, module already +#' instaled, internet access, etc.) +#' @return \code{get_tskit_py} returns \code{tskit} a reticulate Python module +#' if succesful or otherwise throws an error (when \code{object_name} exists +#' but is not a reticulate Python module) or returns \code{simpleError} +#' (when installation or import failed). \code{check_tskit_py} returns +#' \code{TRUE} if \code{object} is a reticulate Python module or \code{FALSE} +#' otherwise. #' @examples #' tskit <- get_tskit_py() #' is(tskit) @@ -24,45 +31,49 @@ #' tskit$ALLELES_01 #' } #' @export -get_tskit_py <- function(object_name = "tskit") { - test <- !is.null(object_name) && - exists(object_name, envir = .GlobalEnv, inherits = FALSE) - if (test) { - tskit <- get(object_name, envir = .GlobalEnv, inherits = FALSE) - test <- reticulate::is_py_object(tskit) && - is(tskit, "python.builtin.module") +get_tskit_py <- function(object_name = "tskit", force = FALSE) { + if (!force) { + test <- !is.null(object_name) && + exists(object_name, envir = .GlobalEnv, inherits = FALSE) if (test) { - return(tskit) - } else { - txt <- paste0( - "Object '", - object_name, - "' exists in the global environment but is not a reticulate Python module!" - ) - stop(txt) + tskit <- get(object_name, envir = .GlobalEnv, inherits = FALSE) + test <- reticulate::is_py_object(tskit) && + is(tskit, "python.builtin.module") + if (test) { + return(tskit) + } else { + txt <- paste0( + "Object '", + object_name, + "' exists in the global environment but is not a reticulate Python module!" + ) + stop(txt) + } } } - # else - # These lines are hard to hit with tests and cached reticulate Python and modules - # nocov start - msgSuccess <- 'reticulate::py_require("tskit") succeded!' - msgFail <- 'reticulate::py_require("tskit") failed!' + + msgSuccess <- paste0('reticulate::py_require("', object_name, '") succeded!') + msgFail <- paste0('reticulate::py_require("', object_name, '") failed!') e <- simpleError(msgFail) - if (!reticulate::py_module_available("tskit")) { - txt <- "Python module 'tskit' is not available. Attempting to install it ..." + if (!reticulate::py_module_available(object_name)) { + txt <- paste0( + 'Python module ', + object_name, + ' is not available. Attempting to install it ...' + ) message(txt) out <- tryCatch( - reticulate::py_require("tskit"), + reticulate::py_require(object_name), error = function(s) e ) if (is(out, "simpleError")) { - return(msgFail) + return(out) # hard to hit with tests! } } - msgFail <- 'reticulate::import("tskit") failed!' + msgFail <- paste0('reticulate::import("', object_name, '") failed!') e <- simpleError(msgFail) out <- tryCatch( - reticulate::import("tskit", delay_load = TRUE), + reticulate::import(object_name, delay_load = TRUE), error = function(e) e ) return(out) diff --git a/RcppTskit/man/get_tskit_py.Rd b/RcppTskit/man/get_tskit_py.Rd index 7ab5501..36fc2e7 100644 --- a/RcppTskit/man/get_tskit_py.Rd +++ b/RcppTskit/man/get_tskit_py.Rd @@ -5,7 +5,7 @@ \alias{check_tskit_py} \title{Get the reticulate Python tskit module} \usage{ -get_tskit_py(object_name = "tskit") +get_tskit_py(object_name = "tskit", force = FALSE) check_tskit_py(object, stop = FALSE) } @@ -13,17 +13,22 @@ check_tskit_py(object, stop = FALSE) \item{object_name}{character name of the object holding \code{tskit} reticulate Python module. If this object exists in the global R environment and is a reticulate Python object, then it is returned. Otherwise, the function -attempts to install and import tskit before returning it. If \code{NULL}, -then the function directly attempts to install and import tskit before -returning it.} +attempts to install and import tskit before returning it.} + +\item{force}{logical force installation and/or import before returning the +reticulate Python module.} \item{object}{reticulate Python module object, hopefully.} \item{stop}{logical for throwing an error in \code{check_tskit_py}.} } \value{ -\code{get_tskit_py} returns \code{tskit} reticulate Python module. - \code{check_tskit_py} returns \code{TRUE} if +\code{get_tskit_py} returns \code{tskit} a reticulate Python module + if succesful or otherwise throws an error (when \code{object_name} exists + but is not a reticulate Python module) or returns \code{simpleError} + (when installation or import failed). \code{check_tskit_py} returns + \code{TRUE} if \code{object} is a reticulate Python module or \code{FALSE} + otherwise. } \description{ This function imports the reticulate Python \code{tskit} module @@ -33,7 +38,10 @@ This function imports the reticulate Python \code{tskit} module This function is meant for users running \code{tskit <- get_tskit_py()} or similar code, but also by other functions in this package that need the \code{tskit} reticulate Python module. The point of \code{get_tskit_py} is - to avoid importing the module repeatedly, if it has been imported already. + to avoid importing the module repeatedly, if it has been imported already + in which case we use that imported module. Since this process can be + finicky (it depends on the availability of reticulate Python, module already + instaled, internet access, etc.) } \section{Functions}{ \itemize{ diff --git a/RcppTskit/notes_pkg_dev.Rmd b/RcppTskit/notes_pkg_dev.Rmd index 4d1f660..b60e8ad 100644 --- a/RcppTskit/notes_pkg_dev.Rmd +++ b/RcppTskit/notes_pkg_dev.Rmd @@ -172,6 +172,9 @@ install.packages("covr") # https://usethis.r-lib.org/reference/use_coverage.html usethis::use_coverage(type = "codecov") +# Build the report & report +cov <- covr::package_coverage(clean = TRUE); covr::report(cov) + # Build the report cov <- covr::package_coverage(clean = TRUE) diff --git a/RcppTskit/tests/testthat/test_get_tskit_py.R b/RcppTskit/tests/testthat/test_get_tskit_py.R index 3547fa4..4fdfea8 100644 --- a/RcppTskit/tests/testthat/test_get_tskit_py.R +++ b/RcppTskit/tests/testthat/test_get_tskit_py.R @@ -14,10 +14,17 @@ test_that("get_tskit_py() works", { skip_if_offline() # The tests below take quite a bit of time since they pull in installation of - # Python modules, hence skipping on CRAN due to time limits on CRAN. + # Python modules, hence skipping on CRAN due to time limits on CRAN skip_on_cran() } + # Uncomment the below to explore test behaviour, but note that the removal + # doesn't work when you try to run the tests multiple times in the same session! + # Hence we are commenting this next line out. + # try(reticulate::py_require("tskit", action = "remove")) + if (!reticulate::py_available(initialize = TRUE)) { + skip("Python not available for get_tskit_py tests.") + } # Install (if not already installed) & import tskit on the first call if (exists("tskit", envir = .GlobalEnv)) { rm("tskit", envir = .GlobalEnv) @@ -40,14 +47,18 @@ test_that("get_tskit_py() works", { expect_equal(tskit$`__name__`, tskit2$`__name__`) # Re-importing - tskit3 <- get_tskit_py(object_name = NULL) + tskit3 <- get_tskit_py(force = TRUE) # lobstr::obj_addr(tskit3) # "0x161ec00f0" --> different address because we are obtaining a new object # but it is still the same module expect_equal(tskit$`__name__`, tskit3$`__name__`) + + # Installing a non-existent module + out <- get_tskit_py(object_name = "havent_seen_such_a_python_module") + expect_false(is(out, "python.builtin.module")) }) -test_that("check_tskit_py() validates python module objects", { +test_that("check_tskit_py() validates reticulate Python module objects", { expect_message( expect_false(check_tskit_py(1)), "object must be a reticulate Python module object!" @@ -57,8 +68,8 @@ test_that("check_tskit_py() validates python module objects", { "object must be a reticulate Python module object!" ) - if (!reticulate::py_available(initialize = FALSE)) { - skip("Python not available for reticulate tests.") + if (!reticulate::py_available(initialize = TRUE)) { + skip("Python not available for check_tskit_py tests.") } obj <- reticulate::py_eval("1") @@ -74,6 +85,6 @@ test_that("check_tskit_py() validates python module objects", { tskit <- get_tskit_py() expect_true(check_tskit_py(tskit)) } else { - skip("tskit module not available for reticulate tests.") + skip("tskit module not available for check_tskit_py tests.") } }) From afce3c66c985b16e44fb92dacca451531d21e804 Mon Sep 17 00:00:00 2001 From: Gregor Gorjanc Date: Tue, 27 Jan 2026 23:56:17 +0000 Subject: [PATCH 7/7] Exposing RcppTskit_treeseq_xptr in a header for #40 plus polish --- RcppTskit/DESCRIPTION | 6 +-- RcppTskit/R/RcppTskit.R | 6 +-- RcppTskit/inst/include/RcppTskit.hpp | 21 ++++++++++ RcppTskit/man/get_tskit_py.Rd | 4 +- RcppTskit/src/RcppTskit.cpp | 19 +-------- RcppTskit/vignettes/RcppTskit_intro.qmd | 55 ++++++++++++++----------- 6 files changed, 62 insertions(+), 49 deletions(-) create mode 100644 RcppTskit/inst/include/RcppTskit.hpp diff --git a/RcppTskit/DESCRIPTION b/RcppTskit/DESCRIPTION index 53c305f..f486320 100644 --- a/RcppTskit/DESCRIPTION +++ b/RcppTskit/DESCRIPTION @@ -30,8 +30,8 @@ Depends: Imports: methods, R6, - Rcpp (>= 1.1.0), - reticulate + Rcpp (>= 0.12.10), + reticulate (>= 1.41.0) Suggests: covr, knitr, @@ -39,7 +39,7 @@ Suggests: spelling, testthat (>= 3.0.0) LinkingTo: - Rcpp (>= 1.1.0) + Rcpp (>= 0.12.10) VignetteBuilder: quarto Config/testthat/edition: 3 diff --git a/RcppTskit/R/RcppTskit.R b/RcppTskit/R/RcppTskit.R index 7460c75..93e66b6 100644 --- a/RcppTskit/R/RcppTskit.R +++ b/RcppTskit/R/RcppTskit.R @@ -17,9 +17,9 @@ #' to avoid importing the module repeatedly, if it has been imported already #' in which case we use that imported module. Since this process can be #' finicky (it depends on the availability of reticulate Python, module already -#' instaled, internet access, etc.) +#' installed, internet access, etc.) #' @return \code{get_tskit_py} returns \code{tskit} a reticulate Python module -#' if succesful or otherwise throws an error (when \code{object_name} exists +#' if successful or otherwise throws an error (when \code{object_name} exists #' but is not a reticulate Python module) or returns \code{simpleError} #' (when installation or import failed). \code{check_tskit_py} returns #' \code{TRUE} if \code{object} is a reticulate Python module or \code{FALSE} @@ -52,7 +52,7 @@ get_tskit_py <- function(object_name = "tskit", force = FALSE) { } } - msgSuccess <- paste0('reticulate::py_require("', object_name, '") succeded!') + msgSuccess <- paste0('reticulate::py_require("', object_name, '") succeeded!') msgFail <- paste0('reticulate::py_require("', object_name, '") failed!') e <- simpleError(msgFail) if (!reticulate::py_module_available(object_name)) { diff --git a/RcppTskit/inst/include/RcppTskit.hpp b/RcppTskit/inst/include/RcppTskit.hpp new file mode 100644 index 0000000..675743a --- /dev/null +++ b/RcppTskit/inst/include/RcppTskit.hpp @@ -0,0 +1,21 @@ +#ifndef RCPPTSKIT_H +#define RCPPTSKIT_H + +#include +#include + +// Finaliser to free tsk_treeseq_t when it is garbage collected +// See \url{https://tskit.dev/tskit/docs/stable/c-api.html#c.tsk_treeseq_free} +// for more details. +static void RcppTskit_treeseq_xptr_delete(tsk_treeseq_t *ptr) { + if (ptr != NULL) { + tsk_treeseq_free(ptr); + delete ptr; + } +} + +// Define the external pointer type for tsk_treeseq_t with the finaliser +using RcppTskit_treeseq_xptr = Rcpp::XPtr; + +#endif diff --git a/RcppTskit/man/get_tskit_py.Rd b/RcppTskit/man/get_tskit_py.Rd index 36fc2e7..584e96b 100644 --- a/RcppTskit/man/get_tskit_py.Rd +++ b/RcppTskit/man/get_tskit_py.Rd @@ -24,7 +24,7 @@ reticulate Python module.} } \value{ \code{get_tskit_py} returns \code{tskit} a reticulate Python module - if succesful or otherwise throws an error (when \code{object_name} exists + if successful or otherwise throws an error (when \code{object_name} exists but is not a reticulate Python module) or returns \code{simpleError} (when installation or import failed). \code{check_tskit_py} returns \code{TRUE} if \code{object} is a reticulate Python module or \code{FALSE} @@ -41,7 +41,7 @@ This function is meant for users running \code{tskit <- get_tskit_py()} to avoid importing the module repeatedly, if it has been imported already in which case we use that imported module. Since this process can be finicky (it depends on the availability of reticulate Python, module already - instaled, internet access, etc.) + installed, internet access, etc.) } \section{Functions}{ \itemize{ diff --git a/RcppTskit/src/RcppTskit.cpp b/RcppTskit/src/RcppTskit.cpp index 6e1863b..801b947 100644 --- a/RcppTskit/src/RcppTskit.cpp +++ b/RcppTskit/src/RcppTskit.cpp @@ -1,21 +1,4 @@ -#include -#include - -// using namespace Rcpp; // to omit Rcpp:: prefix for whole Rcpp API -// using Rcpp::IntegerVector; // to omit Rcpp:: prefix for IntegerVector - -// Finaliser to free tsk_treeseq_t when it is garbage collected -// See \url{https://tskit.dev/tskit/docs/stable/c-api.html#c.tsk_treeseq_free} -// for more details. -static void RcppTskit_treeseq_xptr_delete(tsk_treeseq_t *ptr) { - if (ptr != NULL) { - tsk_treeseq_free(ptr); - delete ptr; - } -} -// Define the external pointer type for tsk_treeseq_t with the finaliser -using RcppTskit_treeseq_xptr = Rcpp::XPtr; +#include //' @title Report the version of installed kastore C API //' @details The version is stored in the installed header \code{kastore.h}. diff --git a/RcppTskit/vignettes/RcppTskit_intro.qmd b/RcppTskit/vignettes/RcppTskit_intro.qmd index 058e97a..1081ff8 100644 --- a/RcppTskit/vignettes/RcppTskit_intro.qmd +++ b/RcppTskit/vignettes/RcppTskit_intro.qmd @@ -157,16 +157,20 @@ after we describe the implemented data and class model. ## Data and class model -`RcppTskit` represents a tree sequence as a lightweight R object of R6 class `TreeSequence`. -R6 class was partially chosen so the R code calls resemble Python code. -`TreeSequence` wraps an external pointer (`externalptr`) to the `tskit` -C data structure (`tsk_treeseq_t`). -Most methods (for example, `ts$num_individuals()`, `ts$dump()`, etc.) +`RcppTskit` represents a tree sequence as a lightweight R6 object of class `TreeSequence`. +The R6 class was chosen in part so that `TreeSequence` method calls in R +resemble the tskit Python API. +`TreeSequence` wraps an external pointer (`externalptr`) to +the `tskit` C object structure `tsk_treeseq_t`. +Most methods (for example, `ts$num_individuals()` and `ts$dump()`) call the `tskit` C API via `Rcpp`, -so the calls are fast and the object is not copied -unless you explicitly write/read or change it. +so calls are fast and the object is not copied +unless you explicitly modify it. The underlying pointer is exposed as `TreeSequence$pointer` -for developers and advanced users that can write C++ code. +for developers and advanced users who work with C++. +In C++, the pointer has type `RcppTskit_treeseq_xptr`, +and the tree sequence memory is released by the `Rcpp::XPtr` +finaliser when the pointer is garbage-collected in R. ## For typical use cases @@ -256,9 +260,9 @@ ts$num_individuals() # 2 (if you have ran the above Python code) #| label: use_case_3 # Write a C++ function as multi-line character string codeString <- ' - #include + #include int ts_num_individuals(SEXP ts) { - Rcpp::XPtr ts_xptr(ts); + RcppTskit_treeseq_xptr ts_xptr(ts); return (int) tsk_treeseq_get_num_individuals(ts_xptr); }' @@ -285,28 +289,33 @@ ts$num_individuals() ### 4) Call `tskit` C API in C++ code in another R package -To call the `tskit` C API in your own R package via `Rcpp` you can leverage `RcppTskit`. -Just follow the steps below and check how these were implemented in -the test R package `RcppTskitTestPkgLinkingTo` at TODO. +To call the `tskit` C API in your own R package via `Rcpp` +you can leverage `RcppTskit`, which will simplify your installation +and enable you to quickly call +To do this, follow the steps below and check how these were implemented in +the test R package `RcppTskitTestLinkingTo` at https://github.com/HighlanderLab/RcppTskitTestLinking. -a) Open `DESCRIPTION` file and add `RcppTskit` to the `LinkingTo:` field, - TODO: likely also `Imports:` field. +a) Open `DESCRIPTION` file and + add `RcppTskit` to the `Imports:` and `LinkingTo:` field, and further + add `Rcpp` to `LinkingTo:` field as a minimum. - TODO: And Rcpp too? +b) Create `R/YourPackage-package.R` file and add to it at a minimum: + `#' @import RcppTskit` in one line and `"_PACKAGE"` in another line, + so that `devtools` will manage your package `NAMESPACE` imports. -b) Add `#include ` as needed to your C++ header files in `src` directory. +c) Add `#include ` as needed to your C++ header files in `src` directory. -c) Add `// [[Rcpp::depends(RcppTskit)]]` to your C++ files in `src` directory. +d) Add `// [[Rcpp::depends(RcppTskit)]]` to your C++ files in `src` directory. -d) Add `// [[Rcpp::plugins(RcppTskit)]]` to your C++ files in `src` directory. +e) Add `// [[Rcpp::plugins(RcppTskit)]]` to your C++ files in `src` directory. -e) Call `tskit` C API as needed in your C++ code in `src` directory. +f) Call `RcppTskit` C++ API and `tskit` C API as needed in your C++ files in `src` directory. -f) Configure your package build to use the `RcppTskit` library file +g) Configure your package build to use the `RcppTskit` library file with the following steps: - Add `src/Makevars.in` and `src/Makevars.win.in` files with - `PKG_LIB = @RCPPTSKIT_LIB@` flag, in addition to other flags. + `PKG_LIBS = @RCPPTSKIT_LIB@` flag, in addition to other flags. - Add `tools/configure.R` file, which will replace `@RCPPTSKIT_LIB@` in `src/Makevars.in` and `src/Makevars.win.in` @@ -319,7 +328,7 @@ f) Configure your package build to use the `RcppTskit` library file - Add `cleanup` and `cleanup.win` scripts (and make them executable) to remove `src/Makevars` and `src/Makevars.win` as well as compilation files. -g) You should now be ready to build, check, and install your package using +h) You should now be ready to build, check, and install your package using `devtools::build()`, `devtools::check()`, and `devtools::install()` or their `R CMD` equivalents.