Tutorial 4: Compare the predicted sample-cfRNA between the disease group and the normal group

CellFreeGMF employs a single-cell reference dataset to computationally association matrices that quantify relationships between pathological/healthy samples and specific cell types

[1]:

rm(list = ls())

print(R.home())
print(.libPaths())

[1] "/home/zhangwenxiang/anaconda3/envs/CellFreeGMF/lib/R"
[1] "/home/zhangwenxiang/anaconda3/envs/CellFreeGMF/lib/R/library"

Preparation

[2]:

library(reticulate)
library(Seurat)
library(dplyr)
library(future)
library(ggplot2)
library(dplyr)
library(scales)
library(ggpubr)

Attaching SeuratObject

Warning message:
“程辑包‘dplyr’是用R版本4.2.3 来建造的”

载入程辑包：‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Warning message:
“程辑包‘scales’是用R版本4.2.3 来建造的”

Setting python environment

[3]:

use_python("/home/zhangwenxiang/anaconda3/envs/CellFreeGMF/bin/python", required = TRUE)

py_config()

python:         /home/zhangwenxiang/anaconda3/envs/CellFreeGMF/bin/python
libpython:      /home/zhangwenxiang/anaconda3/envs/CellFreeGMF/lib/libpython3.10.so
pythonhome:     /home/zhangwenxiang/anaconda3/envs/CellFreeGMF:/home/zhangwenxiang/anaconda3/envs/CellFreeGMF
version:        3.10.16 | packaged by conda-forge | (main, Apr  8 2025, 20:53:32) [GCC 13.3.0]
numpy:          /home/zhangwenxiang/anaconda3/envs/CellFreeGMF/lib/python3.10/site-packages/numpy
numpy_version:  2.2.6

NOTE: Python version was forced by use_python() function

Hyperparameter Configuration

[4]:

setwd('/home/zhangwenxiang/workspace/python/cfRNA/v16')
disease_name = 'PDAC'

Load predicted data

[5]:

np <- import("numpy")

Decon_sampletype_disease_cfRNAtype_Target_gene <- np$load(paste("./save_data/", disease_name, "/Decon_sampletype_disease_cfRNAtype_Target_gene.npz", sep = ""), allow_pickle = TRUE)
Decon_sampletype_normal_cfRNAtype_Target_gene <- np$load(paste("./save_data/", disease_name, "/Decon_sampletype_normal_cfRNAtype_Target_gene.npz", sep = ""), allow_pickle = TRUE)

Normal_Target_decon_Result_sample_cfRNA <- py_to_r(Decon_sampletype_normal_cfRNAtype_Target_gene$f['Result_sample_cfRNA'])
Normal_Target_decon_Result_sample_cell <- py_to_r(Decon_sampletype_normal_cfRNAtype_Target_gene$f['Result_sample_cell'])
Normal_Target_decon_Result_cfRNA_cell <- py_to_r(Decon_sampletype_normal_cfRNAtype_Target_gene$f['Result_cfRNA_cell'])
Normal_Target_decon_sample_name <- py_to_r(Decon_sampletype_normal_cfRNAtype_Target_gene$f['sample_name'])
Normal_Target_decon_cfRNA_name <- py_to_r(Decon_sampletype_normal_cfRNAtype_Target_gene$f['cfRNA_name'])
Normal_Target_decon_cell_name <- py_to_r(Decon_sampletype_normal_cfRNAtype_Target_gene$f['cell_name_cell_ontology_class'])
Normal_Target_decon_cell_id <- py_to_r(Decon_sampletype_normal_cfRNAtype_Target_gene$f['cell_id'])
Normal_Target_cell_meta <- data.frame(cell_name = Normal_Target_decon_cell_name,
                                      cell_id = Normal_Target_decon_cell_id)

Disease_Target_decon_Result_sample_cfRNA <- py_to_r(Decon_sampletype_disease_cfRNAtype_Target_gene$f['Result_sample_cfRNA'])
Disease_Target_decon_Result_sample_cell <- py_to_r(Decon_sampletype_disease_cfRNAtype_Target_gene$f['Result_sample_cell'])
Disease_Target_decon_Result_cfRNA_cell <- py_to_r(Decon_sampletype_disease_cfRNAtype_Target_gene$f['Result_cfRNA_cell'])
Disease_Target_decon_sample_name <- py_to_r(Decon_sampletype_disease_cfRNAtype_Target_gene$f['sample_name'])
Disease_Target_decon_cfRNA_name <- py_to_r(Decon_sampletype_disease_cfRNAtype_Target_gene$f['cfRNA_name'])
Disease_Target_decon_cell_name <- py_to_r(Decon_sampletype_disease_cfRNAtype_Target_gene$f['cell_name_cell_ontology_class'])
Disease_Target_decon_cell_id <- py_to_r(Decon_sampletype_disease_cfRNAtype_Target_gene$f['cell_id'])
Disease_Target_cell_meta <- data.frame(cell_name = Disease_Target_decon_cell_name,
                                      cell_id = Disease_Target_decon_cell_id)

Extracting sample-cell matrix

[6]:

combined_mat <- data.frame(rbind(Normal_Target_decon_Result_sample_cell,
                                 Disease_Target_decon_Result_sample_cell))
rownames(combined_mat) <- c(paste(Normal_Target_decon_sample_name,'_0', sep=""),
                            paste(Disease_Target_decon_sample_name, '_1', sep = ""))
colnames(combined_mat) <- c(Disease_Target_decon_cell_id)

sample_cell <- as.matrix(combined_mat) %*% as.matrix(table(Normal_Target_decon_cell_id, Disease_Target_decon_cell_name))
disease_sample_cell <- sample_cell[1:length(Normal_Target_decon_sample_name),]
normal_sample_cell <- sample_cell[(length(Normal_Target_decon_sample_name) + 1):length(Disease_Target_decon_sample_name), ]

Calculate p value

[7]:

label <- c(rep("Normal", length(Normal_Target_decon_sample_name)),
           rep("Tumor", length(Disease_Target_decon_sample_name)))

p_values_wilcox <- sapply(1:ncol(sample_cell), function(i) {
  wilcox.test(sample_cell[, i] ~ label)$p.value
})
names(p_values_wilcox) <- colnames(sample_cell)
p_values_wilcox <- p_values_wilcox[p_values_wilcox < 0.05]
p_values_wilcox

endothelial cell: 0.00341875403591417
myeloid cell: 0.00755291893270264
pancreatic acinar cell: 0.00404973271118355
pancreatic alpha cell: 0.0144512147404731
pancreatic ductal cell: 0.00337730511333343
pancreatic stellate cell: 0.0415214231472042
t cell: 0.00604448694117583

The analysis identified eight cell types with significant differences in sample–cell associations between PDAC and healthy conditions, notably including pancreas-associated cells, myeloid cells, and T cells

[8]:

for(cell_name in names(p_values_wilcox)){

  expr_df <- as.data.frame(sample_cell[, cell_name])
  colnames(expr_df) <- 'Cell1'

  cell1_df <- data.frame(
    Expression = expr_df$Cell1,
    SampleType = factor(label, levels = c("Tumor", "Normal"))
  )

  p <- ggplot(cell1_df, aes(x = SampleType, y = Expression, fill = SampleType)) +
    geom_boxplot(width = 0.4, alpha = 0.6, outlier.shape = NA, color = "black", lwd = 0.6) +
    geom_jitter(width = 0.2, size = 2, alpha = 0.7, shape = 21, color = "black") +
    stat_compare_means(
      method = "wilcox.test",
      label = "p.format",
      label.y.npc = "top",
      size = 5,
      fontface = "bold",
      color = "red"
    ) +
    scale_fill_manual(values = c("Tumor" = "#1f77b4", "Normal" = "#ff7f0e")) +
    theme_minimal(base_size = 14) +
    theme(
      plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
      axis.title = element_text(face = "bold"),
      legend.position = "none",
      panel.grid.major.x = element_blank(),
      panel.grid.minor = element_blank()
    ) +
    labs(
      title = paste(cell_name, "\n Tumor vs Normal", sep = ""),
      x = "Sample Type",
      y = "Expression Level"
    )
  print(p)
}