R将函数应用于data.frame - > edgelist中每一行的每两个元素

问题描述 投票:1回答:2

我有一个data.frame,其中每一行都是一组人,在2到8之间。我想创建一个图表,显示所有人作为顶点,边缘显示这两个人在我的原始数据中出现在一行中。帧。图表不是问题,只是创建边缘列表。我的问题似乎与Creating an edgelist from Patent data in R 非常接近,但由于它是我在R的第一天,因此不能缩小与那里的差距。


name_1  name_2  name_3  name_4  name_5
jan     tim         
tom     tim     bernd       
stefen  tom     tim     jan     bernd
marcel  bernd


structure(list(name_1 = structure(c(1L, 4L, 3L, 2L), .Label = c("jan", 
"marcel", "stefen", "tom"), class = "factor"), name_2 = structure(c(2L, 
2L, 3L, 1L), .Label = c("bernd", "tim", "tom"), class = "factor"), 
    name_3 = structure(c(1L, 2L, 3L, 1L), .Label = c("", "bernd", 
    "tim"), class = "factor"), name_4 = structure(c(1L, 1L, 2L, 
    1L), .Label = c("", "jan"), class = "factor"), name_5 = structure(c(1L, 
    1L, 2L, 1L), .Label = c("", "bernd"), class = "factor")), .Names = c("name_1", 
"name_2", "name_3", "name_4", "name_5"), class = "data.frame", row.names = c(NA, 


jan     tim
tom     tim
tom     bernd
tim     bernd
stefen  tom
stefen  tim
stefen  jan
stefen  bernd
tom     tim
tom     jan
tom     bernd
tim     jan
tim     bernd
marcel  bernd


r apply

这里我们使用gtools包中的combinations函数。 df1是给定的数据帧。

# convert factor columns to character
df1 <- sapply(df1, as.character)

# get names per row which are not blank
df1 <- apply(df1, 1, function(x) toString(x[x != '']))

# save output to answer
answer <- list()

# we append the combination of names 
answer <- sapply(seq(df1), function(x) {

    val <- unlist(strsplit(df1[x], split = ','))
    answer[[x]] <- combinations(n = length(val), r = 2, v = val,repeats.allowed = F)


# convert the list to df
df2 <- do.call('rbind', answer)

[1,] " tim"   "jan"   
 [2,] " bernd" " tim"  
 [3,] " bernd" "tom"   
 [4,] " tim"   "tom"   
 [5,] " bernd" " jan"  
 [6,] " bernd" " tim"  
 [7,] " bernd" " tom"  
 [8,] " bernd" "stefen"
 [9,] " jan"   " tim"  
[10,] " jan"   " tom"  
[11,] " jan"   "stefen"
[12,] " tim"   " tom"  
[13,] " tim"   "stefen"
[14,] " tom"   "stefen"
[15,] " bernd" "marcel"



df <- data.frame(name_1 = c("jan", "tom", "stefen", "marcel"), 
                 name_2 = c("tim", "tim", "tom", "bernd"), 
                 name_3 = c("", "bernd", "tim", ""), 
                 name_4 = c("", "", "jan", ""), 
                 name_5 = c("", "", "bernd", ""))

            apply(df, 1, function(x){
                x <- x[x != '']; 
                t(combn(x, 2))
    stringsAsFactors = FALSE)
#>        V1    V2
#> 1     jan   tim
#> 2     tom   tim
#> 3     tom bernd
#> 4     tim bernd
#> 5  stefen   tom
#> 6  stefen   tim
#> 7  stefen   jan
#> 8  stefen bernd
#> 9     tom   tim
#> 10    tom   jan
#> 11    tom bernd
#> 12    tim   jan
#> 13    tim bernd
#> 14    jan bernd
#> 15 marcel bernd


                                  x <- c(...);
                                  x <- x[x != '']; 
                                  t(combn(x, 2))
                              lapply(df, as.character)))), 
              stringsAsFactors = FALSE)
#>        V1    V2
#> 1     jan   tim
#> 2     tom   tim
#> 3     tom bernd
#> 4     tim bernd
#> 5  stefen   tom
#> 6  stefen   tim
#> 7  stefen   jan
#> 8  stefen bernd
#> 9     tom   tim
#> 10    tom   jan
#> 11    tom bernd
#> 12    tim   jan
#> 13    tim bernd
#> 14    jan bernd
#> 15 marcel bernd




df_tidy <- df %>% 
    mutate_all(as.character) %>% 
    mutate_all(na_if, '') %>% 
    rowid_to_column() %>% 
    gather(col, name, -rowid) %>% 

#>    rowid    col   name
#> 1      1 name_1    jan
#> 2      2 name_1    tom
#> 3      3 name_1 stefen
#> 4      4 name_1 marcel
#> 5      1 name_2    tim
#> 6      2 name_2    tim
#> 7      3 name_2    tom
#> 8      4 name_2  bernd
#> 10     2 name_3  bernd
#> 11     3 name_3    tim
#> 15     3 name_4    jan
#> 19     3 name_5  bernd


    aggregate(name ~ rowid, df_tidy, 
              function(x){list(t(combn(x, 2)))})$name))


df_tidy %>% 
    group_by(rowid) %>% 
    summarise(name = list(combn(name, 2, compose(as_data_frame, t), simplify = FALSE)), 
              name = map(name, bind_rows)) %>% 
#> # A tibble: 15 x 3
#>    rowid V1     V2   
#>    <int> <chr>  <chr>
#>  1     1 jan    tim  
#>  2     2 tom    tim  
#>  3     2 tom    bernd
#>  4     2 tim    bernd
#>  5     3 stefen tom  
#>  6     3 stefen tim  
#>  7     3 stefen jan  
#>  8     3 stefen bernd
#>  9     3 tom    tim  
#> 10     3 tom    jan  
#> 11     3 tom    bernd
#> 12     3 tim    jan  
#> 13     3 tim    bernd
#> 14     3 jan    bernd
#> 15     4 marcel bernd


df_tidy %>% 
    group_by(rowid) %>% 
    mutate(name2 = list(name)) %>% 
    unnest() %>% 
    filter(name < name2)
#> # A tibble: 15 x 4
#> # Groups:   rowid [4]
#>    rowid col    name   name2 
#>    <int> <chr>  <chr>  <chr> 
#>  1     1 name_1 jan    tim   
#>  2     3 name_1 stefen tom   
#>  3     3 name_1 stefen tim   
#>  4     2 name_2 tim    tom   
#>  5     4 name_2 bernd  marcel
#>  6     2 name_3 bernd  tom   
#>  7     2 name_3 bernd  tim   
#>  8     3 name_3 tim    tom   
#>  9     3 name_4 jan    stefen
#> 10     3 name_4 jan    tom   
#> 11     3 name_4 jan    tim   
#> 12     3 name_5 bernd  stefen
#> 13     3 name_5 bernd  tom   
#> 14     3 name_5 bernd  tim   
#> 15     3 name_5 bernd  jan


© www.soinside.com 2019 - 2024. All rights reserved.