在列表中找到最长的重复序列

Question

如果我有这样的列表：

[i;i;i;a;b;b;a;i;i;c] 
(*the longest repeating sequence would be [i;i]*)
[i;i;i;i] 
(*here the max_pattern would be [i;i] (has to repeat, no overlapping*)

[t;f;f;t] 
(*here it would be [t] as t is, in this case, 
the first element that has a repeating pattern in the list*)

我的想法：

从列表中取出第一个元素并划分列表 - 其中 list_one 包含第一个元素左侧的所有元素。并列出右侧的所有元素。
然后检查该元素是否在两个列表之一中匹配。
如果它确实将当前最大值设置为元素。
现在将原始列表中当前元素右侧的下一个元素连接到当前元素，并再次在 list_one 和 list_two 中查找是否有匹配。
在串联长度达到
```
>(size_of list / 2)
```
点后停止。
现在转到第一步，但使用下一个元素并重复，直到检查列表中的每个元素。

示例：

[t;f;f;t]
(*first round*)
[t][][f;f;t]
(*match in last elem*)
current_max = [t]
(*second round*)
[t;f][][f;t]
(*from here on no more matches*)
(*go to the next element, split lists again, and proceed with mentioned steps*)
[f][t][f;t]
(*match in f*) 
(*repeat from here on...*)

不知道这个算法有没有缺陷。我正在尝试在 OCaml 中实现这一点，但我认为可能有一个更简单的方法来做到这一点。

Answer 1

根据你的例子，我不确定我是否理解这个问题。如果您正在尝试查找重复值的序列，这非常简单。让我们看看用

List.fold_left

来解决这个问题的方法。

List.fold_left 
  (fun (max_seq, cur_seq) x ->
     match (max_seq, cur_seq) with
     (* Set the info up on the first iteration. *)
     | None, None -> (Some (1, x), Some (1, x))
     (* These should basically never occur. *)
     | None, Some (cur_len, cur_val) -> (cur_seq, cur_seq)
     | Some (max_len, max_val), None -> (max_seq, max_seq)
     (* Where the real magic happens. *)
     | Some (max_len, max_val), Some (cur_len, cur_val) -> 
       if x = cur_val && cur_len >= max_len then
         let new_val = Some (cur_len + 1, cur_val) in
         (new_val, new_val)
       else if x = cur_val then   
         (max_seq, Some (cur_len + 1, cur_val))
       else
         (max_seq, Some (1, x))
   )
  (None, None)
  [1; 1; 5; 7; 2; 2; 2; 2; 2; 2; 8; 8; 1; 1; 1; 1; 1]

结果：

(Some (6, 2), Some (5, 1))

因为我们需要处理空列表的前景，所以我们将使用

option

类型来表示观察到的最大序列以及我们正在跟踪的当前序列。

考虑到当我们观察到两个值都是

None

时，我们将最大序列和当前序列设置为迄今为止观察到的序列长度为

的唯一值，接下来的两种情况基本上只是为了确保彻底匹配：

     | None, Some (cur_len, cur_val) -> (cur_seq, cur_seq)
     | Some (max_len, max_val), None -> (max_seq, max_seq)

真正的魔法发生在这里：

     | Some (max_len, max_val), Some (cur_len, cur_val) -> 
       if x = cur_val && cur_len >= max_len then
         let new_val = Some (cur_len + 1, cur_val) in
         (new_val, new_val)
       else if x = cur_val then   
         (max_seq, Some (cur_len + 1, cur_val))
       else
         (max_seq, Some (1, x))

当我们折叠列表中的每个值时：

如果它是当前序列的延续，并且长度等于或大于最大序列，那么我们就有一个new最大序列。
否则，我们有序列的延续，但它不是新的最大值。
否则，我们有一个新的序列要跟踪。

最终结果将给出两个值，分别表示最大序列长度和值，以及当前序列和值。我们可以使用模式匹配来提取这些信息并剔除我们需要的内容。

例如：

let max_seq lst =
  let (max, _) = List.fold_left 
    (fun (max_seq, cur_seq) x ->
       match (max_seq, cur_seq) with
       | None, None -> (Some (1, x), Some (1, x))
       | None, Some (cur_len, cur_val) -> (cur_seq, cur_seq)
       | Some (max_len, max_val), None -> (max_seq, max_seq)
       | Some (max_len, max_val), Some (cur_len, cur_val) -> 
         if x = cur_val && cur_len >= max_len then
           let new_val = Some (cur_len + 1, cur_val) in
           (new_val, new_val)
         else if x = cur_val then   
           (max_seq, Some (cur_len + 1, cur_val))
         else
           (max_seq, Some (1, x))
     )
    (None, None)
    lst
  in
  max

现在我们可以简单地在列表中调用它。

# max_seq [1; 1; 5; 7; 2; 2; 2; 2; 2; 2; 8; 8; 1; 1; 1; 1; 1];;
- : (int * int) option = Some (6, 2)

作为该语言的新手，如果它可以帮助您理解

List.fold_left

，那么这是一个非常容易实现的功能，并且当您试图集中精力时，查看该实现通常很有用。我将调用我的版本

foldl

。

let rec foldl f init lst =
  match lst with
  | [] -> init
  | x::xs -> foldl f (f init x) xs

在列表中找到最长的重复序列

问题描述投票：0回答：1

1个回答

最新问题

在列表中找到最长的重复序列

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1