我使用这个答案https://stackoverflow.com/a/49672348/832783来解析csv文件。
这是代码:
open Microsoft.VisualBasic.FileIO
open System.Text
let parseCsv (fileName: string) (encoding: Encoding) =
//[
seq {
use csvParser = new TextFieldParser(fileName, encoding)
csvParser.SetDelimiters([|","|])
csvParser.TextFieldType <- FieldType.Delimited
csvParser.HasFieldsEnclosedInQuotes <- true
while not (csvParser.EndOfData) do
yield csvParser.ReadFields()
}
//]
parseCsv @"<the path to your csv file>" Encoding.UTF8 |> Seq.length |> Dump |> ignore //iter(fun it -> printfn "%A" it)
它使用 Microsoft.VisualBasic 库 - 您需要将其添加到您的项目中,并且还需要指定 csv 文件的路径。
Dump
来自 linqpad,但您可以将其替换为 Console.WriteLine
。
F# 编译器如何确保资源(在本例中为
TextFieldParser
对象)在处理序列后被释放?
不得不说,这个建筑太棒了。就像post的OP答案一样,我的第一反应是实现一些类,也许是允许遍历序列的接口之一。
我还尝试移动以下块:
use csvParser = new TextFieldParser(fileName, encoding)
csvParser.SetDelimiters([|","|])
csvParser.TextFieldType <- FieldType.Delimited
csvParser.HasFieldsEnclosedInQuotes <- true
在序列表达式之上,但它对序列不起作用,但是,它对列表和数组起作用,这是有道理的。
我更喜欢带有 seq 的选项,因为我认为它不像列表或数组选项那样占用内存。
蒂亚
好的,我使用 ILSpy 将代码反编译为 C#,这回答了我的问题。编译器创建下面的类来扩展 GeneeratedSequenceBase ,并实现一个状态机来跟踪它在序列处理中的位置。当到达序列末尾时,它会处理该对象。
这是:
// query_jpvgrt, Version=0.0.0.0, Culture=neutral, PublicKeyToken=null
// Query_jpvgrt
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using LINQPad;
using Microsoft.FSharp.Core;
using Microsoft.FSharp.Core.CompilerServices;
using Microsoft.VisualBasic.FileIO;
[CompilationMapping(SourceConstructFlags.Module)]
public static class Query_jpvgrt
{
[Serializable]
[SpecialName]
[StructLayout(LayoutKind.Auto, CharSet = CharSet.Auto)]
[CompilationMapping(SourceConstructFlags.Closure)]
internal sealed class parseCsv@30 : GeneratedSequenceBase<string[]>
{
public string fileName;
public Encoding encoding;
public TextFieldParser csvParser;
[DebuggerBrowsable(/*Could not decode attribute arguments.*/)]
[CompilerGenerated]
[DebuggerNonUserCode]
public int pc;
[DebuggerBrowsable(/*Could not decode attribute arguments.*/)]
[CompilerGenerated]
[DebuggerNonUserCode]
public string[] current;
public parseCsv@30(string fileName, Encoding encoding, TextFieldParser csvParser, int pc, string[] current)
{
this.fileName = fileName;
this.encoding = encoding;
this.csvParser = csvParser;
this.pc = pc;
this.current = current;
base..ctor();
}
public override int GenerateNext(ref IEnumerable<string[]> next)
{
switch (pc)
{
default:
csvParser = new TextFieldParser(fileName, encoding);
pc = 1;
csvParser.SetDelimiters(",");
csvParser.TextFieldType = FieldType.Delimited;
csvParser.HasFieldsEnclosedInQuotes = true;
goto case 2;
case 2:
if (!csvParser.EndOfData)
{
pc = 2;
current = csvParser.ReadFields();
return 1;
}
goto case 1;
case 1:
pc = 3;
LanguagePrimitives.IntrinsicFunctions.Dispose(csvParser);
csvParser = null;
pc = 3;
break;
case 3:
break;
}
current = null;
return 0;
}
public override void Close()
{
Exception ex = default(Exception);
while (true)
{
switch (pc)
{
case 3:
if (ex != null)
{
throw ex;
}
return;
}
try
{
switch (pc)
{
default:
pc = 3;
LanguagePrimitives.IntrinsicFunctions.Dispose(csvParser);
break;
case 0:
case 3:
break;
}
pc = 3;
current = null;
}
catch (object obj)
{
Exception e = (Exception)obj;
ex = e;
}
}
}
public bool get_CheckClose()
{
switch (pc)
{
default:
return true;
case 1:
return true;
case 0:
case 3:
return false;
}
}
[CompilerGenerated]
[DebuggerNonUserCode]
public string[] get_LastGenerated()
{
return current;
}
[CompilerGenerated]
[DebuggerNonUserCode]
public override IEnumerator<string[]> GetFreshEnumerator()
{
return new parseCsv@30(fileName, encoding, null, 0, null);
}
}
public static a Dump<a>(a o)
{
return o.Dump();
}
[CompilationArgumentCounts(new int[] { 1, 1 })]
public static IEnumerable<string[]> parseCsv(string fileName, Encoding encoding)
{
return new parseCsv@30(fileName, encoding, null, 0, null);
}
}
和:
// query_jpvgrt, Version=0.0.0.0, Culture=neutral, PublicKeyToken=null
// <StartupCode$query_jpvgrt>.$Query_jpvgrt
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Text;
using Microsoft.FSharp.Collections;
internal static class $Query_jpvgrt
{
[DebuggerBrowsable(/*Could not decode attribute arguments.*/)]
[CompilerGenerated]
[DebuggerNonUserCode]
internal static int init@;
public static void main@()
{
IEnumerable<string[]> source = Query_jpvgrt.parseCsv("<the path to your csv file>", Encoding.UTF8);
int o = SeqModule.Length(source);
int num = Query_jpvgrt.Dump(o);
int num2 = num;
}
}