声明和实例化时抽象类和泛型问题的结合

问题描述 投票:0回答:1

我正在努力使用 C# 中的一个应用程序,它是一个文档审阅应用程序,用于在文档中搜索搜索词列表。它需要获取 PDF 或 WORD 文档,将来可能还需要获取其他文档。

我有一个枚举

DocumentType

public enum DocumentType
{
    NotSpecified,
    PDF,
    WORD

}

和基类

DocumentInterface<DocumentType>
:

public abstract class DocumentInterface<DocumentType>
{

    private string _documentPath;
    private readonly DocumentType _document;
    private int _pageCount;
    private int _currentPage;
    
    // Path to the document being reviewed
    public string DocumentPath
    {
        get { return _documentPath; }
        set { _documentPath = value; }
    }
    
    // Document Object will need to be typecast in child classes
    public abstract DocumentType Document { get; set; }
    //{
    //  get { return _document; }
    //  set { _document = value; }
    //}
    
    // Count of pages in the review document
    public int PageCount
    {
        get { return _pageCount; }
        set { _pageCount = value; }
    }
    
    // to keep track of where we are up to in the document
    public int CurrentPage
    {
        get { return _currentPage; }
        set { _currentPage = value; }
    }
    
    // reports if there are any more pages after the current position.
    public bool HasMorePages { get { return _currentPage < _pageCount; } }
    
    
    public string GetNextPageContents()
    {
        // Makes sure that there is a next page and if so then uses the abstract method
        // to return the page contents.
        if (HasMorePages)
        {
            _currentPage++;
            return GetPageContents(_currentPage);
        }
                
        return string.Empty;
    }
    
    
    #region Constructor & Destructor
    
    public DocumentInterface(string documentpath)
    {
        _document = OpenDocument(documentpath);
    
        // Make sure we opened the document successfully
        if (_document != null)
        {
            _pageCount = GetPageCount();
            _currentPage = 0;
        }
    }
    
    // Class Destructor, must close the document when going out of scope.
    ~DocumentInterface()
    {
        // Abstract method, must be implemented by child classes.
        CloseDocument();
    }
    
    #endregion
    
    
    #region Abstract Methods
    
    // Abstract Class must be implemented by Document Type specific child classes
    public abstract DocumentType OpenDocument(string path);
    public abstract int GetPageCount();
    public abstract string GetPageContents(int pageNumber);
    public abstract void CloseDocument();
    
    #endregion

}

和派生类

DocumentInterfacePDF
:

public class DocumentInterfacePDF : DocumentInterface<PdfDocument>
{
    // Implementation of the abstract base generic as a PdfDocument
    private PdfDocument _document;

    public override PdfDocument Document 
    { 
        get => (PdfDocument)_document; 
        set => _document = value as PdfDocument; 
    }

    /// <summary>
    /// 
    /// </summary>
    /// <param name="documentpath"></param>
    /// <exception cref="Exception"></exception>
    public override PdfDocument OpenDocument(string documentpath)
    {
        if (// All need to be true!                
            documentpath != null
            && !string.IsNullOrEmpty(documentpath)
            && ".pdf" == documentpath.Substring(documentpath.Length - 4).ToLower()
            )
        {
            // Open the PDF
            PdfReader reader = new PdfReader(documentpath);

            // return to base class to assign to _document
            return new PdfDocument(reader);
        }
        return null;
    }

    #region Base Class Overrides to implement as a iText7 PDF Interface

    /// <summary>
    /// Gets the number of pages in the PDF document
    /// </summary>
    /// <returns></returns>
    public override int GetPageCount()
    {
        // Return the Page Count
        return Document.GetNumberOfPages();
    }

    /// <summary>
    /// Gets the page contents for a specific page number
    /// </summary>
    /// <param name="pageNumber"></param>
    /// <returns></returns>
    public override string GetPageContents(int pageNumber)
    {
        // Set the default scanning extents for the PDF Reader
        // numbers are points which are 72 to the inch
        int A4_width  = 595;    // 210mm
        int A4_height = 842;    // 297mm
        int header    =  57;    //  20mm
        int footer    =  57;    //  20mm
        var rect = new Rectangle(0, header, A4_width, A4_height - footer);

        PdfPage page = Document.GetPage(pageNumber);

        // Read the page contents
        FilteredTextEventListener listener =
                new FilteredTextEventListener(
                new LocationTextExtractionStrategy(),
                new TextRegionEventFilter(rect)
                );

        // Return the page contents
        return PdfTextExtractor.GetTextFromPage(page, listener);
    }

    /// <summary>
    /// Closes the PDF Document
    /// </summary>
    public override void CloseDocument()
    {
        // Close the document
        Document.Close();
    }

    #endregion

    #region Constructor

    /// <summary>
    /// Constructor
    /// Call the base class constructor to setup everything in the predefined way.
    /// </summary>
    /// <param name="documentpath"></param>
    public DocumentInterfacePDF(string documentpath) : base(documentpath)
    {
        // all of the implementation is taken care of in the Base Class
    }

    #endregion
}

在我的

DocumentParser
类中,我想使用基类实例化文档,以便可以在运行时根据用户选择的文档类型决定派生类型。

internal class ReviewDocumentParser
{
    #region Properties

    private List<SearchTerm> _searchterms;
    private SearchResults _results;
    private string _documentPath;
    private string _searchTermsPath;
    private DocumentInterface<DocumentType> _documentInterface;

    public List<SearchTerm> SearchTerms
    {
        get { return _searchterms; }
        set { _searchterms = value; }
    }

    public SearchResults Results
    {
        get { return _results; }
        set { _results = value; }
    }

    public string DocumentPath
    {
        get { return _documentPath; }
        set { _documentPath = value; }
    }

    public string SearchTermsPath
    {
        get { return _searchTermsPath; }
        set { _searchTermsPath = value; }
    }

    private DocumentType _documentType;

    public DocumentType DocumentType
    {
        get { return _documentType; }
        set { _documentType = value; }
    }

    public DocumentInterface<DocumentType> DocumentInterface
    {
        get { return _documentInterface; }
        set { _documentInterface = value; }
    }

    #endregion

//... unnecessary code ommitted

    #region Constructor

    public ReviewDocumentParser(DocumentType documenttype, string documentpath, string searchtermspath)
    {
        _documentType = documenttype;
        _documentPath = documentpath;
        _searchTermsPath = searchtermspath;

        // Hook the Search Terms element up
        _results = new SearchResults(_searchTermsPath);

        switch (documenttype)
        {
            case DocumentType.PDF:
                _documentInterface = new DocumentInterfacePDF(_documentPath);
                break;
            case DocumentType.WORD:
                _documentInterface = new DocumentInterfaceWORD(_documentPath);
                break;
            case DocumentType.NotSpecified:
                throw new NoDocumentParserFoundException("No suitable document parser found.");
        }
    }

    #endregion

我试图解决以下错误,但没有运气,我一圈又一圈地用一个问题替换另一个问题。我开始认为我正在尝试做一些不可能的事情。我已经24小时处于停滞状态了。

错误 CS0029 无法将类型“PDF_Reader_Test.Model.DocumentInterfacePDF”隐式转换为“PDF_Reader_Test.Model.DocumentInterface”

我尝试将类型描述符添加到实例化语句中。 PdfDocument 是 iText7 包中定义的类型,我用它仅从 PDF 文档中提取文本。

case DocumentType.PDF:
    _documentInterface = new DocumentInterfacePDF<PdfDocument>(_documentPath);
    break;

但这只会产生不同的警告。我也尝试过修改基类和派生类定义,但我只是遇到了更大的问题,我认为它们(几乎)是正确的。

我期望基类能够分配一个派生类类型。我想在基类中定义广泛的行为,以便它保持一致,并且只覆盖 PDF 和 Word 的派生类中处理不同文件类型的位。我还没有解决Word版本。

c# generics abstract derived-class
1个回答
0
投票

所以,这里有几个问题,有些已经在评论中提到了。

  • 命名约定:不要将你的基类称为“接口”。它不是一个接口,接口通常以“I”为前缀。更好的选择就是简单地

    Document
    或可能
    DocumentBase
    。此外,通常的做法是在泛型参数前面加上
    T
    ,因此更好的选择可能是:
    class DocumentBase<TDocument>

  • 处置: 根据经验,不要依赖终结器(析构函数)。你的班级应该实施

    IDisposable
    。然后,如果您的底层实现(例如
    PdfDocument
    )需要处置,您应该在
    Dispose()
    方法中执行此操作。

  • ReviewDocumentParser:在构造函数内完成所有昂贵的 IO 和文档加载是糟糕的设计。要么使文档不成为解析器状态的一部分,但创建一个返回文档实现的方法,或者将逻辑放入工厂方法中。

直接解决你的问题:你使用

DocumentType
既作为类型名称,又作为泛型参数的名称,这是不幸的,因为现在你不小心声明了
DocumentInterface<DocumentType> _documentInterface
使用枚举类型作为泛型参数,即这就是为什么你不能分配
DocumentInterfacePDF
,其中
PdfDocument
作为通用参数。

如您所见,这就是遵守正确的命名约定可以保护您免受此类错误的方式。

此外,我认为您可能根本不需要通用参数。您可以将其保留为派生类型的实现细节。

这是我如何解决这个问题的简化版本。 (免责声明:我对itext不熟悉,所以这只是关于类设计。)

public abstract class Document : IDisposable
{
    public abstract int PageCount { get; }
    
    public abstract string GetPageContents(int pageNumber);
    
    // [...]

    public abstract void Dispose();
}

public sealed class PdfDocument : Document
{
    // full namespace to avoid name collisions
    private readonly iText.Kernel.Pdf.PdfReader _reader;
    private readonly iText.Kernel.Pdf.PdfDocument _document;
    private bool _disposed;
    
    public PdfDocument(string path)
    {
        _reader = new (path);
        _document = new (_reader);
    }
    
    public override int PageCount => _document.GetNumberOfPages();

    public override string GetPageContents(int pageNumber)
    {
        // [...]
        return null;
    }

    public override void Dispose()
    {
        if (_disposed)
        {
            return;
        }
        _document.Close();
        _reader.Close();
        _disposed = true;
    }
}

internal class ReviewDocumentParser
{
    public Document Parse(DocumentType documenttype, string documentpath, string searchtermspath)
    {
        // [...]
        switch (documenttype)
        {
            case DocumentType.PDF:
                return new PdfDocument(_documentPath);
                break;
            // [...]
        }
        return null;
    }
}
© www.soinside.com 2019 - 2024. All rights reserved.