我的要求是从网站上的文档库中检索文档并提取其内容以将其显示为 HTML,而不是直接使用 SharePoint 作为 CMS 显示文档。
我需要为此创建自定义 Web 部件吗?您能否提供任何 C# 示例来说明如何实现此目的,或者是否有更简单的方法。
答案:
是的,您可以通过创建自定义 Web 部件来实现此目的。根据您使用的文档类型(Word、PDF 等),有几种方法可以提取内容并将其显示为 HTML。
如果您使用 SharePoint Online,则可以使用 Microsoft Graph API 检索文档,将其转换为 HTML(如果是 Word 文档),然后在 Web 部件中呈现 HTML 内容。
以下是如何在 C# 中执行此操作的示例:
using Microsoft.Graph;
using System;
using System.IO;
using System.Threading.Tasks;
public class SharePointDocumentService
{
private readonly GraphServiceClient _graphClient;
public SharePointDocumentService(GraphServiceClient graphClient)
{
_graphClient = graphClient;
}
// Method to download the document as a stream
public async Task<string> GetDocumentAsHtml(string siteId, string documentLibraryId, string documentId)
{
// Retrieve the document
var documentStream = await _graphClient.Sites[siteId]
.Drives[documentLibraryId]
.Items[documentId]
.Content
.Request()
.GetAsync();
// Convert the document stream to HTML if it's a Word document
return ConvertWordDocumentToHtml(documentStream);
}
// Method to convert Word document to HTML (use OpenXML SDK, Aspose.Words, etc.)
private string ConvertWordDocumentToHtml(Stream documentStream)
{
// Conversion logic here (OpenXML SDK, Aspose.Words, etc.)
// Example HTML content
string htmlContent = "<html><body>Your HTML content goes here</body></html>";
return htmlContent;
}
}
在此示例中,Microsoft Graph API 用于以流的形式检索文档。然后,您可以使用 OpenXML SDK 或 Aspose.Words 等库将 Word 文档转换为 HTML 并呈现内容。
如果您更喜欢基于 JavaScript 的解决方案,您可以使用 SPFx 构建 Web 部件,并利用 PnP.js 等库来获取文档内容,并在客户端直接将 Word 文档转换为 HTML。
这是一个 JavaScript 示例 (SPFx):mammoth.js
如果您正在处理 Word 文档并且更喜欢客户端解决方案,则此方法非常有效。
import { sp } from "@pnp/sp/presets/all";
import * as mammoth from "mammoth";
// Fetch the file from SharePoint
async function getDocumentContent(fileUrl: string): Promise<string> {
const fileBuffer = await sp.web.getFileByServerRelativeUrl(fileUrl).getBuffer();
// Convert the buffer to HTML using mammoth.js
const result = await mammoth.convertToHtml({ arrayBuffer: fileBuffer });
return result.value;
}
库将 DOCX 文件转换为干净的 HTML。
总结:mammoth.js
等 JavaScript 库将文档转换为 HTML。
mammoth.js
// Fetch PowerPoint presentation and convert to HTML
public async Task<string> GetPowerPointAsHtml(string siteId, string documentLibraryId, string documentId)
{
var presentationStream = await _graphClient.Sites[siteId]
.Drives[documentLibraryId]
.Items[documentId]
.Content
.Request()
.GetAsync();
return ConvertPowerPointToHtml(presentationStream);
}
// Convert PowerPoint to HTML using a third-party tool like Aspose.Slides
private string ConvertPowerPointToHtml(Stream presentationStream)
{
// Example conversion logic
// Use Aspose.Slides, OpenXML or another library to convert PPT to HTML
string htmlContent = "<html><body>PowerPoint content here</body></html>";
return htmlContent;
}
// Fetch Excel document and convert to HTML
public async Task<string> GetExcelAsHtml(string siteId, string documentLibraryId, string documentId)
{
var excelStream = await _graphClient.Sites[siteId]
.Drives[documentLibraryId]
.Items[documentId]
.Content
.Request()
.GetAsync();
return ConvertExcelToHtml(excelStream);
}
// Convert Excel to HTML using Aspose.Cells or OpenXML SDK
private string ConvertExcelToHtml(Stream excelStream)
{
// Example logic for Excel to HTML conversion
string htmlContent = "<html><body>Excel content here</body></html>";
return htmlContent;
}
标签直接以 HTML 形式呈现它们,或使用 iTextSharp 等库将它们转换为 HTML。
iframe
4。 SharePoint 列表// Fetch PDF and render in iframe or convert to HTML
public async Task<string> GetPdfAsHtml(string siteId, string documentLibraryId, string documentId)
{
var pdfStream = await _graphClient.Sites[siteId]
.Drives[documentLibraryId]
.Items[documentId]
.Content
.Request()
.GetAsync();
return RenderPdfInIframe(pdfStream);
}
// Render PDF using iframe
private string RenderPdfInIframe(Stream pdfStream)
{
string base64Pdf = Convert.ToBase64String(ReadFully(pdfStream)); // Convert PDF stream to base64
string htmlContent = $"<iframe src='data:application/pdf;base64,{base64Pdf}' width='100%' height='600px'></iframe>";
return htmlContent;
}
// Fetch SharePoint list items
public async Task<string> GetSharePointListAsHtml(string siteId, string listId)
{
var listItems = await _graphClient.Sites[siteId].Lists[listId].Items
.Request()
.GetAsync();
return ConvertListToHtml(listItems);
}
// Convert list items to HTML table
private string ConvertListToHtml(IListItemsCollectionPage listItems)
{
string htmlContent = "<table><thead><tr><th>Title</th><th>Modified</th></tr></thead><tbody>";
foreach (var item in listItems)
{
htmlContent += $"<tr><td>{item["Title"]}</td><td>{item["LastModifiedDateTime"]}</td></tr>";
}
htmlContent += "</tbody></table>";
return htmlContent;
}
// Load Office document (PPT, Excel, Word) in SPFx web part
import { sp } from "@pnp/sp/presets/all";
import * as Office from '@microsoft/office-js';
async function loadOfficeDocument(fileUrl) {
// Fetch the document using PnP.js
const fileBuffer = await sp.web.getFileByServerRelativeUrl(fileUrl).getBuffer();
// Render the document in an iframe using Office.js
Office.context.document.open(fileUrl, {
success: function () { console.log('Document opened successfully'); },
error: function (error) { console.error('Error opening document', error); }
});
}