我正在尝试在我的 AI 丰富管道中实施文本分割认知技能。
我的技能组定义如下:
{
"@odata.context": "https://<redacted>/$metadata#skillsets/$entity",
"@odata.etag": "\"<redacted>\"",
"name": "documentindexingskillset",
"description": "",
"skills": [
{
"@odata.type": "#Microsoft.Skills.Util.DocumentExtractionSkill",
"name": "doc_extract_skill",
"description": "",
"context": "/document",
"parsingMode": "default",
"dataToExtract": "contentAndMetadata",
"inputs": [
{
"name": "file_data",
"source": "/document/file_data"
}
],
"outputs": [
{
"name": "content",
"targetName": "extracted_content"
},
{
"name": "normalized_images",
"targetName": "extracted_normalized_images"
}
],
"configuration": {
"imageAction": "generateNormalizedImages",
"[email protected]": "#Int64",
"normalizedImageMaxWidth": 2000,
"[email protected]": "#Int64",
"normalizedImageMaxHeight": 2000
}
},
{
"@odata.type": "#Microsoft.Skills.Vision.OcrSkill",
"name": "doc_ocr_skill",
"description": "Extracts text (plain and structured) from image.",
"context": "/document/extracted_normalized_images/*",
"textExtractionAlgorithm": null,
"lineEnding": "Space",
"defaultLanguageCode": "en",
"detectOrientation": true,
"inputs": [
{
"name": "image",
"source": "/document/extracted_normalized_images/*"
}
],
"outputs": [
{
"name": "text",
"targetName": "imageText"
},
{
"name": "layoutText",
"targetName": "imageLayoutText"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.MergeSkill",
"name": "doc_merge_skill",
"description": "Create merged_text, which includes all the textual representation of each image inserted at the right location in the content field.",
"context": "/document",
"insertPreTag": " ",
"insertPostTag": " ",
"inputs": [
{
"name": "text",
"source": "/document/extracted_content"
},
{
"name": "itemsToInsert",
"source": "/document/extracted_normalized_images/*/imageText"
}
],
"outputs": [
{
"name": "mergedText",
"targetName": "mergedText"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.LanguageDetectionSkill",
"name": "doc_language_detection",
"description": "",
"context": "/document",
"defaultCountryHint": "",
"modelVersion": null,
"inputs": [
{
"name": "text",
"source": "/document/mergedText"
}
],
"outputs": [
{
"name": "languageCode",
"targetName": "languageCode"
},
{
"name": "languageName",
"targetName": "languageName"
},
{
"name": "score",
"targetName": "score"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.SplitSkill",
"name": "doc_text_split",
"description": "",
"context": "/document",
"defaultLanguageCode": "en",
"textSplitMode": "pages",
"maximumPageLength": 1000,
"pageOverlapLength": 100,
"maximumPagesToTake": 0,
"inputs": [
{
"name": "text",
"source": "/document/mergedText"
},
{
"name": "languageCode",
"source": "/document/languageCode"
}
],
"outputs": [
{
"name": "textItems",
"targetName": "mypages"
}
]
}
],
"cognitiveServices": {
"@odata.type": "#Microsoft.Azure.Search.DefaultCognitiveServices",
"description": null
},
"knowledgeStore": null,
"indexProjections": null,
"encryptionKey": null
}
我的索引器定义如下:
{
"@odata.context": "<redacted>/$metadata#indexers/$entity",
"@odata.etag": "\"<redacted>\"",
"name": "cs-ai-uks-01-ixr-02",
"description": null,
"dataSourceName": "ds-cs-ai-uks-02-saaiuksstg01",
"skillsetName": "documentindexingskillset",
"targetIndexName": "cs-ai-uks-02-is-02",
"disabled": null,
"schedule": null,
"parameters": {
"batchSize": null,
"maxFailedItems": null,
"maxFailedItemsPerBatch": null,
"base64EncodeKeys": null,
"configuration": {
"imageAction": "generateNormalizedImages",
"allowSkillsetToReadFileData": true
}
},
"fieldMappings": [],
"outputFieldMappings": [
{
"sourceFieldName": "/document/mypages",
"targetFieldName": "Content"
}
],
"cache": null,
"encryptionKey": null
}
最后,我的索引定义如下:
{
"@odata.context": "<redacted>/$metadata#indexes/$entity",
"@odata.etag": "\"<redacted>\"",
"name": "cs-ai-uks-02-is-02",
"defaultScoringProfile": null,
"fields": [
{
"name": "id",
"type": "Edm.String",
"searchable": false,
"filterable": false,
"retrievable": true,
"sortable": false,
"facetable": false,
"key": true,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"synonymMaps": []
},
{
"name": "Content",
"type": "Edm.String",
"searchable": true,
"filterable": false,
"retrievable": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": "standard.lucene",
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"synonymMaps": []
}
],
"scoringProfiles": [],
"corsOptions": null,
"suggesters": [],
"analyzers": [],
"normalizers": [],
"tokenizers": [],
"tokenFilters": [],
"charFilters": [],
"encryptionKey": null,
"similarity": {
"@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
"k1": null,
"b": null
},
"semantic": null,
"vectorSearch": null
}
我的 AI 搜索服务当前配置为基本定价层 (SKU:B)。
我遇到两个问题:
无法创建调试会话“new-debug-session”,错误:“InvalidSkillset:一项或多项技能无效。详细信息:在 Skill 上发现意外属性。参数:Debug.Skillset”
当我从技能集中删除文本拆分技能时,我可以毫无问题地保存并运行调试会话。
我不明白“技能上发现意外属性。参数:Debug.Skillset”消息,因为没有定义此类参数?
如果有任何意见,我将不胜感激,并提前致谢。
问候。
即使当我尝试在不使用文本拆分技能的索引器上运行调试会话时,我也会遇到相同的错误