我有一个包含大量文件(数百万)的 Azure blob 容器。我需要找出是否有文件大小为0字节的文件。有简单的方法吗?
以防万一,容器没有应用任何标签。
以防万一这可能对其他人有帮助,这是我在 powershell 中编写的脚本:
$ctx = New-AzStorageContext -StorageAccountName <AccountName> -StorageAccountKey <PrimaryKey>
$ContainerName = "containerName"
$MaxReturn = 10000
$loopCount = 0
$Total = 0
$Token = $Null
do {
$Blobs = Get-AzStorageBlob -Context $ctx -Container $ContainerName -MaxCount $MaxReturn -ContinuationToken $Token
$Total += $Blobs.Count
foreach ($blobitem in $Blobs) {
$loopCount += 1
Write-Host -NoNewline "`rValidating batch: " $loopCount "/" $blobsList.Length " (Current total:" $Total ")"
if ($blobitem.Length -eq 0) {
Write-Host ""
Write-Host "Zero bytes file found: " $blobitem.Name
}
}
$loopCount = 0
if ($Blobs.Length -le 0) { Break; }
$Token = $Blobs[$blobs.Count - 1].ContinuationToken;
}
While ($Token -ne $Null)
Write-Host ""
Write-Host "Finished!"
blob_path="give/your/blob/path/here"
files = dbutils.fs.ls(blob_path)
for file in files:
file_path = file.path
file_size = file.size
if(file_size==0):
print("File size is {0} for file {1}".format(file_size, file_path))