我正在制作一个模型,其中有一个
FileField
。我想将文件内容存储在数据库列中,而不是文件路径中。有什么建议吗?
无视那些反对者。如果您想完全控制内容,请将文件放入数据库的 blob 字段中。 我通常还将文件名保留在单独的字段中,以便我可以根据需要重建文件(这样您就可以保留扩展名,这将其与大多数操作系统中的文件类型联系起来)。
请确保将实际的 blob 数据存储在单独的表中,仅通过 id 连接到您的文件名/额外信息表...这样,在处理与文件相关的任何信息(除内容本身。
反对者没有意识到,数据库只是一种极其优化的文件系统形式。 字节是字节,磁盘扇区是磁盘扇区。 数据库在组织和搜索这些字节方面比文件系统要好得多。 更不用说,数据库实现了比大多数文件系统更严格的安全性,并且得到了更好的维护(通过备份、支持人员等)。
我知道这是一个老问题,但从那时起已经编写了一些很好的代码来允许此选项。特别地,请参阅django-database-files,它将使用 Django 的存储 API 使所有 FileFields 和 ImageFields 将其内容存储在数据库中。还有一个 fork 可以将文件本地缓存在文件系统上,以克服使用数据库的最大问题,即延迟。
那么,将其简单地存储在二进制列中怎么样?然后您可以存储字节集合。如果文件名对您也很重要,您可以将其存储在附加名称列中。
我想要一种以干净的方式传送所有内容的方法...这仅限于 PostgreSQL,并使用大对象,以避免单个表膨胀。另一种方法是使用 BinaryFields 模型,但存在 b64 编码/解码转换时间损失,并且存储的值大约是正常基数 256 大小的 3 倍。
这有警告:
没有包含的视图来下载任何存储的文件。另外,如果您不做额外的工作就编写一个,您将无法获得正确的 mime 类型或 http-resume
文件不是静态存储的,因此它们不会出现在典型的静态或媒体文件夹中
数据库范围内最大对象数为 (2^32)-1 个大对象,因此请记住这一点
from django.core.files.storage import Storage
from django.core.files.base import File
from django.db import connection
from django.db import transaction
from django.utils.deconstruct import deconstructible
@deconstructible
class PostgreSQLLargeObjectStorage(Storage):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Ensure the mapping table exists
self._ensure_mapping_table()
def _ensure_mapping_table(self):
with connection.cursor() as cursor:
cursor.execute('''
CREATE TABLE IF NOT EXISTS storage_largeobject (
name VARCHAR PRIMARY KEY,
oid OID,
created_at TIMESTAMP NOT NULL DEFAULT NOW(),
updated_at TIMESTAMP NOT NULL DEFAULT NOW()
)
''')
def _get_oid(self, name):
with connection.cursor() as cursor:
cursor.execute('SELECT oid FROM storage_largeobject WHERE name = %s', [name])
row = cursor.fetchone()
return row[0] if row else None
def _open(self, name, mode='rb'):
oid = self._get_oid(name)
if oid is None:
raise FileNotFoundError(f"No such file: '{name}'")
return PostgreSQLLargeObjectFile(oid, mode)
def _save(self, name, content):
with connection.cursor() as cursor:
# Create a new Large Object
lo = connection.connection.lobject(0, 'wb')
oid = lo.oid
# Write content to the Large Object in chunks
while True:
chunk = content.read(65536)
if not chunk:
break
lo.write(chunk)
lo.close()
# Insert or update the mapping
cursor.execute('''
INSERT INTO storage_largeobject (name, oid, created_at, updated_at)
VALUES (%s, %s, NOW(), NOW())
ON CONFLICT (name) DO UPDATE SET oid = EXCLUDED.oid, updated_at = NOW()
''', [name, oid])
return name
def delete(self, name):
oid = self._get_oid(name)
if oid is None:
return
with connection.cursor() as cursor:
cursor.execute('SELECT lo_unlink(%s)', [oid])
cursor.execute('DELETE FROM storage_largeobject WHERE name = %s', [name])
def exists(self, name):
return self._get_oid(name) is not None
def listdir(self, path):
directories, files = [], []
with connection.cursor() as cursor:
cursor.execute('SELECT name FROM storage_largeobject WHERE name LIKE %s', [f'{path}%'])
rows = cursor.fetchall()
for (name,) in rows:
subpath = name[len(path):]
parts = subpath.lstrip('/').split('/', 1)
if len(parts) == 1 and parts[0]:
files.append(parts[0])
elif len(parts) > 1 and parts[0]:
if parts[0] not in directories:
directories.append(parts[0])
return directories, files
@transaction.atomic
def size(self, name):
oid = self._get_oid(name)
if oid is None:
raise FileNotFoundError(f"No such file: '{name}'")
with connection.cursor() as cursor:
lo = connection.connection.lobject(oid, 'rb')
lo.seek(0, 2) # Seek to the end
size = lo.tell()
lo.close()
return size
def url(self, name):
# Not implemented since it's database storage... A view could return the content instead.
return name;
# return None # This will hide the name in the admin, which we don't want
def accessed_time(self, name):
with connection.cursor() as cursor:
cursor.execute('SELECT updated_at FROM storage_largeobject WHERE name = %s', [name])
row = cursor.fetchone()
if row:
return row[0]
else:
raise FileNotFoundError(f"No such file: '{name}'")
def created_time(self, name):
with connection.cursor() as cursor:
cursor.execute('SELECT created_at FROM storage_largeobject WHERE name = %s', [name])
row = cursor.fetchone()
if row:
return row[0]
else:
raise FileNotFoundError(f"No such file: '{name}'")
def modified_time(self, name):
return self.accessed_time(name)
from django.core.files.base import File
from django.db import connection, transaction
class PostgreSQLLargeObjectFile(File):
def __init__(self, oid, name, mode='rb'):
self.name = name # The name of the file
self.oid = oid # The OID of the large object
self.mode = mode
self._is_dirty = False
self._lo = None
self._opened = False
self._size = None # Cache the size of the file
@transaction.atomic
def open(self, mode=None):
if not self._opened:
if mode:
self.mode = mode
self._opened = True
# Open the large object
self._lo = connection.connection.lobject(self.oid, self.mode)
@transaction.atomic
def close(self):
if self._opened:
if self._lo is not None:
self._lo.close()
self._lo = None
# Commit or rollback the transaction
self._atomic.__exit__(None, None, None)
self._opened = False
@transaction.atomic
@property
def size(self):
if self._size is None:
# Save the current position
pos = self.tell()
# Seek to the end to get the size
self.seek(0, 2)
self._size = self.tell()
# Return to the original position
self.seek(pos)
return self._size
@property
def closed(self):
return not self._opened
@transaction.atomic
def read(self, size=-1):
self.open()
return self._lo.read(size)
@transaction.atomic
def write(self, data):
self.open()
self._is_dirty = True
return self._lo.write(data)
@transaction.atomic
def seek(self, offset, whence=0):
self.open()
return self._lo.seek(offset, whence)
@transaction.atomic
def tell(self):
self.open()
return self._lo.tell()
@transaction.atomic
def flush(self):
if self._is_dirty:
self._is_dirty = False
您可以将其与以下内容一起使用
class TrackingFileDescriptor(FileDescriptor):
def __set__(self, instance, value):
# Get the current value of the file field
current_value = instance.__dict__.get(self.field.name)
current_name = None if not current_value else current_value.name
new_name = value
if current_name != new_name:
if current_name is not None and len(current_name) > 0:
instance.file_will_change(self.field.name, current_name, value)
super().__set__(instance, value)
class TrackingFileField(FileField):
def contribute_to_class(self, cls, name, **kwargs):
super().contribute_to_class(cls, name, **kwargs)
setattr(cls, self.name, TrackingFileDescriptor(self))
class TestModel(models.Model):
file = TrackingFileField(blank=True, null=True)
def file_will_change(self, field_name, old_value, new_value):
print(f"The file '{field_name}' is about to be cleared, or change from '{old_value}' to '{new_value}'")
if old_value is not None:
self.file.delete()
@receiver(post_delete, sender=TestModel)
def delete_file(sender, instance, **kwargs):
if instance.file: instance.file.delete(save=False)
和settings.py
# Database file storage
DEFAULT_FILE_STORAGE = 'liberty.storage.postgres.PostgreSQLLargeObjectStorage'
非常简单
只需覆盖管理中的保存方法
filecontent=form.cleaned_data.get('upload_file')
data =filecontent.read()
from django.db import connection
cursor = connection.cursor()
cursor.execute("update filecontent set filecontent=(%s) where id=(%s)",[data,obj.id])
connection.connection.commit()
cursor.close()
connection.close()
这会将文件内容存储在表文件内容的数据库列文件内容中