打印极坐标数据框时,默认情况下以“人类格式”打印
pl.Duration
。使用什么函数来执行此转换?可以使用吗?尝试 "{}".format()
返回一些可读但不那么好的内容。
import polars as pl
data = {"end": ["2024/11/13 10:28:00",
"2024/10/10 10:10:10",
"2024/09/13 09:12:29",
"2024/08/31 14:57:02",
],
"start": ["2024/11/13 10:27:33",
"2024/10/10 10:01:01",
"2024/09/13 07:07:07",
"2024/08/25 13:48:28",
]
}
df = pl.DataFrame(data)
df = df.with_columns(
pl.col("end").str.to_datetime(),
pl.col("start").str.to_datetime(),
)
df = df.with_columns(
duration = pl.col("end") - pl.col("start"),
)
df = df.with_columns(
pl.col("duration").map_elements(lambda t: "{}".format(t), return_dtype=pl.String()).alias("duration_str")
)
print(df)
shape: (4, 4)
┌─────────────────────┬─────────────────────┬──────────────┬─────────────────┐
│ end ┆ start ┆ duration ┆ duration_str │
│ --- ┆ --- ┆ --- ┆ --- │
│ datetime[μs] ┆ datetime[μs] ┆ duration[μs] ┆ str │
╞═════════════════════╪═════════════════════╪══════════════╪═════════════════╡
│ 2024-11-13 10:28:00 ┆ 2024-11-13 10:27:33 ┆ 27s ┆ 0:00:27 │
│ 2024-10-10 10:10:10 ┆ 2024-10-10 10:01:01 ┆ 9m 9s ┆ 0:09:09 │
│ 2024-09-13 09:12:29 ┆ 2024-09-13 07:07:07 ┆ 2h 5m 22s ┆ 2:05:22 │
│ 2024-08-31 14:57:02 ┆ 2024-08-25 13:48:28 ┆ 6d 1h 8m 34s ┆ 6 days, 1:08:34 │
└─────────────────────┴─────────────────────┴──────────────┴─────────────────┘
有趣!!我不久前遇到了同样的问题,并首先应用了您的初始解决方案,但发现它并没有真正达到我想要的效果,我创建了一个函数来“模仿”持续时间格式。将其应用到您的数据作品中:
import polars as pl
data = {
"end": ["2024/11/13 10:28:00", "2024/10/10 10:10:10", "2024/09/13 09:12:29", "2024/08/31 14:57:02"],
"start": ["2024/11/13 10:27:33", "2024/10/10 10:01:01", "2024/09/13 07:07:07", "2024/08/25 13:48:28"],
}
df = pl.DataFrame(data)
df = df.with_columns(
pl.col("end").str.strptime(pl.Datetime, format="%Y/%m/%d %H:%M:%S").alias("end"),
pl.col("start").str.strptime(pl.Datetime, format="%Y/%m/%d %H:%M:%S").alias("start"),
)
df = df.with_columns(
(pl.col("end") - pl.col("start")).alias("duration")
)
def format_duration(duration_microseconds):
total_seconds = duration_microseconds // 1000000
days, rem = divmod(total_seconds, 86400)
hours, rem = divmod(rem, 3600)
minutes, seconds = divmod(rem, 60)
parts = []
if days > 0:
parts.append(f"{days}d")
if hours > 0 or days > 0:
parts.append(f"{hours}h")
if minutes > 0 or hours > 0 or days > 0:
parts.append(f"{minutes}m")
parts.append(f"{seconds}s")
return " ".join(parts)
df = df.with_columns(
pl.col("duration").cast(pl.Int64).map_elements(format_duration).alias("formatted_duration")
)
print(df)
并返回
shape: (4, 4)
┌─────────────────────┬─────────────────────┬──────────────┬────────────────────┐
│ end ┆ start ┆ duration ┆ formatted_duration │
│ --- ┆ --- ┆ --- ┆ --- │
│ datetime[μs] ┆ datetime[μs] ┆ duration[μs] ┆ str │
╞═════════════════════╪═════════════════════╪══════════════╪════════════════════╡
│ 2024-11-13 10:28:00 ┆ 2024-11-13 10:27:33 ┆ 27s ┆ 27s │
│ 2024-10-10 10:10:10 ┆ 2024-10-10 10:01:01 ┆ 9m 9s ┆ 9m 9s │
│ 2024-09-13 09:12:29 ┆ 2024-09-13 07:07:07 ┆ 2h 5m 22s ┆ 2h 5m 22s │
│ 2024-08-31 14:57:02 ┆ 2024-08-25 13:48:28 ┆ 6d 1h 8m 34s ┆ 6d 1h 8m 34s │
└─────────────────────┴─────────────────────┴──────────────┴────────────────────┘