我正在尝试绘制以下示例数据,其中一架飞机工作了 (c_num 天 (x)) 以及某人每天在该飞机上工作的时间 (MTC_Daily_Lbr_percent (y))。我创建了以下编码,其中我试图进行曲线拟合,但我对曲线拟合和 Matplotlib 非常陌生。有什么方法可以获取这些数据并绘制直方图吗?我知道它应该是一个左偏的钟形(可能在结束日期时还有另一个轻微向上的运动)此外,是否有某种类型的方法可以生成方程式?这样我就可以预测了?
数据太大所以我可以把它放在下面的评论中
由于您有两个轴(天和 Lbr),您可以绘制每个变量的直方图,也可以绘制同时查看两个变量的单个二维直方图。下图证明了这一点。
第一个图是原始图的修改版本,其中所有样本都用于拟合(我认为这就是你想要做的,但由于数据未排序,
x[[0, -1]]
不一定对应于最小值和最大值).
您可以使用这些数据来对未来几天进行预测。一个简单的 ARMA 类型模型将是一个起点,根据它的执行方式,您可以尝试其他技术。我可以根据目标是什么来更多地谈论这一点。
包含的数据来自OP。
import pandas as pd
import numpy as np
import datetime as dt
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
data={
"ID": ["ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID10140728165.2155"," ID10140728165.2155"," ID10140728165.2155"," ID10140728165.2155"," ID10140728165.2155"," ID10140728165.2155"," ID10140728165.2155"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID31366029674.2499"," ID31366029674.2499"," ID31366029674.2499"," ID31366029674.2499"," ID31366029674.2499"," ID31366029674.2499"," ID31366029674.2499"," ID31366029674.2499"," ID42645674424.4224"," ID42645674424.4224"," ID20635322422.8366"," ID20635322422.8366"," ID20635322422.8366"," ID20635322422.8366"," ID20635322422.8366"," ID30380414661.1556"," ID30380414661.1556"," ID30380414661.1556"," ID30380414661.1556"," ID37511661093.7047"," ID37511661093.7047"," ID37511661093.7047"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID46915488293.8591"," ID46915488293.8591"," ID46915488293.8591"," ID46915488293.8591"," ID46915488293.8591"," ID46915488293.8591"," ID46915488293.8591"," ID46915488293.8591"," ID46915488293.8591"," ID46915488293.8591"," ID46915488293.8591"," ID46915488293.8591"," ID46915488293.8591"," ID46915488293.8591"," ID46915488293.8591"," ID46915488293.8591"," ID46915488293.8591"," ID46915488293.8591"," ID18689129034.7377"," ID18689129034.7377"," ID18689129034.7377"," ID18689129034.7377"," ID18689129034.7377"," ID4830501589.4234"," ID4830501589.4234"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID42645674424.4224"," ID24100797148.6495"," ID24100797148.6495"," ID24100797148.6495"," ID18093459046.6984"," ID18093459046.6984"," ID18093459046.6984","ID18093459046.6984"],
"MODEL_CD": [ "A320"," A320"," A320"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," A350"," A350"," B717"," B717"," B717"," B717"," B717"," B737"," B737"," B737"," B737"," B767"," B767"," B767"," B717"," B717"," B717"," B717"," B717"," B717"," B717"," B717"," B717"," B717"," B717"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A320"," A320"," A320"," A320"," A320"," A350"," A350"," A350"," A350"," B717"," B717"," B717"," A320"," A320"," A320"," A320"," A320"," A320"," A320"],
"Check_Type": ["IFC"," IFC"," IFC"," IFC"," IFC"," IFC"," IFC"," IFC"," IFC"," IFC"," IFC"," IFC"," IFC"," IFC"," IFC"," IFC"," IFC"," IFC"," IFC"," PTS"," PTS"," PTS"," PTS"," PTS"," PTS"," PTS"," PTS"," PTS"," PTS"," PTS"," PTS"," SV"," SV"," SV"," SV"," SV"," SV"," SV"," SV"," 121"," 121"," REL"," REL"," REL"," REL"," REL"," IFC"," IFC"," IFC"," IFC"," FDA"," FDA"," FDA"," REL"," REL"," REL"," REL"," REL"," REL"," REL"," REL"," REL"," REL"," REL"," C"," C"," C"," C"," C"," C"," C"," C"," C"," C"," C"," C"," C"," C"," C"," C"," C"," C"," IFC"," IFC"," IFC"," IFC"," IFC"," SV"," SV"," SV"," SV"," REL"," REL"," REL"," IFC"," IFC"," IFC"," IFC"," IFC"," IFC"," IFC"],
"Start_Date": [ "2023-01-12"," 2023-01-12"," 2023-01-12"," 2023-01-24"," 2023-01-24"," 2023-01-24"," 2023-01-24"," 2023-01-24"," 2023-01-24"," 2023-01-24"," 2023-01-24"," 2023-01-24"," 2023-01-19"," 2023-01-19"," 2023-01-19"," 2023-01-19"," 2023-01-19"," 2023-01-19"," 2023-01-19"," 2023-01-06"," 2023-01-06"," 2023-01-06"," 2023-01-06"," 2023-01-06"," 2023-01-06"," 2023-01-06"," 2023-01-06"," 2023-01-06"," 2023-01-06"," 2023-01-06"," 2023-01-06"," 2023-01-01"," 2023-01-01"," 2023-01-01"," 2023-01-01"," 2023-01-01"," 2023-01-01"," 2023-01-01"," 2023-01-01"," 2023-01-04"," 2023-01-04"," 2023-01-18"," 2023-01-18"," 2023-01-18"," 2023-01-18"," 2023-01-18"," 2023-01-04"," 2023-01-04"," 2023-01-04"," 2023-01-04"," 2023-02-19"," 2023-02-19"," 2023-02-19"," 2023-01-03"," 2023-01-03"," 2023-01-03"," 2023-01-03"," 2023-01-03"," 2023-01-07"," 2023-01-07"," 2023-01-07"," 2023-01-07"," 2023-01-07"," 2023-01-07"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-25"," 2023-01-25"," 2023-01-25"," 2023-01-25"," 2023-01-25"," 2023-01-16"," 2023-01-16"," 2023-01-19"," 2023-01-19"," 2023-01-27"," 2023-01-27"," 2023-01-27"," 2023-01-12"," 2023-01-12"," 2023-01-12"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-15"],
"End_Date": ["2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-02-02"," 2023-02-02"," 2023-02-02"," 2023-02-02"," 2023-02-02"," 2023-02-02"," 2023-02-02"," 2023-02-02"," 2023-02-02"," 2023-01-26"," 2023-01-26"," 2023-01-26"," 2023-01-26"," 2023-01-26"," 2023-01-26"," 2023-01-26"," 2023-01-17"," 2023-01-17"," 2023-01-17"," 2023-01-17"," 2023-01-17"," 2023-01-17"," 2023-01-17"," 2023-01-17"," 2023-01-17"," 2023-01-17"," 2023-01-17"," 2023-01-17"," 2023-01-08"," 2023-01-08"," 2023-01-08"," 2023-01-08"," 2023-01-08"," 2023-01-08"," 2023-01-08"," 2023-01-08"," 2023-01-05"," 2023-01-05"," 2023-01-22"," 2023-01-22"," 2023-01-22"," 2023-01-22"," 2023-01-22"," 2023-01-10"," 2023-01-10"," 2023-01-10"," 2023-01-10"," 2023-02-21"," 2023-02-21"," 2023-02-21"," 2023-01-07"," 2023-01-07"," 2023-01-07"," 2023-01-07"," 2023-01-07"," 2023-01-12"," 2023-01-12"," 2023-01-12"," 2023-01-12"," 2023-01-12"," 2023-01-12"," 2023-02-01"," 2023-02-01"," 2023-02-01"," 2023-02-01"," 2023-02-01"," 2023-02-01"," 2023-02-01"," 2023-02-01"," 2023-02-01"," 2023-02-01"," 2023-02-01"," 2023-02-01"," 2023-02-01"," 2023-02-01"," 2023-02-01"," 2023-02-01"," 2023-02-01"," 2023-02-01"," 2023-01-29"," 2023-01-29"," 2023-01-29"," 2023-01-29"," 2023-01-29"," 2023-01-17"," 2023-01-17"," 2023-01-20"," 2023-01-20"," 2023-01-30"," 2023-01-30"," 2023-01-30"," 2023-01-15"," 2023-01-15"," 2023-01-15"," 2023-01-20"," 2023-01-20"," 2023-01-20"," 2023-01-20"],
"Tot_Lbr_Hrs": ["56.61"," 56.61"," 56.61"," 182.36"," 182.36"," 182.36"," 182.36"," 182.36"," 182.36"," 182.36"," 182.36"," 182.36"," 192.8"," 192.8"," 192.8"," 192.8"," 192.8"," 192.8"," 192.8"," 1558.57"," 1558.57"," 1558.57"," 1558.57"," 1558.57"," 1558.57"," 1558.57"," 1558.57"," 1558.57"," 1558.57"," 1558.57"," 1558.57"," 234.16"," 234.16"," 234.16"," 234.16"," 234.16"," 234.16"," 234.16"," 234.16"," 23.3"," 23.3"," 418.43"," 418.43"," 418.43"," 418.43"," 418.43"," 91.62"," 91.62"," 91.62"," 91.62"," 59.67"," 59.67"," 59.67"," 871.52"," 871.52"," 871.52"," 871.52"," 871.52"," 764.58"," 764.58"," 764.58"," 764.58"," 764.58"," 764.58"," 4671.04"," 4671.04"," 4671.04"," 4671.04"," 4671.04"," 4671.04"," 4671.04"," 4671.04"," 4671.04"," 4671.04"," 4671.04"," 4671.04"," 4671.04"," 4671.04"," 4671.04"," 4671.04"," 4671.04"," 4671.04"," 103.47"," 103.47"," 103.47"," 103.47"," 103.47"," 56.47"," 56.47"," 25.21"," 25.21"," 327.29"," 327.29"," 327.29"," 61"," 61"," 61"," 59.07"," 59.07"," 59.07"," 59.07"],
"Daily_Tot_Lbr_Hrs": [ "29.87"," 0.25"," 26.49"," 6.24"," 60.93"," 48.08"," 7.33"," 1.38"," 10.74"," 7.48"," 33.55"," 6.63"," 40.88"," 42.75"," 23.44"," 40.04"," 12.69"," 32.67"," 0.33"," 5.88"," 99.49"," 153.22"," 177.87"," 159.05"," 163.57"," 143.58"," 224.45"," 230.6"," 161.06"," 34.14"," 5.66"," 17.75"," 41.77"," 4.84"," 7.16"," 53.12"," 47.16"," 34.13"," 28.23"," 1.99"," 21.31"," 44.26"," 178.55"," 121.66"," 73.63"," 0.33"," 64.73"," 6.15"," 2.69"," 18.05"," 1.47"," 30.43"," 27.77"," 255.82"," 306.65"," 198.57"," 91.9"," 18.58"," 65.61"," 98.63"," 205.67"," 222.87"," 151.92"," 19.88"," 151"," 245.66"," 239.89"," 307.71"," 286.46"," 301.59"," 368.02"," 451.74"," 300.24"," 369.01"," 442.61"," 341.18"," 328.63"," 187.87"," 153.8"," 113.14"," 49.89"," 32.6"," 10.13"," 23.8"," 36.36"," 22.01"," 11.17"," 44.52"," 11.95"," 2.21"," 23"," 125.27"," 127.97"," 74.05"," 18.75"," 27.53"," 14.72"," 17.55"," 6.72"," 17.13"," 17.4"],
"Day_Counter": ["1"," 2"," 3"," 1"," 2"," 3"," 4"," 5"," 6"," 7"," 8"," 9"," 1"," 2"," 3"," 4"," 5"," 6"," 7"," 1"," 2"," 3"," 4"," 5"," 6"," 7"," 8"," 9"," 10"," 11"," 12"," 1"," 2"," 3"," 4"," 5"," 6"," 7"," 8"," 1"," 2"," 1"," 2"," 3"," 4"," 5"," 1"," 2"," 3"," 4"," 1"," 2"," 3"," 1"," 2"," 3"," 4"," 5"," 1"," 2"," 3"," 4"," 5"," 6"," 1"," 2"," 3"," 4"," 5"," 6"," 7"," 8"," 9"," 10"," 11"," 12"," 13"," 14"," 15"," 16"," 17"," 18"," 1"," 2"," 3"," 4"," 5"," 1"," 2"," 1"," 2"," 1"," 2"," 3"," 1"," 2"," 3"," 1"," 2"," 3"," 4"],
"MTC_Daily_Lbr_percent": ["0.53"," 0"," 0.47"," 0.03"," 0.33"," 0.26"," 0.04"," 0.01"," 0.06"," 0.04"," 0.18"," 0.04"," 0.21"," 0.22"," 0.12"," 0.21"," 0.07"," 0.17"," 0"," 0"," 0.06"," 0.1"," 0.11"," 0.1"," 0.1"," 0.09"," 0.14"," 0.15"," 0.1"," 0.02"," 0"," 0.08"," 0.18"," 0.02"," 0.03"," 0.23"," 0.2"," 0.15"," 0.12"," 0.09"," 0.91"," 0.11"," 0.43"," 0.29"," 0.18"," 0"," 0.71"," 0.07"," 0.03"," 0.2"," 0.02"," 0.51"," 0.47"," 0.29"," 0.35"," 0.23"," 0.11"," 0.02"," 0.09"," 0.13"," 0.27"," 0.29"," 0.2"," 0.03"," 0.03"," 0.05"," 0.05"," 0.07"," 0.06"," 0.06"," 0.08"," 0.1"," 0.06"," 0.08"," 0.09"," 0.07"," 0.07"," 0.04"," 0.03"," 0.02"," 0.01"," 0.01"," 0.1"," 0.23"," 0.35"," 0.21"," 0.11"," 0.79"," 0.21"," 0.09"," 0.91"," 0.38"," 0.39"," 0.23"," 0.31"," 0.45"," 0.24"," 0.3"," 0.11"," 0.29"," 0.29"]
}
df_orig = pd.DataFrame(data)
df = df_orig.copy()
#Convert to appropriate dtypes
df = df.astype({
'Tot_Lbr_Hrs': float,
'Daily_Tot_Lbr_Hrs': float,
'Day_Counter': int,
'MTC_Daily_Lbr_percent': float
})
#columns to DateTime
for col in ['Start_Date', 'End_Date']:
df[col] = pd.to_datetime(df[col].str.strip())
# c_days: the number of days the plane was getting fixed. removes days from cdays.
# divide daycounter by the number in cdays
df["c_days"] = (df["End_Date"] - df["Start_Date"] + timedelta(days=1)).dt.days
df['day_normalized'] = df['Day_Counter'] / df['c_days']
#There are a select few row where the start/end cdays does not equal the labor days.
# We are removing those from further analyses
optimal = df.loc[(df['day_normalized'] <= 1)]
#
#Select and view data
#
x_col = 'day_normalized'
y_col = 'MTC_Daily_Lbr_percent'
x = optimal[x_col].values
y = optimal[y_col].values
f, axs = plt.subplots(nrows=4, figsize=(6, 8), layout='tight')
ax = axs[0]
ax.scatter(x, y, marker='x', s=25, color='tab:brown', label='data')
ax.set(xlabel=x_col, ylabel=y_col)
#
# Fit a polynomial
#
from numpy.polynomial import Polynomial as P
p = P.fit(x, y, deg=3)
# calculate new x's and y's
x_new = np.linspace(x.min(), x.max(), num=100)
y_new = p(x_new)
#Overlay fit
ax.plot(x_new, y_new, color='tab:green', linewidth=3, label=f'poly fit (deg={p.degree()})')
ax.legend(ncol=2, loc='upper left', fontsize=8.5)
#
# Histograms of x, and of y
#
#Histogram of the x values
ax = axs[1]
ax.hist(x, bins=20, color='tab:brown')
ax.set(xlabel=x_col, ylabel='counts', title='histogram of ' + x_col)
ax = axs[2]
ax.hist(y, bins=20, color='tab:brown')
ax.set(xlabel=y_col, ylabel='counts', title='histogram of ' + y_col)
#
# Joint histogram of x and y (2D hist)
#
H_xy, x_edges, y_edges = np.histogram2d(x, y, bins=[15, 5])
ax = axs[3]
cmap = plt.get_cmap('Greens', np.unique(H_xy).size)
im = ax.pcolormesh(x_edges, y_edges, H_xy.T, cmap=cmap)
ax.set(xlabel=x_col, ylabel=y_col, title='2D histogram')
f.colorbar(im, label='counts', aspect=5)
for ax in axs[:-1]: ax.spines[['right', 'top']].set_visible(False)