使用 Matplotlib 在对数刻度上绘制直方图

问题描述 投票:0回答:1

我正在尝试绘制以下示例数据,其中一架飞机工作了 (c_num 天 (x)) 以及某人每天在该飞机上工作的时间 (MTC_Daily_Lbr_percent (y))。我创建了以下编码,其中我试图进行曲线拟合,但我对曲线拟合和 Matplotlib 非常陌生。有什么方法可以获取这些数据并绘制直方图吗?我知道它应该是一个左偏的钟形(可能在结束日期时还有另一个轻微向上的运动)此外,是否有某种类型的方法可以生成方程式?这样我就可以预测了?

数据太大所以我可以把它放在下面的评论中

python numpy matplotlib histogram curve-fitting
1个回答
1
投票

由于您有两个轴(天和 Lbr),您可以绘制每个变量的直方图,也可以绘制同时查看两个变量的单个二维直方图。下图证明了这一点。

第一个图是原始图的修改版本,其中所有样本都用于拟合(我认为这就是你想要做的,但由于数据未排序,

x[[0, -1]]
不一定对应于最小值和最大值).

enter image description here

您可以使用这些数据来对未来几天进行预测。一个简单的 ARMA 类型模型将是一个起点,根据它的执行方式,您可以尝试其他技术。我可以根据目标是什么来更多地谈论这一点。

可重现的示例

包含的数据来自OP。

import pandas as pd
import numpy as np
import datetime as dt
from datetime import datetime, timedelta

import matplotlib.pyplot as plt
from scipy.optimize import curve_fit

data={
    "ID": ["ID42645674424.4224","  ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID10140728165.2155","   ID10140728165.2155","   ID10140728165.2155","   ID10140728165.2155","   ID10140728165.2155","   ID10140728165.2155","   ID10140728165.2155","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID31366029674.2499","   ID31366029674.2499","   ID31366029674.2499","   ID31366029674.2499","   ID31366029674.2499","   ID31366029674.2499","   ID31366029674.2499","   ID31366029674.2499","   ID42645674424.4224","   ID42645674424.4224","   ID20635322422.8366","   ID20635322422.8366","   ID20635322422.8366","   ID20635322422.8366","   ID20635322422.8366","   ID30380414661.1556","   ID30380414661.1556","   ID30380414661.1556","   ID30380414661.1556","   ID37511661093.7047","   ID37511661093.7047","   ID37511661093.7047","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID46915488293.8591","   ID18689129034.7377","   ID18689129034.7377","   ID18689129034.7377","   ID18689129034.7377","   ID18689129034.7377","   ID4830501589.4234","    ID4830501589.4234","    ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID42645674424.4224","   ID24100797148.6495","   ID24100797148.6495","   ID24100797148.6495","   ID18093459046.6984","   ID18093459046.6984","   ID18093459046.6984","ID18093459046.6984"],
    "MODEL_CD": [  "A320","    A320"," A320"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," A321"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," B737"," A350"," A350"," B717"," B717"," B717"," B717"," B717"," B737"," B737"," B737"," B737"," B767"," B767"," B767"," B717"," B717"," B717"," B717"," B717"," B717"," B717"," B717"," B717"," B717"," B717"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A330"," A320"," A320"," A320"," A320"," A320"," A350"," A350"," A350"," A350"," B717"," B717"," B717"," A320"," A320"," A320"," A320"," A320"," A320"," A320"],
    "Check_Type
    "Start_Date": [ "2023-01-12","  2023-01-12","   2023-01-12","   2023-01-24","   2023-01-24","   2023-01-24","   2023-01-24","   2023-01-24","   2023-01-24","   2023-01-24","   2023-01-24","   2023-01-24","   2023-01-19","   2023-01-19","   2023-01-19","   2023-01-19","   2023-01-19","   2023-01-19","   2023-01-19","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-06","   2023-01-01","   2023-01-01","   2023-01-01","   2023-01-01","   2023-01-01","   2023-01-01","   2023-01-01","   2023-01-01","   2023-01-04","   2023-01-04","   2023-01-18","   2023-01-18","   2023-01-18","   2023-01-18","   2023-01-18","   2023-01-04","   2023-01-04","   2023-01-04","   2023-01-04","   2023-02-19","   2023-02-19","   2023-02-19","   2023-01-03","   2023-01-03","   2023-01-03","   2023-01-03","   2023-01-03","   2023-01-07","   2023-01-07","   2023-01-07","   2023-01-07","   2023-01-07","   2023-01-07","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-25","   2023-01-25","   2023-01-25","   2023-01-25","   2023-01-25","   2023-01-16","   2023-01-16","   2023-01-19","   2023-01-19","   2023-01-27","   2023-01-27","   2023-01-27","   2023-01-12","   2023-01-12","   2023-01-12","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-15"],
    "End_Date": ["2023-01-15"," 2023-01-15","   2023-01-15","   2023-02-02","   2023-02-02","   2023-02-02","   2023-02-02","   2023-02-02","   2023-02-02","   2023-02-02","   2023-02-02","   2023-02-02","   2023-01-26","   2023-01-26","   2023-01-26","   2023-01-26","   2023-01-26","   2023-01-26","   2023-01-26","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-17","   2023-01-08","   2023-01-08","   2023-01-08","   2023-01-08","   2023-01-08","   2023-01-08","   2023-01-08","   2023-01-08","   2023-01-05","   2023-01-05","   2023-01-22","   2023-01-22","   2023-01-22","   2023-01-22","   2023-01-22","   2023-01-10","   2023-01-10","   2023-01-10","   2023-01-10","   2023-02-21","   2023-02-21","   2023-02-21","   2023-01-07","   2023-01-07","   2023-01-07","   2023-01-07","   2023-01-07","   2023-01-12","   2023-01-12","   2023-01-12","   2023-01-12","   2023-01-12","   2023-01-12","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-02-01","   2023-01-29","   2023-01-29","   2023-01-29","   2023-01-29","   2023-01-29","   2023-01-17","   2023-01-17","   2023-01-20","   2023-01-20","   2023-01-30","   2023-01-30","   2023-01-30","   2023-01-15","   2023-01-15","   2023-01-15","   2023-01-20","   2023-01-20","   2023-01-20","   2023-01-20"],
    "Tot_Lbr_Hrs": ["56.61","   56.61","    56.61","    182.36","   182.36","   182.36","   182.36","   182.36","   182.36","   182.36","   182.36","   182.36","   192.8","    192.8","    192.8","    192.8","    192.8","    192.8","    192.8","    1558.57","  1558.57","  1558.57","  1558.57","  1558.57","  1558.57","  1558.57","  1558.57","  1558.57","  1558.57","  1558.57","  1558.57","  234.16","   234.16","   234.16","   234.16","   234.16","   234.16","   234.16","   234.16","   23.3"," 23.3"," 418.43","   418.43","   418.43","   418.43","   418.43","   91.62","    91.62","    91.62","    91.62","    59.67","    59.67","    59.67","    871.52","   871.52","   871.52","   871.52","   871.52","   764.58","   764.58","   764.58","   764.58","   764.58","   764.58","   4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  4671.04","  103.47","   103.47","   103.47","   103.47","   103.47","   56.47","    56.47","    25.21","    25.21","    327.29","   327.29","   327.29","   61","   61","   61","   59.07","    59.07","    59.07","    59.07"],
    "Daily_Tot_Lbr_Hrs": [ "29.87","   0.25"," 26.49","    6.24"," 60.93","    48.08","    7.33"," 1.38"," 10.74","    7.48"," 33.55","    6.63"," 40.88","    42.75","    23.44","    40.04","    12.69","    32.67","    0.33"," 5.88"," 99.49","    153.22","   177.87","   159.05","   163.57","   143.58","   224.45","   230.6","    161.06","   34.14","    5.66"," 17.75","    41.77","    4.84"," 7.16"," 53.12","    47.16","    34.13","    28.23","    1.99"," 21.31","    44.26","    178.55","   121.66","   73.63","    0.33"," 64.73","    6.15"," 2.69"," 18.05","    1.47"," 30.43","    27.77","    255.82","   306.65","   198.57","   91.9"," 18.58","    65.61","    98.63","    205.67","   222.87","   151.92","   19.88","    151","  245.66","   239.89","   307.71","   286.46","   301.59","   368.02","   451.74","   300.24","   369.01","   442.61","   341.18","   328.63","   187.87","   153.8","    113.14","   49.89","    32.6"," 10.13","    23.8"," 36.36","    22.01","    11.17","    44.52","    11.95","    2.21"," 23","   125.27","   127.97","   74.05","    18.75","    27.53","    14.72","    17.55","    6.72"," 17.13","    17.4"],
    "Day_Counter": ["1","   2","    3","    1","    2","    3","    4","    5","    6","    7","    8","    9","    1","    2","    3","    4","    5","    6","    7","    1","    2","    3","    4","    5","    6","    7","    8","    9","    10","   11","   12","   1","    2","    3","    4","    5","    6","    7","    8","    1","    2","    1","    2","    3","    4","    5","    1","    2","    3","    4","    1","    2","    3","    1","    2","    3","    4","    5","    1","    2","    3","    4","    5","    6","    1","    2","    3","    4","    5","    6","    7","    8","    9","    10","   11","   12","   13","   14","   15","   16","   17","   18","   1","    2","    3","    4","    5","    1","    2","    1","    2","    1","    2","    3","    1","    2","    3","    1","    2","    3","    4"],
    "MTC_Daily_Lbr_percent": ["0.53","    0","    0.47"," 0.03"," 0.33"," 0.26"," 0.04"," 0.01"," 0.06"," 0.04"," 0.18"," 0.04"," 0.21"," 0.22"," 0.12"," 0.21"," 0.07"," 0.17"," 0","    0","    0.06"," 0.1","  0.11"," 0.1","  0.1","  0.09"," 0.14"," 0.15"," 0.1","  0.02"," 0","    0.08"," 0.18"," 0.02"," 0.03"," 0.23"," 0.2","  0.15"," 0.12"," 0.09"," 0.91"," 0.11"," 0.43"," 0.29"," 0.18"," 0","    0.71"," 0.07"," 0.03"," 0.2","  0.02"," 0.51"," 0.47"," 0.29"," 0.35"," 0.23"," 0.11"," 0.02"," 0.09"," 0.13"," 0.27"," 0.29"," 0.2","  0.03"," 0.03"," 0.05"," 0.05"," 0.07"," 0.06"," 0.06"," 0.08"," 0.1","  0.06"," 0.08"," 0.09"," 0.07"," 0.07"," 0.04"," 0.03"," 0.02"," 0.01"," 0.01"," 0.1","  0.23"," 0.35"," 0.21"," 0.11"," 0.79"," 0.21"," 0.09"," 0.91"," 0.38"," 0.39"," 0.23"," 0.31"," 0.45"," 0.24"," 0.3","  0.11"," 0.29"," 0.29"]
}

df_orig = pd.DataFrame(data)
df = df_orig.copy()

#Convert to appropriate dtypes
df = df.astype({
    'Tot_Lbr_Hrs': float,
    'Daily_Tot_Lbr_Hrs': float,
    'Day_Counter': int,
    'MTC_Daily_Lbr_percent': float
})

#columns to DateTime
for col in ['Start_Date', 'End_Date']:
    df[col] = pd.to_datetime(df[col].str.strip())

# c_days: the number of days the plane was getting fixed. removes days from cdays.
# divide daycounter by the number in cdays
df["c_days"] = (df["End_Date"] - df["Start_Date"] + timedelta(days=1)).dt.days
df['day_normalized'] = df['Day_Counter'] / df['c_days']

#There are a select few row where the start/end cdays does not equal the labor days.
# We are removing those from further analyses
optimal = df.loc[(df['day_normalized'] <= 1)]

#
#Select and view data
#
x_col = 'day_normalized'
y_col = 'MTC_Daily_Lbr_percent'

x = optimal[x_col].values
y = optimal[y_col].values

f, axs = plt.subplots(nrows=4, figsize=(6, 8), layout='tight')
ax = axs[0]
ax.scatter(x, y, marker='x', s=25, color='tab:brown', label='data')
ax.set(xlabel=x_col, ylabel=y_col)

#
# Fit a polynomial
#
from numpy.polynomial import Polynomial as P
p = P.fit(x, y, deg=3)

# calculate new x's and y's
x_new = np.linspace(x.min(), x.max(), num=100)
y_new = p(x_new)

#Overlay fit
ax.plot(x_new, y_new, color='tab:green', linewidth=3, label=f'poly fit (deg={p.degree()})')
ax.legend(ncol=2, loc='upper left', fontsize=8.5)

#
# Histograms of x, and of y
#

#Histogram of the x values
ax = axs[1]
ax.hist(x, bins=20, color='tab:brown')
ax.set(xlabel=x_col, ylabel='counts', title='histogram of ' + x_col)

ax = axs[2]
ax.hist(y, bins=20, color='tab:brown')
ax.set(xlabel=y_col, ylabel='counts', title='histogram of ' + y_col)

#
# Joint histogram of x and y (2D hist)
#
H_xy, x_edges, y_edges = np.histogram2d(x, y, bins=[15, 5])
ax = axs[3]

cmap = plt.get_cmap('Greens', np.unique(H_xy).size)
im = ax.pcolormesh(x_edges, y_edges, H_xy.T, cmap=cmap)
ax.set(xlabel=x_col, ylabel=y_col, title='2D histogram')
f.colorbar(im, label='counts', aspect=5)

for ax in axs[:-1]: ax.spines[['right', 'top']].set_visible(False)
© www.soinside.com 2019 - 2024. All rights reserved.