我想通过使用 @(thoughtge clk) 向 valid_dat 引入一个周期延迟,但它不起作用有人知道为什么吗?计数器与时钟配合良好,并且可以累加一个时钟。但valid_delay与valid_dat同时变化。我不知道出了什么问题。有人可以告诉我原因吗?
// Butterfly
// 1. U <- (X + Y MOD) P
// 2. V <- ((X - Y) * W * INV) MOD P
// Macro for number of input data, `width groups = 2 x `width numbers in total
`define width 4
// Top-Level Design
module BF
#(parameter inv = 256'h8fac2de6599e1a9fa53ddf925918fe6fff2d6e9209dacc5e13f75b68fe75c041,
M = 256'h73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001,
Mp = 256'h3d443ab0d7bf2839181b2c170004ec0653ba5bfffffe5bfdfffffffeffffffff
)
(input clk, input nrst, input valid_dat, input valid_tw, input[255:0] dat_in, output valid_out, output [255:0] dat_out, output calc_done);
localparam wd = $clog2(`width);
localparam state_rst = 3'b000,
state_ld = 3'b001,
state_sub = 3'b010,
state_mont = 3'b011,
state_out = 3'b100,
state_done = 3'b101,
state_halt = 3'b111;
// Indicators
reg pkt_end;
reg pkt_start, valid_delay;
// For loop index
integer i;
//Buffers
reg [255:0] A_buffer[`width-1:0], B_buffer[`width-1:0], TW_buffer[`width-1:0], U_buffer[`width-1:0], V_buffer[`width-1:0];
// Counters
reg [wd-1:0] cnt_tw, cnt_calc;
reg [wd:0] cnt_dat, cnt_out;
// Start signals
reg AddStart, SubStart, MontStart;
// Component Inputs
wire [255:0] AddA, AddB, SubA, SubB, MontA, MontB;
// Component Outpus -- Busy/Done signals
wire AddBusy, AddDone, SubBusy, SubDone, MontBusy, MontDone;
// Component Outpus -- Calculation results
wire [255:0] AddC, SubC, MontC;
// Outputs
reg valid_out_s;
reg calc_done_s;
reg [255:0] dat_out_s;
// Control Unit state
reg [2:0] state;
//test
reg [255:0] dat_test;
// Instantiation of Calculation components
Mont256 #(.inv(inv), .M(M), .Mp(Mp)) mont (.clk(clk), .start(MontStart), .A(MontA), .B(MontB), .busy(MontBusy), .done(MontDone), .C(MontC));
Add256 #(.M(M)) add (.clk(clk), .start(SubStart), .A(AddA), .B(AddB), .busy(AddBusy), .done(AddDone), .C(AddC));
Sub256 #(.M(M)) sub (.clk(clk), .start(SubStart), .A(SubA), .B(SubB), .busy(SubBusy), .done(SubDone), .C(SubC));
// Indicators
always @(posedge clk or negedge nrst) begin
if (!nrst) begin
valid_delay <= 0;
end
else begin
valid_delay <= valid_dat;
end
end
always @(*) begin
if (state == state_ld && cnt_tw == `width/2 && cnt_dat == `width) begin
pkt_end <= 1;
end
else begin
pkt_end <= 0;
end
end
// Counters
always @(posedge clk or pkt_start) begin
if (pkt_start) begin
cnt_dat <= 0;
cnt_tw <= 0;
cnt_calc <= 0;
cnt_out <= 0;
end
else begin
if (valid_dat) begin
cnt_dat <= cnt_dat + 1;
end
if (valid_tw) begin
cnt_tw <= cnt_tw + 1;
end
if (MontDone) begin
cnt_calc <= cnt_calc + 1;
end
if (state == state_out) begin
cnt_out <= cnt_out + 1;
end
end
end
// Input Buffers
always @(cnt_dat or MontDone) begin // Note to readers: this may cause issues to on board testing since we don't know the status of cnt_dat when its not reset
// but it should be working for the simulation.
if (valid_dat) begin
if (cnt_dat[0] == 0) begin
for (i=1;i<`width;i=i+1) begin
A_buffer[i] <= A_buffer[i-1];
A_buffer[0] <= dat_in;
end
end
else begin
if (cnt_dat[0] == 1) begin
for (i=1; i<`width; i=i+1) begin
B_buffer[i] <= B_buffer[i-1];
B_buffer[0] <= dat_in;
end
end
end
end
if (MontDone) begin
for (i=1;i<`width;i=i+1) begin
A_buffer[i] <= A_buffer[i-1];
A_buffer[0] <= '0;
B_buffer[i] <= B_buffer[i-1];
B_buffer[0] <= '0;
end
end
end
always @(cnt_tw or MontDone) begin // Note to readers: this may cause issues to on board testing since we don't know the status of cnt_tw when its not reset
// but it should be working for the simulation.
if (valid_tw) begin
for (i=1; i<`width; i=i+1) begin
TW_buffer[i] <= TW_buffer[i-1];
TW_buffer[0] <= dat_in;
end
end
if (MontDone) begin
for (i=1;i<`width;i=i+1) begin
TW_buffer[i] <= TW_buffer[i-1];
TW_buffer[0] <= '0;
end
end
end
// Inputs for Adder, Subtractor and Multiplier
assign AddA = A_buffer[`width-1];
assign AddB = B_buffer[`width-1];
assign SubA = A_buffer[`width-1];
assign SubB = B_buffer[`width-1];
assign MontA = SubC;
assign MontB = TW_buffer[`width-1];
// Control Unit
always @(posedge clk or pkt_start or negedge nrst) begin
if (!nrst) begin
valid_out_s <= 0;
calc_done_s <= 0;
state <= state_rst;
AddStart <= 0;
SubStart <= 0;
MontStart <= 0;
end
else begin
if (state == state_rst) begin
if (pkt_start == 1) begin
state <= state_ld;
end
end
if (state == state_ld) begin
if (pkt_end == 1) begin
state <= state_sub;
AddStart <= 1;
SubStart <= 1;
MontStart <= 0;
end
end
if (state == state_sub) begin
if (SubDone == 1) begin
state <= state_mont;
SubStart <= 0;
MontStart <= 1;
end
end
if (state == state_mont) begin
if (MontDone == 1 && cnt_calc == '1) begin
state <= state_out;
SubStart <= 0;
MontStart <= 0;
valid_out_s <= 1;
end
else begin
if (MontDone == 1 && cnt_calc != '1) begin
state <= state_sub;
SubStart <= 1;
MontStart <= 0;
end
else begin
state <= state_mont;
SubStart <= 0;
MontStart <= 1;
end
end
end
if (state == state_out) begin
if (cnt_out == 2*`width-2) begin
calc_done_s <= 1;
end
if (cnt_out == '1) begin
state <= state_done;
calc_done_s <= 0;
valid_out_s <= 0;
end
else begin
state <= state_out;
end
end
if (state == state_done) begin
state <= state_done;
end
end
end
// Output Buffers
always @(AddDone) begin
if (AddDone) begin
for (i=1;i<`width;i=i+1) begin
U_buffer[i] <= U_buffer[i-1];
U_buffer[0] <= AddC;
end
end
end
always @(MontDone) begin
if (MontDone) begin
for (i=1;i<`width;i=i+1) begin
V_buffer[i] <= V_buffer[i-1];
V_buffer[0] <= MontC;
end
end
end
always @(posedge clk) begin
if (valid_out_s == 1 && cnt_out[0] == 0) begin
for (i=1;i<`width;i=i+1) begin
U_buffer[i] <= U_buffer[i-1];
U_buffer[0] <= '0;
end
end
if (valid_out_s == 1 && cnt_out[0] == 1) begin
for (i=1;i<`width;i=i+1) begin
V_buffer[i] <= V_buffer[i-1];
V_buffer[0] <= '0;
end
end
end
// Outuputs
always @(*) begin
if (valid_out_s == 1 && cnt_out[0] == 0) begin
dat_out_s <= U_buffer[`width-1];
end
else begin
if (valid_out_s == 1 && cnt_out[0] == 1) begin
dat_out_s <= V_buffer[`width-1];
end
else begin
dat_out_s <= '0;
end
end
end
assign dat_out = dat_out_s;
assign valid_out = valid_out_s;
assign calc_done = calc_done_s;
endmodule
信号被延迟,但是当您的
valid_dat
到达时钟边缘时,它被采样,并且看起来 valid_delay
与 valid_dat
相同。
您可以更改驱动 valid_dat
的 tb 代码,以在时钟沿之后立即驱动引脚,而不是在时钟沿之前或之上。
在附件中,您可以看到这两种情况,在我第一次将 valid_data
拉高一段时间后,落在正时钟边缘(使用的时钟周期 10);
initial
begin
nrst = 0;
valid_dat = 0;
#10;
nrst = 1;
#15;
valid_dat = 1;
#10;
valid_dat = 0;
我第二次在时钟边缘之后驱动它。
@(posedge clk);
valid_dat = 1;