Я делаю множитель со знаком, для этого я создал половину сумматора, полного сумматора, сумматора пульсации и затем, наконец, множитель.Код показан ниже.Как я могу сделать это быстрее, чтобы добиться лучших сроков.Моя последняя задача - заставить пихтовый фильтр работать на частоте 100 МГц.Этот фильтр использует операцию многократного умножения (используя мой множитель).Итак, можете ли вы помочь мне улучшить мой дизайн с помощью некоторой оптимизирующей техники, такой как конвейерная обработка или параллелизм или другие? ??
//half adder
entity half_adder is
port (
a, b : in std_logic ;
sum, cout : out std_logic );
end half_adder;
architecture version1 of half_adder is
begin
sum <= a xor b;
cout <= a and b;
end version1;
- полный сумматор
entity full_adder is
port(
a : in std_logic;
b : in std_logic;
cin : in std_logic;
sum : out std_logic;
cout : out std_logic
);
end full_adder;
architecture structural of full_adder is
component half_adder
port (
a, b : in std_logic ;
sum, cout : out std_logic );
end component;
signal s1, c1, c2 : std_logic ;
begin -- structural
half_adder1 : half_adder
port map (
a => a, b => b,
sum => s1, cout => c1);
half_adder2 : half_adder
port map (
a =>cin, b => s1,
sum => sum, cout => c2);
cout <= c1 or c2;
end structural ;
- суммирующий переносчик 40 бит
entity rca40bit is
generic (
width: integer := 40
);
port(
a : in std_logic_vector(width-1 downto 0);
b : in std_logic_vector(width-1 downto 0);
cin : in std_logic;
sum : out std_logic_vector(width-1 downto 0);
cout : out std_logic
);
end rca40bit;
architecture Behavioral of rca40bit is
component full_adder
port(
a : in std_logic;
b : in std_logic;
cin : in std_logic;
sum : out std_logic;
cout : out std_logic
);
end component;
signal s: std_logic_vector(width downto 0);
begin
s(0)<=cin;
FA:for i in 0 to width-1 generate
FA_i:full_adder
port map
(
a=>a(i),b=>b(i),cin=>s(i),sum=>sum(i),cout=>s(i+1)
);
end generate;
cout<=s(width);
end Behavioral;
- Множитель, я сначала рассчитал частичные продукты, а затем добавил их с помощью rca adder.
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use ieee.numeric_std.all;
entity signed_mult is
generic (
m : integer := 24; -- Multiplicand
n : integer := 16 ; -- multiplier;
bit1 :integer := 39 -- size of the adder
);
port(
a : in std_logic_vector(m - 1 downto 0) ;
b : in std_logic_vector(n - 1 downto 0) ;
prod : out std_logic_vector(bit1 downto 0)
);
end entity signed_mult;
architecture Behavioral of signed_mult is
component rca40bit
port(
a : in std_logic_vector(bit1 downto 0);
b : in std_logic_vector(bit1 downto 0);
cin : in std_logic;
sum : out std_logic_vector(bit1 downto 0);
cout : out std_logic
);
end component;
--partial products signals
signal p0 : std_logic_vector(bit1 downto 0):=(others =>'0') ; --1st
`enter code here`partial product size m-1 bits (24 bit)
signal p1: std_logic_vector (bit1 downto 0):=(others =>'0'); --2nd
partial product size m bits (25 bit)
signal p2 : std_logic_vector (bit1 downto 0):=(others =>'0'); --3rd partial
product size m+1 bits (26 bit)
signal p3 : std_logic_vector (bit1 downto 0):=(others =>'0'); --4th partial
product size m+2 bits (27 bit)
signal p4 : std_logic_vector (bit1 downto 0):=(others =>'0'); --5th
partial product size m+3 bits (28 bit)
signal p5 : std_logic_vector (bit1 downto 0):=(others =>'0'); --6th
partial product size m+4 bits (29 bit)
signal p6 : std_logic_vector (bit1 downto 0):=(others =>'0'); --7th
partial product size m+5 bits (30 bit)
signal p7 : std_logic_vector (bit1 downto 0):=(others =>'0'); --8th
partial product size m+6 bits (31 bit)
signal p8 : std_logic_vector (bit1 downto 0):=(others =>'0'); --9th
partial product size m+7 bits (32 bit)
signal p9 : std_logic_vector (bit1 downto 0):=(others =>'0'); --10th
partial product size m+8 bits (33 bit)
signal p10 : std_logic_vector (bit1 downto 0):=(others =>'0'); --11th
partial product size m+9 bits (34 bit)
signal p11 : std_logic_vector (bit1 downto 0):=(others =>'0'); --12th
partial product size m+10 bits (35 bit)
signal p12 : std_logic_vector (bit1 downto 0):=(others =>'0'); --13th
partial product size m+11 bits (36 bit)
signal p13 : std_logic_vector (bit1 downto 0):=(others =>'0'); --14th
partial product size m+12 bits (37 bit)
signal p14 : std_logic_vector (bit1 downto 0):=(others =>'0'); --15th
partial product size m+13 bits (38 bit)
signal p15 : std_logic_vector (bit1 downto 0); --16th partial product
size m+14 bits (39 bit)
signal p16 : std_logic_vector (bit1 downto 0);
signal p17 : std_logic_vector (bit1 downto 0):=(others =>'0');
signal extc0 :std_logic_vector (bit1 downto 0) ;
--constants and carry
signal zeros :std_logic_vector (bit1 downto 0) :=(others =>'0');
signal c : std_logic := '0' ;
signal c0 : std_logic :='0' ;
signal c12 : std_logic :='0' ;
signal c23 : std_logic :='0' ;
signal c34 ,c45,c56,c67,c78,c89,c910,c1011,c1112,c1213,c1314,c1415,c1516 ,c1617: std_logic := '0';
signal c15 : std_logic_vector (bit1 downto 0):=(others =>'0');
--sum only
signal sum12 : std_logic_vector (bit1 downto 0);
signal sum23 : std_logic_vector (bit1 downto 0);
signal sum34, sum45,sum56,sum67,sum78,
sum89,sum910,sum1011,sum1112,sum1213,sum1314,sum1415,sum1516,sum1617,ps0:
std_logic_vector (bit1 downto 0);
--signal f_sum: std_logic_vector (bit1 downto 0); --sum of p0 and P1
--signal tpp1 : std_logic_vector (bit1 downto 0);
begin
pp0: process is
begin
fst_partial_product :for i in 0 to m-1 loop -- 0 to 23
p0(i)<= (a(i) and b(0)) ;
if i=m-1 then
p0(i)<=not ( a(i) and b(0)); --msb=m=24
end if ;
end loop fst_partial_product ;
n2d_partial_product : for i in 0 to m-1 loop -- 24
p1(0)<='0';
p1(i+1)<=a(i) and b(1) ;
if i=m-1 then
p1(i+1)<=not (a(i) and b(1)); --msb=m=24
end if ;
end loop n2d_partial_product;
r3d_partial_product : for i in 0 to m-1 loop
p2(0)<='0';
p2(1)<='0';
p2(i+2)<=a(i) and b(2) ;
if i=m-1 then
p2(i+2)<=not( a(i) and b(2)); --msb=25
end if ;
end loop r3d_partial_product;
r4th_partial_product : for i in 0 to m-1 loop
p3(0)<='0';
p3(1)<='0';
p3(2)<='0';
p3(i+3)<=a(i) and b(3);
if i=m-1 then
p3(i+3)<=not( a(i) and b(3)); --msb=26
end if ;
end loop r4th_partial_product;
r5d_partial_product : for i in 0 to m-1 loop
p4(0)<='0';
p4(1)<='0';
p4(2)<='0';
p4(3)<='0';
p4(i+4)<=a(i) and b(4) ;
if i=m-1 then
p4(i+4)<=not( a(i) and b(4)); --msb=27
end if ;
end loop r5d_partial_product;
t6hpartial_product : for i in 0 to m-1 loop
p5(0)<='0';
p5(1)<='0';
p5(2)<='0';
p5(3)<='0';
p5(4)<='0';
p5(i+5)<=a(i) and b(5) ;
if i=m-1 then
p5(i+5)<=not( a(i) and b(5)); --msb28
end if;
end loop t6hpartial_product ;
t7h_partial_product : for i in 0 to m-1 loop
p6(0)<='0';
p6(1)<='0';
p6(2)<='0';
p6(3)<='0';
p6(4)<='0';
p6(5)<='0';
p6(i+6)<=a(i) and b(6) ;
if i=m-1 then
p6(i+6)<=not (a(i) and b(6)); --msb=29
end if;
end loop t7h_partial_product;
t8th_partial_product : for i in 0 to m-1 loop
p7(0)<='0';
p7(1)<='0';
p7(2)<='0';
p7(3)<='0';
p7(4)<='0';
p7(5)<='0';
p7(6)<='0';
p7(i+7)<=a(i) and b(7) ;
if i=m-1 then
p7(i+7)<=not( a(i) and b(7)); --msb=30
end if;
end loop t8th_partial_product ;
t9th_partial_product : for i in 0 to m-1 loop
p8(0)<='0';
p8(1)<='0';
p8(2)<='0';
p8(3)<='0';
p8(4)<='0';
p8(5)<='0';
p8(6)<='0';
p8(7)<='0';
p8(i+8)<=a(i) and b(8);
if i=m-1 then
p8(i+8)<=not( a(i) and b(8)); --msb=31
end if;
end loop t9th_partial_product;
t10th_partial_product : for i in 0 to m-1 loop
p9(0)<='0';
p9(1)<='0';
p9(2)<='0';
p9(3)<='0';
p9(4)<='0';
p9(5)<='0';
p9(6)<='0';
p9(7)<='0';
p9(8)<='0';
p9(i+9)<=a(i) and b(9) ;
if i=m-1 then
p9(i+9)<=not( a(i) and b(9)); --msb=32
end if;
end loop t10th_partial_product ;
t11th_partial_product : for i in 0 to m-1 loop
p10(0)<='0';
p10(1)<='0';
p10(2)<='0';
p10(3)<='0';
p10(4)<='0';
p10(5)<='0';
p10(6)<='0';
p10(7)<='0';
p10(8)<='0';
p10(9)<='0';
p10(i+10)<=a(i) and b(10) ;
if i=m-1 then
p10(i+10)<=not( a(i) and b(10)); --msb=31
end if;
end loop t11th_partial_product;
t12th_partial_product : for i in 0 to m-1 loop
p11(0)<='0';
p11(1)<='0';
p11(2)<='0';
p11(3)<='0';
p11(4)<='0';
p11(5)<='0';
p11(6)<='0';
p11(7)<='0';
p11(8)<='0';
p11(9)<='0';
p11(10)<='0';
p11(i+11)<=a(i) and b(11) ;
if i=m-1 then
p11(i+11)<=not( a(i) and b(11)); --msb=32
end if;
end loop t12th_partial_product;
t13th_partial_product : for i in 0 to m-1 loop
p12(0)<='0';
p12(1)<='0';
p12(2)<='0';
p12(3)<='0';
p12(4)<='0';
p12(5)<='0';
p12(6)<='0';
p12(7)<='0';
p12(8)<='0';
p12(9)<='0';
p12(10)<='0';
p12(11)<='0';
p12(i+12)<=a(i) and b(12) ;
if i=m-1 then
p12(i+12)<=not( a(i) and b(12)); --msb=32
end if;
end loop t13th_partial_product ;
t14th_partial_product : for i in 0 to m-1 loop
p13(0)<='0';
p13(1)<='0';
p13(2)<='0';
p13(3)<='0';
p13(4)<='0';
p13(5)<='0';
p13(6)<='0';
p13(7)<='0';
p13(8)<='0';
p13(9)<='0';
p13(10)<='0';
p13(11)<='0';
p13(12)<='0';
p13(i+13)<=a(i) and b(13) ;
if i=m-1 then
p13(i+13)<=not( a(i) and b(13)); --msb=32
end if;
end loop t14th_partial_product ;
t15th_partial_product : for i in 0 to m-1 loop
p14(0)<='0';
p14(1)<='0';
p14(2)<='0';
p14(3)<='0';
p14(4)<='0';
p14(5)<='0';
p14(6)<='0';
p14(7)<='0';
p14(8)<='0';
p14(9)<='0';
p14(10)<='0';
p14(11)<='0';
p14(12)<='0';
p14(13)<='0';
p14(14)<='0';
p14(i+14)<=a(i) and b(14) ;
if i=m-1 then
p14(i+14)<=not( a(i) and b(14)); --msb=32
end if;
end loop t15th_partial_product;
t16th_partial_product : for i in 0 to m-1 loop
p15(0)<='0';
p15(1)<='0';
p15(2)<='0';
p15(3)<='0';
p15(4)<='0';
p15(5)<='0';
p15(6)<='0';
p15(7)<='0';
p15(8)<='0';
p15(9)<='0';
p15(10)<='0';
p15(11)<='0';
p15(12)<='0';
p15(13)<='0';
p15(14)<='0';
p15(i+15)<=not (a(i) and b(15)) ;
if i=m-1 then
p15(i+15)<= (a(i) and b(15));
end if;
end loop t16th_partial_product ;
wait on a,b;
end process pp0;
p16(39)<='1';
p16(38 downto 0)<=p15(38 downto 0);
--sum
extc0(39 downto 0 )<=zeros(39 downto 24) & '1' & zeros(22 downto 0 ); -- just store 1 on msb of 1st pp
c15(39 downto 0 )<=zeros(39 downto 16) & '1' & zeros(14 downto 0 ) ; -- just store 1 on msb of 1st pp
--sum temp p0
tmp1st_pp:rca40bit
port map(a=>extc0 , b=>p0, cin=>c, sum=>ps0 ,cout=>c0); ---carry + p0
--sum temp p0
--ttmp1st_pp:rca40bit
--port map(a=>extc0 , b=>p0, cin=>c, sum=>ps1 ,cout=>c0); ---carry + p0
--sum 1st and 2nd row of pp
s1tnd_pp:rca40bit
port map(a=>ps0 , b=>p1, cin=>c, sum=>sum12 ,cout=>c12);
--adding sum of s12 and 3rd row of pp
t3rd_pp:rca40bit
port map(a=>sum12, b=>p2, cin=>c, sum=>sum23 ,cout=>c23);
--adding sum of s23 and 4rt row of pp
t4rh_pp:rca40bit
port map(a=>sum23, b=>p3, cin=>c, sum=>sum34 ,cout=>c34);
t5rh_pp:rca40bit
port map(a=>sum34, b=>p4, cin=>c, sum=>sum45 ,cout=>c45);
t6rh_pp:rca40bit
port map(a=>sum45, b=>p5, cin=>c, sum=>sum56 ,cout=>c56);
t7th_pp:rca40bit
port map(a=>sum56, b=>p6, cin=>c, sum=>sum67 ,cout=>c67);
t8th_pp:rca40bit
port map(a=>sum67, b=>p7, cin=>c, sum=>sum78 ,cout=>c78);
t9th_pp:rca40bit
port map(a=>sum78, b=>p8, cin=>c, sum=>sum89 ,cout=>c89);
t10th_pp:rca40bit
port map(a=>sum89, b=>p9, cin=>c, sum=>sum910 ,cout=>c910);
t11th_pp:rca40bit
port map(a=>sum910, b=>p10, cin=>c, sum=>sum1011 ,cout=>c1011);
t12th_pp:rca40bit
port map(a=>sum1011, b=>p11, cin=>c, sum=>sum1112 ,cout=>c1112);
t13th_pp:rca40bit
port map(a=>sum1112, b=>p12, cin=>c, sum=>sum1213 ,cout=>c1213);
t14th_pp:rca40bit
port map(a=>sum1213, b=>p13, cin=>c, sum=>sum1314 ,cout=>c1314);
t15th_pp:rca40bit
port map(a=>sum1314, b=>p14, cin=>c, sum=>sum1415,cout=>c1415);
carryaddwith15th_pp:rca40bit
port map(a=>sum1415, b=>p16, cin=>c, sum=>sum1516,cout=>c1617);
t16th_pp:rca40bit
port map(a=>c15, b=>sum1516, cin=>c, sum=>sum1617,cout=>c1516);
prod <=sum1617;
end Behavioral;
Конструкция фильтра
library IEEE;
USE IEEE.STD_LOGIC_1164.ALL;
USE IEEE.NUMERIC_STD.ALL;
entity your_filter is
Generic (
constant PIPELINE_DEPTH : positive := 2;
constant DATA_WIDTH : positive := 24;
constant FIR_ORDER : positive := 34;
constant COEFF_WIDTH : positive := 16
);
Port (
clk : in STD_LOGIC;
rst : in STD_LOGIC;
data_in : in STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);
data_out : out STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0)
);
end your_filter;
architecture Behavioral of your_filter is
component ripple_ca is
port(
a : in std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
b : in std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
cin : in std_logic;
sum : out std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
cout : out std_logic
);
end component;
component signed_mult is
port(
a : in std_logic_vector(DATA_WIDTH - 1 downto 0) ;
b : in std_logic_vector(COEFF_WIDTH - 1 downto 0) ;
prod : out std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0)
);
end component;
type REG_TYPE is array (0 to FIR_ORDER-1) of signed (DATA_WIDTH+COEFF_WIDTH-1 downto 0);
type COEFF_ARRAY_TYPE is array (0 to FIR_ORDER) of signed(COEFF_WIDTH-1 downto 0);
type REG_TYPE2 is array (0 to FIR_ORDER-1) of std_logic_vector (DATA_WIDTH-1 downto 0);
type sum is array (0 to FIR_ORDER) of std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
signal sum1 : sum;
signal c1 : std_logic;
signal c : std_logic :='0';
type COEFF_ARRAY1 is array (0 to FIR_ORDER) of std_logic_vector(COEFF_WIDTH-1 downto 0);
signal coeff : COEFF_ARRAY1;
signal reg : REG_TYPE2;
signal next_reg : REG_TYPE2;
signal reg1 : REG_TYPE2;
type REG_TYPE1 is array (0 to FIR_ORDER-1) of std_logic_vector (DATA_WIDTH+COEFF_WIDTH-1 downto 0);
signal next_reg1 : REG_TYPE1;
signal data_out_temp : std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
signal tempmult : sum;
signal data_in_reg : STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);
signal next_data_out : STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);
constant coeff_array : COEFF_ARRAY_TYPE := (
"0000000101011111",
"0000000001100001",
"0000000000110011",
"1111111111011010",
"1111111101011111",
"1111111011011001",
"1111111001100101",
"1111111000100101",
"1111111000111011",
"1111111010111111",
"1111111110111001",
"0000000100100000",
"0000001011011000",
"0000010010110011",
"0000011001111010",
"0000011111110010",
"0000100011101011",
"0000100101000010",
"0000100011101011",
"0000011111110010",
"0000011001111010",
"0000010010110011",
"0000001011011000",
"0000000100100000",
"1111111110111001",
"1111111010111111",
"1111111000111011",
"1111111000100101",
"1111111001100101",
"1111111011011001",
"1111111101011111",
"1111111111011010",
"0000000000110011",
"0000000001100001",
"0000000101011111"
);
begin
--synchronous process
sync_proc : process (clk)
begin
if rising_edge(clk) then
if rst = '0' then
reg <= (others=>(others=>'0'));
data_in_reg <= (others=>'0');
data_out <= (others=>'0');
else
reg <= next_reg;
data_in_reg <= data_in;
data_out <= next_data_out;
end if;
end if;
end process;
--asynchronous process
async_proc : process (reg, data_in_reg)
variable sum : signed(DATA_WIDTH+COEFF_WIDTH-1 downto 0) := (others => '0');
begin
for i in 0 to FIR_ORDER -2 loop
next_reg(i+1) <= reg(i);
end loop;
next_reg(0) <= (data_in_reg);
-- data_out_temp <= std_logic_vector(sum + signed(data_in_reg)*coeff_array(0));--std_logic_vector(sum);--(sum);--
for i in 0 to FIR_ORDER-1 loop
coeff(i)<= std_logic_vector (coeff_array(i));
reg1(i)<= std_logic_vector (reg(i));
--next_reg1(i)<= std_logic_vector (next_reg(i));
end loop;
coeff(FIR_ORDER)<= std_logic_vector (coeff_array(FIR_ORDER));
end process;
--next_reg(i) <= reg(i+1)+(signed(data_in_reg))*coeff_array(i+1);
nextreg : for i in 0 to FIR_ORDER -1 generate
accumumlator : signed_mult
port map ( a => reg1(i), b => coeff(i+1), prod => tempmult(i) );
end generate nextreg;
sum1(0) <= (others => '0');
addition : for i in 0 to FIR_ORDER -1 generate
addition01 : ripple_ca
port map ( a => sum1(i),b => tempmult(i) , cin=>c, sum => sum1(i+1),cout => c1 );
end generate addition;
-- data_out_temp <= std_logic_vector(reg(0) + signed(data_in_reg)*coeff_array(0));--std_logic_vector(sum);--(sum);--
dataout : signed_mult
port map (
a => data_in_reg,
b => coeff(0),
prod => tempmult(FIR_ORDER)
);
addition_2 : ripple_ca
port map (a => sum1(FIR_ORDER),b => tempmult(FIR_ORDER) ,cin=>c , sum => data_out_temp ,cout => c1);
next_data_out <= data_out_temp(DATA_WIDTH+COEFF_WIDTH-1 downto COEFF_WIDTH);
end Behavioral;