Эффективный подписанный множитель с хорошим временем - PullRequest
0 голосов
/ 24 ноября 2018

Я делаю множитель со знаком, для этого я создал половину сумматора, полного сумматора, сумматора пульсации и затем, наконец, множитель.Код показан ниже.Как я могу сделать это быстрее, чтобы добиться лучших сроков.Моя последняя задача - заставить пихтовый фильтр работать на частоте 100 МГц.Этот фильтр использует операцию многократного умножения (используя мой множитель).Итак, можете ли вы помочь мне улучшить мой дизайн с помощью некоторой оптимизирующей техники, такой как конвейерная обработка или параллелизм или другие? ??

   //half adder
    entity half_adder is
    port (
    a, b : in std_logic ;
    sum, cout : out std_logic );
    end half_adder;

    architecture version1 of half_adder is
    begin
    sum <= a xor b;
    cout <= a and b;
    end version1;

- полный сумматор

    entity full_adder is

    port(

    a       : in  std_logic; 
    b       : in  std_logic; 
    cin     : in  std_logic; 
    sum    : out  std_logic; 
    cout    : out  std_logic
    );
    end full_adder;


    architecture structural of full_adder is
    component half_adder
    port (
    a, b : in std_logic ;
    sum, cout : out std_logic );
    end component;
    signal s1, c1, c2 : std_logic ;
    begin -- structural
    half_adder1 : half_adder
    port map (
    a => a, b => b,
    sum => s1, cout => c1);
    half_adder2 : half_adder
    port map (
    a =>cin, b => s1,
    sum => sum, cout => c2);
    cout <= c1 or c2;

    end structural ;

- суммирующий переносчик 40 бит

    entity rca40bit is
  generic (
          width: integer := 40
          );
    port(
        a         :    in  std_logic_vector(width-1 downto 0);
        b         :    in  std_logic_vector(width-1 downto 0);
        cin     :    in  std_logic;
        sum     :    out std_logic_vector(width-1 downto 0);
        cout     :    out std_logic
        );
    end rca40bit;

architecture Behavioral of rca40bit is
component full_adder 
port(

    a       : in  std_logic; 
    b       : in  std_logic; 
    cin     : in  std_logic; 
    sum    : out  std_logic; 
    cout    : out  std_logic
    );
    end component;
    signal s: std_logic_vector(width downto 0);

    begin
    s(0)<=cin; 
    FA:for i in 0 to width-1 generate
    FA_i:full_adder 
    port map 
    (
    a=>a(i),b=>b(i),cin=>s(i),sum=>sum(i),cout=>s(i+1)
    );

    end generate;

    cout<=s(width);

    end Behavioral;

- Множитель, я сначала рассчитал частичные продукты, а затем добавил их с помощью rca adder.

    library IEEE;
    use IEEE.STD_LOGIC_1164.ALL;
    use ieee.numeric_std.all;


    entity signed_mult is
    generic (

          m   : integer := 24;    -- Multiplicand

          n   : integer := 16 ; -- multiplier;

         bit1 :integer := 39      -- size of the adder

          );


    port(

        a         :    in    std_logic_vector(m - 1 downto 0)      ;
        b         :    in    std_logic_vector(n - 1 downto 0)      ;
        prod      :    out   std_logic_vector(bit1 downto 0)

        );
    end entity signed_mult;

    architecture Behavioral of signed_mult is


component rca40bit 

 port(
                  a         :    in  std_logic_vector(bit1 downto 0);
                  b         :    in  std_logic_vector(bit1 downto 0);
                  cin       :    in  std_logic;
                  sum       :    out std_logic_vector(bit1 downto 0);
                  cout      :    out std_logic
                );




     end component;

--partial products signals 

     signal p0 : std_logic_vector(bit1 downto 0):=(others =>'0') ;  --1st 
    `enter code here`partial product size m-1 bits (24 bit)
    signal p1: std_logic_vector (bit1 downto 0):=(others =>'0'); --2nd 
      partial product size m bits (25 bit) 
    signal p2 : std_logic_vector (bit1 downto 0):=(others =>'0');  --3rd partial 
    product size m+1 bits (26 bit)  
    signal p3 : std_logic_vector (bit1 downto 0):=(others =>'0');  --4th partial 
    product size m+2 bits (27 bit)  
    signal p4 : std_logic_vector (bit1 downto 0):=(others =>'0');   --5th 
    partial product size m+3 bits (28 bit)
    signal p5 : std_logic_vector (bit1 downto 0):=(others =>'0');   --6th 
    partial product size m+4 bits (29 bit)
    signal p6 : std_logic_vector (bit1 downto 0):=(others =>'0');   --7th 
    partial product size m+5 bits (30 bit)
    signal p7 : std_logic_vector (bit1 downto 0):=(others =>'0');   --8th 
    partial product size m+6 bits (31 bit)
    signal p8 : std_logic_vector (bit1 downto 0):=(others =>'0');   --9th 
    partial product size m+7 bits (32 bit)
    signal p9 : std_logic_vector (bit1 downto 0):=(others =>'0');  --10th 
    partial product size m+8 bits (33 bit)
    signal p10 : std_logic_vector (bit1 downto 0):=(others =>'0');   --11th 
    partial product size m+9 bits (34 bit)
    signal p11 : std_logic_vector (bit1 downto 0):=(others =>'0');  --12th 
    partial product size m+10 bits (35 bit)
    signal p12 : std_logic_vector (bit1 downto 0):=(others =>'0');   --13th 
    partial product size m+11 bits (36 bit)
    signal p13 : std_logic_vector (bit1 downto 0):=(others =>'0');   --14th 
       partial product size m+12 bits (37 bit)
    signal p14 : std_logic_vector (bit1 downto 0):=(others =>'0');   --15th 
      partial product size m+13 bits (38 bit)
     signal p15 : std_logic_vector (bit1 downto 0);   --16th partial product 
             size m+14 bits (39 bit)
     signal p16 : std_logic_vector (bit1 downto 0);
     signal p17 : std_logic_vector (bit1 downto 0):=(others =>'0');   



    signal extc0 :std_logic_vector (bit1 downto 0) ;




--constants and  carry


 signal  zeros :std_logic_vector (bit1 downto 0) :=(others =>'0');
 signal c : std_logic := '0' ; 
 signal c0 : std_logic :='0' ; 
  signal c12 : std_logic :='0' ; 
 signal c23 : std_logic :='0' ; 
 signal c34 ,c45,c56,c67,c78,c89,c910,c1011,c1112,c1213,c1314,c1415,c1516 ,c1617: std_logic := '0'; 
  signal c15 : std_logic_vector (bit1 downto 0):=(others =>'0'); 


 --sum only


    signal sum12 : std_logic_vector   (bit1 downto 0); 
    signal sum23 : std_logic_vector   (bit1 downto 0); 

   signal sum34, sum45,sum56,sum67,sum78, 
   sum89,sum910,sum1011,sum1112,sum1213,sum1314,sum1415,sum1516,sum1617,ps0: 
   std_logic_vector   (bit1 downto 0);


 --signal f_sum: std_logic_vector   (bit1 downto 0);   --sum of p0 and P1
 --signal tpp1 : std_logic_vector   (bit1 downto 0); 



 begin 

pp0: process is 
begin 
 fst_partial_product :for i in 0 to m-1 loop  -- 0 to 23


p0(i)<= (a(i) and b(0)) ; 


if i=m-1 then 

p0(i)<=not ( a(i) and b(0)); --msb=m=24

end if ;
 end loop  fst_partial_product ;



n2d_partial_product : for i in 0 to m-1 loop -- 24

 p1(0)<='0';
 p1(i+1)<=a(i) and b(1) ;

if i=m-1 then 
p1(i+1)<=not (a(i) and b(1)); --msb=m=24


 end if ;
 end loop n2d_partial_product;


r3d_partial_product : for i in 0 to m-1 loop

p2(0)<='0';
p2(1)<='0';
p2(i+2)<=a(i) and b(2) ;
if  i=m-1 then
p2(i+2)<=not( a(i) and b(2)); --msb=25

 end if ;
 end loop r3d_partial_product;

r4th_partial_product : for i in 0 to m-1 loop
p3(0)<='0';
p3(1)<='0';
p3(2)<='0';
p3(i+3)<=a(i) and b(3);
 if i=m-1 then 
p3(i+3)<=not(  a(i) and b(3)); --msb=26

end if ;
end loop r4th_partial_product;

r5d_partial_product : for i in 0 to m-1 loop

p4(0)<='0';
p4(1)<='0';
p4(2)<='0';
p4(3)<='0';
p4(i+4)<=a(i) and b(4) ;
if i=m-1 then 
p4(i+4)<=not( a(i) and b(4)); --msb=27
end if ;
end loop r5d_partial_product;

t6hpartial_product : for i in 0 to m-1 loop
p5(0)<='0';
p5(1)<='0';
p5(2)<='0';
p5(3)<='0';
p5(4)<='0';
p5(i+5)<=a(i) and b(5) ;
if i=m-1 then 
p5(i+5)<=not( a(i) and b(5)); --msb28

end if;
end loop t6hpartial_product ;

t7h_partial_product : for i in 0 to m-1 loop

p6(0)<='0';
p6(1)<='0';
p6(2)<='0';
p6(3)<='0';
p6(4)<='0';
p6(5)<='0';
p6(i+6)<=a(i) and b(6) ;
if i=m-1 then 
p6(i+6)<=not (a(i) and b(6)); --msb=29
end if;
end loop t7h_partial_product;

t8th_partial_product : for i in 0 to m-1 loop
p7(0)<='0';
p7(1)<='0';
p7(2)<='0';
p7(3)<='0';
p7(4)<='0';
p7(5)<='0';
p7(6)<='0';
p7(i+7)<=a(i) and b(7) ;
if i=m-1 then 
p7(i+7)<=not( a(i) and b(7)); --msb=30

end if;
end loop t8th_partial_product ;


t9th_partial_product : for i in 0 to m-1 loop

p8(0)<='0';
p8(1)<='0';
p8(2)<='0';
p8(3)<='0';
p8(4)<='0';
p8(5)<='0';
p8(6)<='0';
p8(7)<='0';
p8(i+8)<=a(i) and b(8); 
if i=m-1 then
p8(i+8)<=not( a(i) and b(8)); --msb=31
end if;
end loop t9th_partial_product;

t10th_partial_product : for i in 0 to m-1 loop
p9(0)<='0';
p9(1)<='0';
p9(2)<='0';
p9(3)<='0';
p9(4)<='0';
p9(5)<='0';
p9(6)<='0';
p9(7)<='0';
p9(8)<='0';
p9(i+9)<=a(i) and b(9) ;
if i=m-1 then
 p9(i+9)<=not( a(i) and b(9)); --msb=32

end if;
end loop t10th_partial_product ;

t11th_partial_product : for i in 0 to m-1 loop

p10(0)<='0';
p10(1)<='0';
p10(2)<='0';
p10(3)<='0';
p10(4)<='0';
p10(5)<='0';
p10(6)<='0';
p10(7)<='0';
p10(8)<='0';
p10(9)<='0';
p10(i+10)<=a(i) and b(10) ;
if i=m-1 then
p10(i+10)<=not( a(i) and b(10)); --msb=31

end if;
end loop t11th_partial_product;

t12th_partial_product : for i in 0 to m-1 loop

p11(0)<='0';
p11(1)<='0';
p11(2)<='0';
p11(3)<='0';
p11(4)<='0';
p11(5)<='0';
p11(6)<='0';
p11(7)<='0';
p11(8)<='0';
p11(9)<='0';
p11(10)<='0';
p11(i+11)<=a(i) and b(11) ;
if i=m-1 then
 p11(i+11)<=not( a(i) and b(11)); --msb=32

end if;
end loop t12th_partial_product;
t13th_partial_product : for i in 0 to m-1 loop

p12(0)<='0';
p12(1)<='0';
p12(2)<='0';
p12(3)<='0';
p12(4)<='0';
p12(5)<='0';
p12(6)<='0';
p12(7)<='0';
p12(8)<='0';
p12(9)<='0';
p12(10)<='0';
p12(11)<='0';

p12(i+12)<=a(i) and b(12) ;
if i=m-1 then
 p12(i+12)<=not( a(i) and b(12)); --msb=32

end if;
end loop t13th_partial_product ;
t14th_partial_product : for i in 0 to m-1 loop

p13(0)<='0';
p13(1)<='0';
p13(2)<='0';
p13(3)<='0';
p13(4)<='0';
p13(5)<='0';
p13(6)<='0';
p13(7)<='0';
p13(8)<='0';
p13(9)<='0';
p13(10)<='0';
p13(11)<='0';
p13(12)<='0';
p13(i+13)<=a(i) and b(13) ;
if i=m-1 then
 p13(i+13)<=not( a(i) and b(13)); --msb=32
end if;
end loop t14th_partial_product ;
t15th_partial_product : for i in 0 to m-1 loop

p14(0)<='0';
p14(1)<='0';
p14(2)<='0';
p14(3)<='0';
p14(4)<='0';
p14(5)<='0';
p14(6)<='0';
p14(7)<='0';
p14(8)<='0';
p14(9)<='0';
p14(10)<='0';
p14(11)<='0';
p14(12)<='0';
p14(13)<='0';
p14(14)<='0';
p14(i+14)<=a(i) and b(14) ;

if i=m-1 then 

p14(i+14)<=not( a(i) and b(14)); --msb=32
end if;
end loop t15th_partial_product;

t16th_partial_product : for i in 0 to m-1 loop

p15(0)<='0';
p15(1)<='0';
p15(2)<='0';
p15(3)<='0';
p15(4)<='0';
p15(5)<='0';
p15(6)<='0';
p15(7)<='0';
p15(8)<='0';
p15(9)<='0';
p15(10)<='0';
p15(11)<='0';
p15(12)<='0';
p15(13)<='0';
p15(14)<='0';
p15(i+15)<=not (a(i) and b(15)) ;


if i=m-1 then 
 p15(i+15)<= (a(i) and b(15));

end if;



end loop t16th_partial_product ;


wait on a,b;
end process pp0;

p16(39)<='1';
p16(38 downto 0)<=p15(38 downto 0);
--sum 

extc0(39 downto 0 )<=zeros(39 downto 24) & '1' & zeros(22 downto 0 ); -- just store 1 on msb of 1st pp
c15(39 downto 0 )<=zeros(39 downto 16) & '1' &  zeros(14 downto 0 )  ; -- just store 1 on msb of 1st pp



--sum temp p0
tmp1st_pp:rca40bit 
port map(a=>extc0 , b=>p0, cin=>c, sum=>ps0 ,cout=>c0); ---carry + p0
--sum temp p0
--ttmp1st_pp:rca40bit 
--port map(a=>extc0 , b=>p0, cin=>c, sum=>ps1 ,cout=>c0); ---carry + p0


--sum  1st and 2nd row of pp
s1tnd_pp:rca40bit 
port map(a=>ps0 , b=>p1, cin=>c, sum=>sum12 ,cout=>c12);

--adding sum of s12   and 3rd row of pp

t3rd_pp:rca40bit 
port map(a=>sum12, b=>p2, cin=>c, sum=>sum23 ,cout=>c23);

--adding sum of s23   and 4rt row of pp

t4rh_pp:rca40bit 
port map(a=>sum23, b=>p3, cin=>c, sum=>sum34 ,cout=>c34);


t5rh_pp:rca40bit 
port map(a=>sum34, b=>p4, cin=>c, sum=>sum45 ,cout=>c45);

t6rh_pp:rca40bit 
port map(a=>sum45, b=>p5, cin=>c, sum=>sum56 ,cout=>c56);


t7th_pp:rca40bit 
port map(a=>sum56, b=>p6, cin=>c, sum=>sum67 ,cout=>c67);


t8th_pp:rca40bit 
port map(a=>sum67, b=>p7, cin=>c, sum=>sum78 ,cout=>c78);

t9th_pp:rca40bit 
port map(a=>sum78, b=>p8, cin=>c, sum=>sum89 ,cout=>c89);

t10th_pp:rca40bit 
port map(a=>sum89, b=>p9, cin=>c, sum=>sum910 ,cout=>c910);

t11th_pp:rca40bit 
port map(a=>sum910, b=>p10, cin=>c, sum=>sum1011 ,cout=>c1011);

t12th_pp:rca40bit 
port map(a=>sum1011, b=>p11, cin=>c, sum=>sum1112 ,cout=>c1112);

t13th_pp:rca40bit 
port map(a=>sum1112, b=>p12, cin=>c, sum=>sum1213 ,cout=>c1213);

t14th_pp:rca40bit 
port map(a=>sum1213, b=>p13, cin=>c, sum=>sum1314 ,cout=>c1314);

t15th_pp:rca40bit 
port map(a=>sum1314, b=>p14, cin=>c, sum=>sum1415,cout=>c1415);

carryaddwith15th_pp:rca40bit 
port map(a=>sum1415, b=>p16, cin=>c, sum=>sum1516,cout=>c1617);

t16th_pp:rca40bit 

port map(a=>c15, b=>sum1516, cin=>c, sum=>sum1617,cout=>c1516);

 prod <=sum1617;



 end Behavioral;

Конструкция фильтра

library IEEE;
USE IEEE.STD_LOGIC_1164.ALL;
USE IEEE.NUMERIC_STD.ALL;

entity your_filter is
    Generic (
        constant PIPELINE_DEPTH   : positive := 2;
        constant DATA_WIDTH       : positive := 24;
        constant FIR_ORDER        : positive := 34;
        constant COEFF_WIDTH      : positive := 16      
    );
    Port ( 
        clk           : in  STD_LOGIC;
        rst           : in  STD_LOGIC;
        data_in       : in  STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);
        data_out      : out STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0)
    );
end your_filter;

architecture Behavioral of your_filter is



component ripple_ca is
port(
       a         :    in  std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
       b         :    in  std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
       cin     :    in  std_logic;
       sum     :    out std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
       cout     :    out std_logic
);
end component;

component signed_mult is
 port(

       a         :    in    std_logic_vector(DATA_WIDTH - 1 downto 0)      ;
       b         :    in    std_logic_vector(COEFF_WIDTH - 1 downto 0)      ;
       prod      :    out   std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0)

       );

end component;

        type REG_TYPE is array (0 to FIR_ORDER-1) of signed (DATA_WIDTH+COEFF_WIDTH-1 downto 0);
        type COEFF_ARRAY_TYPE is array (0 to FIR_ORDER) of signed(COEFF_WIDTH-1 downto 0);

        type REG_TYPE2 is array (0 to FIR_ORDER-1) of std_logic_vector (DATA_WIDTH-1 downto 0);
        type sum is array (0 to FIR_ORDER) of std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0); 
        signal sum1 : sum;
        signal c1 : std_logic;
        signal c : std_logic :='0';
         type COEFF_ARRAY1 is array (0 to FIR_ORDER) of std_logic_vector(COEFF_WIDTH-1 downto 0); 
        signal coeff : COEFF_ARRAY1;
        signal reg          : REG_TYPE2;
        signal next_reg     : REG_TYPE2;
        signal reg1          : REG_TYPE2; 
        type REG_TYPE1 is array (0 to FIR_ORDER-1) of std_logic_vector (DATA_WIDTH+COEFF_WIDTH-1 downto 0);
        signal next_reg1     : REG_TYPE1; 
        signal data_out_temp       : std_logic_vector(DATA_WIDTH+COEFF_WIDTH-1 downto 0);
        signal tempmult : sum;
        signal  data_in_reg       :  STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);
        signal  next_data_out      :  STD_LOGIC_VECTOR (DATA_WIDTH - 1 downto 0);



        constant coeff_array : COEFF_ARRAY_TYPE := (
        "0000000101011111",
        "0000000001100001",
        "0000000000110011",
        "1111111111011010",
        "1111111101011111",
        "1111111011011001",
        "1111111001100101",
        "1111111000100101",
        "1111111000111011",
        "1111111010111111",
        "1111111110111001",
        "0000000100100000",
        "0000001011011000",
        "0000010010110011",
        "0000011001111010",
        "0000011111110010",
        "0000100011101011",
        "0000100101000010",
        "0000100011101011",
        "0000011111110010",
        "0000011001111010",
        "0000010010110011",
        "0000001011011000",
        "0000000100100000",
        "1111111110111001",
        "1111111010111111",
        "1111111000111011",
        "1111111000100101",
        "1111111001100101",
        "1111111011011001",
        "1111111101011111",
        "1111111111011010",
        "0000000000110011",
        "0000000001100001",
        "0000000101011111"  
                                                    );

begin



    --synchronous process
    sync_proc : process (clk)
    begin
        if rising_edge(clk) then
            if rst = '0' then
                reg             <= (others=>(others=>'0'));
                data_in_reg     <= (others=>'0'); 
                data_out        <= (others=>'0');
             else
                reg             <= next_reg;
                data_in_reg     <= data_in;
                data_out        <= next_data_out;
            end if;
        end if;
    end process;

    --asynchronous process
    async_proc : process (reg, data_in_reg)
        variable sum : signed(DATA_WIDTH+COEFF_WIDTH-1 downto 0) := (others => '0');
    begin
       for i in 0 to FIR_ORDER -2 loop
           next_reg(i+1) <= reg(i);
                  end loop;
           next_reg(0) <= (data_in_reg);

--      data_out_temp <= std_logic_vector(sum + signed(data_in_reg)*coeff_array(0));--std_logic_vector(sum);--(sum);--

for i in 0 to FIR_ORDER-1 loop
        coeff(i)<= std_logic_vector (coeff_array(i));
        reg1(i)<= std_logic_vector (reg(i));
        --next_reg1(i)<= std_logic_vector (next_reg(i));
     end loop;
coeff(FIR_ORDER)<= std_logic_vector (coeff_array(FIR_ORDER));


end process;

  --next_reg(i) <= reg(i+1)+(signed(data_in_reg))*coeff_array(i+1);
  nextreg :   for i in 0 to FIR_ORDER -1 generate

       accumumlator : signed_mult
          port map ( a => reg1(i), b => coeff(i+1), prod => tempmult(i) );          
     end generate nextreg;

     sum1(0) <= (others => '0');
     addition :   for i in 0 to FIR_ORDER -1 generate
     addition01 : ripple_ca
               port map ( a => sum1(i),b => tempmult(i) , cin=>c, sum => sum1(i+1),cout => c1 );
end generate  addition;
    -- data_out_temp <= std_logic_vector(reg(0) + signed(data_in_reg)*coeff_array(0));--std_logic_vector(sum);--(sum);--

      dataout : signed_mult
       port map (

       a => data_in_reg,
       b => coeff(0),
       prod => tempmult(FIR_ORDER)
       );

  addition_2 : ripple_ca
   port map (a => sum1(FIR_ORDER),b => tempmult(FIR_ORDER) ,cin=>c , sum => data_out_temp ,cout => c1);                         


        next_data_out <= data_out_temp(DATA_WIDTH+COEFF_WIDTH-1  downto COEFF_WIDTH);


end Behavioral;
...