// dkcm_bussed.v
// Constant Coefficient Multiplier (KCM)
// $Id$
// Larry Doolittle, LBNL
// llc-suite Copyright (c) 2004, The Regents of the University of
// California, through Lawrence Berkeley National Laboratory (subject
// to receipt of any required approvals from the U.S. Dept. of Energy).
// All rights reserved.
// Your use of this software is pursuant to a "BSD-style" open
// source license agreement, the text of which is in license.txt
// (md5sum a1e0e81c78f6eba050b0e96996f49fd5) that should accompany
// this file. If the license agreement is not there, or if you
// have questions about the license, please contact Berkeley Lab's
// Technology Transfer Department at TTD@lbl.gov referring to
// "llc-suite (LBNL Ref CR-1988)"
// The constant can be reloaded on the fly,
// based on Xilinx's SRL16E primitive.
// It is therefore useful only on Xilinx Spartan-II and
// Virtex chips (October 2002 list).
//
// Unlike Xilinx's CoreGen DKCM, I have split out the loading
// logic, so that can be shared by a set of multipliers.
// My controller takes 24 clock cycles, not 16, to reload
// the constant. I have only coded a single configuration:
// 12-bit signed constant
// 12-bit signed input
// 24-bit signed output
// combinatorial (not pipelined) operation
// If anyone wants to parameterize it, start by turning my hard-coded
// srl16x16e into a parameterized module instance array, and adding
// support for that feature to Icarus Verilog.
//
// I estimate the per-multiplier LUT count at 82
// (48 SRL16E, 32 adder, 2 address decoder)
// Xilinx Foundation 4.2i says it implements it in 44 slices (88 LUT).
// The controller (dkcm_controller) is designed as a peripheral to a
// general purpose computer. If you wanted to shave the LUT count,
// you could perform this function in software instead, and tie the
// mass of SRL16E's directly to the processor data bus. I do it
// in hardware for speed and implementation-detail-hiding; besides,
// it only costs about 40 LUT.
`timescale 1ns / 1ns
module dkcm_bussed(
input [11:0] var,
output [23:0] product,
input clk,
input [21:0] dkcm_bus,
input [2:0] ident
);
wire [15:0] load_data; assign load_data = dkcm_bus[20:5];
wire [2:0] load_addr; assign load_addr = dkcm_bus[4:2];
wire load_unsigned; assign load_unsigned = dkcm_bus[1] & (load_addr == ident);
wire load_signed; assign load_signed = dkcm_bus[0] & (load_addr == ident);
wire [15:0] pprod0, pprod1, pprod2;
srl16x16e bank0(.Q(pprod0), .A0(var[0]), .A1(var[1]), .A2(var[2]), .A3(var[3]), .CE(load_unsigned), .CLK(clk), .D(load_data));
srl16x16e bank1(.Q(pprod1), .A0(var[4]), .A1(var[5]), .A2(var[6]), .A3(var[7]), .CE(load_unsigned), .CLK(clk), .D(load_data));
srl16x16e bank2(.Q(pprod2), .A0(var[8]), .A1(var[9]), .A2(var[10]), .A3(var[11]), .CE(load_signed), .CLK(clk), .D(load_data));
assign product =
{{8{pprod0[15]}}, pprod0} +
{{4{pprod1[15]}}, pprod1, 4'b0000} +
{ pprod2, 8'b00000000};
always @(negedge clk) if (0) begin
#1;
$display(" %x*K + %x*K<<4 + %x*K<<8",var[3:0],var[7:4],var[11:8]);
$display(" %x + %x<<4 + %x<<8 = %d",pprod0,pprod1,pprod2,product);
end
endmodule