Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 56 additions & 1 deletion common_c.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,42 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2025 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/

#ifndef COMMON_C_H
#define COMMON_C_H

Expand Down Expand Up @@ -62,6 +101,9 @@
#define CGEMM_ITCOPY cgemm_itcopy
#endif

#define CCOMM_NCOPY ccomm_ncopy
#define CCOMM_TCOPY ccomm_tcopy

#define CTRMM_OUNUCOPY ctrmm_ounucopy
#define CTRMM_OUNNCOPY ctrmm_ounncopy
#define CTRMM_OUTUCOPY ctrmm_outucopy
Expand Down Expand Up @@ -125,6 +167,11 @@
#define CGEMM_KERNEL_R cgemm_kernel_r
#define CGEMM_KERNEL_B cgemm_kernel_b

#define CCOMM_KERNEL_N ccomm_kernel_n
#define CCOMM_KERNEL_L ccomm_kernel_l
#define CCOMM_KERNEL_R ccomm_kernel_r
#define CCOMM_KERNEL_B ccomm_kernel_b

#define CTRMM_KERNEL_LN ctrmm_kernel_LN
#define CTRMM_KERNEL_LT ctrmm_kernel_LT
#define CTRMM_KERNEL_LR ctrmm_kernel_LR
Expand Down Expand Up @@ -320,17 +367,25 @@
#define CTRMM_IUTNCOPY gotoblas -> ctrmm_iutncopy
#define CTRMM_ILNNCOPY gotoblas -> ctrmm_ilnncopy
#define CTRMM_ILTNCOPY gotoblas -> ctrmm_iltncopy
#define CCOMM_NCOPY gotoblas -> ccomm_ncopy
#define CCOMM_TCOPY gotoblas -> ccomm_tcopy

#define CTRSM_IUNNCOPY gotoblas -> ctrsm_iunncopy
#define CTRSM_IUTNCOPY gotoblas -> ctrsm_iutncopy
#define CTRSM_ILNNCOPY gotoblas -> ctrsm_ilnncopy
#define CTRSM_ILTNCOPY gotoblas -> ctrsm_iltncopy

#define CGEMM_BETA gotoblas -> cgemm_beta
#define CGEMM_BETA gotoblas -> cgemm_beta
#define CGEMM_KERNEL_N gotoblas -> cgemm_kernel_n
#define CGEMM_KERNEL_L gotoblas -> cgemm_kernel_l
#define CGEMM_KERNEL_R gotoblas -> cgemm_kernel_r
#define CGEMM_KERNEL_B gotoblas -> cgemm_kernel_b

#define CCOMM_KERNEL_N gotoblas -> ccomm_kernel_n
#define CCOMM_KERNEL_L gotoblas -> ccomm_kernel_l
#define CCOMM_KERNEL_R gotoblas -> ccomm_kernel_r
#define CCOMM_KERNEL_B gotoblas -> ccomm_kernel_b

#define CTRMM_KERNEL_LN gotoblas -> ctrmm_kernel_LN
#define CTRMM_KERNEL_LT gotoblas -> ctrmm_kernel_LT
#define CTRMM_KERNEL_LR gotoblas -> ctrmm_kernel_LR
Expand Down
49 changes: 48 additions & 1 deletion common_d.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,42 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2025 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/

#ifndef COMMON_D_H
#define COMMON_D_H

Expand Down Expand Up @@ -55,6 +94,9 @@
#define DGEMM_ITCOPY dgemm_itcopy
#endif

#define DCOMM_NCOPY dcomm_ncopy
#define DCOMM_TCOPY dcomm_tcopy

#define DTRMM_OUNUCOPY dtrmm_ounucopy
#define DTRMM_OUNNCOPY dtrmm_ounncopy
#define DTRMM_OUTUCOPY dtrmm_outucopy
Expand Down Expand Up @@ -114,6 +156,7 @@
#define DGEMM_BETA dgemm_beta

#define DGEMM_KERNEL dgemm_kernel
#define DCOMM_KERNEL dcomm_kernel

#define DTRMM_KERNEL_LN dtrmm_kernel_LN
#define DTRMM_KERNEL_LT dtrmm_kernel_LT
Expand Down Expand Up @@ -239,13 +282,17 @@
#define DTRMM_IUTNCOPY gotoblas -> dtrmm_iutncopy
#define DTRMM_ILNNCOPY gotoblas -> dtrmm_ilnncopy
#define DTRMM_ILTNCOPY gotoblas -> dtrmm_iltncopy
#define DCOMM_NCOPY gotoblas -> dcomm_ncopy
#define DCOMM_TCOPY gotoblas -> dcomm_tcopy

#define DTRSM_IUNNCOPY gotoblas -> dtrsm_iunncopy
#define DTRSM_IUTNCOPY gotoblas -> dtrsm_iutncopy
#define DTRSM_ILNNCOPY gotoblas -> dtrsm_ilnncopy
#define DTRSM_ILTNCOPY gotoblas -> dtrsm_iltncopy

#define DGEMM_BETA gotoblas -> dgemm_beta
#define DGEMM_BETA gotoblas -> dgemm_beta
#define DGEMM_KERNEL gotoblas -> dgemm_kernel
#define DCOMM_KERNEL gotoblas -> dcomm_kernel

#define DTRMM_KERNEL_LN gotoblas -> dtrmm_kernel_LN
#define DTRMM_KERNEL_LT gotoblas -> dtrmm_kernel_LT
Expand Down
21 changes: 21 additions & 0 deletions common_level3.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2025 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
Expand Down Expand Up @@ -284,6 +285,8 @@ int strmm_ilnucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX
int strmm_ilnncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
int strmm_iltucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
int strmm_iltncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
int scomm_ncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
int scomm_tcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
int strmm_olnucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
int strmm_olnncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
int strmm_oltucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
Expand All @@ -301,6 +304,8 @@ int dtrmm_ilnucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG pos
int dtrmm_ilnncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
int dtrmm_iltucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
int dtrmm_iltncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
int dcomm_tcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
int dcomm_ncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
int dtrmm_olnucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
int dtrmm_olnncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
int dtrmm_oltucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
Expand Down Expand Up @@ -335,6 +340,8 @@ int ctrmm_ilnucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX
int ctrmm_ilnncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
int ctrmm_iltucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
int ctrmm_iltncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
int ccomm_tcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
int ccomm_ncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
int ctrmm_olnucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
int ctrmm_olnncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
int ctrmm_oltucopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, float *b);
Expand All @@ -352,6 +359,8 @@ int ztrmm_ilnucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG pos
int ztrmm_ilnncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
int ztrmm_iltucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
int ztrmm_iltncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
int zcomm_tcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
int zcomm_ncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b);
int ztrmm_olnucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
int ztrmm_olnncopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
int ztrmm_oltucopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, BLASLONG posX, BLASLONG posY, double *b);
Expand Down Expand Up @@ -579,6 +588,8 @@ int bgemm_kernel(BLASLONG, BLASLONG, BLASLONG, bfloat16, bfloat16 *, bfloat16 *
int sbgemm_kernel(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, bfloat16 *, float *, BLASLONG);
int sgemm_kernel(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG);
int dgemm_kernel(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
int scomm_kernel(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG);
int dcomm_kernel(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);

#ifdef QUAD_PRECISION
int qgemm_kernel(BLASLONG, BLASLONG, BLASLONG, xidouble *, xidouble *, xidouble *, xdouble *, BLASLONG);
Expand Down Expand Up @@ -728,6 +739,16 @@ int cgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float
int zgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
int xgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);

int ccomm_kernel_n(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
int ccomm_kernel_l(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
int ccomm_kernel_r(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
int ccomm_kernel_b(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);

int zcomm_kernel_n(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
int zcomm_kernel_l(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
int zcomm_kernel_r(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
int zcomm_kernel_b(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);

int shgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, hfloat16 *, hfloat16 *, BLASLONG);
int shgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, hfloat16 *, hfloat16 *, BLASLONG);
int shgemm_tn(blas_arg_t *, BLASLONG *, BLASLONG *, hfloat16 *, hfloat16 *, BLASLONG);
Expand Down
Loading
Loading