openblas_ffi 0.1.1

FFI bindings to sequential and parallel OpenBLAS.
Documentation
#
# Include user definition
#

# TO suppress recursive includes
INCLUDED = 1

ifndef TOPDIR
TOPDIR = .
endif

NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib

# Default C compiler
# - Only set if not specified on the command line or inherited from the environment.
# - CC is an implicit variable so neither '?=' or 'ifndef' can be used.
#   http://stackoverflow.com/questions/4029274/mingw-and-make-variables
# - Default value is 'cc' which is not always a valid command (e.g. MinGW).
ifeq ($(origin CC),default)
CC = gcc
# Change the default compile to clang on Mac OSX.
# http://stackoverflow.com/questions/714100/os-detecting-makefile
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Darwin)
     CC = clang
#     EXTRALIB += -Wl,-no_compact_unwind
endif
endif

# Default Fortran compiler (FC) is selected by f_check.

ifndef MAKEFILE_RULE
include $(TOPDIR)/Makefile.rule
else
include $(TOPDIR)/$(MAKEFILE_RULE)
endif

#
#  Beginning of system configuration
#

ifndef HOSTCC
HOSTCC	 = $(CC)
endif

ifdef TARGET
GETARCH_FLAGS := -DFORCE_$(TARGET)
endif

# Force fallbacks for 32bit

ifeq ($(BINARY), 32)
ifeq ($(TARGET), HASWELL)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET), SANDYBRIDGE)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET), BULLDOZER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET), PILEDRIVER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET), STEAMROLLER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET), EXCAVATOR)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
endif


#TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
#
ifdef TARGET_CORE
GETARCH_FLAGS := -DFORCE_$(TARGET_CORE)
endif

# Force fallbacks for 32bit

ifeq ($(BINARY), 32)
ifeq ($(TARGET_CORE), HASWELL)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET_CORE), SANDYBRIDGE)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET_CORE), BULLDOZER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET_CORE), PILEDRIVER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET_CORE), STEAMROLLER)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
ifeq ($(TARGET_CORE), EXCAVATOR)
GETARCH_FLAGS := -DFORCE_BARCELONA
endif
endif




ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
GETARCH_FLAGS	+= -DUSE64BITINT
endif
endif

ifndef GEMM_MULTITHREAD_THRESHOLD
GEMM_MULTITHREAD_THRESHOLD=4
endif
GETARCH_FLAGS	+= -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD)

ifeq ($(NO_AVX), 1)
GETARCH_FLAGS	+= -DNO_AVX
endif

ifeq ($(BINARY), 32)
GETARCH_FLAGS	+= -DNO_AVX
endif

ifeq ($(NO_AVX2), 1)
GETARCH_FLAGS	+= -DNO_AVX2
endif

ifeq ($(DEBUG), 1)
GETARCH_FLAGS	+= -g
endif

ifeq ($(QUIET_MAKE), 1)
MAKE += -s
endif

ifndef NO_PARALLEL_MAKE
NO_PARALLEL_MAKE=0
endif
GETARCH_FLAGS	+= -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE)

ifdef MAKE_NB_JOBS
GETARCH_FLAGS += -DMAKE_NB_JOBS=$(MAKE_NB_JOBS)
endif

ifeq ($(HOSTCC), loongcc)
GETARCH_FLAGS  += -static
endif

#if don't use Fortran, it will only compile CBLAS.
ifeq ($(ONLY_CBLAS), 1)
NO_LAPACK = 1
else
ONLY_CBLAS = 0
endif

# This operation is expensive, so execution should be once.
ifndef GOTOBLAS_MAKEFILE
export GOTOBLAS_MAKEFILE = 1

# Generating Makefile.conf and config.h
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)

ifndef TARGET_CORE
include $(TOPDIR)/Makefile.conf
else
include $(TOPDIR)/Makefile_kernel.conf
endif

endif

ifndef NUM_THREADS
NUM_THREADS = $(NUM_CORES)
endif

ifeq ($(NUM_THREADS), 1)
override USE_THREAD = 0
endif

ifdef USE_THREAD
ifeq ($(USE_THREAD), 0)
SMP =
else
SMP = 1
endif
else
ifeq ($(NUM_THREAD), 1)
SMP =
else
SMP = 1
endif
endif

ifndef NEED_PIC
NEED_PIC = 1
endif

ARFLAGS	=
CPP	= $(COMPILER) -E
AR	= $(CROSS_SUFFIX)ar
AS	= $(CROSS_SUFFIX)as
LD	= $(CROSS_SUFFIX)ld
RANLIB	= $(CROSS_SUFFIX)ranlib
NM	= $(CROSS_SUFFIX)nm
DLLWRAP = $(CROSS_SUFFIX)dllwrap
OBJCOPY = $(CROSS_SUFFIX)objcopy
OBJCONV = $(CROSS_SUFFIX)objconv


# For detect fortran failed, only build BLAS.
ifeq ($(NOFORTRAN), 1)
NO_LAPACK = 1
endif

#
#  OS dependent settings
#

ifeq ($(OSNAME), Darwin)
export MACOSX_DEPLOYMENT_TARGET=10.6
MD5SUM = md5 -r
endif

ifeq ($(OSNAME), FreeBSD)
MD5SUM = md5 -r
endif

ifeq ($(OSNAME), NetBSD)
MD5SUM = md5 -n
endif

ifeq ($(OSNAME), Linux)
EXTRALIB	+= -lm
NO_EXPRECISION = 1
endif

ifeq ($(OSNAME), AIX)
EXTRALIB	+= -lm
endif

ifeq ($(OSNAME), WINNT)
NEED_PIC = 0
NO_EXPRECISION = 1

EXTRALIB        += -defaultlib:advapi32

SUFFIX  = obj
PSUFFIX = pobj
LIBSUFFIX = a

ifeq ($(C_COMPILER), CLANG)
CCOMMON_OPT	+= -DMS_ABI
endif

ifeq ($(C_COMPILER), GCC)
#Test for supporting MS_ABI
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
ifeq ($(GCCVERSIONGT4), 1)
# GCC Majar version > 4
# It is compatible with MSVC ABI.
CCOMMON_OPT	+= -DMS_ABI
endif

ifeq ($(GCCVERSIONGTEQ4), 1)
ifeq ($(GCCMINORVERSIONGTEQ7), 1)
# GCC Version >=4.7
# It is compatible with MSVC ABI.
CCOMMON_OPT	+= -DMS_ABI
endif
endif
endif

# Ensure the correct stack alignment on Win32
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
ifeq ($(ARCH), x86)
CCOMMON_OPT += -mincoming-stack-boundary=2
FCOMMON_OPT += -mincoming-stack-boundary=2
endif

endif

ifeq ($(OSNAME), Interix)
NEED_PIC = 0
NO_EXPRECISION = 1

INTERIX_TOOL_DIR = /opt/gcc.3.3/i586-pc-interix3/bin
endif

ifeq ($(OSNAME), CYGWIN_NT)
NEED_PIC = 0
NO_EXPRECISION = 1
endif

ifneq ($(OSNAME), WINNT)
ifneq ($(OSNAME), CYGWIN_NT)
ifneq ($(OSNAME), Interix)
ifneq ($(OSNAME), Android)
ifdef SMP
EXTRALIB   += -lpthread
endif
endif
endif
endif
endif

# ifeq logical or
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix))
OS_WINDOWS=1
endif

ifdef QUAD_PRECISION
CCOMMON_OPT	+= -DQUAD_PRECISION
NO_EXPRECISION = 1
endif

ifneq ($(ARCH), x86)
ifneq ($(ARCH), x86_64)
NO_EXPRECISION = 1
endif
endif

ifdef UTEST_CHECK
CCOMMON_OPT	+= -DUTEST_CHECK
SANITY_CHECK = 1
endif

ifdef SANITY_CHECK
CCOMMON_OPT	+= -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
endif

MAX_STACK_ALLOC ?= 2048
ifneq ($(MAX_STACK_ALLOC), 0)
CCOMMON_OPT	+= -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
endif

#
#  Architecture dependent settings
#

ifeq ($(ARCH), x86)
ifndef BINARY
NO_BINARY_MODE	= 1
endif

ifeq ($(CORE), generic)
NO_EXPRECISION = 1
endif

ifndef NO_EXPRECISION
ifeq ($(F_COMPILER), GFORTRAN)
# ifeq logical or. GCC or LSB
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
EXPRECISION	= 1
CCOMMON_OPT	+= -DEXPRECISION -m128bit-long-double
FCOMMON_OPT	+= -m128bit-long-double
endif
ifeq ($(C_COMPILER), CLANG)
EXPRECISION	= 1
CCOMMON_OPT	+= -DEXPRECISION
FCOMMON_OPT	+= -m128bit-long-double
endif
endif
endif
endif

ifeq ($(ARCH), x86_64)

ifeq ($(CORE), generic)
NO_EXPRECISION = 1
endif

ifndef NO_EXPRECISION
ifeq ($(F_COMPILER), GFORTRAN)
# ifeq logical or. GCC or LSB
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
EXPRECISION	= 1
CCOMMON_OPT	+= -DEXPRECISION -m128bit-long-double
FCOMMON_OPT	+= -m128bit-long-double
endif
ifeq ($(C_COMPILER), CLANG)
EXPRECISION	= 1
CCOMMON_OPT	+= -DEXPRECISION
FCOMMON_OPT	+= -m128bit-long-double
endif
endif
endif
endif

ifeq ($(C_COMPILER), INTEL)
CCOMMON_OPT    += -wd981
endif


ifeq ($(USE_OPENMP), 1)

#check
ifeq ($(USE_THREAD), 0)
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.)
endif

# ifeq logical or. GCC or LSB
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
CCOMMON_OPT    += -fopenmp
endif

ifeq ($(C_COMPILER), CLANG)
$(error OpenBLAS: Clang didn't support OpenMP yet.)
CCOMMON_OPT    += -fopenmp
endif

ifeq ($(C_COMPILER), INTEL)
CCOMMON_OPT    += -openmp
endif

ifeq ($(C_COMPILER), PGI)
CCOMMON_OPT    += -mp
endif

ifeq ($(C_COMPILER), OPEN64)
CCOMMON_OPT    += -mp
CEXTRALIB   += -lstdc++
endif

ifeq ($(C_COMPILER), PATHSCALE)
CCOMMON_OPT    += -mp
endif
endif


ifeq ($(DYNAMIC_ARCH), 1)
ifeq ($(ARCH), x86)
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
	       CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
endif

ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
endif
ifneq ($(NO_AVX2), 1)
DYNAMIC_CORE += HASWELL
endif
endif

ifndef DYNAMIC_CORE
DYNAMIC_ARCH =
endif
endif

ifeq ($(ARCH), ia64)
NO_BINARY_MODE	= 1
BINARY_DEFINED	= 1

ifeq ($(F_COMPILER), GFORTRAN)
ifeq ($(C_COMPILER), GCC)
# EXPRECISION	= 1
# CCOMMON_OPT	+= -DEXPRECISION
endif
endif
endif

ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
NO_BINARY_MODE	= 1
endif

ifeq ($(ARCH), alpha)
NO_BINARY_MODE	= 1
BINARY_DEFINED	= 1
endif

ifeq ($(ARCH), arm)
NO_BINARY_MODE  = 1
BINARY_DEFINED  = 1
endif

ifeq ($(ARCH), arm64)
NO_BINARY_MODE  = 1
BINARY_DEFINED  = 1
endif




#
#  C Compiler dependent settings
#


# ifeq logical or. GCC or CLANG or LSB
# http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC CLANG LSB))
CCOMMON_OPT += -Wall
COMMON_PROF += -fno-inline
NO_UNINITIALIZED_WARN =  -Wno-uninitialized

ifeq ($(QUIET_MAKE), 1)
CCOMMON_OPT += $(NO_UNINITIALIZED_WARN) -Wno-unused
endif

ifdef NO_BINARY_MODE

ifeq ($(ARCH), $(filter $(ARCH),mips64))
ifdef BINARY64
CCOMMON_OPT += -mabi=64
else
CCOMMON_OPT += -mabi=n32
endif
BINARY_DEFINED = 1
else ifeq ($(ARCH), $(filter $(ARCH),mips))
CCOMMON_OPT += -mabi=32
BINARY_DEFINED = 1
endif

ifeq ($(CORE), LOONGSON3A)
CCOMMON_OPT += -march=mips64
FCOMMON_OPT += -march=mips64
endif

ifeq ($(CORE), LOONGSON3B)
CCOMMON_OPT += -march=mips64
FCOMMON_OPT += -march=mips64
endif

ifeq ($(CORE), P5600)
CCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
FCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
endif

ifeq ($(CORE), I6400)
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
endif

ifeq ($(CORE), P6600)
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS)
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS)
endif

ifeq ($(OSNAME), AIX)
BINARY_DEFINED = 1
endif

endif

ifndef BINARY_DEFINED
ifdef BINARY64
CCOMMON_OPT += -m64
else
CCOMMON_OPT += -m32
endif
endif

endif

ifeq ($(C_COMPILER), PGI)
ifdef BINARY64
CCOMMON_OPT += -tp p7-64
else
CCOMMON_OPT += -tp p7
endif
endif

ifeq ($(C_COMPILER), PATHSCALE)
ifdef BINARY64
CCOMMON_OPT += -m64
else
CCOMMON_OPT += -m32
endif
endif

#
#  Fortran Compiler dependent settings
#

ifeq ($(F_COMPILER), G77)
CCOMMON_OPT += -DF_INTERFACE_G77
FCOMMON_OPT += -Wall
ifndef NO_BINARY_MODE
ifdef BINARY64
FCOMMON_OPT += -m64
else
FCOMMON_OPT += -m32
endif
endif
endif

ifeq ($(F_COMPILER), G95)
CCOMMON_OPT += -DF_INTERFACE_G95
FCOMMON_OPT += -Wall
ifndef NO_BINARY_MODE
ifdef BINARY64
FCOMMON_OPT += -m64
else
FCOMMON_OPT += -m32
endif
endif
endif

ifeq ($(F_COMPILER), GFORTRAN)
CCOMMON_OPT += -DF_INTERFACE_GFORT
FCOMMON_OPT += -Wall
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
ifneq ($(NO_LAPACK), 1)
EXTRALIB += -lgfortran
endif
ifdef NO_BINARY_MODE
ifeq ($(ARCH), $(filter $(ARCH),mips64))
ifdef BINARY64
FCOMMON_OPT += -mabi=64
else
FCOMMON_OPT += -mabi=n32
endif
else ifeq ($(ARCH), $(filter $(ARCH),mips))
FCOMMON_OPT += -mabi=32
endif
else
ifdef BINARY64
FCOMMON_OPT += -m64
ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
FCOMMON_OPT +=  -fdefault-integer-8
endif
endif
else
FCOMMON_OPT += -m32
endif
endif
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -fopenmp
endif
endif

ifeq ($(F_COMPILER), INTEL)
CCOMMON_OPT += -DF_INTERFACE_INTEL
ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
FCOMMON_OPT += -i8
endif
endif
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -openmp
endif
endif

ifeq ($(F_COMPILER), FUJITSU)
CCOMMON_OPT += -DF_INTERFACE_FUJITSU
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -openmp
endif
endif

ifeq ($(F_COMPILER), IBM)
CCOMMON_OPT += -DF_INTERFACE_IBM
# FCOMMON_OPT	+= -qarch=440
ifdef BINARY64
FCOMMON_OPT += -q64
ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
FCOMMON_OPT += -qintsize=8
endif
endif
else
FCOMMON_OPT += -q32
endif
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -openmp
endif
endif

ifeq ($(F_COMPILER), PGI)
CCOMMON_OPT  += -DF_INTERFACE_PGI
COMMON_PROF +=  -DPGICOMPILER
ifdef BINARY64
ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
FCOMMON_OPT += -i8
endif
endif
FCOMMON_OPT += -tp p7-64
else
FCOMMON_OPT += -tp p7
endif
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -mp
endif
endif

ifeq ($(F_COMPILER), PATHSCALE)
CCOMMON_OPT  += -DF_INTERFACE_PATHSCALE
ifdef BINARY64
ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
FCOMMON_OPT += -i8
endif
endif
endif
 
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -mp
endif
endif

ifeq ($(F_COMPILER), OPEN64)
CCOMMON_OPT  += -DF_INTERFACE_OPEN64
ifdef BINARY64
ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
FCOMMON_OPT += -i8
endif
endif
endif

ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
ifndef BINARY64
FCOMMON_OPT += -n32
else
FCOMMON_OPT += -n64
endif
ifeq ($(CORE), LOONGSON3A)
FCOMMON_OPT += -loongson3 -static
endif

ifeq ($(CORE), LOONGSON3B)
FCOMMON_OPT += -loongson3 -static
endif

else
ifndef BINARY64
FCOMMON_OPT += -m32
else
FCOMMON_OPT += -m64
endif
endif

ifeq ($(USE_OPENMP), 1)
FEXTRALIB   += -lstdc++
FCOMMON_OPT += -mp
endif
endif

ifeq ($(C_COMPILER), OPEN64)

ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
ifndef BINARY64
CCOMMON_OPT += -n32
else
CCOMMON_OPT += -n64
endif
ifeq ($(CORE), LOONGSON3A)
CCOMMON_OPT += -loongson3 -static
endif

ifeq ($(CORE), LOONGSON3B)
CCOMMON_OPT += -loongson3 -static
endif

else

ifndef BINARY64
CCOMMON_OPT += -m32
else
CCOMMON_OPT += -m64
endif
endif
endif

ifeq ($(C_COMPILER), SUN)
CCOMMON_OPT  += -w
ifeq ($(ARCH), x86)
CCOMMON_OPT  += -m32
else
FCOMMON_OPT  += -m64
endif
endif

ifeq ($(F_COMPILER), SUN)
CCOMMON_OPT  += -DF_INTERFACE_SUN
ifeq ($(ARCH), x86)
FCOMMON_OPT  += -m32
else
FCOMMON_OPT  += -m64
endif
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -xopenmp=parallel
endif
endif

ifeq ($(F_COMPILER), COMPAQ)
CCOMMON_OPT  += -DF_INTERFACE_COMPAQ
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -openmp
endif
endif

ifdef BINARY64
ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
CCOMMON_OPT	+=
#-DUSE64BITINT
endif
endif
endif

ifeq ($(NEED_PIC), 1)
ifeq ($(C_COMPILER), IBM)
CCOMMON_OPT += -qpic=large
else
CCOMMON_OPT += -fPIC
endif
ifeq ($(F_COMPILER), SUN)
FCOMMON_OPT  += -pic
else
FCOMMON_OPT += -fPIC
endif
endif

ifeq ($(DYNAMIC_ARCH), 1)
CCOMMON_OPT	+= -DDYNAMIC_ARCH
endif

ifeq ($(NO_LAPACK), 1)
CCOMMON_OPT	+= -DNO_LAPACK
#Disable LAPACK C interface
NO_LAPACKE = 1
endif

ifeq ($(NO_LAPACKE), 1)
CCOMMON_OPT	+= -DNO_LAPACKE
endif

ifeq ($(NO_AVX), 1)
CCOMMON_OPT	+= -DNO_AVX
endif

ifeq ($(ARCH), x86)
CCOMMON_OPT	+= -DNO_AVX
endif

ifeq ($(NO_AVX2), 1)
CCOMMON_OPT	+= -DNO_AVX2
endif

ifdef SMP
CCOMMON_OPT	+= -DSMP_SERVER

ifeq ($(ARCH), mips64)
ifneq ($(CORE), LOONGSON3B)
USE_SIMPLE_THREADED_LEVEL3 = 1
endif
endif

ifeq ($(USE_OPENMP), 1)
# USE_SIMPLE_THREADED_LEVEL3 = 1
# NO_AFFINITY = 1
CCOMMON_OPT	+= -DUSE_OPENMP
endif

ifeq ($(BIGNUMA), 1)
CCOMMON_OPT	+= -DBIGNUMA
endif

endif

ifeq ($(NO_WARMUP), 1)
CCOMMON_OPT	+= -DNO_WARMUP
endif

ifeq ($(CONSISTENT_FPCSR), 1)
CCOMMON_OPT	+= -DCONSISTENT_FPCSR
endif

# Only for development
# CCOMMON_OPT	 += -DPARAMTEST
# CCOMMON_OPT	 += -DPREFETCHTEST
# CCOMMON_OPT	 += -DNO_SWITCHING
# USE_PAPI = 1

ifdef USE_PAPI
CCOMMON_OPT	 += -DUSE_PAPI
EXTRALIB	 += -lpapi -lperfctr
endif

ifdef DYNAMIC_THREADS
CCOMMON_OPT	 += -DDYNAMIC_THREADS
endif

CCOMMON_OPT	+= -DMAX_CPU_NUMBER=$(NUM_THREADS)

ifdef USE_SIMPLE_THREADED_LEVEL3
CCOMMON_OPT	+= -DUSE_SIMPLE_THREADED_LEVEL3
endif

ifndef SYMBOLPREFIX
SYMBOLPREFIX =
endif

ifndef SYMBOLSUFFIX
SYMBOLSUFFIX =
endif

ifndef LIBNAMESUFFIX
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)
else
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
endif

KERNELDIR	= $(TOPDIR)/kernel/$(ARCH)

include $(TOPDIR)/Makefile.$(ARCH)

CCOMMON_OPT	+= -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\"

ifeq ($(CORE), PPC440)
CCOMMON_OPT	+= -DALLOC_QALLOC
endif

ifeq ($(CORE), PPC440FP2)
STATIC_ALLOCATION = 1
endif

ifneq ($(OSNAME), Linux)
NO_AFFINITY = 1
endif

ifneq ($(ARCH), x86_64)
ifneq ($(ARCH), x86)
ifneq ($(CORE), LOONGSON3B)
NO_AFFINITY = 1
endif
endif
endif

ifdef NO_AFFINITY
CCOMMON_OPT	+= -DNO_AFFINITY
endif

ifdef FUNCTION_PROFILE
CCOMMON_OPT	+= -DFUNCTION_PROFILE
endif

ifdef HUGETLB_ALLOCATION
CCOMMON_OPT	+= -DALLOC_HUGETLB
endif

ifdef HUGETLBFILE_ALLOCATION
CCOMMON_OPT	+= -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=$(HUGETLBFILE_ALLOCATION)
endif

ifdef STATIC_ALLOCATION
CCOMMON_OPT	+= -DALLOC_STATIC
endif

ifdef DEVICEDRIVER_ALLOCATION
CCOMMON_OPT	+= -DALLOC_DEVICEDRIVER -DDEVICEDRIVER_NAME=\"/dev/mapper\"
endif

ifdef MIXED_MEMORY_ALLOCATION
CCOMMON_OPT	+= -DMIXED_MEMORY_ALLOCATION
endif

ifeq ($(OSNAME), SunOS)
TAR	= gtar
PATCH	= gpatch
GREP	= ggrep
AWK	= nawk
else
TAR	= tar
PATCH	= patch
GREP	= grep
AWK	= awk
endif

ifndef MD5SUM
MD5SUM	= md5sum
endif


REVISION = -r$(VERSION)
MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION)))

ifeq ($(DEBUG), 1)
COMMON_OPT += -g
endif

ifeq ($(DEBUG), 1)
FCOMMON_OPT += -g
endif

ifndef COMMON_OPT
COMMON_OPT = -O2
endif

ifndef FCOMMON_OPT
FCOMMON_OPT = -O2 -frecursive
endif



override CFLAGS     += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
override PFLAGS     += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)

override FFLAGS     += $(FCOMMON_OPT)
override FPFLAGS    += $(FCOMMON_OPT) $(COMMON_PROF)
#MAKEOVERRIDES =

#For LAPACK Fortran codes.
#Disable -fopenmp for LAPACK Fortran codes on Windows.
ifdef OS_WINDOWS
LAPACK_FFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FFLAGS))
LAPACK_FPFLAGS := $(filter-out -fopenmp -mp -openmp -xopenmp=parallel,$(FPFLAGS))
else
LAPACK_FFLAGS := $(FFLAGS)
LAPACK_FPFLAGS := $(FPFLAGS)
endif

LAPACK_CFLAGS = $(CFLAGS)
LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H
ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
LAPACK_CFLAGS +=  -DLAPACK_ILP64
endif
endif

ifdef OS_WINDOWS
LAPACK_CFLAGS +=  -DOPENBLAS_OS_WINDOWS
endif
ifeq ($(C_COMPILER), LSB)
LAPACK_CFLAGS +=  -DLAPACK_COMPLEX_STRUCTURE
endif

ifndef SUFFIX
SUFFIX  = o
endif

ifndef PSUFFIX
PSUFFIX = po
endif

ifndef LIBSUFFIX
LIBSUFFIX = a
endif

ifneq ($(DYNAMIC_ARCH), 1)
ifndef SMP
LIBNAME		= $(LIBPREFIX)_$(LIBCORE)$(REVISION).$(LIBSUFFIX)
LIBNAME_P	= $(LIBPREFIX)_$(LIBCORE)$(REVISION)_p.$(LIBSUFFIX)
else
LIBNAME		= $(LIBPREFIX)_$(LIBCORE)p$(REVISION).$(LIBSUFFIX)
LIBNAME_P	= $(LIBPREFIX)_$(LIBCORE)p$(REVISION)_p.$(LIBSUFFIX)
endif
else
ifndef SMP
LIBNAME		= $(LIBPREFIX)$(REVISION).$(LIBSUFFIX)
LIBNAME_P	= $(LIBPREFIX)$(REVISION)_p.$(LIBSUFFIX)
else
LIBNAME		= $(LIBPREFIX)p$(REVISION).$(LIBSUFFIX)
LIBNAME_P	= $(LIBPREFIX)p$(REVISION)_p.$(LIBSUFFIX)
endif
endif


LIBDLLNAME   = $(LIBPREFIX).dll
LIBSONAME    = $(LIBNAME:.$(LIBSUFFIX)=.so)
LIBDYNNAME   = $(LIBNAME:.$(LIBSUFFIX)=.dylib)
LIBDEFNAME   = $(LIBNAME:.$(LIBSUFFIX)=.def)
LIBEXPNAME   = $(LIBNAME:.$(LIBSUFFIX)=.exp)
LIBZIPNAME   = $(LIBNAME:.$(LIBSUFFIX)=.zip)

LIBS		= $(TOPDIR)/$(LIBNAME)
LIBS_P		= $(TOPDIR)/$(LIBNAME_P)


LIB_COMPONENTS = BLAS
ifneq ($(NO_CBLAS), 1)
LIB_COMPONENTS += CBLAS
endif

ifneq ($(NO_LAPACK), 1)
LIB_COMPONENTS += LAPACK
ifneq ($(NO_LAPACKE), 1)
LIB_COMPONENTS += LAPACKE
endif
endif

ifeq ($(ONLY_CBLAS), 1)
LIB_COMPONENTS = CBLAS
endif

export OSNAME
export ARCH
export CORE
export LIBCORE
export PGCPATH
export CONFIG
export CC
export FC
export BU
export FU
export NEED2UNDERSCORES
export USE_THREAD
export NUM_THREADS
export NUM_CORES
export SMP
export MAKEFILE_RULE
export NEED_PIC
export BINARY
export BINARY32
export BINARY64
export F_COMPILER
export C_COMPILER
export USE_OPENMP
export CROSS
export CROSS_SUFFIX
export NOFORTRAN
export NO_FBLAS
export EXTRALIB
export CEXTRALIB
export FEXTRALIB
export HAVE_SSE
export HAVE_SSE2
export HAVE_SSE3
export HAVE_SSSE3
export HAVE_SSE4_1
export HAVE_SSE4_2
export HAVE_SSE4A
export HAVE_SSE5
export HAVE_AVX
export HAVE_VFP
export HAVE_VFPV3
export HAVE_VFPV4
export HAVE_NEON
export HAVE_MSA
export MSA_FLAGS
export KERNELDIR
export FUNCTION_PROFILE
export TARGET_CORE

export SGEMM_UNROLL_M
export SGEMM_UNROLL_N
export DGEMM_UNROLL_M
export DGEMM_UNROLL_N
export QGEMM_UNROLL_M
export QGEMM_UNROLL_N
export CGEMM_UNROLL_M
export CGEMM_UNROLL_N
export ZGEMM_UNROLL_M
export ZGEMM_UNROLL_N
export XGEMM_UNROLL_M
export XGEMM_UNROLL_N
export CGEMM3M_UNROLL_M
export CGEMM3M_UNROLL_N
export ZGEMM3M_UNROLL_M
export ZGEMM3M_UNROLL_N
export XGEMM3M_UNROLL_M
export XGEMM3M_UNROLL_N


ifdef USE_CUDA
export CUDADIR
export CUCC
export CUFLAGS
export CULIB
endif

.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f

.f.$(SUFFIX):
	$(FC) $(FFLAGS) -c $<  -o $(@F)

.f.$(PSUFFIX):
	$(FC) $(FPFLAGS) -pg -c $<  -o $(@F)


ifdef BINARY64
PATHSCALEPATH	= /opt/pathscale/lib/3.1
PGIPATH		= /opt/pgi/linux86-64/7.1-5/lib
else
PATHSCALEPATH	= /opt/pathscale/lib/3.1/32
PGIPATH		= /opt/pgi/linux86/7.1-5/lib
endif

ACMLPATH	= /opt/acml/4.3.0
ifneq ($(OSNAME), Darwin)
MKLPATH         = /opt/intel/mkl/10.2.2.025/lib
else
MKLPATH         = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib
endif
ATLASPATH	= /opt/atlas/3.9.17/opteron
FLAMEPATH	= $(HOME)/flame/lib
ifneq ($(OSNAME), SunOS)
SUNPATH		= /opt/sunstudio12.1
else
SUNPATH		= /opt/SUNWspro
endif