From 0cc8bd8387f5987ff453b48605811ac535c3d8ab Mon Sep 17 00:00:00 2001 From: Jin Budelmann Date: Mon, 24 Jan 2011 02:52:16 +0000 Subject: [PATCH 01/24] New branch for CUDA integration. Tracker item 2180 git-svn-id: https://opencmiss.svn.sourceforge.net/svnroot/opencmiss/cm/branches/developers/jin@1907 61e36070-c886-4827-b7c0-ed9e4dc59cc8 From d17fe30576a2e2820929d7a182fccb3508cb1256 Mon Sep 17 00:00:00 2001 From: Jin Budelmann Date: Mon, 24 Jan 2011 22:54:24 +0000 Subject: [PATCH 02/24] Initial commit of cuda integration. Makefiles are hacked to together as I am pressed for time. I wouldn't recommend anyone test this yet as makefiles have some harcoded paths from my comp and code is tweaked for my tesla board. Tracker item 2180 git-svn-id: https://opencmiss.svn.sourceforge.net/svnroot/opencmiss/cm/branches/developers/jin@1916 61e36070-c886-4827-b7c0-ed9e4dc59cc8 --- ExampleMakefile | 2 +- Makefile | 10 +- src/cuda_solver_routines.cu | 489 ++++++++++++++++++ ...ines.c => external_dae_solver_routines.cu} | 16 +- src/external_dae_solver_routines.h | 17 +- 5 files changed, 524 insertions(+), 10 deletions(-) create mode 100644 src/cuda_solver_routines.cu rename src/{external_dae_solver_routines.c => external_dae_solver_routines.cu} (89%) diff --git a/ExampleMakefile b/ExampleMakefile index 8d914430..83aa6100 100644 --- a/ExampleMakefile +++ b/ExampleMakefile @@ -882,7 +882,7 @@ $(EXE_DIR) : mkdir -p $@; $(EXECUTABLE) : $(OBJECTS) $(OPENCMISS_LIBRARY) - $(EXE_LINK) -o $@ $(OBJECTS) $(OPENCMISS_LIBRARY) $(ELFLAGS) $(EXTERNAL_LIBRARIES) + $(EXE_LINK) -o $@ $(OBJECTS) $(OPENCMISS_LIBRARY) $(ELFLAGS) $(EXTERNAL_LIBRARIES) -L/usr/local/cuda/lib64 -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/lib -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/common/lib/linux -L/people/vbud003/NVIDIA_GPU_Computing_SDK/shared//lib -lcudart -L/usr/local/cuda/lib64 -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/lib -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/common/lib/linux -L/people/vbud003/NVIDIA_GPU_Computing_SDK/shared//lib -lcudart -lcutil_x86_64 -lshrutil_x86_64 $(OPENCMISS_LIBRARY) : ( cd $(GLOBAL_ROOT); $(MAKE) ) diff --git a/Makefile b/Makefile index 0bc73738..d192cbe0 100644 --- a/Makefile +++ b/Makefile @@ -727,7 +727,7 @@ FPPFLAGS += $(EXTERNAL_INCLUDE_PATH) ELFLAGS += $(EXTERNAL_LIB_PATH) -.SUFFIXES: .f90 .c +.SUFFIXES: .f90 .c .cu $(OBJECT_DIR)/%.o : $(SOURCE_DIR)/%.f90 ( cd $(OBJECT_DIR) ; $(FC) -o $@ $(FFLAGS) $(FPPFLAGS) -c $< ) @@ -735,6 +735,10 @@ $(OBJECT_DIR)/%.o : $(SOURCE_DIR)/%.f90 $(OBJECT_DIR)/%.o : $(SOURCE_DIR)/%.c ( cd $(OBJECT_DIR) ; $(CC) -o $@ $(CFLAGS) $(CPPFLAGS) -c $< ) +$(OBJECT_DIR)/%.o : $(SOURCE_DIR)/%.cu + ( cd $(OBJECT_DIR) ; nvcc -gencode=arch=compute_10,code=\"sm_10,compute_10\" -gencode=arch=compute_20,code=\"sm_20,compute_20\" --compile -m64 --compiler-options -fno-strict-aliasing --ptxas-options=-v -I$(SOURCE_DIR) -I/usr/local/cuda/include -I/people/vbud003/NVIDIA_GPU_Computing_SDK/C/common/inc -I/people/vbud003/NVIDIA_GPU_Computing_SDK/shared//inc --use_fast_math -DUNIX -O2 -o $@ -c $< ) + + ifeq ($(USEFIELDML),true) FIELDML_OBJECT = \ $(OBJECT_DIR)/fieldml_util_routines.o \ @@ -1403,8 +1407,8 @@ $(OBJECT_DIR)/equations_set_routines.o : $(SOURCE_DIR)/equations_set_routines.f9 $(OBJECT_DIR)/timer_f.o \ $(OBJECT_DIR)/types.o -$(OBJECT_DIR)/external_dae_solver_routines.o : $(SOURCE_DIR)/external_dae_solver_routines.c \ - $(SOURCE_DIR)/external_dae_solver_routines.h +$(OBJECT_DIR)/external_dae_solver_routines.o : $(SOURCE_DIR)/external_dae_solver_routines.cu \ + $(SOURCE_DIR)/external_dae_solver_routines.h \ $(OBJECT_DIR)/field_routines.o : $(SOURCE_DIR)/field_routines.f90 \ $(OBJECT_DIR)/base_routines.o \ diff --git a/src/cuda_solver_routines.cu b/src/cuda_solver_routines.cu new file mode 100644 index 00000000..afe660b4 --- /dev/null +++ b/src/cuda_solver_routines.cu @@ -0,0 +1,489 @@ +#include +#include + +#include + +extern __shared__ double shared_array[]; +const int algebraicCount = 25; +const int rateStateCount = 8; +const int constantCount = 0; +const double FLOPSPerFunction = 193.0f; +const int DEFAULT_TESTING_THREADS = 2000000; +const int sharedMemoryCellModel = 0; +const char* cellModelName = "LR R3"; + +//////////////////////////////////////////////////////////////////////////////// +// Cell Model Device Functions +//////////////////////////////////////////////////////////////////////////////// +__device__ void computeRates(float VOI, double* DUMMY, double* STATES, double* ALGEBRAIC, double* RATES) +{ + ALGEBRAIC[0] = -25.5; // Add stimulus in proper + ALGEBRAIC[1] = (0.32f*STATES[0]+15.0816f)/(1.0f - (expf(- 0.1f*STATES[0]-4.713f))); // 7 ops + + if (STATES[0] < -40.0f) { + ALGEBRAIC[2] = 0.135f*(expf(((80.0f+STATES[0])/- 6.8f))); // 4 ops + ALGEBRAIC[3] = (( - 127140*(expf(0.2444f*STATES[0])) - 3.47400e-05*(expf(-0.04391f*STATES[0])))*(STATES[0]+37.78f))/(1.0f+(expf(0.311f*STATES[0]+24.64053))); // 14 ops + ALGEBRAIC[9] = 3.56f*(expf(0.079f*STATES[0]))+ 310000*(expf(0.35f*STATES[0])); // 7 ops + ALGEBRAIC[10] = 0.1212f*(expf(-0.01052f*STATES[0]))/(1.0f+(expf(-0.1378f*STATES[0]-5.531292f))); // 8 ops + } else { + ALGEBRAIC[2] = 0.00000; + ALGEBRAIC[3] = 0.00000; + ALGEBRAIC[9] = 1.0f/( 0.13f*(1.0f+(expf(((STATES[0]+10.66f)/- 11.1f))))); + ALGEBRAIC[10] = ( 0.3f*(expf(-2.53500e-07*STATES[0])))/(1.0f+(expf(-0.1f*STATES[0]-3.2f))); + } + if (STATES[0] < -100.0f) { + ALGEBRAIC[16] = 2.837f*(expf(0.04f*STATES[0]+3.08f) - 1.0f)/((STATES[0]+77.0f)*(expf(0.04f*STATES[0]+1.4f))); // 11 ops + } else { + ALGEBRAIC[16] = 1.0f; + } + + ALGEBRAIC[4] = (0.095f*(expf(-0.01f*STATES[0] + 0.5f)))/(1.0f+(expf(-0.072*STATES[0] + 0.36f))); // 9 ops + ALGEBRAIC[5] = (0.012f*(expf(-0.008f*STATES[0]-0.224f)))/(1.0f+(expf(0.15f*STATES[0]+4.2f))); // 9 ops + ALGEBRAIC[6] = (0.0005f*(expf(0.083f*STATES[0]+4.15f)))/(1.0f+(expf(0.057f*STATES[0]+2.85f))); // 9 ops + ALGEBRAIC[7] = 23*(powf(STATES[1], 3.0f))*STATES[2]*STATES[3]*(STATES[0] - 54.794463f); // 6 ops + ALGEBRAIC[8] = 0.08f*(expf(-STATES[0]/11.0000)); // 3 ops + ALGEBRAIC[11] = (0.07f*(expf(-0.017f*STATES[0]-0.748f)))/(1.0f+(expf(0.05f*STATES[0]+2.2f))); // 9 ops + ALGEBRAIC[12] = (0.0065f*(expf(-0.02f*STATES[0]-0.6f)))/(1.0f+(expf(-0.2f*STATES[0]-6.0f))); // 9 ops + ALGEBRAIC[13] = (0.0013f*(expf(-0.06f*STATES[0]-1.2f)))/(1.0f+(expf(-0.04f*STATES[0]-0.8f))); // 9 ops + ALGEBRAIC[14] = 7.7f - 13.0287f*logf(STATES[4]); // 3 ops + ALGEBRAIC[15] = 0.09f*STATES[5]*STATES[6]*(STATES[0] - ALGEBRAIC[14]); // 4 ops + ALGEBRAIC[17] = 0.282f*STATES[7]*ALGEBRAIC[16]*(STATES[0] + 77.56758f); // 4 ops + ALGEBRAIC[18] = 1.02f/(1.0f+(expf(0.2385f*STATES[0] + 6.83967915f))); // 4 ops + ALGEBRAIC[19] = (0.49124f*(expf( 0.08032f *STATES[0] + 7.49939f) + expf(0.06175f*STATES[0] - 31.271255925f)))/(1.00000+expf(-0.514300*STATES[0] - 214.85137268791f)); // 13 ops + ALGEBRAIC[20] = ALGEBRAIC[18]/(ALGEBRAIC[18]+ALGEBRAIC[19]); // 2 ops + ALGEBRAIC[21] = 0.6047f*ALGEBRAIC[20]*(STATES[0] + 87.89290f); // 3 ops + ALGEBRAIC[22] = 1.0f/(1.0f+(exp(((7.488f - STATES[0])/5.98f)))); // 5 ops + ALGEBRAIC[23] = 0.0183f*ALGEBRAIC[22]*(STATES[0] + 87.89290f); // 3 ops + ALGEBRAIC[24] = 0.03921f*STATES[0] +2.3475027f; // 3 ops + + RATES[0] = -(ALGEBRAIC[0]+ALGEBRAIC[7]+ALGEBRAIC[15]+ALGEBRAIC[17]+ALGEBRAIC[21]+ALGEBRAIC[23]+ALGEBRAIC[24]); // 7 ops + RATES[1] = ALGEBRAIC[1]*(1.00000 - STATES[1]) - ALGEBRAIC[8]*STATES[1]; // 4 ops + RATES[2] = ALGEBRAIC[2]*(1.00000 - STATES[2]) - ALGEBRAIC[9]*STATES[2]; // 4 ops + RATES[3] = ALGEBRAIC[3]*(1.00000 - STATES[3]) - ALGEBRAIC[10]*STATES[3]; // 4 ops + RATES[4] = - 0.0001f*ALGEBRAIC[15]+ 0.000007f - 0.07f*STATES[4]; // 4 ops + RATES[5] = ALGEBRAIC[4]*(1.00000 - STATES[5]) - ALGEBRAIC[11]*STATES[5]; // 4 ops + RATES[6] = ALGEBRAIC[5]*(1.00000 - STATES[6]) - ALGEBRAIC[12]*STATES[6]; // 4 ops + RATES[7] = ALGEBRAIC[6]*(1.00000 - STATES[7]) - ALGEBRAIC[13]*STATES[7]; // 4 ops +} + +//////////////////////////////////////////////////////////////////////////////// +// Cell Model Host Functions ////////// Should Not Be Needed Later ///////////// +//////////////////////////////////////////////////////////////////////////////// +void initProblem(int num_threads, double* STATES) +{ + int i; + + STATES[0] = -84.3801107371; + STATES[1] = 0.00171338077730188; + STATES[2] = 0.982660523699656; + STATES[3] = 0.989108212766685; + STATES[4] = 0.00017948816388306; + STATES[5] = 0.00302126301779861; + STATES[6] = 0.999967936476325; + STATES[7] = 0.0417603108167287; + + for (i=1; i 0 + double threadConstants[constantCount]; + + intitialiseConstants(threadConstants); +#endif + + + for (i=0; i 0 + integrator(timeSteps, stepSize, threadConstants, threadStates + threadIdx.x*rateStateCount, threadAlgebraic); +#else + integrator(timeSteps, stepSize, NULL, threadStates + threadIdx.x*rateStateCount, threadAlgebraic); +#endif + + for (i=0; i freeGPUMemory && count < 200 ) { + num_partitions++; + count++; + if (num_threads%(num_partitions*num_streams)==0) { + pagedMemorySize = sizeof(double)*rateStateCount*num_threads/num_partitions; + } + } + + // Adjust threads per blocks so that states variables fit in shared memory + count=0; + sharedMem = (size_t) (sharedMemoryIntegrator + sharedMemoryDevice + sharedMemoryCellModel) * threads_per_block * sizeof(double); + while ( sharedMem > deviceProp.sharedMemPerBlock*0.95 && count < 200 ) { + if (threads_per_block > 32 ) { + threads_per_block-=32; + } else { + fprintf(stderr, "Cannot fit variables in shared memory"); + exit(EXIT_FAILURE); + } + count++; + sharedMem = (size_t) (sharedMemoryIntegrator + sharedMemoryDevice + sharedMemoryCellModel) * threads_per_block * sizeof(double); + } + num_blocks=num_threads/threads_per_block; + if (num_threads % threads_per_block !=0) num_blocks++; // Round up calculation + +//#ifdef DEBUG + if (timing_file) { + printf("> Device name : %s\n", deviceProp.name ); + printf("> CUDA Capable SM %d.%d hardware with %d multi-processors\n", + deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); + printf("> Cell Model = %s, Integrator = %s\n", cellModelName, integratorName); + printf("> num_threads = %d, num_blocks = %d, threads_per_block = %d, num_partitions = %d, timeSteps = %d, num_streams = %d\n", + num_threads, num_blocks, threads_per_block, num_partitions, timeSteps, num_streams); + } +//#endif + + // Allocate CUDA device and host pinned memory + cutilSafeCall( cudaMallocHost((void**) &h_paged_states, pagedMemorySize) ); + cutilSafeCall( cudaMalloc((void **) &d_states, pagedMemorySize) ); + + // Create streams + streams = (cudaStream_t*) malloc(num_streams * sizeof(cudaStream_t)); + for(i = 0; i < num_streams; i++) + cutilSafeCall( cudaStreamCreate(&(streams[i])) ); + + // Setup execution parameters + dim3 grid(num_blocks/num_streams/num_partitions, 1, 1); + printf("blox %d streams %d parts %d\n", num_blocks, num_streams, num_partitions); + dim3 threads(threads_per_block, 1, 1); + + if (timing_file) { + // Setup and start global timer + timer = 0; + cutCreateTimer(&timer); + cutilCheckError(cutStartTimer(timer)); + + // Test kernel speed in default stream (timing is more accurate in default stream) + memcpy(h_paged_states, h_states, pagedMemorySize/num_streams); + cutilSafeCall( cudaMemcpy(d_states, h_paged_states, pagedMemorySize/num_streams, + cudaMemcpyHostToDevice) ); + // Start kernel timer + kernel_timer = 0; + cutCreateTimer(&kernel_timer); + cutilCheckError(cutStartTimer(kernel_timer)); + // Start kernel + printf("grid.x %d threads.x %d mem %d\n", grid.x, threads.x, sharedMem); + solveSystem<<>>(timeSteps, stepSize, d_states); + checkCUDAError("Single Kernel Execution"); + cutilSafeCall( cudaThreadSynchronize() ); + // Stop kernel Timer + cutilCheckError(cutStopTimer(kernel_timer)); + cutilSafeCall( cudaMemcpy(h_paged_states, d_states, pagedMemorySize/num_streams, + cudaMemcpyDeviceToHost) ); + memcpy(h_states, h_paged_states, pagedMemorySize/num_streams); + + // Prefetch data for next partition in first stream + if (num_partitions>1) { + global_offset = rateStateCount * num_streams * grid.x * threads.x; + memcpy(h_paged_states, h_states + global_offset, pagedMemorySize/num_streams); + } + } else { + memcpy(h_paged_states, h_states, pagedMemorySize); + } + + // Queue kernel calls into streams to hide memory transfers (num_partitions sets of kernel calls in each stream) + for(i = 0; i < num_partitions; i++) { + // Asynchronously launch num_streams memcopies + for(j = 0; j < num_streams; j++) { + if (timing_file==NULL || !(i==0 && j==0)) { + local_offset = j * rateStateCount * grid.x * threads.x ; + cutilSafeCall( cudaMemcpyAsync(d_states + local_offset, h_paged_states + local_offset, + (size_t)pagedMemorySize/num_streams, + cudaMemcpyHostToDevice, streams[j]) ); + } + } + // Execute the kernel + // Asynchronously launch num_streams kernels, each operating on its own portion of data + for(j = 0; j < num_streams; j++) { + if (timing_file==NULL || !(i==0 && j==0)) { + local_offset = j * grid.x * threads.x ; + solveSystem<<>>(timeSteps, stepSize, + d_states + rateStateCount*local_offset); + } + } + + // Asynchronoously launch num_streams memcopies + for(j = 0; j < num_streams; j++){ + if (timing_file==NULL || !(i==0 && j==0)) { + local_offset = j * rateStateCount * grid.x * threads.x ; + cutilSafeCall( cudaMemcpyAsync(h_paged_states + local_offset, d_states + local_offset, + pagedMemorySize/num_streams, + cudaMemcpyDeviceToHost, streams[j]) ); + } + } + + // Execute memcpys in and out of paged memory when CUDA calls in the streams have finished + for(j = 0; j < num_streams; j++){ + if (timing_file==NULL || !(i==0 && j==0)) { + cudaStreamSynchronize(streams[j]); + + local_offset = j * rateStateCount * grid.x * threads.x ; + global_offset = i * num_streams * grid.x * threads.x; + + memcpy(h_states + rateStateCount * global_offset + local_offset, h_paged_states + local_offset, + pagedMemorySize/num_streams); + + if (i < num_partitions - 1) { + global_offset = (i + 1) * num_streams * grid.x * threads.x; + memcpy(h_paged_states + local_offset, h_states + rateStateCount * global_offset + local_offset, + pagedMemorySize/num_streams); + } + } + } + } + + if (timing_file) { + // Stop global timer + cutilCheckError(cutStopTimer(timer)); + + // Calculate timing statistics + dSeconds = cutGetTimerValue(timer)/1000.0; + nFLOPS = (FLOPSPerTimeStep*rateStateCount + FLOPSPerFunction*FunctionEvals)*timeSteps*num_threads; + gflops = 1.0e-9 * nFLOPS/dSeconds; + + kernel_dSeconds = cutGetTimerValue(kernel_timer)/1000.0; + kernel_nFLOPS = (FLOPSPerTimeStep*rateStateCount + FLOPSPerFunction*FunctionEvals)*timeSteps*num_threads/num_streams/num_partitions; + kernel_gflops = 1.0e-9 * kernel_nFLOPS/kernel_dSeconds; + + // Store Stats + fprintf(timing_file,"%s\t%s\t%d\t%d\t%d\t%d\t%d\t%f\t%f\t%f\t%f\t%f\n", cellModelName, integratorName, num_threads, num_blocks, + threads_per_block, num_partitions, num_streams, dSeconds, gflops, kernel_dSeconds, + kernel_gflops, gflops/kernel_gflops*100); + } + + // Deallocate Memory and Release Threads + for(i = 0; i < num_streams; i++) + cutilSafeCall( cudaStreamDestroy(streams[i]) ); + cutilSafeCall( cudaFree(d_states) ); + cutilSafeCall( cudaFreeHost(h_paged_states) ); + cudaThreadExit(); +} + + +//////////////////////////////////////////////////////////////////////////////// +// Auxilliary Testing Functions +//////////////////////////////////////////////////////////////////////////////// + +void solveProblem(unsigned int timeSteps, unsigned int num_threads, unsigned int threads_per_block, + unsigned int num_partitions, unsigned int num_streams, FILE *timing_file, FILE *results_file) +{ + unsigned int i, j; + float startTime = 0.0f; + float endTime = 0.2f; + + double* h_states = NULL; + + h_states = (double *) safeMalloc(sizeof(double)*rateStateCount*num_threads); + + initProblem(num_threads, h_states); + + solve(h_states, startTime, endTime, timeSteps, num_threads, threads_per_block, num_partitions, num_streams, timing_file); + + if (results_file) { + fprintf(results_file,"\n\n"); + + for (i=0; i #include "external_dae_solver_routines.h" +#include "cuda_solver_routines.cu" /* Type definitions */ /* Function Definitions */ - +extern "C" +{ void SolverDAEExternalIntegrate(const int NumberOfDofs, const double StartTime, const double EndTime, @@ -81,8 +83,12 @@ void SolverDAEExternalIntegrate(const int NumberOfDofs, int *err) { - printf("Hello World!\n"); - -} + unsigned int timesteps = 10; + + printf("start %f end %f int step %f dofs %d", StartTime, EndTime, InitialStep[0], NumberOfDofs); + solve(StateData, StartTime, EndTime, timesteps, NumberOfDofs, 1024, 10, 2, NULL); + +} +} diff --git a/src/external_dae_solver_routines.h b/src/external_dae_solver_routines.h index fa00d6c5..7f8682a3 100644 --- a/src/external_dae_solver_routines.h +++ b/src/external_dae_solver_routines.h @@ -42,4 +42,19 @@ * the terms of any one of the MPL, the GPL or the LGPL. * */ - +extern "C" +{ +void SolverDAEExternalIntegrate(const int NumberOfDofs, + const double StartTime, + const double EndTime, + double *InitialStep, + const int OnlyOneModelIndex, + int *ModelsData, + int NumberOfState, + double *StateData, + int NumberOfParameters, + double *ParametersData, + int NumberOfIntermediate, + double *IntermediateData, + int *err); +} From 5d014f354cdddcd36c826e5016dd11e454ef5ebf Mon Sep 17 00:00:00 2001 From: Jin Budelmann Date: Mon, 24 Jan 2011 23:33:12 +0000 Subject: [PATCH 03/24] Updated cellml to opencmiss field mapping calls. Tracker item 2180 git-svn-id: https://opencmiss.svn.sourceforge.net/svnroot/opencmiss/cm/branches/developers/jin@1918 61e36070-c886-4827-b7c0-ed9e4dc59cc8 From 7557b3f573aebe9b8884f82bcc34365eb96a4887 Mon Sep 17 00:00:00 2001 From: Jin Budelmann Date: Tue, 1 Feb 2011 22:02:33 +0000 Subject: [PATCH 04/24] CMGUI heart mesh files added. Tracker item 2180 git-svn-id: https://opencmiss.svn.sourceforge.net/svnroot/opencmiss/cm/branches/developers/jin@1988 61e36070-c886-4827-b7c0-ed9e4dc59cc8 From 5707beb20dd51dd81a03e9c3dbb8aca98569f647 Mon Sep 17 00:00:00 2001 From: Jin Budelmann Date: Thu, 3 Feb 2011 01:33:59 +0000 Subject: [PATCH 05/24] Implemented VTK reading in MonodomainCUDA example. Getting Segment violations when accesssing converted C pointers. No Tracker Item git-svn-id: https://opencmiss.svn.sourceforge.net/svnroot/opencmiss/cm/branches/developers/jin@2001 61e36070-c886-4827-b7c0-ed9e4dc59cc8 --- Makefile | 11 +++++++--- src/field_IO_routines.f90 | 16 +++++++------- src/opencmiss.f90 | 45 ++++++++++++++++++++++++++++++++++++--- src/vtk_import_routines.c | 20 ++++++++--------- 4 files changed, 68 insertions(+), 24 deletions(-) diff --git a/Makefile b/Makefile index f07f6f16..39e29084 100644 --- a/Makefile +++ b/Makefile @@ -855,6 +855,7 @@ OBJECTS = $(OBJECT_DIR)/advection_diffusion_equation_routines.o \ $(OBJECT_DIR)/trees.o \ $(OBJECT_DIR)/types.o \ $(OBJECT_DIR)/util_array.o \ + $(OBJECT_DIR)/vtk_import_routines.o \ $(FIELDML_OBJECT) ifeq ($(OPERATING_SYSTEM),linux)# Linux @@ -1416,8 +1417,8 @@ $(OBJECT_DIR)/equations_set_routines.o : $(SOURCE_DIR)/equations_set_routines.f9 $(OBJECT_DIR)/timer_f.o \ $(OBJECT_DIR)/types.o -$(OBJECT_DIR)/external_dae_solver_routines.o : $(SOURCE_DIR)/external_dae_solver_routines.c \ - $(SOURCE_DIR)/external_dae_solver_routines.h \ +$(OBJECT_DIR)/external_dae_solver_routines.o : $(SOURCE_DIR)/external_dae_solver_routines.c \ + $(SOURCE_DIR)/external_dae_solver_routines.h $(OBJECT_DIR)/field_routines.o : $(SOURCE_DIR)/field_routines.f90 \ $(OBJECT_DIR)/base_routines.o \ @@ -1455,7 +1456,8 @@ $(OBJECT_DIR)/field_IO_routines.o : $(SOURCE_DIR)/field_IO_routines.f90 \ $(OBJECT_DIR)/node_routines.o \ $(OBJECT_DIR)/region_routines.o \ $(OBJECT_DIR)/strings.o \ - $(OBJECT_DIR)/types.o + $(OBJECT_DIR)/types.o \ + $(OBJECT_DIR)/vtk_import_routines.o $(OBJECT_DIR)/finite_elasticity_routines.o : $(SOURCE_DIR)/finite_elasticity_routines.f90 \ $(OBJECT_DIR)/base_routines.o \ @@ -2126,6 +2128,9 @@ $(OBJECT_DIR)/util_array.o : $(SOURCE_DIR)/util_array.f90 \ $(OBJECT_DIR)/base_routines.o \ $(OBJECT_DIR)/types.o +$(OBJECT_DIR)/vtk_import_routines.o : $(SOURCE_DIR)/vtk_import_routines.c \ + $(SOURCE_DIR)/vtk_import_routines.h + # ---------------------------------------------------------------------------- # # clean and clobber for removing objects and executable. diff --git a/src/field_IO_routines.f90 b/src/field_IO_routines.f90 index a08b6a73..6c93bc45 100644 --- a/src/field_IO_routines.f90 +++ b/src/field_IO_routines.f90 @@ -353,16 +353,16 @@ FUNCTION FieldExport_DerivativeIndices( handle, componentNumber, fieldType, vari INTEGER(C_INT) :: FieldExport_DerivativeIndices END FUNCTION FieldExport_DerivativeIndices - SUBROUTINE ReadVTK(filePath, points, numPoints, cells, numCells) & + SUBROUTINE READ_VTK(filePath, points, numPoints, cells, numCells) & & BIND(C, NAME="readVTK") USE TYPES USE ISO_C_BINDING - CHARACTER(C_CHAR), INTENT(IN) :: filePath(*) - REAL(C_DOUBLE), INTENT(OUT) :: points(*) - INTEGER(C_INT), INTENT(OUT) :: numPoints(*) - INTEGER(C_INT), INTENT(OUT) :: cells(*) - INTEGER(C_INT), INTENT(OUT) :: numCells(*) - END SUBROUTINE ReadVTK + CHARACTER(LEN=1,KIND=C_CHAR), INTENT(IN) :: filePath(*) + TYPE(C_PTR), INTENT(OUT):: points(*) + INTEGER(C_INT), INTENT(OUT) :: numPoints + TYPE(C_PTR), INTENT(OUT) :: cells(*) + INTEGER(C_INT), INTENT(OUT) :: numCells + END SUBROUTINE READ_VTK END INTERFACE @@ -391,7 +391,7 @@ END SUBROUTINE ReadVTK MODULE PROCEDURE CHECKED_DEALLOCATE_BASIS END INTERFACE !CHECKED_DEALLOCATE - PUBLIC :: FIELD_IO_FIELDS_IMPORT, FIELD_IO_NODES_EXPORT, FIELD_IO_ELEMENTS_EXPORT + PUBLIC :: FIELD_IO_FIELDS_IMPORT, FIELD_IO_NODES_EXPORT, FIELD_IO_ELEMENTS_EXPORT, READ_VTK CONTAINS diff --git a/src/opencmiss.f90 b/src/opencmiss.f90 index 9b4d3b3a..6d69081f 100644 --- a/src/opencmiss.f90 +++ b/src/opencmiss.f90 @@ -3406,7 +3406,7 @@ MODULE OPENCMISS MODULE PROCEDURE CMISSFieldIONodesExportVSVSObj END INTERFACE !CMISSFieldIONodesExport - PUBLIC CMISSFieldIOElementsExport,CMISSFieldIONodesExport,CMISSFieldIOFieldsImport + PUBLIC CMISSFieldIOElementsExport,CMISSFieldIONodesExport,CMISSFieldIOFieldsImport, CMISSReadVTK !!================================================================================================================================== !! @@ -29086,7 +29086,7 @@ SUBROUTINE CMISSFieldIONodesExportVSVSObj(Fields,FileName,Method,Err) END SUBROUTINE CMISSFieldIONodesExportVSVSObj - ! + ! !================================================================================================================================ ! @@ -29127,9 +29127,48 @@ SUBROUTINE CMISSFieldIOFieldsImport(FileName, Method, Region, Mesh, MeshUserNumb CALL EXITS("CMISSFieldIOFieldsImport") CALL CMISS_HANDLE_ERROR(Err,ERROR) RETURN - END SUBROUTINE CMISSFieldIOFieldsImport + ! + !================================================================================================================================ + ! + + !> Import mesh from vtk file + SUBROUTINE CMISSReadVTK(filePath, points, numPoints, cells, numCells, Err) + !Argument variables + USE ISO_C_BINDING + CHARACTER(LEN=*), INTENT(IN) :: filePath + TYPE(C_PTR), INTENT(OUT) :: points(*) + INTEGER(C_INT), INTENT(OUT) :: numPoints + TYPE(C_PTR), INTENT(OUT) :: cells(*) + INTEGER(C_INT), INTENT(OUT) :: numCells + INTEGER(INTG), INTENT(OUT) :: Err !LEN_TRIM(filePath)) THEN + LENGTH=LEN_TRIM(filePath) + ELSE + LENGTH=SIZE(Cstring,1)-1 + ENDIF + DO i=1,LENGTH + Cstring(i)=filePath(i:i) + ENDDO !i + !Null terminate the string + Cstring(LENGTH+1)=C_NULL_CHAR + + CALL READ_VTK(Cstring, points, numPoints, cells, numCells) + + CALL EXITS("CMISSReadVTK") + RETURN +999 CALL ERRORS("CMISSReadVTK",Err,ERROR) + CALL EXITS("CMISSReadVTK") + CALL CMISS_HANDLE_ERROR(Err,ERROR) + RETURN + END SUBROUTINE CMISSReadVTK !!================================================================================================================================== !! diff --git a/src/vtk_import_routines.c b/src/vtk_import_routines.c index c454c006..c0be415e 100644 --- a/src/vtk_import_routines.c +++ b/src/vtk_import_routines.c @@ -34,37 +34,37 @@ void readVTK(const char* filePath, double* points, int* numPoints, int* cells, i fgets(buffer, 100, file); fgets(buffer, 100, file); - FSCANF(file, "%*s %d %*s\n", &numPoints); + FSCANF(file, "%*s %d %*s\n", numPoints); - printf("Number of Points = %d\n", numPoints); + printf("Number of Points = %d\n", (*numPoints)); - points = (double *) malloc(sizeof(double)*numPoints*3); + points = (double *) malloc(sizeof(double)*(*numPoints)*3); - while (count Date: Thu, 3 Feb 2011 02:21:46 +0000 Subject: [PATCH 06/24] Bug fixes. git-svn-id: https://opencmiss.svn.sourceforge.net/svnroot/opencmiss/cm/branches/developers/jin@2002 61e36070-c886-4827-b7c0-ed9e4dc59cc8 --- src/field_IO_routines.f90 | 4 ++-- src/opencmiss.f90 | 4 ++-- src/vtk_import_routines.c | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/field_IO_routines.f90 b/src/field_IO_routines.f90 index 6c93bc45..f603c763 100644 --- a/src/field_IO_routines.f90 +++ b/src/field_IO_routines.f90 @@ -358,9 +358,9 @@ SUBROUTINE READ_VTK(filePath, points, numPoints, cells, numCells) & USE TYPES USE ISO_C_BINDING CHARACTER(LEN=1,KIND=C_CHAR), INTENT(IN) :: filePath(*) - TYPE(C_PTR), INTENT(OUT):: points(*) + TYPE(C_PTR), INTENT(OUT):: points INTEGER(C_INT), INTENT(OUT) :: numPoints - TYPE(C_PTR), INTENT(OUT) :: cells(*) + TYPE(C_PTR), INTENT(OUT) :: cells INTEGER(C_INT), INTENT(OUT) :: numCells END SUBROUTINE READ_VTK diff --git a/src/opencmiss.f90 b/src/opencmiss.f90 index 6d69081f..3ef43a10 100644 --- a/src/opencmiss.f90 +++ b/src/opencmiss.f90 @@ -29137,9 +29137,9 @@ SUBROUTINE CMISSReadVTK(filePath, points, numPoints, cells, numCells, Err) !Argument variables USE ISO_C_BINDING CHARACTER(LEN=*), INTENT(IN) :: filePath - TYPE(C_PTR), INTENT(OUT) :: points(*) + TYPE(C_PTR), INTENT(OUT) :: points INTEGER(C_INT), INTENT(OUT) :: numPoints - TYPE(C_PTR), INTENT(OUT) :: cells(*) + TYPE(C_PTR), INTENT(OUT) :: cells INTEGER(C_INT), INTENT(OUT) :: numCells INTEGER(INTG), INTENT(OUT) :: Err ! Date: Thu, 3 Feb 2011 03:20:48 +0000 Subject: [PATCH 07/24] VTK importing properly into example. No Tracker Item git-svn-id: https://opencmiss.svn.sourceforge.net/svnroot/opencmiss/cm/branches/developers/jin@2003 61e36070-c886-4827-b7c0-ed9e4dc59cc8 From 61394ccd7f75bcc442f3b59a46e188b7fe2efa6b Mon Sep 17 00:00:00 2001 From: Jin Budelmann Date: Mon, 7 Feb 2011 02:30:06 +0000 Subject: [PATCH 08/24] Changes made to Monodomain files. Tracker item 2180 git-svn-id: https://opencmiss.svn.sourceforge.net/svnroot/opencmiss/cm/branches/developers/jin@2008 61e36070-c886-4827-b7c0-ed9e4dc59cc8 From e1d55156e4a8c8ed3c164a95ae6ff26f9ab47448 Mon Sep 17 00:00:00 2001 From: Jin Budelmann Date: Wed, 9 Feb 2011 01:26:14 +0000 Subject: [PATCH 09/24] Updated CUDA example, changed to fitzhugh nagumo for simplicity. See Tracker Item 2180 git-svn-id: https://opencmiss.svn.sourceforge.net/svnroot/opencmiss/cm/branches/developers/jin@2021 61e36070-c886-4827-b7c0-ed9e4dc59cc8 --- ExampleMakefile | 3 +- Makefile | 9 +- src/Darcy_equations_routines.f90 | 82 ++++----- src/Poisson_equations_routines.f90 | 62 ++++--- src/advection_diffusion_equation_routines.f90 | 119 +++++++++++- src/cmiss_c.c | 18 +- src/control_loop_routines.f90 | 58 +++--- src/cuda_solver_routines.cu | 169 ++++++++++-------- src/external_dae_solver_routines.c | 94 ---------- src/external_dae_solver_routines.cu | 6 +- src/external_dae_solver_routines.h | 6 +- src/field_IO_routines.f90 | 70 +++++--- src/finite_elasticity_routines.f90 | 159 ++++++++-------- src/fluid_mechanics_IO_routines.f90 | 4 + src/solver_routines.f90 | 1 + 15 files changed, 461 insertions(+), 399 deletions(-) delete mode 100644 src/external_dae_solver_routines.c diff --git a/ExampleMakefile b/ExampleMakefile index 41e0ea90..a67943b2 100644 --- a/ExampleMakefile +++ b/ExampleMakefile @@ -887,8 +887,7 @@ $(EXE_DIR) : mkdir -p $@; $(EXECUTABLE) : $(OBJECTS) $(OPENCMISS_LIBRARY) - $(EXE_LINK) -o $@ $(OBJECTS) $(OPENCMISS_LIBRARY) $(ELFLAGS) $(EXTERNAL_LIBRARIES) -#-L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/lib -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/common/lib/linux -L/people/vbud003/NVIDIA_GPU_Computing_SDK/shared//lib -lcudart -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/lib -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/common/lib/linux -L/people/vbud003/NVIDIA_GPU_Computing_SDK/shared//lib -lcudart -lcutil_x86_64 -lshrutil_x86_64 + $(EXE_LINK) -o $@ $(OBJECTS) $(OPENCMISS_LIBRARY) $(ELFLAGS) $(EXTERNAL_LIBRARIES) -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/lib -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/common/lib/linux -L/people/vbud003/NVIDIA_GPU_Computing_SDK/shared//lib -lcudart -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/lib -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/common/lib/linux -L/people/vbud003/NVIDIA_GPU_Computing_SDK/shared//lib -lcudart -lcutil_x86_64 -lshrutil_x86_64 $(OPENCMISS_LIBRARY) : ( cd $(GLOBAL_ROOT); $(MAKE) ) diff --git a/Makefile b/Makefile index 39e29084..457353ce 100644 --- a/Makefile +++ b/Makefile @@ -732,8 +732,7 @@ FPPFLAGS += $(EXTERNAL_INCLUDE_PATH) ELFLAGS += $(EXTERNAL_LIB_PATH) -.SUFFIXES: .f90 .c -#.cu +.SUFFIXES: .f90 .c .cu $(OBJECT_DIR)/%.o : $(SOURCE_DIR)/%.f90 ( cd $(OBJECT_DIR) ; $(FC) -o $@ $(FFLAGS) $(FPPFLAGS) -c $< ) @@ -741,8 +740,8 @@ $(OBJECT_DIR)/%.o : $(SOURCE_DIR)/%.f90 $(OBJECT_DIR)/%.o : $(SOURCE_DIR)/%.c ( cd $(OBJECT_DIR) ; $(CC) -o $@ $(CFLAGS) $(CPPFLAGS) -c $< ) -#(OBJECT_DIR)/%.o : $(SOURCE_DIR)/%.cu -# ( cd $(OBJECT_DIR) ; nvcc -gencode=arch=compute_10,code=\"sm_10,compute_10\" -gencode=arch=compute_20,code=\"sm_20,compute_20\" --compile -m64 --compiler-options -fno-strict-aliasing --ptxas-options=-v -I$(SOURCE_DIR) -I/usr/local/cuda/include -I/people/vbud003/NVIDIA_GPU_Computing_SDK/C/common/inc -I/people/vbud003/NVIDIA_GPU_Computing_SDK/shared//inc --use_fast_math -DUNIX -O2 -o $@ -c $< ) +$(OBJECT_DIR)/%.o : $(SOURCE_DIR)/%.cu + ( cd $(OBJECT_DIR) ; nvcc -gencode=arch=compute_10,code=\"sm_10,compute_10\" -gencode=arch=compute_20,code=\"sm_20,compute_20\" --compile -m64 --compiler-options -fno-strict-aliasing --ptxas-options=-v -I$(SOURCE_DIR) -I/usr/local/cuda/include -I/people/vbud003/NVIDIA_GPU_Computing_SDK/C/common/inc -I/people/vbud003/NVIDIA_GPU_Computing_SDK/shared//inc --use_fast_math -DUNIX -O2 -o $@ -c $< ) ifeq ($(USEFIELDML),true) @@ -1417,7 +1416,7 @@ $(OBJECT_DIR)/equations_set_routines.o : $(SOURCE_DIR)/equations_set_routines.f9 $(OBJECT_DIR)/timer_f.o \ $(OBJECT_DIR)/types.o -$(OBJECT_DIR)/external_dae_solver_routines.o : $(SOURCE_DIR)/external_dae_solver_routines.c \ +$(OBJECT_DIR)/external_dae_solver_routines.o : $(SOURCE_DIR)/external_dae_solver_routines.cu \ $(SOURCE_DIR)/external_dae_solver_routines.h $(OBJECT_DIR)/field_routines.o : $(SOURCE_DIR)/field_routines.f90 \ diff --git a/src/Darcy_equations_routines.f90 b/src/Darcy_equations_routines.f90 index e5174dee..e2f939e8 100644 --- a/src/Darcy_equations_routines.f90 +++ b/src/Darcy_equations_routines.f90 @@ -3041,7 +3041,7 @@ SUBROUTINE DARCY_EQUATION_FINITE_ELEMENT_CALCULATE(EQUATIONS_SET,ELEMENT_NUMBER, IF(EQUATIONS_SET%SUBTYPE==EQUATIONS_SET_INCOMPRESSIBLE_ELASTICITY_DRIVEN_DARCY_SUBTYPE .OR. & & EQUATIONS_SET%SUBTYPE==EQUATIONS_SET_INCOMPRESSIBLE_ELAST_MULTI_COMP_DARCY_SUBTYPE) THEN - C_PARAM=EQUATIONS%INTERPOLATION%SOURCE_INTERP_POINT(FIELD_U_VARIABLE_TYPE)%PTR%VALUES(4, NO_PART_DERIV) + C_PARAM=EQUATIONS%INTERPOLATION%SOURCE_INTERP_POINT(FIELD_U_VARIABLE_TYPE)%PTR%VALUES(mh, NO_PART_DERIV) !IF(ABS(C_PARAM)>1.0E-08) WRITE(*,*)'C_PARAM = ',C_PARAM @@ -5375,26 +5375,27 @@ SUBROUTINE DARCY_EQUATION_POST_SOLVE_OUTPUT_DATA(CONTROL_LOOP,SOLVER,ERR,ERROR,* !Subiteration intermediate solutions / iterates output: - IF(CONTROL_LOOP%PARENT_LOOP%LOOP_TYPE==PROBLEM_CONTROL_WHILE_LOOP_TYPE) THEN !subiteration exists - IF(CURRENT_LOOP_ITERATION<10) THEN - IF(SUBITERATION_NUMBER<10) THEN - WRITE(OUTPUT_FILE,'("T_00",I0,"_SB_0",I0,"_C",I0)') CURRENT_LOOP_ITERATION,SUBITERATION_NUMBER, & - & equations_set_idx - ELSE IF(SUBITERATION_NUMBER<100) THEN - WRITE(OUTPUT_FILE,'("T_00",I0,"_SB_",I0,"_C",I0)') CURRENT_LOOP_ITERATION,SUBITERATION_NUMBER, & - & equations_set_idx - END IF - FILE=OUTPUT_FILE - METHOD="FORTRAN" - EXPORT_FIELD=.TRUE. - IF(EXPORT_FIELD) THEN - CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,"Darcy export subiterates ...",ERR,ERROR,*999) - CALL FLUID_MECHANICS_IO_WRITE_CMGUI(EQUATIONS_SET%REGION,EQUATIONS_SET%GLOBAL_NUMBER,FILE, & - & ERR,ERROR,*999) - CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,OUTPUT_FILE,ERR,ERROR,*999) - ENDIF - ENDIF - ENDIF +! IF(CONTROL_LOOP%PARENT_LOOP%LOOP_TYPE==PROBLEM_CONTROL_WHILE_LOOP_TYPE) THEN !subiteration exists +! IF(CURRENT_LOOP_ITERATION<10) THEN +! IF(SUBITERATION_NUMBER<10) THEN +! WRITE(OUTPUT_FILE,'("T_00",I0,"_SB_0",I0,"_C",I0)') CURRENT_LOOP_ITERATION,SUBITERATION_NUMBER, & +! & equations_set_idx +! ELSE IF(SUBITERATION_NUMBER<100) THEN +! WRITE(OUTPUT_FILE,'("T_00",I0,"_SB_",I0,"_C",I0)') CURRENT_LOOP_ITERATION,SUBITERATION_NUMBER, & +! & equations_set_idx +! END IF +! FILE=OUTPUT_FILE +! METHOD="FORTRAN" +! EXPORT_FIELD=.TRUE. +! IF(EXPORT_FIELD) THEN +! CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,"Darcy export subiterates ...",ERR,ERROR,*999) +! CALL FLUID_MECHANICS_IO_WRITE_CMGUI(EQUATIONS_SET%REGION,EQUATIONS_SET%GLOBAL_NUMBER,FILE, & +! & ERR,ERROR,*999) +! CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,OUTPUT_FILE,ERR,ERROR,*999) +! ENDIF +! ENDIF +! ENDIF + ELSE !for single compartment (i.e. standary Darcy flow) equations sets CONTROL_TIME_LOOP=>CONTROL_LOOP DO loop_idx=1,CONTROL_LOOP%CONTROL_LOOP_LEVEL @@ -5445,25 +5446,26 @@ SUBROUTINE DARCY_EQUATION_POST_SOLVE_OUTPUT_DATA(CONTROL_LOOP,SOLVER,ERR,ERROR,* ENDIF - !Subiteration intermediate solutions / iterates output: - IF(CONTROL_LOOP%PARENT_LOOP%LOOP_TYPE==PROBLEM_CONTROL_WHILE_LOOP_TYPE) THEN !subiteration exists - IF(CURRENT_LOOP_ITERATION<10) THEN - IF(SUBITERATION_NUMBER<10) THEN - WRITE(OUTPUT_FILE,'("T_00",I0,"_SUB_000",I0)') CURRENT_LOOP_ITERATION,SUBITERATION_NUMBER - ELSE IF(SUBITERATION_NUMBER<100) THEN - WRITE(OUTPUT_FILE,'("T_00",I0,"_SUB_00",I0)') CURRENT_LOOP_ITERATION,SUBITERATION_NUMBER - END IF - FILE=OUTPUT_FILE - METHOD="FORTRAN" - EXPORT_FIELD=.TRUE. - IF(EXPORT_FIELD) THEN - CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,"Darcy export subiterates ...",ERR,ERROR,*999) - CALL FLUID_MECHANICS_IO_WRITE_CMGUI(EQUATIONS_SET%REGION,EQUATIONS_SET%GLOBAL_NUMBER,FILE, & - & ERR,ERROR,*999) - CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,OUTPUT_FILE,ERR,ERROR,*999) - ENDIF - ENDIF - ENDIF +! !Subiteration intermediate solutions / iterates output: +! IF(CONTROL_LOOP%PARENT_LOOP%LOOP_TYPE==PROBLEM_CONTROL_WHILE_LOOP_TYPE) THEN !subiteration exists +! IF(CURRENT_LOOP_ITERATION<10) THEN +! IF(SUBITERATION_NUMBER<10) THEN +! WRITE(OUTPUT_FILE,'("T_00",I0,"_SUB_000",I0)') CURRENT_LOOP_ITERATION,SUBITERATION_NUMBER +! ELSE IF(SUBITERATION_NUMBER<100) THEN +! WRITE(OUTPUT_FILE,'("T_00",I0,"_SUB_00",I0)') CURRENT_LOOP_ITERATION,SUBITERATION_NUMBER +! END IF +! FILE=OUTPUT_FILE +! METHOD="FORTRAN" +! EXPORT_FIELD=.TRUE. +! IF(EXPORT_FIELD) THEN +! CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,"Darcy export subiterates ...",ERR,ERROR,*999) +! CALL FLUID_MECHANICS_IO_WRITE_CMGUI(EQUATIONS_SET%REGION,EQUATIONS_SET%GLOBAL_NUMBER,FILE, & +! & ERR,ERROR,*999) +! CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,OUTPUT_FILE,ERR,ERROR,*999) +! ENDIF +! ENDIF +! ENDIF + ENDIF ENDIF ENDDO diff --git a/src/Poisson_equations_routines.f90 b/src/Poisson_equations_routines.f90 index 55fef7ce..71aa1e30 100644 --- a/src/Poisson_equations_routines.f90 +++ b/src/Poisson_equations_routines.f90 @@ -1165,7 +1165,7 @@ SUBROUTINE POISSON_EQUATION_EQUATIONS_SET_SETUP(EQUATIONS_SET,EQUATIONS_SET_SETU CASE(EQUATIONS_SET_LINEAR_SOURCE_POISSON_SUBTYPE) CALL POISSON_EQUATION_EQUATIONS_SET_LINEAR_SOURCE_SETUP(EQUATIONS_SET,EQUATIONS_SET_SETUP,ERR,ERROR,*999) CASE(EQUATIONS_SET_LINEAR_PRESSURE_POISSON_SUBTYPE,EQUATIONS_SET_NONLINEAR_PRESSURE_POISSON_SUBTYPE, & - & EQUATIONS_SET_FITTED_PRESSURE_POISSON_SUBTYPE) + & EQUATIONS_SET_FITTED_PRESSURE_POISSON_SUBTYPE,EQUATIONS_SET_ALE_PRESSURE_POISSON_SUBTYPE) CALL POISSON_EQUATION_EQUATIONS_SET_PRESSURE_POISSON_SETUP(EQUATIONS_SET,EQUATIONS_SET_SETUP,ERR,ERROR,*999) CASE(EQUATIONS_SET_QUADRATIC_SOURCE_POISSON_SUBTYPE) CALL POISSON_EQUATION_EQUATIONS_SET_NONLINEAR_SOURCE_SETUP(EQUATIONS_SET,EQUATIONS_SET_SETUP,ERR,ERROR,*999) @@ -1225,7 +1225,8 @@ SUBROUTINE POISSON_EQUATION_EQUATIONS_SET_SOLUTION_METHOD_SET(EQUATIONS_SET,SOLU CALL FLAG_ERROR(LOCAL_ERROR,ERR,ERROR,*999) END SELECT CASE(EQUATIONS_SET_LINEAR_SOURCE_POISSON_SUBTYPE,EQUATIONS_SET_LINEAR_PRESSURE_POISSON_SUBTYPE, & - & EQUATIONS_SET_NONLINEAR_PRESSURE_POISSON_SUBTYPE,EQUATIONS_SET_FITTED_PRESSURE_POISSON_SUBTYPE) + & EQUATIONS_SET_NONLINEAR_PRESSURE_POISSON_SUBTYPE,EQUATIONS_SET_FITTED_PRESSURE_POISSON_SUBTYPE, & + & EQUATIONS_SET_ALE_PRESSURE_POISSON_SUBTYPE) SELECT CASE(SOLUTION_METHOD) CASE(EQUATIONS_SET_FEM_SOLUTION_METHOD) EQUATIONS_SET%SOLUTION_METHOD=EQUATIONS_SET_FEM_SOLUTION_METHOD @@ -1330,6 +1331,10 @@ SUBROUTINE POISSON_EQUATION_EQUATIONS_SET_SUBTYPE_SET(EQUATIONS_SET,EQUATIONS_SE EQUATIONS_SET%CLASS=EQUATIONS_SET_CLASSICAL_FIELD_CLASS EQUATIONS_SET%TYPE=EQUATIONS_SET_POISSON_EQUATION_TYPE EQUATIONS_SET%SUBTYPE=EQUATIONS_SET_NONLINEAR_PRESSURE_POISSON_SUBTYPE + CASE(EQUATIONS_SET_ALE_PRESSURE_POISSON_SUBTYPE) + EQUATIONS_SET%CLASS=EQUATIONS_SET_CLASSICAL_FIELD_CLASS + EQUATIONS_SET%TYPE=EQUATIONS_SET_POISSON_EQUATION_TYPE + EQUATIONS_SET%SUBTYPE=EQUATIONS_SET_ALE_PRESSURE_POISSON_SUBTYPE CASE(EQUATIONS_SET_FITTED_PRESSURE_POISSON_SUBTYPE) EQUATIONS_SET%CLASS=EQUATIONS_SET_CLASSICAL_FIELD_CLASS EQUATIONS_SET%TYPE=EQUATIONS_SET_POISSON_EQUATION_TYPE @@ -1395,6 +1400,7 @@ SUBROUTINE POISSON_EQUATION_EQUATIONS_SET_PRESSURE_POISSON_SETUP(EQUATIONS_SET,E IF(ASSOCIATED(EQUATIONS_SET)) THEN IF(EQUATIONS_SET%SUBTYPE==EQUATIONS_SET_LINEAR_PRESSURE_POISSON_SUBTYPE.OR. & & EQUATIONS_SET%SUBTYPE==EQUATIONS_SET_NONLINEAR_PRESSURE_POISSON_SUBTYPE.OR. & + & EQUATIONS_SET%SUBTYPE==EQUATIONS_SET_ALE_PRESSURE_POISSON_SUBTYPE.OR. & & EQUATIONS_SET%SUBTYPE==EQUATIONS_SET_FITTED_PRESSURE_POISSON_SUBTYPE) THEN SELECT CASE(EQUATIONS_SET_SETUP%SETUP_TYPE) CASE(EQUATIONS_SET_SETUP_INITIAL_TYPE) @@ -1649,7 +1655,7 @@ SUBROUTINE POISSON_EQUATION_EQUATIONS_SET_PRESSURE_POISSON_SETUP(EQUATIONS_SET,E CASE(EQUATIONS_SET_SETUP_SOURCE_TYPE) SELECT CASE(EQUATIONS_SET%SUBTYPE) CASE(EQUATIONS_SET_LINEAR_PRESSURE_POISSON_SUBTYPE,EQUATIONS_SET_NONLINEAR_PRESSURE_POISSON_SUBTYPE, & - & EQUATIONS_SET_FITTED_PRESSURE_POISSON_SUBTYPE) + & EQUATIONS_SET_FITTED_PRESSURE_POISSON_SUBTYPE,EQUATIONS_SET_ALE_PRESSURE_POISSON_SUBTYPE) SELECT CASE(EQUATIONS_SET_SETUP%ACTION_TYPE) !Set start action CASE(EQUATIONS_SET_SETUP_START_ACTION) @@ -2107,19 +2113,19 @@ SUBROUTINE POISSON_PRE_SOLVE_UPDATE_PPE_MESH(CONTROL_LOOP,SOLVER,ERR,ERROR,*) CALL ENTERS("POISSON_PRE_SOLVE_UPDATE_PPE_MESH",ERR,ERROR,*999) IF(ASSOCIATED(CONTROL_LOOP)) THEN - CALL CONTROL_LOOP_CURRENT_TIMES_GET(CONTROL_LOOP,CURRENT_TIME,TIME_INCREMENT,ERR,ERROR,*999) + CALL CONTROL_LOOP_CURRENT_TIMES_GET(CONTROL_LOOP%PARENT_LOOP,CURRENT_TIME,TIME_INCREMENT,ERR,ERROR,*999) NULLIFY(SOLVER_LAPLACE) NULLIFY(SOLVER_ALE_PPE) IF(ASSOCIATED(SOLVER)) THEN - IF(ASSOCIATED(CONTROL_LOOP%PROBLEM)) THEN - SELECT CASE(CONTROL_LOOP%PROBLEM%SUBTYPE) - CASE(EQUATIONS_SET_LINEAR_PRESSURE_POISSON_SUBTYPE) + IF(ASSOCIATED(CONTROL_LOOP%PARENT_LOOP%PROBLEM)) THEN + SELECT CASE(CONTROL_LOOP%PARENT_LOOP%PROBLEM%SUBTYPE) + CASE(PROBLEM_LINEAR_PRESSURE_POISSON_SUBTYPE) ! do nothing ??? - CASE(EQUATIONS_SET_NONLINEAR_PRESSURE_POISSON_SUBTYPE) + CASE(PROBLEM_NONLINEAR_PRESSURE_POISSON_SUBTYPE) ! do nothing ??? - CASE(EQUATIONS_SET_ALE_PRESSURE_POISSON_SUBTYPE) + CASE(PROBLEM_ALE_PRESSURE_POISSON_SUBTYPE) !Update mesh within the dynamic solver - IF(SOLVER%SOLVE_TYPE==SOLVER_DYNAMIC_TYPE) THEN + IF(SOLVER%SOLVE_TYPE==SOLVER_LINEAR_TYPE) THEN !Get the independent field for the ALE PPE problem CALL SOLVERS_SOLVER_GET(SOLVER%SOLVERS,1,SOLVER_ALE_PPE,ERR,ERROR,*999) SOLVER_EQUATIONS_ALE_PPE=>SOLVER_ALE_PPE%SOLVER_EQUATIONS @@ -2143,7 +2149,8 @@ SUBROUTINE POISSON_PRE_SOLVE_UPDATE_PPE_MESH(CONTROL_LOOP,SOLVER,ERR,ERROR,*) CALL FIELD_PARAMETER_SET_DATA_GET(EQUATIONS_SET_ALE_PPE%INDEPENDENT%INDEPENDENT_FIELD, & & FIELD_U_VARIABLE_TYPE,FIELD_MESH_DISPLACEMENT_SET_TYPE,MESH_DISPLACEMENT_VALUES,ERR,ERROR,*999) CALL FLUID_MECHANICS_IO_READ_DATA(SOLVER_LINEAR_TYPE,MESH_DISPLACEMENT_VALUES, & - & NUMBER_OF_DIMENSIONS_ALE_PPE,INPUT_TYPE,INPUT_OPTION,CONTROL_LOOP%TIME_LOOP%ITERATION_NUMBER,1.0_DP) + & NUMBER_OF_DIMENSIONS_ALE_PPE,INPUT_TYPE,INPUT_OPTION,CONTROL_LOOP%PARENT_LOOP%TIME_LOOP% & + & ITERATION_NUMBER,1.0_DP) CALL FIELD_PARAMETER_SET_UPDATE_START(EQUATIONS_SET_ALE_PPE%INDEPENDENT%INDEPENDENT_FIELD, & & FIELD_U_VARIABLE_TYPE,FIELD_MESH_DISPLACEMENT_SET_TYPE,ERR,ERROR,*999) CALL FIELD_PARAMETER_SET_UPDATE_FINISH(EQUATIONS_SET_ALE_PPE%INDEPENDENT%INDEPENDENT_FIELD, & @@ -2200,7 +2207,7 @@ SUBROUTINE POISSON_PRE_SOLVE_UPDATE_PPE_MESH(CONTROL_LOOP,SOLVER,ERR,ERROR,*) CALL FIELD_PARAMETER_SET_UPDATE_FINISH(EQUATIONS_SET_ALE_PPE%GEOMETRY%GEOMETRIC_FIELD, & & FIELD_U_VARIABLE_TYPE,FIELD_VALUES_SET_TYPE,ERR,ERROR,*999) !Now use displacement values to calculate velocity values - TIME_INCREMENT=CONTROL_LOOP%TIME_LOOP%TIME_INCREMENT + TIME_INCREMENT=CONTROL_LOOP%PARENT_LOOP%TIME_LOOP%TIME_INCREMENT ALPHA=1.0_DP/TIME_INCREMENT CALL FIELD_PARAMETER_SETS_COPY(INDEPENDENT_FIELD_ALE_PPE,FIELD_U_VARIABLE_TYPE, & & FIELD_MESH_DISPLACEMENT_SET_TYPE,FIELD_MESH_VELOCITY_SET_TYPE,ALPHA,ERR,ERROR,*999) @@ -3724,7 +3731,7 @@ SUBROUTINE POISSON_EQUATION_PROBLEM_SETUP(PROBLEM,PROBLEM_SETUP,ERR,ERROR,*) CASE(PROBLEM_LINEAR_SOURCE_POISSON_SUBTYPE) CALL POISSON_EQUATION_PROBLEM_LINEAR_SOURCE_SETUP(PROBLEM,PROBLEM_SETUP,ERR,ERROR,*999) CASE(PROBLEM_LINEAR_PRESSURE_POISSON_SUBTYPE,PROBLEM_NONLINEAR_PRESSURE_POISSON_SUBTYPE, & - & PROBLEM_FITTED_PRESSURE_POISSON_SUBTYPE) + & PROBLEM_FITTED_PRESSURE_POISSON_SUBTYPE,PROBLEM_ALE_PRESSURE_POISSON_SUBTYPE) CALL POISSON_EQUATION_PROBLEM_PRESSURE_POISSON_SETUP(PROBLEM,PROBLEM_SETUP,ERR,ERROR,*999) CASE(PROBLEM_NONLINEAR_SOURCE_POISSON_SUBTYPE) CALL POISSON_EQUATION_PROBLEM_NONLINEAR_SOURCE_SETUP(PROBLEM,PROBLEM_SETUP,ERR,ERROR,*999) @@ -3760,7 +3767,7 @@ SUBROUTINE POISSON_EQUATION_FINITE_ELEMENT_CALCULATE(EQUATIONS_SET,ELEMENT_NUMBE INTEGER(INTG) FIELD_VAR_TYPE,ng,mh,mhs,mi,ms,nh,nhs,ni,ns,I,J,K,L,H,element_node_identity !,k1sum,k2sum REAL(DP) :: RWG,SUM,PGMSI(3),PGNSI(3),SUM2,DXI_DX(3,3),DELTA_T,DXI_DX_DX(3,3) REAL(DP) :: U_VALUE(3),U_DERIV(3,3),RHO_PARAM,MU_PARAM,U_OLD(3),U_SECOND(3,3,3),X(3),B(3),P_DERIV(3),DIFF_COEFF,W_VALUE(3) - TYPE(BASIS_TYPE), POINTER :: DEPENDENT_BASIS,GEOMETRIC_BASIS,SOURCE_BASIS + TYPE(BASIS_TYPE), POINTER :: DEPENDENT_BASIS,GEOMETRIC_BASIS,SOURCE_BASIS,INDEPENDENT_BASIS TYPE(EQUATIONS_TYPE), POINTER :: EQUATIONS TYPE(EQUATIONS_MAPPING_TYPE), POINTER :: EQUATIONS_MAPPING TYPE(EQUATIONS_MAPPING_LINEAR_TYPE), POINTER :: LINEAR_MAPPING @@ -3769,7 +3776,7 @@ SUBROUTINE POISSON_EQUATION_FINITE_ELEMENT_CALCULATE(EQUATIONS_SET,ELEMENT_NUMBE TYPE(EQUATIONS_MATRICES_RHS_TYPE), POINTER :: RHS_VECTOR TYPE(EQUATIONS_MATRICES_SOURCE_TYPE), POINTER :: SOURCE_VECTOR TYPE(EQUATIONS_MATRIX_TYPE), POINTER :: EQUATIONS_MATRIX - TYPE(FIELD_TYPE), POINTER :: DEPENDENT_FIELD,GEOMETRIC_FIELD,SOURCE_FIELD,MATERIALS_FIELD + TYPE(FIELD_TYPE), POINTER :: DEPENDENT_FIELD,GEOMETRIC_FIELD,SOURCE_FIELD,MATERIALS_FIELD,INDEPENDENT_FIELD TYPE(FIELD_VARIABLE_TYPE), POINTER :: FIELD_VARIABLE TYPE(QUADRATURE_SCHEME_TYPE), POINTER :: QUADRATURE_SCHEME TYPE(VARYING_STRING) :: LOCAL_ERROR @@ -3903,6 +3910,14 @@ SUBROUTINE POISSON_EQUATION_FINITE_ELEMENT_CALCULATE(EQUATIONS_SET,ELEMENT_NUMBE & MATERIALS_INTERP_PARAMETERS(FIELD_U_VARIABLE_TYPE)%PTR,ERR,ERROR,*999) CALL FIELD_INTERPOLATION_PARAMETERS_ELEMENT_GET(FIELD_VALUES_SET_TYPE,ELEMENT_NUMBER,EQUATIONS%INTERPOLATION% & & DEPENDENT_INTERP_PARAMETERS(FIELD_VAR_TYPE)%PTR,ERR,ERROR,*999) + IF(EQUATIONS_SET%SUBTYPE==EQUATIONS_SET_ALE_PRESSURE_POISSON_SUBTYPE) THEN + INDEPENDENT_FIELD=>EQUATIONS%INTERPOLATION%INDEPENDENT_FIELD + INDEPENDENT_BASIS=>INDEPENDENT_FIELD%DECOMPOSITION%DOMAIN(INDEPENDENT_FIELD% & + & DECOMPOSITION%MESH_COMPONENT_NUMBER)%PTR% & + & TOPOLOGY%ELEMENTS%ELEMENTS(ELEMENT_NUMBER)%BASIS + CALL FIELD_INTERPOLATION_PARAMETERS_ELEMENT_GET(FIELD_MESH_VELOCITY_SET_TYPE,ELEMENT_NUMBER,EQUATIONS%INTERPOLATION% & + & INDEPENDENT_INTERP_PARAMETERS(FIELD_VAR_TYPE)%PTR,ERR,ERROR,*999) + ENDIF !\todo: Check the influence of DIFF_COEFF DIFF_COEFF=1.0_DP @@ -3940,7 +3955,7 @@ SUBROUTINE POISSON_EQUATION_FINITE_ELEMENT_CALCULATE(EQUATIONS_SET,ELEMENT_NUMBE & INDEPENDENT_INTERP_POINT(FIELD_U_VARIABLE_TYPE)%PTR,ERR,ERROR,*999) W_VALUE(1)=EQUATIONS%INTERPOLATION%INDEPENDENT_INTERP_POINT(FIELD_U_VARIABLE_TYPE)%PTR%VALUES(1,NO_PART_DERIV) W_VALUE(2)=EQUATIONS%INTERPOLATION%INDEPENDENT_INTERP_POINT(FIELD_U_VARIABLE_TYPE)%PTR%VALUES(2,NO_PART_DERIV) - IF(FIELD_VARIABLE%NUMBER_OF_COMPONENTS==4) THEN + IF(INDEPENDENT_BASIS%NUMBER_OF_XI==3) THEN W_VALUE(3)=EQUATIONS%INTERPOLATION%INDEPENDENT_INTERP_POINT(FIELD_U_VARIABLE_TYPE)%PTR%VALUES(3,NO_PART_DERIV) END IF ELSE @@ -4700,6 +4715,10 @@ SUBROUTINE POISSON_EQUATION_PROBLEM_SUBTYPE_SET(PROBLEM,PROBLEM_SUBTYPE,ERR,ERRO PROBLEM%CLASS=PROBLEM_CLASSICAL_FIELD_CLASS PROBLEM%TYPE=PROBLEM_POISSON_EQUATION_TYPE PROBLEM%SUBTYPE=PROBLEM_LINEAR_PRESSURE_POISSON_SUBTYPE + CASE(PROBLEM_ALE_PRESSURE_POISSON_SUBTYPE) + PROBLEM%CLASS=PROBLEM_CLASSICAL_FIELD_CLASS + PROBLEM%TYPE=PROBLEM_POISSON_EQUATION_TYPE + PROBLEM%SUBTYPE=PROBLEM_ALE_PRESSURE_POISSON_SUBTYPE CASE(PROBLEM_FITTED_PRESSURE_POISSON_SUBTYPE) PROBLEM%CLASS=PROBLEM_CLASSICAL_FIELD_CLASS PROBLEM%TYPE=PROBLEM_POISSON_EQUATION_TYPE @@ -5001,7 +5020,8 @@ SUBROUTINE POISSON_EQUATION_PROBLEM_PRESSURE_POISSON_SETUP(PROBLEM,PROBLEM_SETUP NULLIFY(SOLVERS) IF(ASSOCIATED(PROBLEM)) THEN IF(PROBLEM%SUBTYPE==PROBLEM_NONLINEAR_PRESSURE_POISSON_SUBTYPE.OR.PROBLEM%SUBTYPE== & - & PROBLEM_LINEAR_PRESSURE_POISSON_SUBTYPE) THEN + & PROBLEM_LINEAR_PRESSURE_POISSON_SUBTYPE.OR.PROBLEM%SUBTYPE== & + & PROBLEM_ALE_PRESSURE_POISSON_SUBTYPE) THEN SELECT CASE(PROBLEM_SETUP%SETUP_TYPE) CASE(PROBLEM_SETUP_INITIAL_TYPE) SELECT CASE(PROBLEM_SETUP%ACTION_TYPE) @@ -5393,7 +5413,8 @@ SUBROUTINE POISSON_POST_SOLVE(CONTROL_LOOP,SOLVER,ERR,ERROR,*) ! do nothing CASE(PROBLEM_NONLINEAR_SOURCE_POISSON_SUBTYPE) ! do nothing - CASE(PROBLEM_LINEAR_PRESSURE_POISSON_SUBTYPE,PROBLEM_NONLINEAR_PRESSURE_POISSON_SUBTYPE) + CASE(PROBLEM_LINEAR_PRESSURE_POISSON_SUBTYPE,PROBLEM_NONLINEAR_PRESSURE_POISSON_SUBTYPE, & + & PROBLEM_ALE_PRESSURE_POISSON_SUBTYPE) CALL POISSON_POST_SOLVE_OUTPUT_DATA(CONTROL_LOOP,SOLVER,ERR,ERROR,*999) CASE(PROBLEM_FITTED_PRESSURE_POISSON_SUBTYPE) !Post solve for the linear solver @@ -5500,7 +5521,7 @@ SUBROUTINE POISSON_PRE_SOLVE(CONTROL_LOOP,SOLVER,ERR,ERROR,*) !First update source field for PPE calculation CALL POISSON_PRE_SOLVE_UPDATE_PPE_SOURCE(CONTROL_LOOP,SOLVER,ERR,ERROR,*999) !Read in the lable information - CALL POISSON_PRE_SOLVE_UPDATE_INPUT_DATA(CONTROL_LOOP,SOLVER,ERR,ERROR,*999) +! CALL POISSON_PRE_SOLVE_UPDATE_INPUT_DATA(CONTROL_LOOP,SOLVER,ERR,ERROR,*999) CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,"Solving PPE problem... ",ERR,ERROR,*999) ELSE CALL FLAG_ERROR("Solver global number not associated for PPE problem.",ERR,ERROR,*999) @@ -5811,7 +5832,8 @@ SUBROUTINE POISSON_POST_SOLVE_OUTPUT_DATA(CONTROL_LOOP,SOLVER,ERR,ERROR,*) ! do nothing CASE(PROBLEM_NONLINEAR_SOURCE_POISSON_SUBTYPE) ! do nothing - CASE(PROBLEM_LINEAR_PRESSURE_POISSON_SUBTYPE,PROBLEM_NONLINEAR_PRESSURE_POISSON_SUBTYPE) + CASE(PROBLEM_LINEAR_PRESSURE_POISSON_SUBTYPE,PROBLEM_NONLINEAR_PRESSURE_POISSON_SUBTYPE, & + & PROBLEM_ALE_PRESSURE_POISSON_SUBTYPE) IF(CONTROL_LOOP%WHILE_LOOP%ITERATION_NUMBER==CONTROL_LOOP%WHILE_LOOP%MAXIMUM_NUMBER_OF_ITERATIONS)THEN CONTROL_TIME_LOOP=>CONTROL_LOOP%PARENT_LOOP CALL CONTROL_LOOP_CURRENT_TIMES_GET(CONTROL_TIME_LOOP,CURRENT_TIME,TIME_INCREMENT,ERR,ERROR,*999) diff --git a/src/advection_diffusion_equation_routines.f90 b/src/advection_diffusion_equation_routines.f90 index 6511ace4..9e3833f0 100644 --- a/src/advection_diffusion_equation_routines.f90 +++ b/src/advection_diffusion_equation_routines.f90 @@ -2254,9 +2254,11 @@ SUBROUTINE ADVECTION_DIFFUSION_EQUATION_FINITE_ELEMENT_CALCULATE(EQUATIONS_SET,E INTEGER(INTG), INTENT(OUT) :: ERR !EQUATIONS%EQUATIONS_MAPPING + IF(EQUATIONS_SET%SUBTYPE==EQUATIONS_SET_MULTI_COMP_TRANSPORT_ADVEC_DIFF_SUBTYPE .OR. & + & EQUATIONS_SET%SUBTYPE==EQUATIONS_SET_MULTI_COMP_TRANSPORT_ADVEC_DIFF_SUPG_SUBTYPE) THEN + EQUATIONS_SET_FIELD=>EQUATIONS_SET%EQUATIONS_SET_FIELD%EQUATIONS_SET_FIELD_FIELD + CALL FIELD_PARAMETER_SET_DATA_GET(EQUATIONS_SET_FIELD,FIELD_U_VARIABLE_TYPE, & + & FIELD_VALUES_SET_TYPE,EQUATIONS_SET_FIELD_DATA,ERR,ERROR,*999) + my_compartment = EQUATIONS_SET_FIELD_DATA(1) + Ncompartments = EQUATIONS_SET_FIELD_DATA(2) + LINEAR_MATRICES=>EQUATIONS_MATRICES%LINEAR_MATRICES + LINEAR_MAPPING=>EQUATIONS_MAPPING%LINEAR_MAPPING + num_var_count=0 + DO imatrix = 1,Ncompartments + IF(imatrix/=my_compartment)THEN + num_var_count=num_var_count+1 + COUPLING_MATRICES(num_var_count)%PTR=>LINEAR_MATRICES%MATRICES(num_var_count)%PTR + FIELD_VARIABLES(num_var_count)%PTR=>LINEAR_MAPPING%EQUATIONS_MATRIX_TO_VAR_MAPS(num_var_count)%VARIABLE + FIELD_VAR_TYPES(num_var_count)=FIELD_VARIABLES(num_var_count)%PTR%VARIABLE_TYPE + COUPLING_MATRICES(num_var_count)%PTR%ELEMENT_MATRIX%MATRIX=0.0_DP + ENDIF + END DO + ENDIF IF(EQUATIONS_SET%SUBTYPE==EQUATIONS_SET_NO_SOURCE_ADVECTION_DIFFUSION_SUBTYPE .OR. & & EQUATIONS_SET%SUBTYPE==EQUATIONS_SET_CONSTANT_SOURCE_ADVECTION_DIFFUSION_SUBTYPE .OR. & & EQUATIONS_SET%SUBTYPE==EQUATIONS_SET_LINEAR_SOURCE_ADVECTION_DIFFUSION_SUBTYPE .OR. & @@ -2481,6 +2507,8 @@ SUBROUTINE ADVECTION_DIFFUSION_EQUATION_FINITE_ELEMENT_CALCULATE(EQUATIONS_SET,E DIFFUSION_DEPENDENT_PREVIOUS_INTERPOLATED_POINT=> & & EQUATIONS%INTERPOLATION%DEPENDENT_INTERP_POINT(FIELD_V_VARIABLE_TYPE)%PTR ENDIF + + !Select whether using standard Galerkin scheme, or the stabilised streamwise-upwinding Petrov-Galerkin scheme SELECT CASE(EQUATIONS_SET%SUBTYPE) CASE(EQUATIONS_SET_NO_SOURCE_ADVECTION_DIFFUSION_SUBTYPE,EQUATIONS_SET_CONSTANT_SOURCE_ADVECTION_DIFFUSION_SUBTYPE, & @@ -2571,8 +2599,11 @@ SUBROUTINE ADVECTION_DIFFUSION_EQUATION_FINITE_ELEMENT_CALCULATE(EQUATIONS_SET,E ! A_PARAM is the material parameter that multiplies the linear source u ELSEIF(EQUATIONS_SET%SUBTYPE==EQUATIONS_SET_MULTI_COMP_TRANSPORT_ADVEC_DIFF_SUBTYPE) THEN !for multi-compartment model must include additional terms into the - - + COUPLING_PARAM=EQUATIONS%INTERPOLATION%MATERIALS_INTERP_POINT(FIELD_V_VARIABLE_TYPE)%PTR% & + & VALUES(my_compartment,NO_PART_DERIV) + STIFFNESS_MATRIX%ELEMENT_MATRIX%MATRIX(mhs,nhs)=STIFFNESS_MATRIX%ELEMENT_MATRIX%MATRIX(mhs,nhs)+ & + & SUM*RWG + QUADRATURE_SCHEME%GAUSS_BASIS_FNS(ms,NO_PART_DERIV,ng)* & + & QUADRATURE_SCHEME%GAUSS_BASIS_FNS(ns,NO_PART_DERIV,ng)*RWG*COUPLING_PARAM ENDIF ENDIF IF(EQUATIONS_SET%SUBTYPE==EQUATIONS_SET_NO_SOURCE_ADVECTION_DIFFUSION_SUBTYPE .OR. & @@ -2632,6 +2663,82 @@ SUBROUTINE ADVECTION_DIFFUSION_EQUATION_FINITE_ELEMENT_CALCULATE(EQUATIONS_SET,E ENDIF ENDIF IF(UPDATE_RHS_VECTOR) RHS_VECTOR%ELEMENT_VECTOR%VECTOR(mhs)=0.0_DP + + IF(EQUATIONS_SET%SUBTYPE==EQUATIONS_SET_MULTI_COMP_TRANSPORT_ADVEC_DIFF_SUBTYPE) THEN + !Calculate the coupling matrices + + !Loop over element rows + mhs=0 + DO mh=1,FIELD_VARIABLE%NUMBER_OF_COMPONENTS !field_variable is the variable associated with the equations set under consideration + + MESH_COMPONENT_1 = FIELD_VARIABLE%COMPONENTS(mh)%MESH_COMPONENT_NUMBER + DEPENDENT_BASIS_1 => DEPENDENT_FIELD%DECOMPOSITION%DOMAIN(MESH_COMPONENT_1)%PTR% & + & TOPOLOGY%ELEMENTS%ELEMENTS(ELEMENT_NUMBER)%BASIS + QUADRATURE_SCHEME_1 => DEPENDENT_BASIS_1%QUADRATURE% & + & QUADRATURE_SCHEME_MAP(BASIS_DEFAULT_QUADRATURE_SCHEME)%PTR + RWG = EQUATIONS%INTERPOLATION%GEOMETRIC_INTERP_POINT_METRICS(FIELD_U_VARIABLE_TYPE)%PTR%JACOBIAN * & + & QUADRATURE_SCHEME_1%GAUSS_WEIGHTS(ng) + + DO ms=1,DEPENDENT_BASIS_1%NUMBER_OF_ELEMENT_PARAMETERS + mhs=mhs+1 + + num_var_count=0 + DO imatrix = 1,Ncompartments + IF(imatrix/=my_compartment)THEN + num_var_count=num_var_count+1 + +!need to test for the case where imatrix==mycompartment +!the coupling terms then needs to be added into the stiffness matrix + IF(COUPLING_MATRICES(num_var_count)%PTR%UPDATE_MATRIX) THEN + +! !Loop over element columns + nhs=0 +! ! DO nh=1,FIELD_VARIABLE%NUMBER_OF_COMPONENTS + DO nh=1,FIELD_VARIABLES(num_var_count)%PTR%NUMBER_OF_COMPONENTS + + MESH_COMPONENT_2 = FIELD_VARIABLE%COMPONENTS(nh)%MESH_COMPONENT_NUMBER + DEPENDENT_BASIS_2 => DEPENDENT_FIELD%DECOMPOSITION%DOMAIN(MESH_COMPONENT_2)%PTR% & + & TOPOLOGY%ELEMENTS%ELEMENTS(ELEMENT_NUMBER)%BASIS + !--- We cannot use two different quadrature schemes here !!! + QUADRATURE_SCHEME_2 => DEPENDENT_BASIS_2%QUADRATURE% & + & QUADRATURE_SCHEME_MAP(BASIS_DEFAULT_QUADRATURE_SCHEME)%PTR + !RWG = EQUATIONS%INTERPOLATION%GEOMETRIC_INTERP_POINT_METRICS%JACOBIAN * & + ! & QUADRATURE_SCHEME_2%GAUSS_WEIGHTS(ng) + + DO ns=1,DEPENDENT_BASIS_2%NUMBER_OF_ELEMENT_PARAMETERS + nhs=nhs+1 + +! !------------------------------------------------------------------------------------------------------------- +! !concentration test function, concentration trial function +! !For now, this is only a dummy implementation - this still has to be properly set up. +! IF(mh==nh.AND.nh>FATAL ERROR: Bus error occured.\n"); + fprintf(stderr,">>FATAL ERROR: Bus error occurred.\n"); } break; #endif /* defined (SIGBUS) */ #if defined (SIGEMT) case SIGEMT: { - fprintf(stderr,">>FATAL ERROR: EMT occured.\n"); + fprintf(stderr,">>FATAL ERROR: EMT occurred.\n"); } break; #endif /* defined (SIGEMT) */ case SIGFPE: { - fprintf(stderr,">>FATAL ERROR: Floating point execption occured.\n"); + fprintf(stderr,">>FATAL ERROR: Floating point execption occurred.\n"); } break; case SIGILL: { - fprintf(stderr,">>FATAL ERROR: Illegal instruction occured.\n"); + fprintf(stderr,">>FATAL ERROR: Illegal instruction occurred.\n"); } break; case SIGINT: { - fprintf(stderr,">>FATAL ERROR: Interrupt occured.\n"); + fprintf(stderr,">>FATAL ERROR: Interrupt occurred.\n"); } break; case SIGABRT: { - fprintf(stderr,">>FATAL ERROR: Abort occured.\n"); + fprintf(stderr,">>FATAL ERROR: Abort occurred.\n"); } break; case SIGSEGV: { - fprintf(stderr,">>FATAL ERROR: Segment violation occured.\n"); + fprintf(stderr,">>FATAL ERROR: Segment violation occurred.\n"); } break; #if defined (SIGTRAP) case SIGTRAP: { - fprintf(stderr,">>FATAL ERROR: Trace trap occured.\n"); + fprintf(stderr,">>FATAL ERROR: Trace trap occurred.\n"); } break; #endif /* defined (SIGTRAP) */ default: { - fprintf(stderr,">>FATAL ERROR: Unknown signal %d occured.\n",code); + fprintf(stderr,">>FATAL ERROR: Unknown signal %d occurred.\n",code); } break; } #endif diff --git a/src/control_loop_routines.f90 b/src/control_loop_routines.f90 index bcaf96b8..45800823 100644 --- a/src/control_loop_routines.f90 +++ b/src/control_loop_routines.f90 @@ -1400,43 +1400,39 @@ SUBROUTINE CONTROL_LOOP_TIMES_SET(CONTROL_LOOP,START_TIME,STOP_TIME,TIME_INCREME CALL ENTERS("CONTROL_LOOP_TIMES_SET",ERR,ERROR,*999) IF(ASSOCIATED(CONTROL_LOOP)) THEN - IF(CONTROL_LOOP%CONTROL_LOOP_FINISHED) THEN - CALL FLAG_ERROR("Control loop has been finished.",ERR,ERROR,*999) - ELSE - IF(CONTROL_LOOP%LOOP_TYPE==PROBLEM_CONTROL_TIME_LOOP_TYPE) THEN - TIME_LOOP=>CONTROL_LOOP%TIME_LOOP - IF(ASSOCIATED(TIME_LOOP)) THEN - IF(ABS(TIME_INCREMENT)<=ZERO_TOLERANCE) THEN - LOCAL_ERROR="The specified time increment of "//TRIM(NUMBER_TO_VSTRING(TIME_INCREMENT,"*",ERR,ERROR))// & - & " is invalid. The time increment must not be zero." - CALL FLAG_ERROR(LOCAL_ERROR,ERR,ERROR,*999) + IF(CONTROL_LOOP%LOOP_TYPE==PROBLEM_CONTROL_TIME_LOOP_TYPE) THEN + TIME_LOOP=>CONTROL_LOOP%TIME_LOOP + IF(ASSOCIATED(TIME_LOOP)) THEN + IF(ABS(TIME_INCREMENT)<=ZERO_TOLERANCE) THEN + LOCAL_ERROR="The specified time increment of "//TRIM(NUMBER_TO_VSTRING(TIME_INCREMENT,"*",ERR,ERROR))// & + & " is invalid. The time increment must not be zero." + CALL FLAG_ERROR(LOCAL_ERROR,ERR,ERROR,*999) + ELSE + IF(TIME_INCREMENT>0.0_DP) THEN + IF(STOP_TIME<=START_TIME) THEN + LOCAL_ERROR="The specified stop time of "//TRIM(NUMBER_TO_VSTRING(STOP_TIME,"*",ERR,ERROR))// & + & " is incompatiable with a specified start time of "//TRIM(NUMBER_TO_VSTRING(START_TIME,"*",ERR,ERROR))// & + & ". For a positive time increment the stop time must be > than the start time." + CALL FLAG_ERROR(LOCAL_ERROR,ERR,ERROR,*999) + ENDIF ELSE - IF(TIME_INCREMENT>0.0_DP) THEN - IF(STOP_TIME<=START_TIME) THEN - LOCAL_ERROR="The specified stop time of "//TRIM(NUMBER_TO_VSTRING(STOP_TIME,"*",ERR,ERROR))// & - & " is incompatiable with a specified start time of "//TRIM(NUMBER_TO_VSTRING(START_TIME,"*",ERR,ERROR))// & - & ". For a positive time increment the stop time must be > than the start time." - CALL FLAG_ERROR(LOCAL_ERROR,ERR,ERROR,*999) - ENDIF - ELSE - IF(START_TIME<=STOP_TIME) THEN - LOCAL_ERROR="The specified start time of "//TRIM(NUMBER_TO_VSTRING(START_TIME,"*",ERR,ERROR))// & - & " is incompatiable with a specified stop time of "//TRIM(NUMBER_TO_VSTRING(STOP_TIME,"*",ERR,ERROR))// & - & ". For a negative time increment the stop time must be < than the start time." - CALL FLAG_ERROR(LOCAL_ERROR,ERR,ERROR,*999) - ENDIF + IF(START_TIME<=STOP_TIME) THEN + LOCAL_ERROR="The specified start time of "//TRIM(NUMBER_TO_VSTRING(START_TIME,"*",ERR,ERROR))// & + & " is incompatiable with a specified stop time of "//TRIM(NUMBER_TO_VSTRING(STOP_TIME,"*",ERR,ERROR))// & + & ". For a negative time increment the stop time must be < than the start time." + CALL FLAG_ERROR(LOCAL_ERROR,ERR,ERROR,*999) ENDIF ENDIF - TIME_LOOP%START_TIME=START_TIME - TIME_LOOP%STOP_TIME=STOP_TIME - TIME_LOOP%TIME_INCREMENT=TIME_INCREMENT - ELSE - CALL FLAG_ERROR("Control loop time loop is not associated.",ERR,ERROR,*999) ENDIF + TIME_LOOP%START_TIME=START_TIME + TIME_LOOP%STOP_TIME=STOP_TIME + TIME_LOOP%TIME_INCREMENT=TIME_INCREMENT ELSE - CALL FLAG_ERROR("The specified control loop is not a time control loop.",ERR,ERROR,*999) + CALL FLAG_ERROR("Control loop time loop is not associated.",ERR,ERROR,*999) ENDIF - ENDIF + ELSE + CALL FLAG_ERROR("The specified control loop is not a time control loop.",ERR,ERROR,*999) + ENDIF ELSE CALL FLAG_ERROR("Control loop is not associated.",ERR,ERROR,*999) ENDIF diff --git a/src/cuda_solver_routines.cu b/src/cuda_solver_routines.cu index afe660b4..97c6e6d3 100644 --- a/src/cuda_solver_routines.cu +++ b/src/cuda_solver_routines.cu @@ -3,89 +3,111 @@ #include +//extern __shared__ double shared_array[]; +//const int algebraicCount = 25; +//const int rateStateCount = 8; +//const int constantCount = 0; +//const double FLOPSPerFunction = 193.0f; +//const int DEFAULT_TESTING_THREADS = 2000000; +//const int sharedMemoryCellModel = 0; +//const char* cellModelName = "LR R3"; +// +////////////////////////////////////////////////////////////////////////////////// +//// Cell Model Device Functions +////////////////////////////////////////////////////////////////////////////////// +//__device__ void computeRates(float VOI, double* DUMMY, double* STATES, double* ALGEBRAIC, double* RATES) +//{ +// ALGEBRAIC[0] = -25.5; // Add stimulus in proper +// ALGEBRAIC[1] = (0.32f*STATES[0]+15.0816f)/(1.0f - (expf(- 0.1f*STATES[0]-4.713f))); // 7 ops +// +// if (STATES[0] < -40.0f) { +// ALGEBRAIC[2] = 0.135f*(expf(((80.0f+STATES[0])/- 6.8f))); // 4 ops +// ALGEBRAIC[3] = (( - 127140*(expf(0.2444f*STATES[0])) - 3.47400e-05*(expf(-0.04391f*STATES[0])))*(STATES[0]+37.78f))/(1.0f+(expf(0.311f*STATES[0]+24.64053))); // 14 ops +// ALGEBRAIC[9] = 3.56f*(expf(0.079f*STATES[0]))+ 310000*(expf(0.35f*STATES[0])); // 7 ops +// ALGEBRAIC[10] = 0.1212f*(expf(-0.01052f*STATES[0]))/(1.0f+(expf(-0.1378f*STATES[0]-5.531292f))); // 8 ops +// } else { +// ALGEBRAIC[2] = 0.00000; +// ALGEBRAIC[3] = 0.00000; +// ALGEBRAIC[9] = 1.0f/( 0.13f*(1.0f+(expf(((STATES[0]+10.66f)/- 11.1f))))); +// ALGEBRAIC[10] = ( 0.3f*(expf(-2.53500e-07*STATES[0])))/(1.0f+(expf(-0.1f*STATES[0]-3.2f))); +// } +// if (STATES[0] < -100.0f) { +// ALGEBRAIC[16] = 2.837f*(expf(0.04f*STATES[0]+3.08f) - 1.0f)/((STATES[0]+77.0f)*(expf(0.04f*STATES[0]+1.4f))); // 11 ops +// } else { +// ALGEBRAIC[16] = 1.0f; +// } +// +// ALGEBRAIC[4] = (0.095f*(expf(-0.01f*STATES[0] + 0.5f)))/(1.0f+(expf(-0.072*STATES[0] + 0.36f))); // 9 ops +// ALGEBRAIC[5] = (0.012f*(expf(-0.008f*STATES[0]-0.224f)))/(1.0f+(expf(0.15f*STATES[0]+4.2f))); // 9 ops +// ALGEBRAIC[6] = (0.0005f*(expf(0.083f*STATES[0]+4.15f)))/(1.0f+(expf(0.057f*STATES[0]+2.85f))); // 9 ops +// ALGEBRAIC[7] = 23*(powf(STATES[1], 3.0f))*STATES[2]*STATES[3]*(STATES[0] - 54.794463f); // 6 ops +// ALGEBRAIC[8] = 0.08f*(expf(-STATES[0]/11.0000)); // 3 ops +// ALGEBRAIC[11] = (0.07f*(expf(-0.017f*STATES[0]-0.748f)))/(1.0f+(expf(0.05f*STATES[0]+2.2f))); // 9 ops +// ALGEBRAIC[12] = (0.0065f*(expf(-0.02f*STATES[0]-0.6f)))/(1.0f+(expf(-0.2f*STATES[0]-6.0f))); // 9 ops +// ALGEBRAIC[13] = (0.0013f*(expf(-0.06f*STATES[0]-1.2f)))/(1.0f+(expf(-0.04f*STATES[0]-0.8f))); // 9 ops +// ALGEBRAIC[14] = 7.7f - 13.0287f*logf(STATES[4]); // 3 ops +// ALGEBRAIC[15] = 0.09f*STATES[5]*STATES[6]*(STATES[0] - ALGEBRAIC[14]); // 4 ops +// ALGEBRAIC[17] = 0.282f*STATES[7]*ALGEBRAIC[16]*(STATES[0] + 77.56758f); // 4 ops +// ALGEBRAIC[18] = 1.02f/(1.0f+(expf(0.2385f*STATES[0] + 6.83967915f))); // 4 ops +// ALGEBRAIC[19] = (0.49124f*(expf( 0.08032f *STATES[0] + 7.49939f) + expf(0.06175f*STATES[0] - 31.271255925f)))/(1.00000+expf(-0.514300*STATES[0] - 214.85137268791f)); // 13 ops +// ALGEBRAIC[20] = ALGEBRAIC[18]/(ALGEBRAIC[18]+ALGEBRAIC[19]); // 2 ops +// ALGEBRAIC[21] = 0.6047f*ALGEBRAIC[20]*(STATES[0] + 87.89290f); // 3 ops +// ALGEBRAIC[22] = 1.0f/(1.0f+(exp(((7.488f - STATES[0])/5.98f)))); // 5 ops +// ALGEBRAIC[23] = 0.0183f*ALGEBRAIC[22]*(STATES[0] + 87.89290f); // 3 ops +// ALGEBRAIC[24] = 0.03921f*STATES[0] +2.3475027f; // 3 ops +// +// RATES[0] = -(ALGEBRAIC[0]+ALGEBRAIC[7]+ALGEBRAIC[15]+ALGEBRAIC[17]+ALGEBRAIC[21]+ALGEBRAIC[23]+ALGEBRAIC[24]); // 7 ops +// RATES[1] = ALGEBRAIC[1]*(1.00000 - STATES[1]) - ALGEBRAIC[8]*STATES[1]; // 4 ops +// RATES[2] = ALGEBRAIC[2]*(1.00000 - STATES[2]) - ALGEBRAIC[9]*STATES[2]; // 4 ops +// RATES[3] = ALGEBRAIC[3]*(1.00000 - STATES[3]) - ALGEBRAIC[10]*STATES[3]; // 4 ops +// RATES[4] = - 0.0001f*ALGEBRAIC[15]+ 0.000007f - 0.07f*STATES[4]; // 4 ops +// RATES[5] = ALGEBRAIC[4]*(1.00000 - STATES[5]) - ALGEBRAIC[11]*STATES[5]; // 4 ops +// RATES[6] = ALGEBRAIC[5]*(1.00000 - STATES[6]) - ALGEBRAIC[12]*STATES[6]; // 4 ops +// RATES[7] = ALGEBRAIC[6]*(1.00000 - STATES[7]) - ALGEBRAIC[13]*STATES[7]; // 4 ops +//} + extern __shared__ double shared_array[]; -const int algebraicCount = 25; -const int rateStateCount = 8; +const int rateStateCount = 2; const int constantCount = 0; -const double FLOPSPerFunction = 193.0f; -const int DEFAULT_TESTING_THREADS = 2000000; +const int algebraicCount = 1; +const double FLOPSPerFunction = 9.0f; +const int DEFAULT_TESTING_THREADS = 20000000; const int sharedMemoryCellModel = 0; -const char* cellModelName = "LR R3"; +const char* cellModelName = "FN R1"; //////////////////////////////////////////////////////////////////////////////// // Cell Model Device Functions //////////////////////////////////////////////////////////////////////////////// -__device__ void computeRates(float VOI, double* DUMMY, double* STATES, double* ALGEBRAIC, double* RATES) +__device__ void computeRates(double time, double* constants, double* states, double* algebraic, double* rates) { - ALGEBRAIC[0] = -25.5; // Add stimulus in proper - ALGEBRAIC[1] = (0.32f*STATES[0]+15.0816f)/(1.0f - (expf(- 0.1f*STATES[0]-4.713f))); // 7 ops - - if (STATES[0] < -40.0f) { - ALGEBRAIC[2] = 0.135f*(expf(((80.0f+STATES[0])/- 6.8f))); // 4 ops - ALGEBRAIC[3] = (( - 127140*(expf(0.2444f*STATES[0])) - 3.47400e-05*(expf(-0.04391f*STATES[0])))*(STATES[0]+37.78f))/(1.0f+(expf(0.311f*STATES[0]+24.64053))); // 14 ops - ALGEBRAIC[9] = 3.56f*(expf(0.079f*STATES[0]))+ 310000*(expf(0.35f*STATES[0])); // 7 ops - ALGEBRAIC[10] = 0.1212f*(expf(-0.01052f*STATES[0]))/(1.0f+(expf(-0.1378f*STATES[0]-5.531292f))); // 8 ops - } else { - ALGEBRAIC[2] = 0.00000; - ALGEBRAIC[3] = 0.00000; - ALGEBRAIC[9] = 1.0f/( 0.13f*(1.0f+(expf(((STATES[0]+10.66f)/- 11.1f))))); - ALGEBRAIC[10] = ( 0.3f*(expf(-2.53500e-07*STATES[0])))/(1.0f+(expf(-0.1f*STATES[0]-3.2f))); - } - if (STATES[0] < -100.0f) { - ALGEBRAIC[16] = 2.837f*(expf(0.04f*STATES[0]+3.08f) - 1.0f)/((STATES[0]+77.0f)*(expf(0.04f*STATES[0]+1.4f))); // 11 ops - } else { - ALGEBRAIC[16] = 1.0f; - } - - ALGEBRAIC[4] = (0.095f*(expf(-0.01f*STATES[0] + 0.5f)))/(1.0f+(expf(-0.072*STATES[0] + 0.36f))); // 9 ops - ALGEBRAIC[5] = (0.012f*(expf(-0.008f*STATES[0]-0.224f)))/(1.0f+(expf(0.15f*STATES[0]+4.2f))); // 9 ops - ALGEBRAIC[6] = (0.0005f*(expf(0.083f*STATES[0]+4.15f)))/(1.0f+(expf(0.057f*STATES[0]+2.85f))); // 9 ops - ALGEBRAIC[7] = 23*(powf(STATES[1], 3.0f))*STATES[2]*STATES[3]*(STATES[0] - 54.794463f); // 6 ops - ALGEBRAIC[8] = 0.08f*(expf(-STATES[0]/11.0000)); // 3 ops - ALGEBRAIC[11] = (0.07f*(expf(-0.017f*STATES[0]-0.748f)))/(1.0f+(expf(0.05f*STATES[0]+2.2f))); // 9 ops - ALGEBRAIC[12] = (0.0065f*(expf(-0.02f*STATES[0]-0.6f)))/(1.0f+(expf(-0.2f*STATES[0]-6.0f))); // 9 ops - ALGEBRAIC[13] = (0.0013f*(expf(-0.06f*STATES[0]-1.2f)))/(1.0f+(expf(-0.04f*STATES[0]-0.8f))); // 9 ops - ALGEBRAIC[14] = 7.7f - 13.0287f*logf(STATES[4]); // 3 ops - ALGEBRAIC[15] = 0.09f*STATES[5]*STATES[6]*(STATES[0] - ALGEBRAIC[14]); // 4 ops - ALGEBRAIC[17] = 0.282f*STATES[7]*ALGEBRAIC[16]*(STATES[0] + 77.56758f); // 4 ops - ALGEBRAIC[18] = 1.02f/(1.0f+(expf(0.2385f*STATES[0] + 6.83967915f))); // 4 ops - ALGEBRAIC[19] = (0.49124f*(expf( 0.08032f *STATES[0] + 7.49939f) + expf(0.06175f*STATES[0] - 31.271255925f)))/(1.00000+expf(-0.514300*STATES[0] - 214.85137268791f)); // 13 ops - ALGEBRAIC[20] = ALGEBRAIC[18]/(ALGEBRAIC[18]+ALGEBRAIC[19]); // 2 ops - ALGEBRAIC[21] = 0.6047f*ALGEBRAIC[20]*(STATES[0] + 87.89290f); // 3 ops - ALGEBRAIC[22] = 1.0f/(1.0f+(exp(((7.488f - STATES[0])/5.98f)))); // 5 ops - ALGEBRAIC[23] = 0.0183f*ALGEBRAIC[22]*(STATES[0] + 87.89290f); // 3 ops - ALGEBRAIC[24] = 0.03921f*STATES[0] +2.3475027f; // 3 ops - - RATES[0] = -(ALGEBRAIC[0]+ALGEBRAIC[7]+ALGEBRAIC[15]+ALGEBRAIC[17]+ALGEBRAIC[21]+ALGEBRAIC[23]+ALGEBRAIC[24]); // 7 ops - RATES[1] = ALGEBRAIC[1]*(1.00000 - STATES[1]) - ALGEBRAIC[8]*STATES[1]; // 4 ops - RATES[2] = ALGEBRAIC[2]*(1.00000 - STATES[2]) - ALGEBRAIC[9]*STATES[2]; // 4 ops - RATES[3] = ALGEBRAIC[3]*(1.00000 - STATES[3]) - ALGEBRAIC[10]*STATES[3]; // 4 ops - RATES[4] = - 0.0001f*ALGEBRAIC[15]+ 0.000007f - 0.07f*STATES[4]; // 4 ops - RATES[5] = ALGEBRAIC[4]*(1.00000 - STATES[5]) - ALGEBRAIC[11]*STATES[5]; // 4 ops - RATES[6] = ALGEBRAIC[5]*(1.00000 - STATES[6]) - ALGEBRAIC[12]*STATES[6]; // 4 ops - RATES[7] = ALGEBRAIC[6]*(1.00000 - STATES[7]) - ALGEBRAIC[13]*STATES[7]; // 4 ops + rates[1] = 0.005f*(states[0] - 3.0f*states[1]); + rates[0] = ((states[0]*(states[0] - -0.08f)*(1.0f - states[0]) - states[1])+ algebraic[0]); } -//////////////////////////////////////////////////////////////////////////////// -// Cell Model Host Functions ////////// Should Not Be Needed Later ///////////// -//////////////////////////////////////////////////////////////////////////////// -void initProblem(int num_threads, double* STATES) -{ - int i; - - STATES[0] = -84.3801107371; - STATES[1] = 0.00171338077730188; - STATES[2] = 0.982660523699656; - STATES[3] = 0.989108212766685; - STATES[4] = 0.00017948816388306; - STATES[5] = 0.00302126301779861; - STATES[6] = 0.999967936476325; - STATES[7] = 0.0417603108167287; - - for (i=1; i>>(timeSteps, stepSize, d_states); checkCUDAError("Single Kernel Execution"); cutilSafeCall( cudaThreadSynchronize() ); // Stop kernel Timer - cutilCheckError(cutStopTimer(kernel_timer)); + cutilCheckError(cutStopTimer(kernel_timer)); cutilSafeCall( cudaMemcpy(h_paged_states, d_states, pagedMemorySize/num_streams, cudaMemcpyDeviceToHost) ); memcpy(h_states, h_paged_states, pagedMemorySize/num_streams); diff --git a/src/external_dae_solver_routines.c b/src/external_dae_solver_routines.c deleted file mode 100644 index f365f7fa..00000000 --- a/src/external_dae_solver_routines.c +++ /dev/null @@ -1,94 +0,0 @@ -/* \file - * $Id: external_dae_solver_routines.c 1836 2010-12-20 17:25:14Z chrispbradley $ - * \author Chris Bradley - * \brief This file provides the routines for solving differential-algebraic equations with an external solver. - *. - * \section LICENSE - * - * Version: MPL 1.1/GPL 2.0/LGPL 2.1 - * - * The contents of this file are subject to the Mozilla Public License - * Version 1.1 (the "License"); you may not use this file except in - * compliance with the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" - * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the - * License for the specific language governing rights and limitations - * under the License. - * - * The Original Code is OpenCMISS - * - * The Initial Developer of the Original Code is University of Auckland, - * Auckland, New Zealand, the University of Oxford, Oxford, United - * Kingdom and King's College, London, United Kingdom. Portions created - * by the University of Auckland, the University of Oxford and King's - * College, London are Copyright (C) 2007-2010 by the University of - * Auckland, the University of Oxford and King's College, London. - * All Rights Reserved. - * - * Contributor(s): - * - * Alternatively, the contents of this file may be used under the terms of - * either the GNU General Public License Version 2 or later (the "GPL"), or - * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), - * in which case the provisions of the GPL or the LGPL are applicable instead - * of those above. If you wish to allow use of your version of this file only - * under the terms of either the GPL or the LGPL, and not to allow others to - * use your version of this file under the terms of the MPL, indicate your - * decision by deleting the provisions above and replace them with the notice - * and other provisions required by the GPL or the LGPL. If you do not delete - * the provisions above, a recipient may use your version of this file under - * the terms of any one of the MPL, the GPL or the LGPL. - * - */ - -/* -File: external_dae_solver_routines.c -=================== - -This file provides provides the routines for solving differential-algebraic equations with an external solver. - -Functions included: - -SolverDAEExternalIntegrate Solves the differential-algebraic equation. - -*/ - -/* Included files */ - -#include -#include - -#include "external_dae_solver_routines.h" -//#include "cuda_solver_routines.cu" - -/* Type definitions */ - -/* Function Definitions -extern "C" -{*/ -void SolverDAEExternalIntegrate(const int NumberOfDofs, - const double StartTime, - const double EndTime, - double *InitialStep, - const int OnlyOneModelIndex, - int *ModelsData, - int NumberOfState, - double *StateData, - int NumberOfParameters, - double *ParametersData, - int NumberOfIntermediate, - double *IntermediateData, - int *err) -{ - - //unsigned int timesteps = 10; - - //printf("start %f end %f int step %f dofs %d", StartTime, EndTime, InitialStep[0], NumberOfDofs); - - //solve(StateData, StartTime, EndTime, timesteps, NumberOfDofs, 1024, 10, 2, NULL); - -} -//} - diff --git a/src/external_dae_solver_routines.cu b/src/external_dae_solver_routines.cu index cd76e408..5479e992 100644 --- a/src/external_dae_solver_routines.cu +++ b/src/external_dae_solver_routines.cu @@ -83,11 +83,9 @@ void SolverDAEExternalIntegrate(const int NumberOfDofs, int *err) { - unsigned int timesteps = 10; + printf("start %f end %f steps %d\n", StartTime, EndTime, (int)((EndTime-StartTime)/InitialStep[0])); - printf("start %f end %f int step %f dofs %d", StartTime, EndTime, InitialStep[0], NumberOfDofs); - - solve(StateData, StartTime, EndTime, timesteps, NumberOfDofs, 1024, 10, 2, NULL); + solve(StateData, StartTime, EndTime, (int)((EndTime-StartTime)/InitialStep[0]), NumberOfDofs, 1024, 10, 2, NULL); } } diff --git a/src/external_dae_solver_routines.h b/src/external_dae_solver_routines.h index ac39b779..d09ea0c9 100644 --- a/src/external_dae_solver_routines.h +++ b/src/external_dae_solver_routines.h @@ -43,8 +43,8 @@ * */ -//extern "C" -//{ +extern "C" +{ void SolverDAEExternalIntegrate(const int NumberOfDofs, const double StartTime, const double EndTime, @@ -58,4 +58,4 @@ void SolverDAEExternalIntegrate(const int NumberOfDofs, int NumberOfIntermediate, double *IntermediateData, int *err); -//} +} diff --git a/src/field_IO_routines.f90 b/src/field_IO_routines.f90 index f603c763..c5eeade7 100644 --- a/src/field_IO_routines.f90 +++ b/src/field_IO_routines.f90 @@ -353,16 +353,16 @@ FUNCTION FieldExport_DerivativeIndices( handle, componentNumber, fieldType, vari INTEGER(C_INT) :: FieldExport_DerivativeIndices END FUNCTION FieldExport_DerivativeIndices - SUBROUTINE READ_VTK(filePath, points, numPoints, cells, numCells) & - & BIND(C, NAME="readVTK") - USE TYPES - USE ISO_C_BINDING - CHARACTER(LEN=1,KIND=C_CHAR), INTENT(IN) :: filePath(*) - TYPE(C_PTR), INTENT(OUT):: points - INTEGER(C_INT), INTENT(OUT) :: numPoints - TYPE(C_PTR), INTENT(OUT) :: cells - INTEGER(C_INT), INTENT(OUT) :: numCells - END SUBROUTINE READ_VTK + SUBROUTINE READ_VTK(filePath, points, numPoints, cells, numCells) & + & BIND(C, NAME="readVTK") + USE TYPES + USE ISO_C_BINDING + CHARACTER(LEN=1,KIND=C_CHAR), INTENT(IN) :: filePath(*) + TYPE(C_PTR), INTENT(OUT):: points + INTEGER(C_INT), INTENT(OUT) :: numPoints + TYPE(C_PTR), INTENT(OUT) :: cells + INTEGER(C_INT), INTENT(OUT) :: numCells + END SUBROUTINE READ_VTK END INTERFACE @@ -1955,7 +1955,6 @@ SUBROUTINE FIELD_IO_IMPORT_GLOBAL_MESH(NAME, REGION, MESH, MESH_USER_NUMBER, MAS ENDIF DO idx_comp=1, NUMBER_OF_MESH_COMPONENTS - NULLIFY(ELEMENTS_PTR(idx_comp)%PTR) CALL MESH_TOPOLOGY_ELEMENTS_CREATE_START(MESH,idx_comp,BASIS_FUNCTIONS%BASES(1)%PTR,ELEMENTS_PTR(idx_comp)%PTR, & & ERR,ERROR,*999) ENDDO @@ -5224,14 +5223,15 @@ SUBROUTINE FIELD_IO_EXPORT_NODES_INTO_LOCAL_FILE(NODAL_INFO_SET, NAME, my_comput TYPE(VARYING_STRING) :: FILE_NAME !the prefix name of file. TYPE(FIELD_VARIABLE_COMPONENT_TYPE), POINTER :: COMPONENT !the prefix name of file. TYPE(DOMAIN_NODES_TYPE), POINTER :: DOMAIN_NODES ! domain nodes - INTEGER(INTG) :: local_number, global_number, sessionHandle, paddingCount, DERIVATIVE_INDEXES(PART_DERIV_S4_S4_S4) + INTEGER(INTG) :: local_number, global_number, sessionHandle, paddingCount,DERIVATIVE_INDEXES(PART_DERIV_S4_S4_S4) INTEGER(INTG), ALLOCATABLE :: paddingInfo(:) INTEGER(INTG) :: nn, comp_idx, dev_idx, NUM_OF_NODAL_DEV, MAX_NUM_OF_NODAL_DERIVATIVES, total_nodal_values + INTEGER(INTG), POINTER :: GEOMETRIC_PARAMETERS_INTG(:) LOGICAL :: FOUND REAL(C_DOUBLE), ALLOCATABLE, TARGET :: NODAL_BUFFER(:), TOTAL_NODAL_BUFFER(:) - REAL(DP), POINTER :: GEOMETRIC_PARAMETERS(:) + REAL(DP), POINTER :: GEOMETRIC_PARAMETERS_DP(:) !INTEGER(INTG), POINTER :: GEOMETRIC_PARAMETERS_INTG(:) - REAL(DP) :: padding(1) + REAL(DP) :: padding(1),VALUE padding(1) = 1.23456789 @@ -5334,16 +5334,18 @@ SUBROUTINE FIELD_IO_EXPORT_NODES_INTO_LOCAL_FILE(NODAL_INFO_SET, NAME, my_comput ENDDO !paddingCount -! SELECT CASE(COMPONENT%FIELD_VARIABLE%DATA_TYPE) -! CASE(FIELD_INTG_TYPE) -! NULLIFY(GEOMETRIC_PARAMETERS_INTG) -! CALL FIELD_PARAMETER_SET_DATA_GET(COMPONENT%FIELD_VARIABLE%FIELD,FIELD_U_VARIABLE_TYPE, & -! & FIELD_VALUES_SET_TYPE,GEOMETRIC_PARAMETERS_INTG,ERR,ERROR,*999) -! CASE(FIELD_DP_TYPE) - NULLIFY(GEOMETRIC_PARAMETERS) - CALL FIELD_PARAMETER_SET_DATA_GET(COMPONENT%FIELD_VARIABLE%FIELD,FIELD_U_VARIABLE_TYPE, & - & FIELD_VALUES_SET_TYPE,GEOMETRIC_PARAMETERS,ERR,ERROR,*999) -! END SELECT + SELECT CASE(COMPONENT%FIELD_VARIABLE%DATA_TYPE) + CASE(FIELD_INTG_TYPE) + NULLIFY(GEOMETRIC_PARAMETERS_INTG) + CALL FIELD_PARAMETER_SET_DATA_GET(COMPONENT%FIELD_VARIABLE%FIELD,FIELD_U_VARIABLE_TYPE, & + & FIELD_VALUES_SET_TYPE,GEOMETRIC_PARAMETERS_INTG,ERR,ERROR,*999) + CASE(FIELD_DP_TYPE) + NULLIFY(GEOMETRIC_PARAMETERS_DP) + CALL FIELD_PARAMETER_SET_DATA_GET(COMPONENT%FIELD_VARIABLE%FIELD,FIELD_U_VARIABLE_TYPE, & + & FIELD_VALUES_SET_TYPE,GEOMETRIC_PARAMETERS_DP,ERR,ERROR,*999) + CASE DEFAULT + CALL FLAG_ERROR("Not implemented.",ERR,ERROR,*999) + END SELECT !get the nodal partial derivatives NUM_OF_NODAL_DEV=DOMAIN_NODES%NODES(local_number)%NUMBER_OF_DERIVATIVES @@ -5362,8 +5364,17 @@ SUBROUTINE FIELD_IO_EXPORT_NODES_INTO_LOCAL_FILE(NODAL_INFO_SET, NAME, my_comput NUM_OF_NODAL_DEV = NUM_OF_NODAL_DEV + 1 - NODAL_BUFFER( NUM_OF_NODAL_DEV ) = GEOMETRIC_PARAMETERS( COMPONENT%PARAM_TO_DOF_MAP% & - & NODE_PARAM2DOF_MAP( DERIVATIVE_INDEXES( dev_idx ), local_number ) ) + SELECT CASE(COMPONENT%FIELD_VARIABLE%DATA_TYPE) + CASE(FIELD_INTG_TYPE) + VALUE=REAL(GEOMETRIC_PARAMETERS_INTG( COMPONENT%PARAM_TO_DOF_MAP%NODE_PARAM2DOF_MAP( & + & DERIVATIVE_INDEXES( dev_idx ), local_number ) ),DP) + CASE(FIELD_DP_TYPE) + VALUE=GEOMETRIC_PARAMETERS_DP( COMPONENT%PARAM_TO_DOF_MAP%NODE_PARAM2DOF_MAP( & + & DERIVATIVE_INDEXES( dev_idx ), local_number ) ) + CASE DEFAULT + CALL FLAG_ERROR("Not implemented.",ERR,ERROR,*999) + END SELECT + NODAL_BUFFER( NUM_OF_NODAL_DEV ) = VALUE ENDDO !dev_idx CALL GROW_ARRAY( TOTAL_NODAL_BUFFER, NUM_OF_NODAL_DEV, "Insufficient memory during I/O", ERR, ERROR, *999 ) @@ -5412,7 +5423,8 @@ SUBROUTINE FIELD_IO_EXPORT_NODES_INTO_LOCAL_FILE(NODAL_INFO_SET, NAME, my_comput !release the temporary memory CALL CHECKED_DEALLOCATE( NODAL_BUFFER ) CALL CHECKED_DEALLOCATE( TOTAL_NODAL_BUFFER ) - IF(ASSOCIATED(GEOMETRIC_PARAMETERS)) NULLIFY(GEOMETRIC_PARAMETERS) + IF(ASSOCIATED(GEOMETRIC_PARAMETERS_DP)) NULLIFY(GEOMETRIC_PARAMETERS_DP) + IF(ASSOCIATED(GEOMETRIC_PARAMETERS_INTG)) NULLIFY(GEOMETRIC_PARAMETERS_INTG) CALL EXITS("FIELD_IO_EXPORT_NODES_INTO_LOCAL_FILE") RETURN @@ -6066,4 +6078,8 @@ SUBROUTINE FIELD_IO_ELEMENTS_EXPORT(FIELDS, FILE_NAME, METHOD,ERR,ERROR,*) RETURN 1 END SUBROUTINE FIELD_IO_ELEMENTS_EXPORT + ! + !================================================================================================================================ + ! + END MODULE FIELD_IO_ROUTINES diff --git a/src/finite_elasticity_routines.f90 b/src/finite_elasticity_routines.f90 index 96502604..de20d74a 100644 --- a/src/finite_elasticity_routines.f90 +++ b/src/finite_elasticity_routines.f90 @@ -2997,28 +2997,29 @@ SUBROUTINE FINITE_ELASTICITY_EQUATIONS_SET_SETUP(EQUATIONS_SET,EQUATIONS_SET_SET !Check the user specified field CALL FIELD_TYPE_CHECK(EQUATIONS_SET_SETUP%FIELD,FIELD_GENERAL_TYPE,ERR,ERROR,*999) CALL FIELD_DEPENDENT_TYPE_CHECK(EQUATIONS_SET_SETUP%FIELD,FIELD_INDEPENDENT_TYPE,ERR,ERROR,*999) + !Question:Better to leave it up for the user to play around? CALL FIELD_NUMBER_OF_VARIABLES_CHECK(EQUATIONS_SET_SETUP%FIELD,2,ERR,ERROR,*999) CALL FIELD_VARIABLE_TYPES_CHECK(EQUATIONS_SET_SETUP%FIELD,(/FIELD_U_VARIABLE_TYPE,FIELD_DELUDELN_VARIABLE_TYPE/), & - & ERR,ERROR,*999) + & ERR,ERROR,*999) CALL FIELD_DIMENSION_CHECK(EQUATIONS_SET_SETUP%FIELD,FIELD_U_VARIABLE_TYPE,FIELD_VECTOR_DIMENSION_TYPE,ERR,ERROR,*999) CALL FIELD_DIMENSION_CHECK(EQUATIONS_SET_SETUP%FIELD,FIELD_DELUDELN_VARIABLE_TYPE,FIELD_VECTOR_DIMENSION_TYPE, & - & ERR,ERROR,*999) + & ERR,ERROR,*999) CALL FIELD_DATA_TYPE_CHECK(EQUATIONS_SET_SETUP%FIELD,FIELD_U_VARIABLE_TYPE,FIELD_DP_TYPE,ERR,ERROR,*999) CALL FIELD_DATA_TYPE_CHECK(EQUATIONS_SET_SETUP%FIELD,FIELD_DELUDELN_VARIABLE_TYPE,FIELD_DP_TYPE,ERR,ERROR,*999) CALL FIELD_NUMBER_OF_COMPONENTS_GET(EQUATIONS_SET%GEOMETRY%GEOMETRIC_FIELD,FIELD_U_VARIABLE_TYPE, & - & NUMBER_OF_DIMENSIONS,ERR,ERROR,*999) + & NUMBER_OF_DIMENSIONS,ERR,ERROR,*999) CALL FIELD_NUMBER_OF_COMPONENTS_CHECK(EQUATIONS_SET_SETUP%FIELD,FIELD_U_VARIABLE_TYPE,NUMBER_OF_COMPONENTS, & - & ERR,ERROR,*999) + & ERR,ERROR,*999) CALL FIELD_NUMBER_OF_COMPONENTS_CHECK(EQUATIONS_SET_SETUP%FIELD,FIELD_DELUDELN_VARIABLE_TYPE,NUMBER_OF_COMPONENTS, & - & ERR,ERROR,*999) + & ERR,ERROR,*999) SELECT CASE(EQUATIONS_SET%SOLUTION_METHOD) CASE(EQUATIONS_SET_FEM_SOLUTION_METHOD) - DO component_idx=1,NUMBER_OF_DIMENSIONS - CALL FIELD_COMPONENT_INTERPOLATION_CHECK(EQUATIONS_SET_SETUP%FIELD,FIELD_U_VARIABLE_TYPE,component_idx, & - & FIELD_NODE_BASED_INTERPOLATION,ERR,ERROR,*999) - CALL FIELD_COMPONENT_INTERPOLATION_CHECK(EQUATIONS_SET_SETUP%FIELD,FIELD_DELUDELN_VARIABLE_TYPE,component_idx, & - & FIELD_NODE_BASED_INTERPOLATION,ERR,ERROR,*999) - ENDDO !component_idx + !DO component_idx=1,NUMBER_OF_DIMENSIONS + ! CALL FIELD_COMPONENT_INTERPOLATION_CHECK(EQUATIONS_SET_SETUP%FIELD,FIELD_U_VARIABLE_TYPE,component_idx, & + ! & FIELD_NODE_BASED_INTERPOLATION,ERR,ERROR,*999) + ! CALL FIELD_COMPONENT_INTERPOLATION_CHECK(EQUATIONS_SET_SETUP%FIELD,FIELD_DELUDELN_VARIABLE_TYPE,component_idx, & + ! & FIELD_NODE_BASED_INTERPOLATION,ERR,ERROR,*999) + !ENDDO !component_idx !kmith :09.06.09 - Hydrostatic pressure could be node-based as well ! CALL FIELD_COMPONENT_INTERPOLATION_CHECK(EQUATIONS_SET_SETUP%FIELD,FIELD_U_VARIABLE_TYPE,4, & @@ -3756,7 +3757,8 @@ SUBROUTINE FINITE_ELASTICITY_POST_SOLVE_OUTPUT_DATA(CONTROL_LOOP,SOLVER,ERR,ERRO TYPE(SOLVER_EQUATIONS_TYPE), POINTER :: SOLVER_EQUATIONS !SOLVER_MAPPING%EQUATIONS_SETS(equations_set_idx)%PTR - IF(EQUATIONS_SET%TYPE==EQUATIONS_SET_FINITE_ELASTICITY_TYPE)THEN - CONTROL_TIME_LOOP=>CONTROL_LOOP - DO loop_idx=1,CONTROL_LOOP%CONTROL_LOOP_LEVEL - IF(CONTROL_TIME_LOOP%LOOP_TYPE==PROBLEM_CONTROL_TIME_LOOP_TYPE) THEN - EXIT - ENDIF - IF (ASSOCIATED(CONTROL_LOOP%PARENT_LOOP)) THEN - CONTROL_TIME_LOOP=>CONTROL_TIME_LOOP%PARENT_LOOP - ELSE - CALL FLAG_ERROR("Could not find a time control loop.",ERR,ERROR,*999) - ENDIF - ENDDO - - CURRENT_LOOP_ITERATION=CONTROL_TIME_LOOP%TIME_LOOP%ITERATION_NUMBER - OUTPUT_ITERATION_NUMBER=CONTROL_TIME_LOOP%TIME_LOOP%OUTPUT_NUMBER - - IF(OUTPUT_ITERATION_NUMBER/=0) THEN - IF(CONTROL_TIME_LOOP%TIME_LOOP%CURRENT_TIME<=CONTROL_TIME_LOOP%TIME_LOOP%STOP_TIME) THEN - IF(CURRENT_LOOP_ITERATION<10) THEN - WRITE(OUTPUT_FILE,'("S_TIMESTP_000",I0)') CURRENT_LOOP_ITERATION - ELSE IF(CURRENT_LOOP_ITERATION<100) THEN - WRITE(OUTPUT_FILE,'("S_TIMESTP_00",I0)') CURRENT_LOOP_ITERATION - ELSE IF(CURRENT_LOOP_ITERATION<1000) THEN - WRITE(OUTPUT_FILE,'("S_TIMESTP_0",I0)') CURRENT_LOOP_ITERATION - ELSE IF(CURRENT_LOOP_ITERATION<10000) THEN - WRITE(OUTPUT_FILE,'("S_TIMESTP_",I0)') CURRENT_LOOP_ITERATION - END IF - FILE=OUTPUT_FILE - ! FILE="TRANSIENT_OUTPUT" - METHOD="FORTRAN" - EXPORT_FIELD=.TRUE. - IF(EXPORT_FIELD) THEN - IF(MOD(CURRENT_LOOP_ITERATION,OUTPUT_ITERATION_NUMBER)==0) THEN - IF(SOLVER%OUTPUT_TYPE>=SOLVER_PROGRESS_OUTPUT) THEN - CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,"Finite Elasticity export fields ...",ERR,ERROR,*999) - CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,OUTPUT_FILE,ERR,ERROR,*999) - ENDIF - CALL FLUID_MECHANICS_IO_WRITE_CMGUI(EQUATIONS_SET%REGION,EQUATIONS_SET%GLOBAL_NUMBER,FILE, & - & ERR,ERROR,*999) - IF(SOLVER%OUTPUT_TYPE>=SOLVER_PROGRESS_OUTPUT) THEN - CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,"Finite Elasticity all fields exported ...",ERR,ERROR,*999) + IF(EQUATIONS_SET%TYPE==EQUATIONS_SET_FINITE_ELASTICITY_TYPE) THEN + TIME_LOOP=>CONTROL_LOOP !Initialise time loop (load increment loop on first) + !Move up to find outer time loop + DO loop_idx=1,CONTROL_LOOP%CONTROL_LOOP_LEVEL-1 + IF(ASSOCIATED(TIME_LOOP%PARENT_LOOP)) THEN + TIME_LOOP=>TIME_LOOP%PARENT_LOOP + ELSE + CALL FLAG_ERROR("Could not find a time control loop.",ERR,ERROR,*999) + ENDIF + ENDDO + WHILE_LOOP=>TIME_LOOP%SUB_LOOPS(1)%PTR + CURRENT_LOOP_ITERATION=TIME_LOOP%TIME_LOOP%ITERATION_NUMBER + OUTPUT_ITERATION_NUMBER=TIME_LOOP%TIME_LOOP%OUTPUT_NUMBER + + IF(WHILE_LOOP%WHILE_LOOP%CONTINUE_LOOP) THEN + IF(TIME_LOOP%TIME_LOOP%CURRENT_TIME<=TIME_LOOP%TIME_LOOP%STOP_TIME) THEN + WRITE(OUTPUT_FILE,'("S_TIMESTP_",I4.4)') CURRENT_LOOP_ITERATION + FILE=OUTPUT_FILE + METHOD="FORTRAN" + EXPORT_FIELD=.TRUE. + IF(EXPORT_FIELD) THEN + IF(MOD(CURRENT_LOOP_ITERATION,OUTPUT_ITERATION_NUMBER)==0) THEN + IF(SOLVER%OUTPUT_TYPE>=SOLVER_PROGRESS_OUTPUT) THEN + CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,"Finite Elasticity export fields ...",ERR,ERROR,*999) + CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,OUTPUT_FILE,ERR,ERROR,*999) + ENDIF + CALL FLUID_MECHANICS_IO_WRITE_CMGUI(EQUATIONS_SET%REGION,EQUATIONS_SET%GLOBAL_NUMBER,FILE, & + & ERR,ERROR,*999) + IF(SOLVER%OUTPUT_TYPE>=SOLVER_PROGRESS_OUTPUT) THEN + CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,"Finite Elasticity all fields exported ...",ERR,ERROR,*999) + ENDIF + CALL WRITE_STRING(DIAGNOSTIC_OUTPUT_TYPE,OUTPUT_FILE,ERR,ERROR,*999) ENDIF - CALL WRITE_STRING(DIAGNOSTIC_OUTPUT_TYPE,OUTPUT_FILE,ERR,ERROR,*999) - ENDIF - ENDIF - ENDIF - ENDIF - - !Subiteration intermediate solutions / iterates output: -! IF(CONTROL_LOOP%PARENT_LOOP%LOOP_TYPE==PROBLEM_CONTROL_WHILE_LOOP_TYPE) THEN !subiteration exists -! SUBITERATION_NUMBER=CONTROL_LOOP%PARENT_LOOP%WHILE_LOOP%ITERATION_NUMBER -! IF(CURRENT_LOOP_ITERATION<10) THEN -! IF(SUBITERATION_NUMBER<10) THEN -! WRITE(OUTPUT_FILE,'("S_00",I0,"_SUB_000",I0)') CURRENT_LOOP_ITERATION,SUBITERATION_NUMBER -! ELSE IF(SUBITERATION_NUMBER<100) THEN -! WRITE(OUTPUT_FILE,'("S_00",I0,"_SUB_00",I0)') CURRENT_LOOP_ITERATION,SUBITERATION_NUMBER -! END IF -! FILE=OUTPUT_FILE -! METHOD="FORTRAN" -! EXPORT_FIELD=.TRUE. -! IF(EXPORT_FIELD) THEN -! CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,"Finite Elasticity export subiterates ...",ERR,ERROR,*999) -! CALL FLUID_MECHANICS_IO_WRITE_CMGUI(EQUATIONS_SET%REGION,EQUATIONS_SET%GLOBAL_NUMBER,FILE, & -! & ERR,ERROR,*999) -! CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,OUTPUT_FILE,ERR,ERROR,*999) -! CALL WRITE_STRING(DIAGNOSTIC_OUTPUT_TYPE,OUTPUT_FILE,ERR,ERROR,*999) + ENDIF + ENDIF !stop_time + ENDIF !continue_loop + +! !Subiteration intermediate solutions / iterates output: +! IF(CONTROL_LOOP%PARENT_LOOP%LOOP_TYPE==PROBLEM_CONTROL_WHILE_LOOP_TYPE) THEN !subiteration exists +! SUBITERATION_NUMBER=CONTROL_LOOP%PARENT_LOOP%WHILE_LOOP%ITERATION_NUMBER +! IF(CURRENT_LOOP_ITERATION<10) THEN +! IF(SUBITERATION_NUMBER<10) THEN +! WRITE(OUTPUT_FILE,'("S_00",I0,"_SUB_000",I0)') CURRENT_LOOP_ITERATION,SUBITERATION_NUMBER +! ELSE IF(SUBITERATION_NUMBER<100) THEN +! WRITE(OUTPUT_FILE,'("S_00",I0,"_SUB_00",I0)') CURRENT_LOOP_ITERATION,SUBITERATION_NUMBER +! END IF +! FILE=OUTPUT_FILE +! METHOD="FORTRAN" +! EXPORT_FIELD=.TRUE. +! IF(EXPORT_FIELD) THEN +! CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,"Finite Elasticity export subiterates ...",ERR,ERROR,*999) +! CALL FLUID_MECHANICS_IO_WRITE_CMGUI(EQUATIONS_SET%REGION,EQUATIONS_SET%GLOBAL_NUMBER,FILE, & +! & ERR,ERROR,*999) +! CALL WRITE_STRING(GENERAL_OUTPUT_TYPE,OUTPUT_FILE,ERR,ERROR,*999) +! CALL WRITE_STRING(DIAGNOSTIC_OUTPUT_TYPE,OUTPUT_FILE,ERR,ERROR,*999) +! ENDIF ! ENDIF ! ENDIF -! ENDIF - ENDIF - ENDDO - ENDIF - ENDIF + ENDIF !EQUATIONS_SET_FINITE_ELASTICITY_TYPE + ENDDO !equations_set_idx + ENDIF !Solver_mapping + ENDIF !Solver_equations CASE DEFAULT LOCAL_ERROR="Problem subtype "//TRIM(NUMBER_TO_VSTRING(CONTROL_LOOP%PROBLEM%SUBTYPE,"*",ERR,ERROR))// & & " is not valid for a finite elasticity problem class." @@ -4401,9 +4392,9 @@ SUBROUTINE FINITE_ELASTICITY_PRE_SOLVE_UPDATE_BOUNDARY_CONDITIONS(CONTROL_LOOP,S CALL FIELD_PARAMETER_SET_DATA_RESTORE(DEPENDENT_FIELD,FIELD_DELUDELN_VARIABLE_TYPE, & & FIELD_PRESSURE_VALUES_SET_TYPE,DUMMY_VALUES1,ERR,ERROR,*999) ENDIF + CALL FIELD_PARAMETER_SET_DATA_RESTORE(DEPENDENT_FIELD,FIELD_DELUDELN_VARIABLE_TYPE, & + & FIELD_PRESSURE_VALUES_SET_TYPE,CURRENT_PRESSURE_VALUES,ERR,ERROR,*999) ENDIF !Pressure_condition_used - CALL FIELD_PARAMETER_SET_DATA_RESTORE(DEPENDENT_FIELD,FIELD_DELUDELN_VARIABLE_TYPE, & - & FIELD_PRESSURE_VALUES_SET_TYPE,CURRENT_PRESSURE_VALUES,ERR,ERROR,*999) ELSE CALL FLAG_ERROR("Boundary condition variable is not associated.",ERR,ERROR,*999) END IF diff --git a/src/fluid_mechanics_IO_routines.f90 b/src/fluid_mechanics_IO_routines.f90 index e984a23c..53776ef2 100644 --- a/src/fluid_mechanics_IO_routines.f90 +++ b/src/fluid_mechanics_IO_routines.f90 @@ -3712,6 +3712,10 @@ SUBROUTINE FLUID_MECHANICS_IO_READ_DATA(SOLVER_TYPE,INPUT_VALUES,NUMBER_OF_DIMEN WRITE(INPUT_FILE,'("./input/motion/DISPLACEMENT_",I0,".dat")') TIME_STEP ENDIF OPEN(UNIT=TIME_STEP, FILE=INPUT_FILE,STATUS='unknown') + READ(TIME_STEP,*) CHECK + IF(CHECK/=ENDI) THEN + STOP 'Error during data input - probably wrong Lagrangian/Hermite input file!' + ENDIF DO I=1,ENDI READ(TIME_STEP,*) INPUT_VALUES(I) ENDDO diff --git a/src/solver_routines.f90 b/src/solver_routines.f90 index 2b41a3a3..2d5912a5 100644 --- a/src/solver_routines.f90 +++ b/src/solver_routines.f90 @@ -2951,6 +2951,7 @@ SUBROUTINE SOLVER_DAE_EXTERNAL_SOLVE(EXTERNAL_SOLVER,ERR,ERROR,*) ENDIF ELSE NULLIFY(INTERMEDIATE_DATA) + NULLIFY(INTERMEDIATE_FIELD) ENDIF !Call the external solver to integrate these CellML equations From 80d84675e9480b545af17f82f8c244818f90e483 Mon Sep 17 00:00:00 2001 From: Jin Budelmann Date: Mon, 18 Apr 2011 01:00:15 +0000 Subject: [PATCH 10/24] Fixed issues with recent changes to field variables. No relevant Tracker Item git-svn-id: https://opencmiss.svn.sourceforge.net/svnroot/opencmiss/cm/branches/developers/jin@2122 61e36070-c886-4827-b7c0-ed9e4dc59cc8 --- src/cuda_solver_routines.cu | 20 ++++++++++---------- src/diffusion_equation_routines.f90 | 14 +++++++------- src/equations_set_routines.f90 | 2 +- src/field_routines.f90 | 8 ++++++++ src/solver_routines.f90 | 2 +- 5 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/cuda_solver_routines.cu b/src/cuda_solver_routines.cu index 8d34faaa..3d98036c 100644 --- a/src/cuda_solver_routines.cu +++ b/src/cuda_solver_routines.cu @@ -423,9 +423,8 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, dim3 grid(blocksPerDomain, 1, 1); dim3 threads(threads_per_block, 1, 1); - //#ifdef DEBUG - if (timing_file) { + printf("> Device name : %s\n", deviceProp.name ); printf("> CUDA Capable SM %d.%d hardware with %d multi-processors\n", deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); @@ -436,6 +435,7 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, printf("grid.x %d threads.x %d sharedMem %d\n", grid.x, threads.x, sharedMem); printf("Spills %d %d %d\n", spill[0], spill[1], spill[2]); + if (timing_file) { // Setup and start global timer timer = 0; cutCreateTimer(&timer); @@ -476,11 +476,11 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, if (timing_file==NULL || !(i==0 && j==0) || (domainIndex+1)*grid.x<=num_blocks) { local_offset = j * rateStateCount * grid.x * threads.x ; if (i == num_partitions - 1 && j == spill[0] && (spill[1]!=0 || spill[2]!=0)) { - //printf("last async in %d, size %d\n", domainIndex, lastStreamMemorySize); + printf("last async in %d, size %d\n", domainIndex, lastStreamMemorySize); cutilSafeCall( cudaMemcpyAsync(d_states + local_offset, h_paged_states + local_offset, lastStreamMemorySize, cudaMemcpyHostToDevice, streams[j]) ); } else { - //printf("normal async in %d, size %d\n", domainIndex, pagedMemorySize/num_streams); + printf("normal async in %d, size %d\n", domainIndex, pagedMemorySize/num_streams); cutilSafeCall( cudaMemcpyAsync(d_states + local_offset, h_paged_states + local_offset, pagedMemorySize/num_streams, cudaMemcpyHostToDevice, streams[j]) ); } @@ -509,11 +509,11 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, if (timing_file==NULL || !(i==0 && j==0) || (domainIndex+1)*grid.x<=num_blocks) { local_offset = j * rateStateCount * grid.x * threads.x ; if (i == num_partitions - 1 && j == spill[0] && (spill[1]!=0 || spill[2]!=0) ) { - //printf("last async out %d, size %d\n", domainIndex, lastStreamMemorySize); + printf("last async out %d, size %d\n", domainIndex, lastStreamMemorySize); cutilSafeCall( cudaMemcpyAsync(h_paged_states + local_offset, d_states + local_offset, lastStreamMemorySize, cudaMemcpyDeviceToHost, streams[j]) ); } else { - //printf("normal async out %d, size %d\n", domainIndex, pagedMemorySize/num_streams); + printf("normal async out %d, size %d\n", domainIndex, pagedMemorySize/num_streams); cutilSafeCall( cudaMemcpyAsync(h_paged_states + local_offset, d_states + local_offset, pagedMemorySize/num_streams, cudaMemcpyDeviceToHost, streams[j]) ); } @@ -530,23 +530,23 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, global_offset = i * num_streams * grid.x * threads.x; if (i == num_partitions - 1 && j == spill[0] && (spill[1]!=0 || spill[2]!=0)) { - //printf("last memcpy out %d\n", domainIndex); + printf("last memcpy out %d\n", domainIndex); memcpy(h_states + rateStateCount * global_offset + local_offset, h_paged_states + local_offset, lastStreamMemorySize); } else { - //printf("normal memcpy out %d\n", domainIndex); + printf("normal memcpy out %d\n", domainIndex); memcpy(h_states + rateStateCount * global_offset + local_offset, h_paged_states + local_offset, pagedMemorySize/num_streams); } global_offset = (i + 1) * num_streams * grid.x * threads.x; if (i == num_partitions - 2 && j == spill[0] && (spill[1]!=0 || spill[2]!=0)) { - //printf("last memcpy in %d\n", domainIndex); + printf("last memcpy in %d\n", domainIndex); lastStreamMemorySize = sizeof(double)*rateStateCount*(spill[1]*threads_per_block+spill[2]); memcpy(h_paged_states + local_offset, h_states + rateStateCount * global_offset + local_offset, lastStreamMemorySize); } else if (i < num_partitions - 2 || j < spill[0] || (i == num_partitions - 2 && j == spill[0])) { - //printf("normal memcpy in %d\n", domainIndex); + printf("normal memcpy in %d\n", domainIndex); memcpy(h_paged_states + local_offset, h_states + rateStateCount * global_offset + local_offset, pagedMemorySize/num_streams); } diff --git a/src/diffusion_equation_routines.f90 b/src/diffusion_equation_routines.f90 index 6bf7641e..6cb29525 100644 --- a/src/diffusion_equation_routines.f90 +++ b/src/diffusion_equation_routines.f90 @@ -3012,7 +3012,7 @@ SUBROUTINE DIFFUSION_EQUATION_EQUATIONS_SET_NONLINEAR_SETUP(EQUATIONS_SET,EQUATI CALL EQUATIONS_MAPPING_DYNAMIC_MATRICES_SET(EQUATIONS_MAPPING,.TRUE.,.TRUE.,ERR,ERROR,*999) CALL EQUATIONS_MAPPING_DYNAMIC_VARIABLE_TYPE_SET(EQUATIONS_MAPPING,FIELD_U_VARIABLE_TYPE,ERR,ERROR,*999) CALL EQUATIONS_MAPPING_RHS_VARIABLE_TYPE_SET(EQUATIONS_MAPPING,FIELD_DELUDELN_VARIABLE_TYPE,ERR,ERROR,*999) - CALL EQUATIONS_MAPPING_RESIDUAL_VARIABLE_TYPE_SET(EQUATIONS_MAPPING,FIELD_U_VARIABLE_TYPE,ERR,ERROR,*999) + CALL EQUATIONS_MAPPING_RESIDUAL_VARIABLE_TYPES_SET(EQUATIONS_MAPPING,[FIELD_U_VARIABLE_TYPE],ERR,ERROR,*999) CALL EQUATIONS_MAPPING_CREATE_FINISH(EQUATIONS_MAPPING,ERR,ERROR,*999) !Create the equations matrices CALL EQUATIONS_MATRICES_CREATE_START(EQUATIONS,EQUATIONS_MATRICES,ERR,ERROR,*999) @@ -5458,7 +5458,7 @@ SUBROUTINE DIFFUSION_EQUATION_FINITE_ELEMENT_JACOBIAN_EVALUATE(EQUATIONS_SET,ELE CASE(EQUATIONS_SET_QUADRATIC_SOURCE_DIFFUSION_SUBTYPE) EQUATIONS_MATRICES=>EQUATIONS%EQUATIONS_MATRICES NONLINEAR_MATRICES=>EQUATIONS_MATRICES%NONLINEAR_MATRICES - JACOBIAN_MATRIX=>NONLINEAR_MATRICES%JACOBIAN + JACOBIAN_MATRIX=>NONLINEAR_MATRICES%JACOBIANS(1)%PTR IF(JACOBIAN_MATRIX%UPDATE_JACOBIAN) THEN !Store all these in equations matrices/somewhere else????? DEPENDENT_FIELD=>EQUATIONS%INTERPOLATION%DEPENDENT_FIELD @@ -5466,7 +5466,7 @@ SUBROUTINE DIFFUSION_EQUATION_FINITE_ELEMENT_JACOBIAN_EVALUATE(EQUATIONS_SET,ELE MATERIALS_FIELD=>EQUATIONS%INTERPOLATION%MATERIALS_FIELD EQUATIONS_MAPPING=>EQUATIONS%EQUATIONS_MAPPING NONLINEAR_MAPPING=>EQUATIONS_MAPPING%NONLINEAR_MAPPING - DEPENDENT_VARIABLE=>NONLINEAR_MAPPING%RESIDUAL_VARIABLE + DEPENDENT_VARIABLE=>NONLINEAR_MAPPING%RESIDUAL_VARIABLES(1)%PTR FIELD_VAR_TYPE=DEPENDENT_VARIABLE%VARIABLE_TYPE GEOMETRIC_VARIABLE=>GEOMETRIC_FIELD%VARIABLE_TYPE_MAP(FIELD_U_VARIABLE_TYPE)%PTR DEPENDENT_BASIS=>DEPENDENT_FIELD%DECOMPOSITION%DOMAIN(DEPENDENT_FIELD%DECOMPOSITION%MESH_COMPONENT_NUMBER)%PTR% & @@ -5520,7 +5520,7 @@ SUBROUTINE DIFFUSION_EQUATION_FINITE_ELEMENT_JACOBIAN_EVALUATE(EQUATIONS_SET,ELE CASE(EQUATIONS_SET_EXPONENTIAL_SOURCE_DIFFUSION_SUBTYPE) EQUATIONS_MATRICES=>EQUATIONS%EQUATIONS_MATRICES NONLINEAR_MATRICES=>EQUATIONS_MATRICES%NONLINEAR_MATRICES - JACOBIAN_MATRIX=>NONLINEAR_MATRICES%JACOBIAN + JACOBIAN_MATRIX=>NONLINEAR_MATRICES%JACOBIANS(1)%PTR IF(JACOBIAN_MATRIX%UPDATE_JACOBIAN) THEN !Store all these in equations matrices/somewhere else????? DEPENDENT_FIELD=>EQUATIONS%INTERPOLATION%DEPENDENT_FIELD @@ -5528,7 +5528,7 @@ SUBROUTINE DIFFUSION_EQUATION_FINITE_ELEMENT_JACOBIAN_EVALUATE(EQUATIONS_SET,ELE MATERIALS_FIELD=>EQUATIONS%INTERPOLATION%MATERIALS_FIELD EQUATIONS_MAPPING=>EQUATIONS%EQUATIONS_MAPPING NONLINEAR_MAPPING=>EQUATIONS_MAPPING%NONLINEAR_MAPPING - DEPENDENT_VARIABLE=>NONLINEAR_MAPPING%RESIDUAL_VARIABLE + DEPENDENT_VARIABLE=>NONLINEAR_MAPPING%RESIDUAL_VARIABLES(1)%PTR FIELD_VAR_TYPE=DEPENDENT_VARIABLE%VARIABLE_TYPE GEOMETRIC_VARIABLE=>GEOMETRIC_FIELD%VARIABLE_TYPE_MAP(FIELD_U_VARIABLE_TYPE)%PTR DEPENDENT_BASIS=>DEPENDENT_FIELD%DECOMPOSITION%DOMAIN(DEPENDENT_FIELD%DECOMPOSITION%MESH_COMPONENT_NUMBER)%PTR% & @@ -5669,7 +5669,7 @@ SUBROUTINE DIFFUSION_EQUATION_FINITE_ELEMENT_RESIDUAL_EVALUATE(EQUATIONS_SET,ELE EQUATIONS_MAPPING=>EQUATIONS%EQUATIONS_MAPPING DYNAMIC_MAPPING=>EQUATIONS_MAPPING%DYNAMIC_MAPPING NONLINEAR_MAPPING=>EQUATIONS_MAPPING%NONLINEAR_MAPPING - DEPENDENT_VARIABLE=>NONLINEAR_MAPPING%RESIDUAL_VARIABLE + DEPENDENT_VARIABLE=>NONLINEAR_MAPPING%RESIDUAL_VARIABLES(1)%PTR FIELD_VAR_TYPE=DEPENDENT_VARIABLE%VARIABLE_TYPE GEOMETRIC_VARIABLE=>GEOMETRIC_FIELD%VARIABLE_TYPE_MAP(FIELD_U_VARIABLE_TYPE)%PTR DEPENDENT_BASIS=>DEPENDENT_FIELD%DECOMPOSITION%DOMAIN(DEPENDENT_FIELD%DECOMPOSITION%MESH_COMPONENT_NUMBER)%PTR% & @@ -5790,7 +5790,7 @@ SUBROUTINE DIFFUSION_EQUATION_FINITE_ELEMENT_RESIDUAL_EVALUATE(EQUATIONS_SET,ELE EQUATIONS_MAPPING=>EQUATIONS%EQUATIONS_MAPPING DYNAMIC_MAPPING=>EQUATIONS_MAPPING%DYNAMIC_MAPPING NONLINEAR_MAPPING=>EQUATIONS_MAPPING%NONLINEAR_MAPPING - DEPENDENT_VARIABLE=>NONLINEAR_MAPPING%RESIDUAL_VARIABLE + DEPENDENT_VARIABLE=>NONLINEAR_MAPPING%RESIDUAL_VARIABLES(1)%PTR FIELD_VAR_TYPE=DEPENDENT_VARIABLE%VARIABLE_TYPE GEOMETRIC_VARIABLE=>GEOMETRIC_FIELD%VARIABLE_TYPE_MAP(FIELD_U_VARIABLE_TYPE)%PTR DEPENDENT_BASIS=>DEPENDENT_FIELD%DECOMPOSITION%DOMAIN(DEPENDENT_FIELD%DECOMPOSITION%MESH_COMPONENT_NUMBER)%PTR% & diff --git a/src/equations_set_routines.f90 b/src/equations_set_routines.f90 index b4012b27..781dda06 100644 --- a/src/equations_set_routines.f90 +++ b/src/equations_set_routines.f90 @@ -4942,7 +4942,7 @@ SUBROUTINE EQUATIONS_SET_RESIDUAL_EVALUATE(EQUATIONS_SET,ERR,ERROR,*) IF(ASSOCIATED(EQUATIONS_MAPPING)) THEN NONLINEAR_MAPPING=>EQUATIONS_MAPPING%NONLINEAR_MAPPING IF(ASSOCIATED(NONLINEAR_MAPPING)) THEN - RESIDUAL_VARIABLE=>NONLINEAR_MAPPING%RESIDUAL_VARIABLE + RESIDUAL_VARIABLE=>NONLINEAR_MAPPING%RESIDUAL_VARIABLES(1)%PTR IF(ASSOCIATED(RESIDUAL_VARIABLE)) THEN RESIDUAL_PARAMETER_SET=>RESIDUAL_VARIABLE%PARAMETER_SETS%SET_TYPE(FIELD_RESIDUAL_SET_TYPE)%PTR IF(ASSOCIATED(RESIDUAL_PARAMETER_SET)) THEN diff --git a/src/field_routines.f90 b/src/field_routines.f90 index f77a4100..16a48dcd 100644 --- a/src/field_routines.f90 +++ b/src/field_routines.f90 @@ -20463,6 +20463,14 @@ SUBROUTINE FIELD_PARAMETER_SET_UPDATE_NODE_DP(FIELD,VARIABLE_TYPE,FIELD_SET_TYPE !Only checks if version number is greater than zero IF(DERIVATIVE_NUMBER>0.AND.DERIVATIVE_NUMBER<=DOMAIN_NODES%NODES(DOMAIN_LOCAL_NODE_NUMBER)% & NUMBER_OF_DERIVATIVES.AND.VERSION_NUMBER>0) THEN + WRITE (*,*) SIZE(FIELD_VARIABLE%COMPONENTS, 1) + WRITE (*,*) SIZE(FIELD_VARIABLE%COMPONENTS(COMPONENT_NUMBER)%PARAM_TO_DOF_MAP% & + & NODE_PARAM2DOF_MAP%NODES, 1) + WRITE (*,*) SIZE(FIELD_VARIABLE%COMPONENTS(COMPONENT_NUMBER)%PARAM_TO_DOF_MAP% & + & NODE_PARAM2DOF_MAP%NODES(DOMAIN_LOCAL_NODE_NUMBER)%DERIVATIVES, 1) + WRITE (*,*) SIZE(FIELD_VARIABLE%COMPONENTS(COMPONENT_NUMBER)%PARAM_TO_DOF_MAP% & + & NODE_PARAM2DOF_MAP%NODES(DOMAIN_LOCAL_NODE_NUMBER)%DERIVATIVES(DERIVATIVE_NUMBER)% & + & VERSIONS, 1) dof_idx=FIELD_VARIABLE%COMPONENTS(COMPONENT_NUMBER)%PARAM_TO_DOF_MAP% & & NODE_PARAM2DOF_MAP%NODES(DOMAIN_LOCAL_NODE_NUMBER)%DERIVATIVES(DERIVATIVE_NUMBER)% & & VERSIONS(VERSION_NUMBER) diff --git a/src/solver_routines.f90 b/src/solver_routines.f90 index e1702a92..5c6a3624 100644 --- a/src/solver_routines.f90 +++ b/src/solver_routines.f90 @@ -15318,7 +15318,7 @@ SUBROUTINE SOLVER_VARIABLES_DYNAMIC_FIELD_PREVIOUS_VALUES_UPDATE(SOLVER,ERR,ERRO IF(ASSOCIATED(EQUATIONS_MAPPING)) THEN NONLINEAR_MAPPING=>EQUATIONS_MAPPING%NONLINEAR_MAPPING IF(ASSOCIATED(NONLINEAR_MAPPING)) THEN - RESIDUAL_VARIABLE=>NONLINEAR_MAPPING%RESIDUAL_VARIABLE + RESIDUAL_VARIABLE=>NONLINEAR_MAPPING%RESIDUAL_VARIABLES(1)%PTR IF(ASSOCIATED(RESIDUAL_VARIABLE)) THEN CALL FIELD_PARAMETER_SETS_COPY(RESIDUAL_VARIABLE%FIELD,RESIDUAL_VARIABLE%VARIABLE_TYPE, & & FIELD_RESIDUAL_SET_TYPE,FIELD_PREVIOUS_RESIDUAL_SET_TYPE,1.0_DP,ERR,ERROR,*999) From b6c36290fa547752bd20c6fdc8af9e2df87fef9b Mon Sep 17 00:00:00 2001 From: Jin Budelmann Date: Tue, 19 Apr 2011 02:14:33 +0000 Subject: [PATCH 11/24] Further optimisations and fixes to partitioning algorithm. Added FHN model definition. See Tracker Item 2180 git-svn-id: https://opencmiss.svn.sourceforge.net/svnroot/opencmiss/cm/branches/developers/jin@2123 61e36070-c886-4827-b7c0-ed9e4dc59cc8 --- src/cuda_solver_routines.cu | 57 +++++++++++++++++++++---------------- src/field_routines.f90 | 8 ------ 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/src/cuda_solver_routines.cu b/src/cuda_solver_routines.cu index 3d98036c..b2bb747d 100644 --- a/src/cuda_solver_routines.cu +++ b/src/cuda_solver_routines.cu @@ -275,7 +275,7 @@ void domainDecomposition(unsigned int* threads_per_domain, unsigned int* spill, } (*num_blocks)=num_threads/(*threads_per_block); spill[2]=num_threads % (*threads_per_block); - if (spill[2] !=0) (*num_blocks)++; // Round up calculation + //if (spill[2] !=0) (*num_blocks)++; // Round up calculation (*blocksPerDomain) = (*num_blocks)/(num_streams*(*num_partitions)); if ((*blocksPerDomain) == 0) { @@ -291,9 +291,14 @@ void domainDecomposition(unsigned int* threads_per_domain, unsigned int* spill, } else if (remainingBlocks > num_streams*(*blocksPerDomain)) { numberRemainingDomains = ceil((float)remainingBlocks/(*blocksPerDomain)); (*num_partitions) += ceil((float)numberRemainingDomains/num_streams); - remainingBlocks = ((spill[2]!=0) ? (*num_blocks)-1 : (*num_blocks))%(num_streams*((*num_partitions)-1)); } + if ((*num_blocks)/(num_streams*(*num_partitions))==0) { + (*num_partitions)--; + } + + remainingBlocks = (*num_blocks)%(*num_partitions); + // Adjust number of partitions so that data fits in GPU memory count = 0; cutilSafeCall( cudaMemGetInfo(&freeGPUMemory, &test) ); @@ -307,8 +312,8 @@ void domainDecomposition(unsigned int* threads_per_domain, unsigned int* spill, } else if (remainingBlocks > num_streams*(*blocksPerDomain)) { numberRemainingDomains = ceil((float)remainingBlocks/(*blocksPerDomain)); (*num_partitions) += ceil((float)numberRemainingDomains/num_streams); - remainingBlocks = ((spill[2]!=0) ? (*num_blocks)-1 : (*num_blocks))%(num_streams*((*num_partitions)-1)); } + remainingBlocks = (*num_blocks)%(*num_partitions); } else { fprintf(stderr, "Cannot fit variables in GPU device memory\nReduce threads per block and try again"); exit(EXIT_FAILURE); @@ -327,7 +332,6 @@ void domainDecomposition(unsigned int* threads_per_domain, unsigned int* spill, spill[0] = remainingBlocks/(*blocksPerDomain); spill[1] = remainingBlocks%(*blocksPerDomain); - // if (renainingBlocks != 0) { // blocksLastStream = // } @@ -375,6 +379,7 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, unsigned int kernel_timer = 0; unsigned int domainIndex = 0; unsigned int blocksPerDomain = 0; + unsigned int lastFullDomain = 0; unsigned int threads_per_domain = 0; unsigned int spill[3] = { 0 }; @@ -410,6 +415,9 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, domainDecomposition(&threads_per_domain, spill, num_threads, num_streams, &pagedMemorySize, &threads_per_block, &num_partitions, &blocksPerDomain, &num_blocks, &sharedMem); + + lastFullDomain = num_blocks/blocksPerDomain; + // Allocate CUDA device and host pinned memory cutilSafeCall( cudaMallocHost((void**) &h_paged_states, pagedMemorySize) ); cutilSafeCall( cudaMalloc((void **) &d_states, pagedMemorySize) ); @@ -424,7 +432,7 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, dim3 threads(threads_per_block, 1, 1); //#ifdef DEBUG - + if (timing_file) { printf("> Device name : %s\n", deviceProp.name ); printf("> CUDA Capable SM %d.%d hardware with %d multi-processors\n", deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); @@ -435,7 +443,6 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, printf("grid.x %d threads.x %d sharedMem %d\n", grid.x, threads.x, sharedMem); printf("Spills %d %d %d\n", spill[0], spill[1], spill[2]); - if (timing_file) { // Setup and start global timer timer = 0; cutCreateTimer(&timer); @@ -469,18 +476,18 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, } // Queue kernel calls into streams to hide memory transfers (num_partitions sets of kernel calls in each stream) - for(i = 0; i < num_partitions; i++) { + for(i = 0; i < num_partitions+1; i++) { // Asynchronously launch num_streams memcopies for(j = 0; j < num_streams; j++) { domainIndex = j + i*num_streams; - if (timing_file==NULL || !(i==0 && j==0) || (domainIndex+1)*grid.x<=num_blocks) { + if (domainIndex <= lastFullDomain && (timing_file==NULL || !(i==0 && j==0))) { local_offset = j * rateStateCount * grid.x * threads.x ; - if (i == num_partitions - 1 && j == spill[0] && (spill[1]!=0 || spill[2]!=0)) { - printf("last async in %d, size %d\n", domainIndex, lastStreamMemorySize); + if (domainIndex == lastFullDomain && (spill[1]!=0 || spill[2]!=0)) { + //printf("last async in %d, size %d\n", domainIndex, lastStreamMemorySize); cutilSafeCall( cudaMemcpyAsync(d_states + local_offset, h_paged_states + local_offset, lastStreamMemorySize, cudaMemcpyHostToDevice, streams[j]) ); } else { - printf("normal async in %d, size %d\n", domainIndex, pagedMemorySize/num_streams); + //printf("normal async in %d, size %d\n", domainIndex, pagedMemorySize/num_streams); cutilSafeCall( cudaMemcpyAsync(d_states + local_offset, h_paged_states + local_offset, pagedMemorySize/num_streams, cudaMemcpyHostToDevice, streams[j]) ); } @@ -490,9 +497,9 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, // Asynchronously launch num_streams kernels, each operating on its own portion of data for(j = 0; j < num_streams; j++) { domainIndex = j + i*num_streams; - if (timing_file==NULL || !(i==0 && j==0) || (domainIndex+1)*grid.x<=num_blocks) { + if (domainIndex <= lastFullDomain && (timing_file==NULL || !(i==0 && j==0))) { local_offset = j * grid.x * threads.x ; - if (i == num_partitions - 1 && j == spill[0] && (spill[1]!=0 || spill[2]!=0)) { + if (domainIndex == lastFullDomain && (spill[1]!=0 || spill[2]!=0)) { grid.x = spill[1]+1; solveSystem<<>>(timeSteps, stepSize, d_states + rateStateCount*local_offset); @@ -506,14 +513,14 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, // Asynchronoously launch num_streams memcopies for(j = 0; j < num_streams; j++){ domainIndex = j + i*num_streams; - if (timing_file==NULL || !(i==0 && j==0) || (domainIndex+1)*grid.x<=num_blocks) { + if (domainIndex <= lastFullDomain && (timing_file==NULL || !(i==0 && j==0))) { local_offset = j * rateStateCount * grid.x * threads.x ; - if (i == num_partitions - 1 && j == spill[0] && (spill[1]!=0 || spill[2]!=0) ) { - printf("last async out %d, size %d\n", domainIndex, lastStreamMemorySize); + if (domainIndex == lastFullDomain && (spill[1]!=0 || spill[2]!=0)) { + //printf("last async out %d, size %d\n", domainIndex, lastStreamMemorySize); cutilSafeCall( cudaMemcpyAsync(h_paged_states + local_offset, d_states + local_offset, lastStreamMemorySize, cudaMemcpyDeviceToHost, streams[j]) ); } else { - printf("normal async out %d, size %d\n", domainIndex, pagedMemorySize/num_streams); + //printf("normal async out %d, size %d\n", domainIndex, pagedMemorySize/num_streams); cutilSafeCall( cudaMemcpyAsync(h_paged_states + local_offset, d_states + local_offset, pagedMemorySize/num_streams, cudaMemcpyDeviceToHost, streams[j]) ); } @@ -523,30 +530,30 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, // Execute memcpys in and out of paged memory when CUDA calls in the streams have finished for(j = 0; j < num_streams; j++){ domainIndex = j + i*num_streams; - if (timing_file==NULL || !(i==0 && j==0) || (domainIndex+1)*grid.x<=num_blocks) { + if (domainIndex <= lastFullDomain && (timing_file==NULL || !(i==0 && j==0))) { cudaStreamSynchronize(streams[j]); local_offset = j * rateStateCount * grid.x * threads.x ; global_offset = i * num_streams * grid.x * threads.x; - if (i == num_partitions - 1 && j == spill[0] && (spill[1]!=0 || spill[2]!=0)) { - printf("last memcpy out %d\n", domainIndex); + if (domainIndex == lastFullDomain && (spill[1]!=0 || spill[2]!=0)) { + //printf("last memcpy out %d\n", domainIndex); memcpy(h_states + rateStateCount * global_offset + local_offset, h_paged_states + local_offset, lastStreamMemorySize); } else { - printf("normal memcpy out %d\n", domainIndex); + //printf("normal memcpy out %d\n", domainIndex); memcpy(h_states + rateStateCount * global_offset + local_offset, h_paged_states + local_offset, pagedMemorySize/num_streams); } global_offset = (i + 1) * num_streams * grid.x * threads.x; - if (i == num_partitions - 2 && j == spill[0] && (spill[1]!=0 || spill[2]!=0)) { - printf("last memcpy in %d\n", domainIndex); + if (domainIndex == lastFullDomain - num_streams && (spill[1]!=0 || spill[2]!=0)) { + //printf("last memcpy in %d\n", domainIndex); lastStreamMemorySize = sizeof(double)*rateStateCount*(spill[1]*threads_per_block+spill[2]); memcpy(h_paged_states + local_offset, h_states + rateStateCount * global_offset + local_offset, lastStreamMemorySize); - } else if (i < num_partitions - 2 || j < spill[0] || (i == num_partitions - 2 && j == spill[0])) { - printf("normal memcpy in %d\n", domainIndex); + } else if (domainIndex < lastFullDomain - num_streams) { + //printf("normal memcpy in %d\n", domainIndex); memcpy(h_paged_states + local_offset, h_states + rateStateCount * global_offset + local_offset, pagedMemorySize/num_streams); } diff --git a/src/field_routines.f90 b/src/field_routines.f90 index 16a48dcd..f77a4100 100644 --- a/src/field_routines.f90 +++ b/src/field_routines.f90 @@ -20463,14 +20463,6 @@ SUBROUTINE FIELD_PARAMETER_SET_UPDATE_NODE_DP(FIELD,VARIABLE_TYPE,FIELD_SET_TYPE !Only checks if version number is greater than zero IF(DERIVATIVE_NUMBER>0.AND.DERIVATIVE_NUMBER<=DOMAIN_NODES%NODES(DOMAIN_LOCAL_NODE_NUMBER)% & NUMBER_OF_DERIVATIVES.AND.VERSION_NUMBER>0) THEN - WRITE (*,*) SIZE(FIELD_VARIABLE%COMPONENTS, 1) - WRITE (*,*) SIZE(FIELD_VARIABLE%COMPONENTS(COMPONENT_NUMBER)%PARAM_TO_DOF_MAP% & - & NODE_PARAM2DOF_MAP%NODES, 1) - WRITE (*,*) SIZE(FIELD_VARIABLE%COMPONENTS(COMPONENT_NUMBER)%PARAM_TO_DOF_MAP% & - & NODE_PARAM2DOF_MAP%NODES(DOMAIN_LOCAL_NODE_NUMBER)%DERIVATIVES, 1) - WRITE (*,*) SIZE(FIELD_VARIABLE%COMPONENTS(COMPONENT_NUMBER)%PARAM_TO_DOF_MAP% & - & NODE_PARAM2DOF_MAP%NODES(DOMAIN_LOCAL_NODE_NUMBER)%DERIVATIVES(DERIVATIVE_NUMBER)% & - & VERSIONS, 1) dof_idx=FIELD_VARIABLE%COMPONENTS(COMPONENT_NUMBER)%PARAM_TO_DOF_MAP% & & NODE_PARAM2DOF_MAP%NODES(DOMAIN_LOCAL_NODE_NUMBER)%DERIVATIVES(DERIVATIVE_NUMBER)% & & VERSIONS(VERSION_NUMBER) From fb3328a9924215b6a16c6e88d9ef91cad92fb693 Mon Sep 17 00:00:00 2001 From: Jin Budelmann Date: Wed, 20 Apr 2011 00:17:58 +0000 Subject: [PATCH 12/24] Added per-time step solver timing data dump and total solve timing recording in example code. See tracker item 2180 git-svn-id: https://opencmiss.svn.sourceforge.net/svnroot/opencmiss/cm/branches/developers/jin@2124 61e36070-c886-4827-b7c0-ed9e4dc59cc8 --- src/cuda_solver_routines.cu | 18 +++++++++--------- src/external_dae_solver_routines.cu | 23 +++++++++++++++++++---- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/cuda_solver_routines.cu b/src/cuda_solver_routines.cu index b2bb747d..a39c9076 100644 --- a/src/cuda_solver_routines.cu +++ b/src/cuda_solver_routines.cu @@ -433,15 +433,15 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, //#ifdef DEBUG if (timing_file) { - printf("> Device name : %s\n", deviceProp.name ); - printf("> CUDA Capable SM %d.%d hardware with %d multi-processors\n", - deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); - printf("> Cell Model = %s, Integrator = %s\n", cellModelName, integratorName); - printf("> num_threads = %d, num_blocks = %d, threads_per_block = %d, num_partitions = %d, timeSteps = %d, num_streams = %d\n", - num_threads, num_blocks, threads_per_block, num_partitions, timeSteps, num_streams); - //#endif - printf("grid.x %d threads.x %d sharedMem %d\n", grid.x, threads.x, sharedMem); - printf("Spills %d %d %d\n", spill[0], spill[1], spill[2]); +// printf("> Device name : %s\n", deviceProp.name ); +// printf("> CUDA Capable SM %d.%d hardware with %d multi-processors\n", +// deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); +// printf("> Cell Model = %s, Integrator = %s\n", cellModelName, integratorName); +// printf("> num_threads = %d, num_blocks = %d, threads_per_block = %d, num_partitions = %d, timeSteps = %d, num_streams = %d\n", +// num_threads, num_blocks, threads_per_block, num_partitions, timeSteps, num_streams); +// //#endif +// printf("grid.x %d threads.x %d sharedMem %d\n", grid.x, threads.x, sharedMem); +// printf("Spills %d %d %d\n", spill[0], spill[1], spill[2]); // Setup and start global timer timer = 0; diff --git a/src/external_dae_solver_routines.cu b/src/external_dae_solver_routines.cu index 449e90c1..172af625 100644 --- a/src/external_dae_solver_routines.cu +++ b/src/external_dae_solver_routines.cu @@ -56,7 +56,6 @@ SolverDAEExternalIntegrate Solves the differential-algebraic equation. */ /* Included files */ - #include #include @@ -87,14 +86,30 @@ void SolverDAEExternalIntegrate(const int NumberOfDofs, { FILE* timing_file = NULL; - //timing_file = fopen("performance_data.txt","w"); + char *filename = NULL; + + asprintf(&filename,"Results/MonodomainExample-CUDAON-%d-%d-%d-%d.txt",(NumberOfDofs/101) - 1,ThreadsPerBlock,NumberOfPartitions,NumberOfStreams); + timing_file = fopen(filename, "rt"); + + if (!timing_file) { + timing_file = fopen(filename, "wt"); + fprintf(timing_file,"Cell Model\tIntegrator\tNumber of Threads\tNumber 0f Blocks\tThreads Per Block\tNumber of Partitions\tNumber of Streams\tTotal Computational Time(s)\tTotal GFLOPS\tSingle Kernel Computaional Time(s)\tKernel GFLOPS\tDevice Utilisation\n"); + } else { + fclose(timing_file); + timing_file = fopen(filename, "at"); + if (!timing_file) { + fprintf(stderr, "Timing file could not be opened or created."); + exit(EXIT_FAILURE); + } + } //printf("start %f end %f steps %d\n", StartTime, EndTime, (int)((EndTime-StartTime)/InitialStep[0])); // timeSteps = (int)ceil(((EndTime-StartTime)/InitialStep[0])); - solve(StateData, StartTime, EndTime, InitialStep[0], NumberOfDofs, ThreadsPerBlock, NumberOfPartitions, NumberOfStreams, timing_file); + solve(StateData, StartTime, EndTime, InitialStep[0], NumberOfDofs, ThreadsPerBlock, NumberOfPartitions, NumberOfStreams, NULL);// timing_file); - //if (timing_file != NULL ) fclose(timing_file); + if (timing_file != NULL ) fclose(timing_file); + free(filename); } } From 7c52a10da2af56e2b2dfd4e5c641b5a1da902512 Mon Sep 17 00:00:00 2001 From: Jin Budelmann Date: Wed, 20 Apr 2011 05:35:33 +0000 Subject: [PATCH 13/24] Changed some WRITEs to write_string_value to clean up output. Added hack dae solver timer using COMMON variables in SOLVER_DAE_SOLVE. Temporarily removed single domain timing as it was producing NANs for some reason. See tracker item 2180 git-svn-id: https://opencmiss.svn.sourceforge.net/svnroot/opencmiss/cm/branches/developers/jin@2125 61e36070-c886-4827-b7c0-ed9e4dc59cc8 --- src/cmiss_cellml.f90 | 32 +++++++++---- src/cuda_solver_routines.cu | 74 +++++++++++++++-------------- src/external_dae_solver_routines.cu | 6 +-- src/solver_routines.f90 | 11 +++++ 4 files changed, 75 insertions(+), 48 deletions(-) diff --git a/src/cmiss_cellml.f90 b/src/cmiss_cellml.f90 index 36d6adb7..91989d98 100644 --- a/src/cmiss_cellml.f90 +++ b/src/cmiss_cellml.f90 @@ -439,7 +439,9 @@ SUBROUTINE CELLML_CREATE_FINISH(CELLML,ERR,ERROR,*) !Check that we have set up the models IF(CELLML%NUMBER_OF_MODELS>0) THEN DO model_idx=1,CELLML%NUMBER_OF_MODELS - write(*,*) 'model_idx = ',model_idx + IF(DIAGNOSTICS1) THEN + CALL WRITE_STRING_VALUE(DIAGNOSTIC_OUTPUT_TYPE,'model_idx = ',model_idx,ERR,ERROR,*999) + ENDIF CELLML_MODEL => CELLML%MODELS(model_idx)%PTR !CALL CELLML_MODEL_DEFINITION_SET_SAVE_TEMP_FILES(CELLML_MODEL%PTR,1) ERROR_CODE = CELLML_MODEL_DEFINITION_INSTANTIATE(CELLML_MODEL%PTR) @@ -2719,8 +2721,11 @@ SUBROUTINE CELLML_STATE_FIELD_CREATE_FINISH(CELLML,ERR,ERROR,*) & TRIM(NUMBER_TO_VSTRING(state_component_idx,"*",ERR,ERROR))//"." CALL FLAG_ERROR(LOCAL_ERROR,ERR,ERROR,*999) ENDIF - WRITE(*,*) '(single model) Initial value for state variable: ',state_component_idx,'; type: ',& - & CELLML_VARIABLE_TYPE,'; value = ',INITIAL_VALUE + IF(DIAGNOSTICS1) THEN + CALL WRITE_STRING_TWO_VALUE(DIAGNOSTIC_OUTPUT_TYPE,'(single model) Initial value for state variable: ', & + & state_component_idx,'; type: ',CELLML_VARIABLE_TYPE,ERR,ERROR,*999) + CALL WRITE_STRING_VALUE(DIAGNOSTIC_OUTPUT_TYPE,'value = ',INITIAL_VALUE,ERR,ERROR,*999) + ENDIF CALL FIELD_COMPONENT_VALUES_INITIALISE(CELLML%STATE_FIELD%STATE_FIELD,FIELD_U_VARIABLE_TYPE, & & FIELD_VALUES_SET_TYPE,state_component_idx,INITIAL_VALUE,ERR,ERROR,*999) ENDDO !state_component_idx @@ -2750,8 +2755,11 @@ SUBROUTINE CELLML_STATE_FIELD_CREATE_FINISH(CELLML,ERR,ERROR,*) & TRIM(NUMBER_TO_VSTRING(state_component_idx,"*",ERR,ERROR))//"." CALL FLAG_ERROR(LOCAL_ERROR,ERR,ERROR,*999) ENDIF - WRITE(*,*) '(multiple models) Initial value for state variable: ',state_component_idx,'; type: ',& - & CELLML_VARIABLE_TYPE,'; value = ',INITIAL_VALUE + IF(DIAGNOSTICS1) THEN + CALL WRITE_STRING_TWO_VALUE(DIAGNOSTIC_OUTPUT_TYPE,'(multiple models) Initial value for state variable: '& + &,state_component_idx,'; type: ', CELLML_VARIABLE_TYPE,ERR,ERROR,*999) + CALL WRITE_STRING_VALUE(DIAGNOSTIC_OUTPUT_TYPE,'value = ',INITIAL_VALUE,ERR,ERROR,*999) + ENDIF !\todo make diagnostic output CALL CELLML_FIELD_VARIABLE_SOURCE_DOF_SET_CONSTANT(STATE_VARIABLE,FIELD_VALUES_SET_TYPE,source_dof_idx, & & state_component_idx,INITIAL_VALUE,ERR,ERROR,*999) ENDDO !state_component_idx @@ -3593,8 +3601,11 @@ SUBROUTINE CELLML_PARAMETERS_FIELD_CREATE_FINISH(CELLML,ERR,ERROR,*) & TRIM(NUMBER_TO_VSTRING(parameter_component_idx,"*",ERR,ERROR))//"." CALL FLAG_ERROR(LOCAL_ERROR,ERR,ERROR,*999) ENDIF - WRITE(*,*) '(single model) Initial value for parameter variable: ',parameter_component_idx,'; type: ',& - & CELLML_VARIABLE_TYPE,'; value = ',INITIAL_VALUE + IF(DIAGNOSTICS1) THEN + CALL WRITE_STRING_TWO_VALUE(DIAGNOSTIC_OUTPUT_TYPE,'(single model) Initial value for state variable: ', & + & parameter_component_idx,'; type: ', CELLML_VARIABLE_TYPE,ERR,ERROR,*999) + CALL WRITE_STRING_VALUE(DIAGNOSTIC_OUTPUT_TYPE,'value = ',INITIAL_VALUE,ERR,ERROR,*999) + ENDIF CALL FIELD_COMPONENT_VALUES_INITIALISE(CELLML%PARAMETERS_FIELD%PARAMETERS_FIELD,FIELD_U_VARIABLE_TYPE, & & FIELD_VALUES_SET_TYPE,parameter_component_idx,INITIAL_VALUE,ERR,ERROR,*999) ENDDO !parameter_component_idx @@ -3624,8 +3635,11 @@ SUBROUTINE CELLML_PARAMETERS_FIELD_CREATE_FINISH(CELLML,ERR,ERROR,*) & TRIM(NUMBER_TO_VSTRING(parameter_component_idx,"*",ERR,ERROR))//"." CALL FLAG_ERROR(LOCAL_ERROR,ERR,ERROR,*999) ENDIF - WRITE(*,*) '(multiple models) Initial value for parameter variable: ',parameter_component_idx,'; type: ',& - & CELLML_VARIABLE_TYPE,'; value = ',INITIAL_VALUE + IF(DIAGNOSTICS1) THEN + CALL WRITE_STRING_TWO_VALUE(DIAGNOSTIC_OUTPUT_TYPE,'(multiple models) Initial value for state variable: '& + &, parameter_component_idx,'; type: ', CELLML_VARIABLE_TYPE,ERR,ERROR,*999) + CALL WRITE_STRING_VALUE(DIAGNOSTIC_OUTPUT_TYPE,'value = ',INITIAL_VALUE,ERR,ERROR,*999) + ENDIF !\todo make diagnostic output CALL CELLML_FIELD_VARIABLE_SOURCE_DOF_SET_CONSTANT(PARAMETERS_VARIABLE,FIELD_VALUES_SET_TYPE, & & source_dof_idx, parameter_component_idx,INITIAL_VALUE,ERR,ERROR,*999) ENDDO !parameter_component_idx diff --git a/src/cuda_solver_routines.cu b/src/cuda_solver_routines.cu index a39c9076..dcf65120 100644 --- a/src/cuda_solver_routines.cu +++ b/src/cuda_solver_routines.cu @@ -448,39 +448,39 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, cutCreateTimer(&timer); cutilCheckError(cutStartTimer(timer)); - // Test kernel speed in default stream (timing is more accurate in default stream) - memcpy(h_paged_states, h_states, pagedMemorySize/num_streams); - cutilSafeCall( cudaMemcpy(d_states, h_paged_states, pagedMemorySize/num_streams, - cudaMemcpyHostToDevice) ); - // Start kernel timer - kernel_timer = 0; - cutCreateTimer(&kernel_timer); - cutilCheckError(cutStartTimer(kernel_timer)); - // Start kernel - solveSystem<<>>(timeSteps, stepSize, d_states); - checkCUDAError("Single Kernel Execution"); - cutilSafeCall( cudaThreadSynchronize() ); - // Stop kernel Timer - cutilCheckError(cutStopTimer(kernel_timer)); - cutilSafeCall( cudaMemcpy(h_paged_states, d_states, pagedMemorySize/num_streams, - cudaMemcpyDeviceToHost) ); - memcpy(h_states, h_paged_states, pagedMemorySize/num_streams); +// // Test kernel speed in default stream (timing is more accurate in default stream) +// memcpy(h_paged_states, h_states, pagedMemorySize/num_streams); +// cutilSafeCall( cudaMemcpy(d_states, h_paged_states, pagedMemorySize/num_streams, +// cudaMemcpyHostToDevice) ); +// // Start kernel timer +// kernel_timer = 0; +// cutCreateTimer(&kernel_timer); +// cutilCheckError(cutStartTimer(kernel_timer)); +// // Start kernel +// solveSystem<<>>(timeSteps, stepSize, d_states); +// checkCUDAError("Single Kernel Execution"); +// cutilSafeCall( cudaThreadSynchronize() ); +// // Stop kernel Timer +// cutilCheckError(cutStopTimer(kernel_timer)); +// cutilSafeCall( cudaMemcpy(h_paged_states, d_states, pagedMemorySize/num_streams, +// cudaMemcpyDeviceToHost) ); +// memcpy(h_states, h_paged_states, pagedMemorySize/num_streams); // Prefetch data for next partition in first stream - if (num_partitions>1) { - global_offset = rateStateCount * num_streams * grid.x * threads.x; - memcpy(h_paged_states, h_states + global_offset, pagedMemorySize/num_streams); - } - } else { +// if (num_partitions>1) { +// global_offset = rateStateCount * num_streams * grid.x * threads.x; +// memcpy(h_paged_states, h_states + global_offset, pagedMemorySize/num_streams); +// } +// } else { memcpy(h_paged_states, h_states, pagedMemorySize); } // Queue kernel calls into streams to hide memory transfers (num_partitions sets of kernel calls in each stream) for(i = 0; i < num_partitions+1; i++) { // Asynchronously launch num_streams memcopies - for(j = 0; j < num_streams; j++) { + for(j = 0; j < num_streams; j++) { //((timing_file!=NULL && i==0) ? 1 : 0); j < num_streams; j++){ domainIndex = j + i*num_streams; - if (domainIndex <= lastFullDomain && (timing_file==NULL || !(i==0 && j==0))) { + if (domainIndex <= lastFullDomain) { local_offset = j * rateStateCount * grid.x * threads.x ; if (domainIndex == lastFullDomain && (spill[1]!=0 || spill[2]!=0)) { //printf("last async in %d, size %d\n", domainIndex, lastStreamMemorySize); @@ -495,9 +495,9 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, } // Execute the kernel // Asynchronously launch num_streams kernels, each operating on its own portion of data - for(j = 0; j < num_streams; j++) { + for(j = 0; j < num_streams; j++) { //((timing_file!=NULL && i==0) ? 1 : 0); j < num_streams; j++){ domainIndex = j + i*num_streams; - if (domainIndex <= lastFullDomain && (timing_file==NULL || !(i==0 && j==0))) { + if (domainIndex <= lastFullDomain) { local_offset = j * grid.x * threads.x ; if (domainIndex == lastFullDomain && (spill[1]!=0 || spill[2]!=0)) { grid.x = spill[1]+1; @@ -511,9 +511,9 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, } // Asynchronoously launch num_streams memcopies - for(j = 0; j < num_streams; j++){ + for(j = 0; j < num_streams; j++) { //((timing_file!=NULL && i==0) ? 1 : 0); j < num_streams; j++){ domainIndex = j + i*num_streams; - if (domainIndex <= lastFullDomain && (timing_file==NULL || !(i==0 && j==0))) { + if (domainIndex <= lastFullDomain) { local_offset = j * rateStateCount * grid.x * threads.x ; if (domainIndex == lastFullDomain && (spill[1]!=0 || spill[2]!=0)) { //printf("last async out %d, size %d\n", domainIndex, lastStreamMemorySize); @@ -528,9 +528,9 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, } // Execute memcpys in and out of paged memory when CUDA calls in the streams have finished - for(j = 0; j < num_streams; j++){ + for(j = 0; j < num_streams; j++) { //((timing_file!=NULL && i==0) ? 1 : 0); j < num_streams; j++){ domainIndex = j + i*num_streams; - if (domainIndex <= lastFullDomain && (timing_file==NULL || !(i==0 && j==0))) { + if (domainIndex <= lastFullDomain) { cudaStreamSynchronize(streams[j]); local_offset = j * rateStateCount * grid.x * threads.x ; @@ -570,14 +570,16 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, nFLOPS = (FLOPSPerTimeStep*rateStateCount + FLOPSPerFunction*FunctionEvals)*timeSteps*num_threads; gflops = 1.0e-9 * nFLOPS/dSeconds; - kernel_dSeconds = cutGetTimerValue(kernel_timer)/1000.0; - kernel_nFLOPS = (FLOPSPerTimeStep*rateStateCount + FLOPSPerFunction*FunctionEvals)*timeSteps*num_threads/num_streams/num_partitions; - kernel_gflops = 1.0e-9 * kernel_nFLOPS/kernel_dSeconds; +// kernel_dSeconds = cutGetTimerValue(kernel_timer)/1000.0; +// kernel_nFLOPS = (FLOPSPerTimeStep*rateStateCount + FLOPSPerFunction*FunctionEvals)*timeSteps*num_threads/num_streams/num_partitions; +// kernel_gflops = 1.0e-9 * kernel_nFLOPS/kernel_dSeconds; // Store Stats - fprintf(timing_file,"%s\t%s\t%d\t%d\t%d\t%d\t%d\t%f\t%f\t%f\t%f\t%f\n", cellModelName, integratorName, num_threads, num_blocks, - threads_per_block, num_partitions, num_streams, dSeconds, gflops, kernel_dSeconds, - kernel_gflops, gflops/kernel_gflops*100); + fprintf(timing_file,"%s\t%s\t%d\t%d\t%d\t%d\t%d\t%f\t%f\n", cellModelName, integratorName, num_threads, num_blocks, + threads_per_block, num_partitions, num_streams, dSeconds, gflops); +// fprintf(timing_file,"%s\t%s\t%d\t%d\t%d\t%d\t%d\t%f\t%f\t%f\t%f\t%f\n", cellModelName, integratorName, num_threads, num_blocks, +// threads_per_block, num_partitions, num_streams, dSeconds, gflops, kernel_dSeconds, +// kernel_gflops, gflops/kernel_gflops*100); } // Deallocate Memory and Release Threads diff --git a/src/external_dae_solver_routines.cu b/src/external_dae_solver_routines.cu index 172af625..12acb39f 100644 --- a/src/external_dae_solver_routines.cu +++ b/src/external_dae_solver_routines.cu @@ -84,7 +84,6 @@ void SolverDAEExternalIntegrate(const int NumberOfDofs, double *IntermediateData, int *err) { - FILE* timing_file = NULL; char *filename = NULL; @@ -93,7 +92,8 @@ void SolverDAEExternalIntegrate(const int NumberOfDofs, if (!timing_file) { timing_file = fopen(filename, "wt"); - fprintf(timing_file,"Cell Model\tIntegrator\tNumber of Threads\tNumber 0f Blocks\tThreads Per Block\tNumber of Partitions\tNumber of Streams\tTotal Computational Time(s)\tTotal GFLOPS\tSingle Kernel Computaional Time(s)\tKernel GFLOPS\tDevice Utilisation\n"); + fprintf(timing_file,"Cell Model\tIntegrator\tNumber of Threads\tNumber 0f Blocks\tThreads Per Block\tNumber of Partitions\tNumber of Streams\tTotal Computational Time(s)\tTotal GFLOPS\n"); +// fprintf(timing_file,"Cell Model\tIntegrator\tNumber of Threads\tNumber 0f Blocks\tThreads Per Block\tNumber of Partitions\tNumber of Streams\tTotal Computational Time(s)\tTotal GFLOPS\tSingle Kernel Computaional Time(s)\tKernel GFLOPS\tDevice Utilisation\n"); } else { fclose(timing_file); timing_file = fopen(filename, "at"); @@ -106,7 +106,7 @@ void SolverDAEExternalIntegrate(const int NumberOfDofs, //printf("start %f end %f steps %d\n", StartTime, EndTime, (int)((EndTime-StartTime)/InitialStep[0])); // timeSteps = (int)ceil(((EndTime-StartTime)/InitialStep[0])); - solve(StateData, StartTime, EndTime, InitialStep[0], NumberOfDofs, ThreadsPerBlock, NumberOfPartitions, NumberOfStreams, NULL);// timing_file); + solve(StateData, StartTime, EndTime, InitialStep[0], NumberOfDofs, ThreadsPerBlock, NumberOfPartitions, NumberOfStreams, timing_file); if (timing_file != NULL ) fclose(timing_file); free(filename); diff --git a/src/solver_routines.f90 b/src/solver_routines.f90 index 5c6a3624..94787d30 100644 --- a/src/solver_routines.f90 +++ b/src/solver_routines.f90 @@ -3254,6 +3254,13 @@ SUBROUTINE SOLVER_DAE_SOLVE(DAE_SOLVER,ERR,ERROR,*) TYPE(SOLVER_TYPE), POINTER :: SOLVER TYPE(VARYING_STRING) :: LOCAL_ERROR + + ! TEMP ADDED BY JIN + REAL TEMP, ODE_TIME + REAL TIMEARRAY(2) + COMMON ODE_TIME + TEMP = DTIME(TIMEARRAY) + CALL ENTERS("SOLVER_DAE_SOLVE",ERR,ERROR,*999) IF(ASSOCIATED(DAE_SOLVER)) THEN @@ -3320,6 +3327,10 @@ SUBROUTINE SOLVER_DAE_SOLVE(DAE_SOLVER,ERR,ERROR,*) ELSE CALL FLAG_ERROR("Differential-algebraic equation solver is not associated.",ERR,ERROR,*999) ENDIF + + + ! TEMP CODE ADDED BY JIN + ODE_TIME = ODE_TIME + DTIME(TIMEARRAY) CALL EXITS("SOLVER_DAE_SOLVE") RETURN From 102334e161defb758c2da4b0e696c2ace27bd03f Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Tue, 14 Jun 2011 22:33:44 +0000 Subject: [PATCH 14/24] CUDA changes. git-svn-id: https://opencmiss.svn.sourceforge.net/svnroot/opencmiss/cm/branches/developers/jin@2186 61e36070-c886-4827-b7c0-ed9e4dc59cc8 --- ExampleMakefile | 50 +++++++++++++++++++++++++++++++++++++++++++++++-- Makefile | 4 ++-- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/ExampleMakefile b/ExampleMakefile index 1fbd5fb7..377a68b1 100644 --- a/ExampleMakefile +++ b/ExampleMakefile @@ -85,6 +85,10 @@ ifndef USEFIELDML USEFIELDML := false endif +ifndef USECUDA + USECUDA := false +endif + ifeq ($(MPI),mpich2) MPI := mpich2 else @@ -122,6 +126,17 @@ ifeq ($(MPIPROF),true) endif endif +ifeq ($(USECUDA),true) + ifndef CUDA_ROOT + $(error CUDA_ROOT is not defined) + endif + CUDA_DIR := ${CUDA_ROOT} + ifndef CUDA_SDK_ROOT + $(error CUDA_SDK_ROOT is not defined) + endif + CUDA_SDK_DIR := ${CUDA_SDK_ROOT} +endif + #---------------------------------------------------------------------------------------------------------------------------------- BASE_EXAMPLE_NAME = $(EXAMPLE_NAME)Example @@ -839,6 +854,32 @@ else endif endif +#CUDA +CUDA_LIBRARIES =# +CUDA_LIB_PATH =# +CUDA_SDK_LIBRARIES =# +CUDA_SDK_LIB_PATH =# +ifeq ($(USECUDA),true) + ifeq ($(OPERATING_SYSTEM),linux)# Linux + CUDA_LIBRARIES += -lcudart + ifeq ($(API),32) + CUDA_LIB_PATH += $(addprefix -L, $(CUDA_DIR)/lib ) + CUDA_SDK_LIBRARIES += -lcutil -lshrutil + else + CUDA_LIB_PATH += $(addprefix -L, $(CUDA_DIR)/lib64 ) + CUDA_SDK_LIBRARIES += -lcutil_x86_64 -lshrutil_x86_64 + endif + CUDA_SDK_LIB_PATH += $(addprefix -L, $(CUDA_SDK_DIR)/C/common/lib/linux ) + CUDA_SDK_LIB_PATH += $(addprefix -L, $(CUDA_SDK_DIR)/shared/lib/linux ) + else + ifeq ($(OPERATING_SYSTEM),aix)# AIX + $(error CUDA not implemented for AIX) + else# windows + $(error CUDA not implemented for Windows) + endif + endif +endif + OPENCMISS_LIBRARY = $(LIBRARY) OPENCMISS_INCLUDE_PATH = $(addprefix -I, $(INC_DIR) ) @@ -857,6 +898,11 @@ ifeq ($(USEFIELDML),true) EXTERNAL_INCLUDE_PATH += $(FIELDML_INCLUDE_PATH) endif +ifeq ($(USECUDA),true) + EXTERNAL_LIBRARIES += $(CUDA_SDK_LIBRARIES) $(CUDA_LIBRARIES) + EXTERNAL_LIB_PATH += $(CUDA_SDK_LIB_PATH) $(CUDA_LIB_PATH) +endif + EXTERNAL_LIBRARIES += $(strip $(TAO_LIBRARIES) $(PETSC_LIBRARIES) $(SUNDIALS_LIBRARIES) $(HYPRE_LIBRARIES) $(SUPERLU_LIBRARIES) $(PASTIX_LIBRARIES) $(SCOTCH_LIBRARIES) $(MUMPS_LIBRARIES) $(SCALAPACK_LIBRARIES) $(BLACS_LIBRARIES) $(PARMETIS_LIBRARIES) $(TAU_LIBRARIES) $(MPI_LIBRARIES) $(BLAS_LIBRARIES)) EXTERNAL_LIB_PATH += $(strip $(TAO_LIB_PATH) $(PETSC_LIB_PATH) $(SUNDIALS_LIB_PATH) $(HYPRE_LIB_PATH) $(SUPERLU_LIB_PATH) $(PASTIX_LIB_PATH) $(SCOTCH_LIB_PATH) $(MUMPS_LIB_PATH) $(SCALAPACK_LIB_PATH) $(BLACS_LIB_PATH) $(PARMETIS_LIB_PATH) $(TAU_LIB_PATH) $(MPI_LIB_PATH) $(BLAS_LIB_PATH)) EXTERNAL_INCLUDE_PATH += $(strip $(MPI_INCLUDE_PATH) ) @@ -892,12 +938,12 @@ $(EXE_DIR) : mkdir -p $@; $(EXECUTABLE) : $(OBJECTS) $(OPENCMISS_LIBRARY) - $(EXE_LINK) -o $@ $(OBJECTS) $(OPENCMISS_LIBRARY) $(ELFLAGS) $(EXTERNAL_LIBRARIES) -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/lib -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/common/lib/linux -L/people/vbud003/NVIDIA_GPU_Computing_SDK/shared//lib -lcudart -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/lib -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/common/lib/linux -L/people/vbud003/NVIDIA_GPU_Computing_SDK/shared//lib -L/usr/local/cuda/lib64/ -lcudart -lcutil_x86_64 -lshrutil_x86_64 + $(EXE_LINK) -o $@ $(OBJECTS) $(OPENCMISS_LIBRARY) $(ELFLAGS) $(EXTERNAL_LIBRARIES) +# $(EXE_LINK) -o $@ $(OBJECTS) $(OPENCMISS_LIBRARY) $(ELFLAGS) $(EXTERNAL_LIBRARIES) -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/lib -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/common/lib/linux -L/people/vbud003/NVIDIA_GPU_Computing_SDK/shared//lib -lcudart -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/lib -L/people/vbud003/NVIDIA_GPU_Computing_SDK/C/common/lib/linux -L/people/vbud003/NVIDIA_GPU_Computing_SDK/shared//lib -L/usr/local/cuda/lib64/ -lcudart -lcutil_x86_64 -lshrutil_x86_64 $(OPENCMISS_LIBRARY) : ( cd $(GLOBAL_ROOT); $(MAKE) ) - # Place the list of dependencies for the objects here. # # ---------------------------------------------------------------------------- diff --git a/Makefile b/Makefile index 1e458d82..57a24484 100644 --- a/Makefile +++ b/Makefile @@ -741,10 +741,10 @@ $(OBJECT_DIR)/%.o : $(SOURCE_DIR)/%.c ( cd $(OBJECT_DIR) ; $(CC) -o $@ $(CFLAGS) $(CPPFLAGS) -c $< ) $(OBJECT_DIR)/%.o : $(SOURCE_DIR)/%.cu - ( cd $(OBJECT_DIR) ; nvcc -gencode=arch=compute_20,code=\"sm_20,compute_20\" --compile -m64 --compiler-options -fno-strict-aliasing --ptxas-options=-v -I$(SOURCE_DIR) -I/usr/local/cuda/include -I/people/vbud003/NVIDIA_GPU_Computing_SDK/C/common/inc -I/people/vbud003/NVIDIA_GPU_Computing_SDK/shared//inc --use_fast_math -DUNIX -O2 -o $@ -c $< ) + ( cd $(OBJECT_DIR) ; nvcc -g -gencode=arch=compute_20,code=\"sm_20,compute_20\" --compile -m64 --compiler-options -fno-strict-aliasing --ptxas-options=-v -I$(SOURCE_DIR) -I${CUDA_ROOT}/include -I${CUDA_SDK_ROOT}/C/common/inc -I${CUDA_SDK_ROOT}/shared/inc --use_fast_math -DUNIX -O2 -o $@ -c $< ) +# ( cd $(OBJECT_DIR) ; nvcc -g -gencode=arch=compute_20,code=\"sm_20,compute_20\" --compile -m64 --compiler-options -fno-strict-aliasing --ptxas-options=-v -I$(SOURCE_DIR) -I/usr/local/cuda/include -I/people/vbud003/NVIDIA_GPU_Computing_SDK/C/common/inc -I/people/vbud003/NVIDIA_GPU_Computing_SDK/shared//inc --use_fast_math -DUNIX -O2 -o $@ -c $< ) #-gencode=arch=compute_10,code=\"sm_10,compute_10\" -gencode=arch=compute_20,code=\"sm_20,compute_20\" --compile -m64 --compiler-options -fno-strict-aliasing --ptxas-options=-v -I$(SOURCE_DIR) -I/usr/local/cuda/include -I/people/vbud003/NVIDIA_GPU_Computing_SDK/C/common/inc -I/people/vbud003/NVIDIA_GPU_Computing_SDK/shared//inc --use_fast_math -DUNIX -O2 -o $@ -c $< ) - ifeq ($(USEFIELDML),true) FIELDML_OBJECT = \ $(OBJECT_DIR)/fieldml_util_routines.o \ From 9584a6336d254f1ee975674cf921148b21e5fe9d Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Tue, 18 Oct 2011 09:04:47 +1300 Subject: [PATCH 15/24] Initialise pointers bug fix. --- src/analytic_analysis_routines.f90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/analytic_analysis_routines.f90 b/src/analytic_analysis_routines.f90 index a894fc8f..b6bc4575 100644 --- a/src/analytic_analysis_routines.f90 +++ b/src/analytic_analysis_routines.f90 @@ -158,6 +158,8 @@ SUBROUTINE ANALYTIC_ANALYSIS_OUTPUT(FIELD,FILENAME,ERR,ERROR,*) LOCAL_STRING="Field "//TRIM(NUMBER_TO_VSTRING(FIELD%USER_NUMBER,"*",ERR,ERROR))//" : "//FIELD%LABEL IF(ERR/=0) GOTO 999 CALL WRITE_STRING(OUTPUT_ID,LOCAL_STRING,ERR,ERROR,*999) + NULLIFY(NUMERICAL_VALUES) + NULLIFY(ANALYTIC_VALUES) !Loop over the variables DO var_idx=1,FIELD%NUMBER_OF_VARIABLES variable_type=FIELD%VARIABLES(var_idx)%VARIABLE_TYPE From 5175b077c514a36f56fdf0d0c270a154de08c0f2 Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Tue, 18 Oct 2011 14:47:13 +1300 Subject: [PATCH 16/24] More uninitialised pointers. --- src/equations_set_routines.f90 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/equations_set_routines.f90 b/src/equations_set_routines.f90 index 1f12324e..5a6466f1 100644 --- a/src/equations_set_routines.f90 +++ b/src/equations_set_routines.f90 @@ -1838,6 +1838,10 @@ SUBROUTINE EQUATIONS_SET_BACKSUBSTITUTE(EQUATIONS_SET,BOUNDARY_CONDITIONS,ERR,ER TYPE(FIELD_VARIABLE_TYPE), POINTER :: DEPENDENT_VARIABLE,RHS_VARIABLE TYPE(VARYING_STRING) :: LOCAL_ERROR + NULLIFY(DEPENDENT_PARAMETERS) + NULLIFY(EQUATIONS_MATRIX_DATA) + NULLIFY(SOURCE_VECTOR_DATA) + CALL ENTERS("EQUATIONS_SET_BACKSUBSTITUTE",ERR,ERROR,*999) IF(ASSOCIATED(EQUATIONS_SET)) THEN From 3fcb2e1856a634c6019fbcc5a3e2caa34d2b9059 Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Tue, 18 Oct 2011 14:47:38 +1300 Subject: [PATCH 17/24] OpenMPI include path corrections. --- ExampleMakefile | 1 + Makefile | 24 +++++++++++++++--------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/ExampleMakefile b/ExampleMakefile index 7f2ded5f..f18c0557 100644 --- a/ExampleMakefile +++ b/ExampleMakefile @@ -872,6 +872,7 @@ ifeq ($(OPERATING_SYSTEM),linux)# Linux else ifeq ($(MPI),openmpi) MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/include/ ) + MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/lib/ ) MPI_LIBRARIES += -lmpi_f90 -lmpi_f77 -lmpi -libverbs -lnsl -lutil -ldl -lnsl -lutil MPI_LIB_PATH += $(addprefix -L, $(EXTERNAL_CM_DIR)/lib/ ) else diff --git a/Makefile b/Makefile index 3319190c..cb178798 100644 --- a/Makefile +++ b/Makefile @@ -708,18 +708,24 @@ endif MPI_INCLUDE_PATH =# ifeq ($(OPERATING_SYSTEM),linux)# Linux ifeq ($(MPI),mpich2) + MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/include/ ) else - ifeq ($(MPI),intel) - ifeq ($(MPIPROF),true) - MPI_INCLUDE_PATH += $(addprefix -I, $(VT_ROOT)/include/ ) - endif - ifeq ($(ABI),64) - MPI_INCLUDE_PATH += $(addprefix -I, $(I_MPI_ROOT)/include64/ ) + ifeq ($(MPI),openmpi) + MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/lib/ ) + MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/include/ ) + else + ifeq ($(MPI),intel) + ifeq ($(MPIPROF),true) + MPI_INCLUDE_PATH += $(addprefix -I, $(VT_ROOT)/include/ ) + endif + ifeq ($(ABI),64) + MPI_INCLUDE_PATH += $(addprefix -I, $(I_MPI_ROOT)/include64/ ) + else + MPI_INCLUDE_PATH += $(addprefix -I, $(I_MPI_ROOT)/include/ ) + endif else - MPI_INCLUDE_PATH += $(addprefix -I, $(I_MPI_ROOT)/include/ ) + MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/lib/ ) endif - else - MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/lib/ ) endif endif else From 24e3f1fdf6cbfca2769e9ecd6fddf9b1ded34ba5 Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Tue, 18 Oct 2011 14:50:48 +1300 Subject: [PATCH 18/24] Uninitialised pointer. --- src/finite_elasticity_routines.f90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/finite_elasticity_routines.f90 b/src/finite_elasticity_routines.f90 index 402c758f..9a9d9bc1 100644 --- a/src/finite_elasticity_routines.f90 +++ b/src/finite_elasticity_routines.f90 @@ -150,6 +150,8 @@ SUBROUTINE FINITE_ELASTICITY_ANALYTIC_CALCULATE(EQUATIONS_SET,BOUNDARY_CONDITION LOGICAL :: X_FIXED,Y_FIXED,NODE_EXISTS, X_OKAY,Y_OKAY TYPE(VARYING_STRING) :: LOCAL_ERROR + NULLIFY(GEOMETRIC_PARAMETERS) + CALL ENTERS("FINITE_ELASTICITY_ANALYTIC_CALCULATE",ERR,ERROR,*999) MY_COMPUTATIONAL_NODE_NUMBER=COMPUTATIONAL_NODE_NUMBER_GET(ERR,ERROR) From e72f935e3e4c08db41cfc901f189f506e43cecfd Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Tue, 22 Nov 2011 17:57:53 +1300 Subject: [PATCH 19/24] Moving MPI to its own repository. --- ExampleMakefile | 39 +++++++++++++++++++++++++++------------ Makefile | 24 ++++++++++++++++++++---- 2 files changed, 47 insertions(+), 16 deletions(-) diff --git a/ExampleMakefile b/ExampleMakefile index f18c0557..d204b055 100644 --- a/ExampleMakefile +++ b/ExampleMakefile @@ -58,6 +58,9 @@ endif ifndef EXTERNAL_CM_ROOT EXTERNAL_CM_ROOT := ${OPENCMISSEXTRAS_ROOT}/cm/external endif +ifndef GLOBAL_MPI_ROOT + GLOBAL_MPI_ROOT := ${OPENCMISSEXTRAS_ROOT}/mpi +endif ifndef EXTERNAL_CELLML_ROOT EXTERNAL_CELLML_ROOT := ${OPENCMISSEXTRAS_ROOT}/cellml endif @@ -159,6 +162,9 @@ ifeq ($(OPERATING_SYSTEM),linux)# Linux ifndef EXTERNAL_CM_DIR EXTERNAL_CM_DIR := $(EXTERNAL_CM_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(MPI)/$(COMPILER)_$(COMPILER_VERSION) endif + ifndef MPI_DIR + MPI_DIR := $(GLOBAL_MPI_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(MPI)/$(COMPILER)_$(COMPILER_VERSION) + endif EXTERNAL_CELLML_DIR := $(EXTERNAL_CELLML_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(COMPILER)_$(COMPILER_VERSION) EXTERNAL_FIELDML_DIR := $(EXTERNAL_FIELDML_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(COMPILER)_$(COMPILER_VERSION) EXTERNAL_COMMON_DIR := $(EXTERNAL_COMMON_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(COMPILER)_$(COMPILER_VERSION) @@ -171,6 +177,9 @@ ifeq ($(OPERATING_SYSTEM),linux)# Linux ifndef EXTERNAL_CM_DIR EXTERNAL_CM_DIR := $(EXTERNAL_CM_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(MPI)/$(COMPILER) endif + ifndef MPI_DIR + MPI_DIR := $(GLOBAL_MPI_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(MPI)/$(COMPILER) + endif EXTERNAL_CELLML_DIR := $(EXTERNAL_CELLML_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(COMPILER) EXTERNAL_FIELDML_DIR := $(EXTERNAL_FIELDML_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(COMPILER) EXTERNAL_COMMON_DIR := $(EXTERNAL_COMMON_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(COMPILER) @@ -185,6 +194,9 @@ else ifndef EXTERNAL_CM_DIR EXTERNAL_CM_DIR := $(EXTERNAL_CM_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(MPI)/$(COMPILER) endif + ifndef MPI_DIR + MPI_DIR := $(GLOBAL_MPI_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(MPI)/$(COMPILER) + endif EXTERNAL_CELLML_DIR := $(EXTERNAL_CELLML_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(COMPILER) EXTERNAL_FIELDML_DIR := $(EXTERNAL_FIELDML_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(COMPILER) EXTERNAL_COMMON_DIR := $(EXTERNAL_COMMON_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX) @@ -192,6 +204,9 @@ else ifndef EXTERNAL_CM_DIR EXTERNAL_CM_DIR := $(EXTERNAL_CM_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX) endif + ifndef MPI_DIR + MPI_DIR := $(GLOBAL_MPI_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX) + endif EXTERNAL_CELLML_DIR := $(EXTERNAL_CELLML_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX) EXTERNAL_FIELDML_DIR := $(EXTERNAL_FIELDML_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX) EXTERNAL_COMMON_DIR := $(EXTERNAL_COMMON_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX) @@ -846,14 +861,14 @@ ifeq ($(OPERATING_SYSTEM),linux)# Linux ifeq ($(MPIPROF),true) MPI_LIBRARIES += -lmpe_f2cmpi -llmpe -lmpe endif - MPL_LIB = $(call searchdirs, $(EXTERNAL_CM_DIR)/lib/, libmpl* ) + MPL_LIB = $(call searchdirs, $(MPI_DIR)/lib/, libmpl* ) MPI_LIBRARIES += -lmpich ifneq (,$(MPL_LIB)) MPI_LIBRARIES += -lmpl endif MPI_LIBRARIES += -lpthread -lrt - MPI_LIB_PATH += $(addprefix -L, $(EXTERNAL_CM_DIR)/lib/ ) - MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/include/ ) + MPI_LIB_PATH += $(addprefix -L, $(MPI_DIR)/lib/ ) + MPI_INCLUDE_PATH += $(addprefix -I, $(MPI_DIR)/include/ ) else ifeq ($(MPI),intel) ifeq ($(MPIPROF),true) @@ -871,19 +886,19 @@ ifeq ($(OPERATING_SYSTEM),linux)# Linux endif else ifeq ($(MPI),openmpi) - MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/include/ ) - MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/lib/ ) + MPI_INCLUDE_PATH += $(addprefix -I, $(MPI_DIR)/include/ ) + MPI_INCLUDE_PATH += $(addprefix -I, $(MPI_DIR)/lib/ ) MPI_LIBRARIES += -lmpi_f90 -lmpi_f77 -lmpi -libverbs -lnsl -lutil -ldl -lnsl -lutil - MPI_LIB_PATH += $(addprefix -L, $(EXTERNAL_CM_DIR)/lib/ ) + MPI_LIB_PATH += $(addprefix -L, $(MPI_DIR)/lib/ ) else ifeq ($(MPI),mvapich2) - MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/include/ ) + MPI_INCLUDE_PATH += $(addprefix -I, $(MPI_DIR)/include/ ) MPI_LIBRARIES += -lmpich -lmpichf90 -lpthread -lrt -luuid -lrt - MPI_LIB_PATH += $(addprefix -L, $(EXTERNAL_CM_DIR)/lib/ ) + MPI_LIB_PATH += $(addprefix -L, $(MPI_DIR)/lib/ ) else - MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/lib/ ) + MPI_INCLUDE_PATH += $(addprefix -I, $(MPI_DIR)/lib/ ) MPI_LIBRARIES += -lmpi_f90 -lmpi_f77 -lmpi -lopen-rte -lopen-pal -ldl -lnsl -lutil - MPI_LIB_PATH += $(addprefix -L, $(EXTERNAL_CM_DIR)/lib/ ) + MPI_LIB_PATH += $(addprefix -L, $(MPI_DIR)/lib/ ) endif endif endif @@ -892,11 +907,11 @@ else ifeq ($(OPERATING_SYSTEM),aix)# AIX MPI_LIBRARIES = -lmpi MPI_LIB_PATH += $(addprefix -L, /usr/lpp/ppe.poe/lib/ ) - MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/lib/ ) + MPI_INCLUDE_PATH += $(addprefix -I, $(MPI_DIR)/include/ ) else# windows MPI_LIBRARIES = -lmpichf90 -lmpich -lpthread -lrt MPI_LIB_PATH += $(addprefix -L, /home/users/local/lib/ ) - MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/lib/ ) + MPI_INCLUDE_PATH += $(addprefix -I, $(MPI_DIR)/include/ ) endif endif diff --git a/Makefile b/Makefile index cb178798..92114c2a 100644 --- a/Makefile +++ b/Makefile @@ -61,6 +61,10 @@ endif GLOBAL_CM_ROOT = $(CURDIR) +ifndef GLOBAL_MPI_ROOT + GLOBAL_MPI_ROOT := ${OPENCMISSEXTRAS_ROOT}/mpi +endif + ifndef GLOBAL_CELLML_ROOT GLOBAL_CELLML_ROOT := ${OPENCMISS_ROOT}/cellml endif @@ -148,6 +152,9 @@ ifeq ($(OPERATING_SYSTEM),linux)# Linux ifndef EXTERNAL_CM_DIR EXTERNAL_CM_DIR := $(EXTERNAL_CM_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(MPI)/$(COMPILER)_$(COMPILER_VERSION) endif + ifndef MPI_DIR + MPI_DIR := $(GLOBAL_MPI_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(MPI)/$(COMPILER)_$(COMPILER_VERSION) + endif OBJECT_DIR := $(GLOBAL_CM_ROOT)/object/$(LIB_ARCH_DIR)$(MT_SUFFIX)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(MPI)/$(COMPILER)_$(COMPILER_VERSION) INC_DIR := $(GLOBAL_CM_ROOT)/include/$(BIN_ARCH_DIR)/$(MPI)/$(COMPILER)_$(COMPILER_VERSION) LIB_DIR := $(GLOBAL_CM_ROOT)/lib/$(BIN_ARCH_DIR)/$(MPI)/$(COMPILER)_$(COMPILER_VERSION) @@ -155,6 +162,9 @@ ifeq ($(OPERATING_SYSTEM),linux)# Linux ifndef EXTERNAL_CM_DIR EXTERNAL_CM_DIR := $(EXTERNAL_CM_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(MPI)/$(COMPILER) endif + ifndef MPI_DIR + MPI_DIR := $(GLOBAL_MPI_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(MPI)/$(COMPILER) + endif OBJECT_DIR := $(GLOBAL_CM_ROOT)/object/$(LIB_ARCH_DIR)$(MT_SUFFIX)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(MPI)/$(COMPILER) INC_DIR := $(GLOBAL_CM_ROOT)/include/$(BIN_ARCH_DIR)/$(MPI)/$(COMPILER) LIB_DIR := $(GLOBAL_CM_ROOT)/lib/$(BIN_ARCH_DIR)/$(MPI)/$(COMPILER) @@ -164,10 +174,16 @@ else ifndef EXTERNAL_CM_DIR EXTERNAL_CM_DIR := $(EXTERNAL_CM_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(MPI)/$(COMPILER) endif + ifndef MPI_DIR + MPI_DIR := $(GLOBAL_MPI_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(MPI)/$(COMPILER) + endif else# windows ifndef EXTERNAL_CM_DIR EXTERNAL_CM_DIR := $(EXTERNAL_CM_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX) endif + ifndef MPI_DIR + MPI_DIR := $(GLOBAL_MPI_ROOT)/$(LIB_ARCH_DIR)$(DEBUG_SUFFIX)$(PROF_SUFFIX) + endif endif OBJECT_DIR := $(GLOBAL_CM_ROOT)/object/$(LIB_ARCH_DIR)$(MT_SUFFIX)$(DEBUG_SUFFIX)$(PROF_SUFFIX)/$(MPI)/$(COMPILER) INC_DIR := $(GLOBAL_CM_ROOT)/include/$(BIN_ARCH_DIR)/$(MPI)/$(COMPILER) @@ -708,11 +724,11 @@ endif MPI_INCLUDE_PATH =# ifeq ($(OPERATING_SYSTEM),linux)# Linux ifeq ($(MPI),mpich2) - MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/include/ ) + MPI_INCLUDE_PATH += $(addprefix -I, $(MPI_DIR)/include/ ) else ifeq ($(MPI),openmpi) - MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/lib/ ) - MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/include/ ) + MPI_INCLUDE_PATH += $(addprefix -I, $(MPI_DIR)/lib/ ) + MPI_INCLUDE_PATH += $(addprefix -I, $(MPI_DIR)/include/ ) else ifeq ($(MPI),intel) ifeq ($(MPIPROF),true) @@ -724,7 +740,7 @@ ifeq ($(OPERATING_SYSTEM),linux)# Linux MPI_INCLUDE_PATH += $(addprefix -I, $(I_MPI_ROOT)/include/ ) endif else - MPI_INCLUDE_PATH += $(addprefix -I, $(EXTERNAL_CM_DIR)/lib/ ) + MPI_INCLUDE_PATH += $(addprefix -I, $(MPI_DIR)/include/ ) endif endif endif From a0ceb30204a990360ae0c47a5f93a38f4a7dbefb Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Mon, 28 Nov 2011 10:48:54 +1300 Subject: [PATCH 20/24] Remove nodal from FieldML input parameters. --- src/fieldml_input_routines.f90 | 66 +++++++++++++++++++++++++++- src/opencmiss.f90 | 80 +++++++++++++++++----------------- 2 files changed, 104 insertions(+), 42 deletions(-) diff --git a/src/fieldml_input_routines.f90 b/src/fieldml_input_routines.f90 index 674588ec..a796dd2e 100644 --- a/src/fieldml_input_routines.f90 +++ b/src/fieldml_input_routines.f90 @@ -73,14 +73,14 @@ MODULE FIELDML_INPUT_ROUTINES PUBLIC :: FIELDML_INPUT_INITIALISE_FROM_FILE, FIELDML_INPUT_MESH_CREATE_START, & & FIELDML_INPUT_COORDINATE_SYSTEM_CREATE_START, FIELDML_INPUT_BASIS_CREATE_START, FIELDML_INPUT_CREATE_MESH_COMPONENT, & - & FIELDML_INPUT_FIELD_CREATE_START, FIELDML_INPUT_FIELD_NODAL_PARAMETERS_UPDATE, FIELDML_INPUT_NODES_CREATE_START + & FIELDML_INPUT_FIELD_CREATE_START, FIELDML_INPUT_FIELD_PARAMETERS_UPDATE, FIELDML_INPUT_NODES_CREATE_START CONTAINS ! !================================================================================================================================ ! - +uu SUBROUTINE FIELDML_ASSERT_IS_IN( FIELDML_INFO, ERR, ERROR, * ) !Argument variables TYPE(FIELDML_IO_TYPE), INTENT(IN) :: FIELDML_INFO !Inputs from a FieldML file the parameters for a field variable parameter set. + SUBROUTINE FIELDML_INPUT_FIELD_PARAMETERS_UPDATE(FIELDML_INFO, EVALUATOR_NAME, FIELD, VARIABLE_TYPE, SET_TYPE, & + & ERR, ERROR, * ) + !Argument variables + TYPE(FIELDML_IO_TYPE), INTENT(INOUT) :: FIELDML_INFO !1) THEN + CALL FIELD_COMPONENT_INTERPOLATION_GET(FIELD,VARIABLE_TYPE,1,INTERPOLATION_TYPE,ERR,ERROR,*999) + CALL FIELD_COMPONENT_MESH_COMPONENT_GET(FIELD,VARIABLE_TYPE,1,MESH_COMPONENT1,ERR,ERROR,*999) + IS_ALL_NODAL_INTERPOLATION=INTERPOLATION_TYPE==FIELD_NODE_BASED_INTERPOLATION + IS_SAME_MESH_COMPONENTS=.TRUE. + DO component_idx=2,NUMBER_OF_COMPONENTS + CALL FIELD_COMPONENT_INTERPOLATION_GET(FIELD,VARIABLE_TYPE,component_idx,INTERPOLATION_TYPE,ERR,ERROR,*999) + CALL FIELD_COMPONENT_MESH_COMPONENT_GET(FIELD,VARIABLE_TYPE,component_idx,MESH_COMPONENT2,ERR,ERROR,*999) + IS_ALL_NODAL_INTERPOLATION=IS_ALL_NODAL_INTERPOLATION.AND.INTERPOLATION_TYPE==FIELD_NODE_BASED_INTERPOLATION + IS_SAME_MESH_COMPONENTS=IS_SAME_MESH_COMPONENTS.AND.MESH_COMPONENT2==MESH_COMPONENT1 + ENDDO !component_idx + IF(IS_ALL_NODAL_INTERPOLATION) THEN + IF(IS_ALL_MESH_COMPONENTS) THEN + CALL FIELDML_INPUT_FIELD_NODAL_PARAMETERS_UPDATE(FIELDML_INFO,EVALUATOR_NAME,FIELD,VARIABLE_TYPE,SET_TYPE, & + & ERR,ERROR,*999) + ELSE + CALL FLAG_ERROR( & + & "FieldML input parameters only implemented for fields where all components have the same mesh component.", & + & ERR,ERROR,*999) + ENDIF + ELSE + CALL FLAG_ERROR("FieldML input parameters only implemented for fields where all components are nodally interpolated.", & + & ERR,ERROR,*999) + ENDIF + ELSE + CALL FLAG_ERROR("Field does not have any components.",ERR,ERROR,*999) + ENDIF + ELSE + CALL FLAG_ERROR("Field is not associated.",ERR,ERROR,*999) + ENDIF + + CALL EXITS("FIELDML_INPUT_FIELD_PARAMETERS_UPDATE") + RETURN +999 CALL ERRORS("FIELDML_INPUT_FIELD_PARAMETERS_UPDATE",ERR,ERROR) + CALL EXITS("FIELDML_INPUT_FIELD_PARAMETERS_UPDATE") + RETURN 1 + END SUBROUTINE FIELDML_INPUT_FIELD_PARAMETERS_UPDATE + + ! + !================================================================================================================================ + ! + !>Update the given field's nodal parameters using the given parameter evaluator. SUBROUTINE FIELDML_INPUT_FIELD_NODAL_PARAMETERS_UPDATE( FIELDML_INFO, EVALUATOR_NAME, FIELD, VARIABLE_TYPE, SET_TYPE, & & ERR, ERROR, * ) diff --git a/src/opencmiss.f90 b/src/opencmiss.f90 index 639bea7e..cf16b2cd 100644 --- a/src/opencmiss.f90 +++ b/src/opencmiss.f90 @@ -5739,13 +5739,13 @@ MODULE OPENCMISS MODULE PROCEDURE CMISSFieldMLInputFieldCreateStartNumberC END INTERFACE CMISSFieldMLInputFieldCreateStart - !> Updates the given field's nodal dofs using the given parameter evaluator. - INTERFACE CMISSFieldMLInputFieldNodalParametersUpdate - MODULE PROCEDURE CMISSFieldMLInputFieldNodalParametersUpdateObjVS - MODULE PROCEDURE CMISSFieldMLInputFieldNodalParametersUpdateNumberVS - MODULE PROCEDURE CMISSFieldMLInputFieldNodalParametersUpdateObjC - MODULE PROCEDURE CMISSFieldMLInputFieldNodalParametersUpdateNumberC - END INTERFACE CMISSFieldMLInputFieldNodalParametersUpdate + !> Updates the given field's dofs using the given parameter evaluator. + INTERFACE CMISSFieldMLInputFieldParametersUpdate + MODULE PROCEDURE CMISSFieldMLInputFieldParametersUpdateObjVS + MODULE PROCEDURE CMISSFieldMLInputFieldParametersUpdateNumberVS + MODULE PROCEDURE CMISSFieldMLInputFieldParametersUpdateObjC + MODULE PROCEDURE CMISSFieldMLInputFieldParametersUpdateNumberC + END INTERFACE CMISSFieldMLInputFieldParametersUpdate !> Creates a basis using the given FieldML evaluator. INTERFACE CMISSFieldMLInputBasisCreateStart @@ -5788,7 +5788,7 @@ MODULE OPENCMISS PUBLIC :: CMISSFieldMLInputCreateFromFile, CMISSFieldMLInputMeshCreateStart, & & CMISSFieldMLInputCoordinateSystemCreateStart, CMISSFieldMLInputCreateMeshComponent, & & CMISSFieldMLInputFieldCreateStart, CMISSFieldMLInputBasisCreateStart, CMISSFieldMLInputNodesCreateStart, & - & CMISSFieldMLInputFieldNodalParametersUpdate + & CMISSFieldMLInputFieldParametersUpdate PUBLIC :: CMISSFieldMLIOTypeFinalise, CMISSFieldMLIOTypeInitialise, CMISSFieldMLIOGetSession @@ -47414,8 +47414,8 @@ END SUBROUTINE CMISSFieldMLInputFieldCreateStartNumberC !================================================================================================================================ ! - !> Update the nodal parameters of the given field, using the given FieldML evaluator. - SUBROUTINE CMISSFieldMLInputFieldNodalParametersUpdateObjVS( fieldml, field, evaluatorName, variableType, & + !> Update the DOF parameters of the given field, using the given FieldML evaluator. + SUBROUTINE CMISSFieldMLInputFieldParametersUpdateObjVS( fieldml, field, evaluatorName, variableType, & & setType, err ) !Arguments TYPE(CMISSFieldMLIOType), INTENT(INOUT) :: FIELDML !< The FieldML context containing the evaluator to use. @@ -47425,26 +47425,26 @@ SUBROUTINE CMISSFieldMLInputFieldNodalParametersUpdateObjVS( fieldml, field, eva INTEGER(INTG), INTENT(IN) :: setType ! Update the nodal parameters of field with the given user number, using the given FieldML evaluator. - SUBROUTINE CMISSFieldMLInputFieldNodalParametersUpdateNumberVS( fieldml, regionNumber, fieldNumber, & + !> Update the DOF parameters of field with the given user number, using the given FieldML evaluator. + SUBROUTINE CMISSFieldMLInputFieldParametersUpdateNumberVS( fieldml, regionNumber, fieldNumber, & & evaluatorName, variableType, setType, err ) !Arguments TYPE(CMISSFieldMLIOType), INTENT(INOUT) :: FIELDML !< The FieldML context containing the evaluator to use. @@ -47459,29 +47459,29 @@ SUBROUTINE CMISSFieldMLInputFieldNodalParametersUpdateNumberVS( fieldml, regionN TYPE(REGION_TYPE), POINTER :: region TYPE(FIELD_TYPE), POINTER :: field - CALL ENTERS("CMISSFieldMLInputFieldNodalParametersUpdateNumberVS",Err,ERROR,*999) + CALL ENTERS("CMISSFieldMLInputFieldParametersUpdateNumberVS",Err,ERROR,*999) CALL REGION_USER_NUMBER_TO_REGION( regionNumber, region, err, error, *999 ) CALL FIELD_USER_NUMBER_TO_FIELD( fieldNumber, region, field, err, error, *999 ) - CALL FIELDML_INPUT_FIELD_NODAL_PARAMETERS_UPDATE( fieldml%fieldmlInfo, evaluatorName, field, variableType, setType, & + CALL FIELDML_INPUT_FIELD_PARAMETERS_UPDATE( fieldml%fieldmlInfo, evaluatorName, field, variableType, setType, & & err, error, *999 ) - CALL EXITS("CMISSFieldMLInputFieldNodalParametersUpdateNumberVS") + CALL EXITS("CMISSFieldMLInputFieldParametersUpdateNumberVS") RETURN -999 CALL ERRORS("CMISSFieldMLInputFieldNodalParametersUpdateNumberVS",Err,ERROR) - CALL EXITS("CMISSFieldMLInputFieldNodalParametersUpdateNumberVS") +999 CALL ERRORS("CMISSFieldMLInputFieldParametersUpdateNumberVS",Err,ERROR) + CALL EXITS("CMISSFieldMLInputFieldParametersUpdateNumberVS") CALL CMISS_HANDLE_ERROR(Err,ERROR) RETURN - END SUBROUTINE CMISSFieldMLInputFieldNodalParametersUpdateNumberVS + END SUBROUTINE CMISSFieldMLInputFieldParametersUpdateNumberVS ! !================================================================================================================================ ! - !> Update the nodal parameters of the given field, using the given FieldML evaluator. - SUBROUTINE CMISSFieldMLInputFieldNodalParametersUpdateObjC( fieldml, field, evaluatorName, & + !> Update the DOF parameters of the given field, using the given FieldML evaluator. + SUBROUTINE CMISSFieldMLInputFieldParametersUpdateObjC( fieldml, field, evaluatorName, & & variableType, setType, err ) !Arguments TYPE(CMISSFieldMLIOType), INTENT(INOUT) :: FIELDML !< The FieldML context containing the evaluator to use. @@ -47491,26 +47491,26 @@ SUBROUTINE CMISSFieldMLInputFieldNodalParametersUpdateObjC( fieldml, field, eval INTEGER(INTG), INTENT(IN) :: setType ! Update the nodal parameters of field with the given user number, using the given FieldML evaluator. - SUBROUTINE CMISSFieldMLInputFieldNodalParametersUpdateNumberC( fieldml, regionNumber, fieldNumber, & + !> Update the DOF parameters of field with the given user number, using the given FieldML evaluator. + SUBROUTINE CMISSFieldMLInputFieldParametersUpdateNumberC( fieldml, regionNumber, fieldNumber, & & evaluatorName, variableType, setType, err ) !Arguments TYPE(CMISSFieldMLIOType), INTENT(INOUT) :: FIELDML !< The FieldML context containing the evaluator to use. @@ -47525,22 +47525,22 @@ SUBROUTINE CMISSFieldMLInputFieldNodalParametersUpdateNumberC( fieldml, regionNu TYPE(REGION_TYPE), POINTER :: region TYPE(FIELD_TYPE), POINTER :: field - CALL ENTERS("CMISSFieldMLInputFieldNodalParametersUpdateNumberC",Err,ERROR,*999) + CALL ENTERS("CMISSFieldMLInputFieldParametersUpdateNumberC",Err,ERROR,*999) CALL REGION_USER_NUMBER_TO_REGION( regionNumber, region, err, error, *999 ) CALL FIELD_USER_NUMBER_TO_FIELD( fieldNumber, region, field, err, error, *999 ) - CALL FIELDML_INPUT_FIELD_NODAL_PARAMETERS_UPDATE( fieldml%fieldmlInfo, var_str(evaluatorName), field, variableType, & + CALL FIELDML_INPUT_FIELD_PARAMETERS_UPDATE( fieldml%fieldmlInfo, var_str(evaluatorName), field, variableType, & & setType, err, error, *999 ) - CALL EXITS("CMISSFieldMLInputFieldNodalParametersUpdateNumberC") + CALL EXITS("CMISSFieldMLInputFieldParametersUpdateNumberC") RETURN -999 CALL ERRORS("CMISSFieldMLInputFieldNodalParametersUpdateNumberC",Err,ERROR) - CALL EXITS("CMISSFieldMLInputFieldNodalParametersUpdateNumberC") +999 CALL ERRORS("CMISSFieldMLInputFieldParametersUpdateNumberC",Err,ERROR) + CALL EXITS("CMISSFieldMLInputFieldParametersUpdateNumberC") CALL CMISS_HANDLE_ERROR(Err,ERROR) RETURN - END SUBROUTINE CMISSFieldMLInputFieldNodalParametersUpdateNumberC + END SUBROUTINE CMISSFieldMLInputFieldParametersUpdateNumberC ! !================================================================================================================================ From e6cfb557139322835a55ce2cb08a6e8481b1c1ba Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Mon, 28 Nov 2011 11:31:26 +1300 Subject: [PATCH 21/24] Fix typo. --- src/fieldml_input_routines.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fieldml_input_routines.f90 b/src/fieldml_input_routines.f90 index a796dd2e..669a7eef 100644 --- a/src/fieldml_input_routines.f90 +++ b/src/fieldml_input_routines.f90 @@ -80,7 +80,7 @@ MODULE FIELDML_INPUT_ROUTINES ! !================================================================================================================================ ! -uu + SUBROUTINE FIELDML_ASSERT_IS_IN( FIELDML_INFO, ERR, ERROR, * ) !Argument variables TYPE(FIELDML_IO_TYPE), INTENT(IN) :: FIELDML_INFO ! Date: Mon, 28 Nov 2011 11:46:17 +1300 Subject: [PATCH 22/24] Fix more typos. --- src/fieldml_input_routines.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/fieldml_input_routines.f90 b/src/fieldml_input_routines.f90 index 669a7eef..548cd0a7 100644 --- a/src/fieldml_input_routines.f90 +++ b/src/fieldml_input_routines.f90 @@ -1151,7 +1151,7 @@ SUBROUTINE FIELDML_INPUT_FIELD_PARAMETERS_UPDATE(FIELDML_INFO, EVALUATOR_NAME, F INTEGER(INTG), INTENT(OUT) :: ERR ! Date: Fri, 2 Dec 2011 17:34:07 +1300 Subject: [PATCH 23/24] Error message for fopen problems. --- src/external_dae_solver_routines.cu | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/external_dae_solver_routines.cu b/src/external_dae_solver_routines.cu index 766838ef..4bb67595 100644 --- a/src/external_dae_solver_routines.cu +++ b/src/external_dae_solver_routines.cu @@ -57,6 +57,8 @@ SolverDAEExternalIntegrate Solves the differential-algebraic equation. /* Included files */ #include #include +#include + #include "external_dae_solver_routines.h" #include "cuda_solver_routines.cu" @@ -91,12 +93,18 @@ void SolverDAEExternalIntegrate(const int NumberOfDofs, if (!timing_file) { timing_file = fopen(filename, "wt"); + if (!timing_file) { + fprintf(stderr,"File error: %s\n",strerror(errno)); + fprintf(stderr, "Timing file could not be opened or created."); + exit(EXIT_FAILURE); + } fprintf(timing_file,"Cell Model\tIntegrator\tNumber of Threads\tNumber 0f Blocks\tThreads Per Block\tNumber of Partitions\tNumber of Streams\tTotal Computational Time(s)\tTotal GFLOPS\n"); // fprintf(timing_file,"Cell Model\tIntegrator\tNumber of Threads\tNumber 0f Blocks\tThreads Per Block\tNumber of Partitions\tNumber of Streams\tTotal Computational Time(s)\tTotal GFLOPS\tSingle Kernel Computaional Time(s)\tKernel GFLOPS\tDevice Utilisation\n"); } else { - fclose(timing_file); + fclose(timing_file); timing_file = fopen(filename, "at"); if (!timing_file) { + fprintf(stderr,"File error: %s\n",strerror(errno)); fprintf(stderr, "Timing file could not be opened or created."); exit(EXIT_FAILURE); } From 4a03384cf931338d17c4bf11748529680c416d17 Mon Sep 17 00:00:00 2001 From: Chris Bradley Date: Sun, 4 Dec 2011 11:09:46 +1300 Subject: [PATCH 24/24] Tidy ups --- src/cuda_solver_routines.cu | 803 +++++++++++++++++++----------------- utils/MakefileCommon.inc | 2 +- 2 files changed, 430 insertions(+), 375 deletions(-) diff --git a/src/cuda_solver_routines.cu b/src/cuda_solver_routines.cu index dcf65120..5d174a90 100644 --- a/src/cuda_solver_routines.cu +++ b/src/cuda_solver_routines.cu @@ -1,3 +1,58 @@ +/* \file + * \author Jin Budlemann + * \brief This file provides the routines for solving differential-algebraic equations with an external solver. + *. + * \section LICENSE + * + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the + * License for the specific language governing rights and limitations + * under the License. + * + * The Original Code is OpenCMISS + * + * The Initial Developer of the Original Code is University of Auckland, + * Auckland, New Zealand, the University of Oxford, Oxford, United + * Kingdom and King's College, London, United Kingdom. Portions created + * by the University of Auckland, the University of Oxford and King's + * College, London are Copyright (C) 2007-2010 by the University of + * Auckland, the University of Oxford and King's College, London. + * All Rights Reserved. + * + * Contributor(s): Chris Bradley + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + */ + +/* +File: cuda_solver_routines.cu +============================= + +This file provides provides the routines for cuda acceleration with an external solver. + +Functions included: + + +*/ + #include #include @@ -260,334 +315,334 @@ void domainDecomposition(unsigned int* threads_per_domain, unsigned int* spill, (*threads_per_block) = (*threads_per_block) > deviceProp.maxThreadsPerBlock ? deviceProp.maxThreadsPerBlock : (*threads_per_block); - // Adjust threads per blocks so that states variables fit in shared memory - count=0; - (*sharedMem) = (size_t) (sharedMemoryIntegrator + sharedMemoryDevice + sharedMemoryCellModel) * (*threads_per_block) * sizeof(double); - while ( (*sharedMem) > deviceProp.sharedMemPerBlock*0.5 && count < 200 ) { - if ((*threads_per_block) > 32 ) { - (*threads_per_block)-=32; - } else { - fprintf(stderr, "Cannot fit variables in shared memory"); - exit(EXIT_FAILURE); - } - count++; - (*sharedMem) = (size_t) (sharedMemoryIntegrator + sharedMemoryDevice + sharedMemoryCellModel) * (*threads_per_block) * sizeof(double); - } - (*num_blocks)=num_threads/(*threads_per_block); - spill[2]=num_threads % (*threads_per_block); - //if (spill[2] !=0) (*num_blocks)++; // Round up calculation - - (*blocksPerDomain) = (*num_blocks)/(num_streams*(*num_partitions)); - if ((*blocksPerDomain) == 0) { - fprintf(stderr, "Too many streams and partitions for your problem size\nReduce these and try again"); - exit(EXIT_FAILURE); - } else if ((*blocksPerDomain) > deviceProp.maxGridSize[0]) { - fprintf(stderr, "Too many blocks allocated to each domain for your problem size\nIncrease number of partitions and try again"); - exit(EXIT_FAILURE); - } + // Adjust threads per blocks so that states variables fit in shared memory + count=0; + (*sharedMem) = (size_t) (sharedMemoryIntegrator + sharedMemoryDevice + sharedMemoryCellModel) * (*threads_per_block) * sizeof(double); + while ( (*sharedMem) > deviceProp.sharedMemPerBlock*0.5 && count < 200 ) { + if ((*threads_per_block) > 32 ) { + (*threads_per_block)-=32; + } else { + fprintf(stderr, "Cannot fit variables in shared memory"); + exit(EXIT_FAILURE); + } + count++; + (*sharedMem) = (size_t) (sharedMemoryIntegrator + sharedMemoryDevice + sharedMemoryCellModel) * (*threads_per_block) * sizeof(double); + } + (*num_blocks)=num_threads/(*threads_per_block); + spill[2]=num_threads % (*threads_per_block); + //if (spill[2] !=0) (*num_blocks)++; // Round up calculation + + (*blocksPerDomain) = (*num_blocks)/(num_streams*(*num_partitions)); + if ((*blocksPerDomain) == 0) { + fprintf(stderr, "Too many streams and partitions for your problem size\nReduce these and try again"); + exit(EXIT_FAILURE); + } else if ((*blocksPerDomain) > deviceProp.maxGridSize[0]) { + fprintf(stderr, "Too many blocks allocated to each domain for your problem size\nIncrease number of partitions and try again"); + exit(EXIT_FAILURE); + } + remainingBlocks = (*num_blocks)%(num_streams*(*num_partitions)); + if (remainingBlocks > 0 && remainingBlocks <= num_streams*(*blocksPerDomain)) { + (*num_partitions)++; + } else if (remainingBlocks > num_streams*(*blocksPerDomain)) { + numberRemainingDomains = ceil((float)remainingBlocks/(*blocksPerDomain)); + (*num_partitions) += ceil((float)numberRemainingDomains/num_streams); + } + + if ((*num_blocks)/(num_streams*(*num_partitions))==0) { + (*num_partitions)--; + } + + remainingBlocks = (*num_blocks)%(*num_partitions); + + // Adjust number of partitions so that data fits in GPU memory + count = 0; + cutilSafeCall( cudaMemGetInfo(&freeGPUMemory, &test) ); + *pagedMemorySize = sizeof(double)*rateStateCount*(*blocksPerDomain)*num_streams*(*threads_per_block); + while ( *pagedMemorySize > freeGPUMemory*0.9 && count < 200 ) { + if ((*blocksPerDomain) > 1 ) { + (*num_partitions) = (*num_blocks)/num_streams/--(*blocksPerDomain); remainingBlocks = (*num_blocks)%(num_streams*(*num_partitions)); - if (remainingBlocks > 0 && remainingBlocks <= num_streams*(*blocksPerDomain)) { - (*num_partitions)++; + if (remainingBlocks != 0) { + (*num_partitions)++; } else if (remainingBlocks > num_streams*(*blocksPerDomain)) { - numberRemainingDomains = ceil((float)remainingBlocks/(*blocksPerDomain)); - (*num_partitions) += ceil((float)numberRemainingDomains/num_streams); + numberRemainingDomains = ceil((float)remainingBlocks/(*blocksPerDomain)); + (*num_partitions) += ceil((float)numberRemainingDomains/num_streams); } - - if ((*num_blocks)/(num_streams*(*num_partitions))==0) { - (*num_partitions)--; - } - remainingBlocks = (*num_blocks)%(*num_partitions); - - // Adjust number of partitions so that data fits in GPU memory - count = 0; - cutilSafeCall( cudaMemGetInfo(&freeGPUMemory, &test) ); - *pagedMemorySize = sizeof(double)*rateStateCount*(*blocksPerDomain)*num_streams*(*threads_per_block); - while ( *pagedMemorySize > freeGPUMemory*0.9 && count < 200 ) { - if ((*blocksPerDomain) > 1 ) { - (*num_partitions) = (*num_blocks)/num_streams/--(*blocksPerDomain); - remainingBlocks = (*num_blocks)%(num_streams*(*num_partitions)); - if (remainingBlocks != 0) { - (*num_partitions)++; - } else if (remainingBlocks > num_streams*(*blocksPerDomain)) { - numberRemainingDomains = ceil((float)remainingBlocks/(*blocksPerDomain)); - (*num_partitions) += ceil((float)numberRemainingDomains/num_streams); - } - remainingBlocks = (*num_blocks)%(*num_partitions); - } else { - fprintf(stderr, "Cannot fit variables in GPU device memory\nReduce threads per block and try again"); - exit(EXIT_FAILURE); - } - count++; - - *pagedMemorySize = sizeof(double)*rateStateCount*(*blocksPerDomain)*num_streams*(*threads_per_block); - } - - if (*pagedMemorySize > freeGPUMemory*0.9) { - fprintf(stderr, "Cannot fit variables in GPU device memory\nReduce threads per block and try again"); - exit(EXIT_FAILURE); - } - - (*threads_per_domain) = (*threads_per_block)*(*blocksPerDomain); - spill[0] = remainingBlocks/(*blocksPerDomain); - spill[1] = remainingBlocks%(*blocksPerDomain); - -// if (renainingBlocks != 0) { -// blocksLastStream = -// } -// remainingBlocks = num_threads%(num_blocks*(*num_partitions)) -// for (i=0; i<(*num_partitions); i++) { -// if (i == (*num_partitions) && remainingBlocks != 0) { -// -// for (j=0; j freeGPUMemory*0.9) { + fprintf(stderr, "Cannot fit variables in GPU device memory\nReduce threads per block and try again"); + exit(EXIT_FAILURE); + } + + (*threads_per_domain) = (*threads_per_block)*(*blocksPerDomain); + spill[0] = remainingBlocks/(*blocksPerDomain); + spill[1] = remainingBlocks%(*blocksPerDomain); + + // if (renainingBlocks != 0) { + // blocksLastStream = + // } + // remainingBlocks = num_threads%(num_blocks*(*num_partitions)) + // for (i=0; i<(*num_partitions); i++) { + // if (i == (*num_partitions) && remainingBlocks != 0) { + // + // for (j=0; j Device name : %s\n", deviceProp.name ); + // printf("> CUDA Capable SM %d.%d hardware with %d multi-processors\n", + // deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); + // printf("> Cell Model = %s, Integrator = %s\n", cellModelName, integratorName); + // printf("> num_threads = %d, num_blocks = %d, threads_per_block = %d, num_partitions = %d, timeSteps = %d, num_streams = %d\n", + // num_threads, num_blocks, threads_per_block, num_partitions, timeSteps, num_streams); + // //#endif + // printf("grid.x %d threads.x %d sharedMem %d\n", grid.x, threads.x, sharedMem); + // printf("Spills %d %d %d\n", spill[0], spill[1], spill[2]); + + // Setup and start global timer + timer = 0; + cutCreateTimer(&timer); + cutilCheckError(cutStartTimer(timer)); + + // // Test kernel speed in default stream (timing is more accurate in default stream) + // memcpy(h_paged_states, h_states, pagedMemorySize/num_streams); + // cutilSafeCall( cudaMemcpy(d_states, h_paged_states, pagedMemorySize/num_streams, + // cudaMemcpyHostToDevice) ); + // // Start kernel timer + // kernel_timer = 0; + // cutCreateTimer(&kernel_timer); + // cutilCheckError(cutStartTimer(kernel_timer)); + // // Start kernel + // solveSystem<<>>(timeSteps, stepSize, d_states); + // checkCUDAError("Single Kernel Execution"); + // cutilSafeCall( cudaThreadSynchronize() ); + // // Stop kernel Timer + // cutilCheckError(cutStopTimer(kernel_timer)); + // cutilSafeCall( cudaMemcpy(h_paged_states, d_states, pagedMemorySize/num_streams, + // cudaMemcpyDeviceToHost) ); + // memcpy(h_states, h_paged_states, pagedMemorySize/num_streams); + + // Prefetch data for next partition in first stream + // if (num_partitions>1) { + // global_offset = rateStateCount * num_streams * grid.x * threads.x; + // memcpy(h_paged_states, h_states + global_offset, pagedMemorySize/num_streams); + // } + // } else { + memcpy(h_paged_states, h_states, pagedMemorySize); + } + + // Queue kernel calls into streams to hide memory transfers (num_partitions sets of kernel calls in each stream) + for(i = 0; i < num_partitions+1; i++) { + // Asynchronously launch num_streams memcopies + for(j = 0; j < num_streams; j++) { //((timing_file!=NULL && i==0) ? 1 : 0); j < num_streams; j++){ + domainIndex = j + i*num_streams; + if (domainIndex <= lastFullDomain) { + local_offset = j * rateStateCount * grid.x * threads.x ; + if (domainIndex == lastFullDomain && (spill[1]!=0 || spill[2]!=0)) { + //printf("last async in %d, size %d\n", domainIndex, lastStreamMemorySize); + cutilSafeCall( cudaMemcpyAsync(d_states + local_offset, h_paged_states + local_offset, + lastStreamMemorySize, cudaMemcpyHostToDevice, streams[j]) ); + } else { + //printf("normal async in %d, size %d\n", domainIndex, pagedMemorySize/num_streams); + cutilSafeCall( cudaMemcpyAsync(d_states + local_offset, h_paged_states + local_offset, + pagedMemorySize/num_streams, cudaMemcpyHostToDevice, streams[j]) ); } - - // Set appropriate device flags and print relevant information - cutilSafeCall( cudaSetDevice( cuda_device ) ); - cutilSafeCall( cudaSetDeviceFlags(cudaDeviceBlockingSync) ); - cudaDeviceProp deviceProp; - cutilSafeCall( cudaGetDeviceProperties(&deviceProp, cuda_device) ); - if( (1 == deviceProp.major) && (deviceProp.minor < 1)) - printf("%s does not have compute capability 1.1 or later\n", deviceProp.name); - - domainDecomposition(&threads_per_domain, spill, num_threads, num_streams, &pagedMemorySize, &threads_per_block, - &num_partitions, &blocksPerDomain, &num_blocks, &sharedMem); - - - lastFullDomain = num_blocks/blocksPerDomain; - - // Allocate CUDA device and host pinned memory - cutilSafeCall( cudaMallocHost((void**) &h_paged_states, pagedMemorySize) ); - cutilSafeCall( cudaMalloc((void **) &d_states, pagedMemorySize) ); - - // Create streams - streams = (cudaStream_t*) malloc(num_streams * sizeof(cudaStream_t)); - for(i = 0; i < num_streams; i++) - cutilSafeCall( cudaStreamCreate(&(streams[i])) ); - - // Setup execution parameters - dim3 grid(blocksPerDomain, 1, 1); - dim3 threads(threads_per_block, 1, 1); - - //#ifdef DEBUG - if (timing_file) { -// printf("> Device name : %s\n", deviceProp.name ); -// printf("> CUDA Capable SM %d.%d hardware with %d multi-processors\n", -// deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount); -// printf("> Cell Model = %s, Integrator = %s\n", cellModelName, integratorName); -// printf("> num_threads = %d, num_blocks = %d, threads_per_block = %d, num_partitions = %d, timeSteps = %d, num_streams = %d\n", -// num_threads, num_blocks, threads_per_block, num_partitions, timeSteps, num_streams); -// //#endif -// printf("grid.x %d threads.x %d sharedMem %d\n", grid.x, threads.x, sharedMem); -// printf("Spills %d %d %d\n", spill[0], spill[1], spill[2]); - - // Setup and start global timer - timer = 0; - cutCreateTimer(&timer); - cutilCheckError(cutStartTimer(timer)); - -// // Test kernel speed in default stream (timing is more accurate in default stream) -// memcpy(h_paged_states, h_states, pagedMemorySize/num_streams); -// cutilSafeCall( cudaMemcpy(d_states, h_paged_states, pagedMemorySize/num_streams, -// cudaMemcpyHostToDevice) ); -// // Start kernel timer -// kernel_timer = 0; -// cutCreateTimer(&kernel_timer); -// cutilCheckError(cutStartTimer(kernel_timer)); -// // Start kernel -// solveSystem<<>>(timeSteps, stepSize, d_states); -// checkCUDAError("Single Kernel Execution"); -// cutilSafeCall( cudaThreadSynchronize() ); -// // Stop kernel Timer -// cutilCheckError(cutStopTimer(kernel_timer)); -// cutilSafeCall( cudaMemcpy(h_paged_states, d_states, pagedMemorySize/num_streams, -// cudaMemcpyDeviceToHost) ); -// memcpy(h_states, h_paged_states, pagedMemorySize/num_streams); - - // Prefetch data for next partition in first stream -// if (num_partitions>1) { -// global_offset = rateStateCount * num_streams * grid.x * threads.x; -// memcpy(h_paged_states, h_states + global_offset, pagedMemorySize/num_streams); -// } -// } else { - memcpy(h_paged_states, h_states, pagedMemorySize); + } + } + // Execute the kernel + // Asynchronously launch num_streams kernels, each operating on its own portion of data + for(j = 0; j < num_streams; j++) { //((timing_file!=NULL && i==0) ? 1 : 0); j < num_streams; j++){ + domainIndex = j + i*num_streams; + if (domainIndex <= lastFullDomain) { + local_offset = j * grid.x * threads.x ; + if (domainIndex == lastFullDomain && (spill[1]!=0 || spill[2]!=0)) { + grid.x = spill[1]+1; + solveSystem<<>>(timeSteps, stepSize, + d_states + rateStateCount*local_offset); + } else { + solveSystem<<>>(timeSteps, stepSize, + d_states + rateStateCount*local_offset); } - - // Queue kernel calls into streams to hide memory transfers (num_partitions sets of kernel calls in each stream) - for(i = 0; i < num_partitions+1; i++) { - // Asynchronously launch num_streams memcopies - for(j = 0; j < num_streams; j++) { //((timing_file!=NULL && i==0) ? 1 : 0); j < num_streams; j++){ - domainIndex = j + i*num_streams; - if (domainIndex <= lastFullDomain) { - local_offset = j * rateStateCount * grid.x * threads.x ; - if (domainIndex == lastFullDomain && (spill[1]!=0 || spill[2]!=0)) { - //printf("last async in %d, size %d\n", domainIndex, lastStreamMemorySize); - cutilSafeCall( cudaMemcpyAsync(d_states + local_offset, h_paged_states + local_offset, - lastStreamMemorySize, cudaMemcpyHostToDevice, streams[j]) ); - } else { - //printf("normal async in %d, size %d\n", domainIndex, pagedMemorySize/num_streams); - cutilSafeCall( cudaMemcpyAsync(d_states + local_offset, h_paged_states + local_offset, - pagedMemorySize/num_streams, cudaMemcpyHostToDevice, streams[j]) ); - } - } - } - // Execute the kernel - // Asynchronously launch num_streams kernels, each operating on its own portion of data - for(j = 0; j < num_streams; j++) { //((timing_file!=NULL && i==0) ? 1 : 0); j < num_streams; j++){ - domainIndex = j + i*num_streams; - if (domainIndex <= lastFullDomain) { - local_offset = j * grid.x * threads.x ; - if (domainIndex == lastFullDomain && (spill[1]!=0 || spill[2]!=0)) { - grid.x = spill[1]+1; - solveSystem<<>>(timeSteps, stepSize, - d_states + rateStateCount*local_offset); - } else { - solveSystem<<>>(timeSteps, stepSize, - d_states + rateStateCount*local_offset); - } - } - } - - // Asynchronoously launch num_streams memcopies - for(j = 0; j < num_streams; j++) { //((timing_file!=NULL && i==0) ? 1 : 0); j < num_streams; j++){ - domainIndex = j + i*num_streams; - if (domainIndex <= lastFullDomain) { - local_offset = j * rateStateCount * grid.x * threads.x ; - if (domainIndex == lastFullDomain && (spill[1]!=0 || spill[2]!=0)) { - //printf("last async out %d, size %d\n", domainIndex, lastStreamMemorySize); - cutilSafeCall( cudaMemcpyAsync(h_paged_states + local_offset, d_states + local_offset, - lastStreamMemorySize, cudaMemcpyDeviceToHost, streams[j]) ); - } else { - //printf("normal async out %d, size %d\n", domainIndex, pagedMemorySize/num_streams); - cutilSafeCall( cudaMemcpyAsync(h_paged_states + local_offset, d_states + local_offset, - pagedMemorySize/num_streams, cudaMemcpyDeviceToHost, streams[j]) ); - } - } - } - - // Execute memcpys in and out of paged memory when CUDA calls in the streams have finished - for(j = 0; j < num_streams; j++) { //((timing_file!=NULL && i==0) ? 1 : 0); j < num_streams; j++){ - domainIndex = j + i*num_streams; - if (domainIndex <= lastFullDomain) { - cudaStreamSynchronize(streams[j]); - - local_offset = j * rateStateCount * grid.x * threads.x ; - global_offset = i * num_streams * grid.x * threads.x; - - if (domainIndex == lastFullDomain && (spill[1]!=0 || spill[2]!=0)) { - //printf("last memcpy out %d\n", domainIndex); - memcpy(h_states + rateStateCount * global_offset + local_offset, h_paged_states + local_offset, - lastStreamMemorySize); - } else { - //printf("normal memcpy out %d\n", domainIndex); - memcpy(h_states + rateStateCount * global_offset + local_offset, h_paged_states + local_offset, - pagedMemorySize/num_streams); - } - - global_offset = (i + 1) * num_streams * grid.x * threads.x; - if (domainIndex == lastFullDomain - num_streams && (spill[1]!=0 || spill[2]!=0)) { - //printf("last memcpy in %d\n", domainIndex); - lastStreamMemorySize = sizeof(double)*rateStateCount*(spill[1]*threads_per_block+spill[2]); - memcpy(h_paged_states + local_offset, h_states + rateStateCount * global_offset + local_offset, - lastStreamMemorySize); - } else if (domainIndex < lastFullDomain - num_streams) { - //printf("normal memcpy in %d\n", domainIndex); - memcpy(h_paged_states + local_offset, h_states + rateStateCount * global_offset + local_offset, - pagedMemorySize/num_streams); - } - } - } + } + } + + // Asynchronoously launch num_streams memcopies + for(j = 0; j < num_streams; j++) { //((timing_file!=NULL && i==0) ? 1 : 0); j < num_streams; j++){ + domainIndex = j + i*num_streams; + if (domainIndex <= lastFullDomain) { + local_offset = j * rateStateCount * grid.x * threads.x ; + if (domainIndex == lastFullDomain && (spill[1]!=0 || spill[2]!=0)) { + //printf("last async out %d, size %d\n", domainIndex, lastStreamMemorySize); + cutilSafeCall( cudaMemcpyAsync(h_paged_states + local_offset, d_states + local_offset, + lastStreamMemorySize, cudaMemcpyDeviceToHost, streams[j]) ); + } else { + //printf("normal async out %d, size %d\n", domainIndex, pagedMemorySize/num_streams); + cutilSafeCall( cudaMemcpyAsync(h_paged_states + local_offset, d_states + local_offset, + pagedMemorySize/num_streams, cudaMemcpyDeviceToHost, streams[j]) ); } - - if (timing_file) { - // Stop global timer - cutilCheckError(cutStopTimer(timer)); - - // Calculate timing statistics - dSeconds = cutGetTimerValue(timer)/1000.0; - nFLOPS = (FLOPSPerTimeStep*rateStateCount + FLOPSPerFunction*FunctionEvals)*timeSteps*num_threads; - gflops = 1.0e-9 * nFLOPS/dSeconds; - -// kernel_dSeconds = cutGetTimerValue(kernel_timer)/1000.0; -// kernel_nFLOPS = (FLOPSPerTimeStep*rateStateCount + FLOPSPerFunction*FunctionEvals)*timeSteps*num_threads/num_streams/num_partitions; -// kernel_gflops = 1.0e-9 * kernel_nFLOPS/kernel_dSeconds; - - // Store Stats - fprintf(timing_file,"%s\t%s\t%d\t%d\t%d\t%d\t%d\t%f\t%f\n", cellModelName, integratorName, num_threads, num_blocks, - threads_per_block, num_partitions, num_streams, dSeconds, gflops); -// fprintf(timing_file,"%s\t%s\t%d\t%d\t%d\t%d\t%d\t%f\t%f\t%f\t%f\t%f\n", cellModelName, integratorName, num_threads, num_blocks, -// threads_per_block, num_partitions, num_streams, dSeconds, gflops, kernel_dSeconds, -// kernel_gflops, gflops/kernel_gflops*100); + } + } + + // Execute memcpys in and out of paged memory when CUDA calls in the streams have finished + for(j = 0; j < num_streams; j++) { //((timing_file!=NULL && i==0) ? 1 : 0); j < num_streams; j++){ + domainIndex = j + i*num_streams; + if (domainIndex <= lastFullDomain) { + cudaStreamSynchronize(streams[j]); + + local_offset = j * rateStateCount * grid.x * threads.x ; + global_offset = i * num_streams * grid.x * threads.x; + + if (domainIndex == lastFullDomain && (spill[1]!=0 || spill[2]!=0)) { + //printf("last memcpy out %d\n", domainIndex); + memcpy(h_states + rateStateCount * global_offset + local_offset, h_paged_states + local_offset, + lastStreamMemorySize); + } else { + //printf("normal memcpy out %d\n", domainIndex); + memcpy(h_states + rateStateCount * global_offset + local_offset, h_paged_states + local_offset, + pagedMemorySize/num_streams); } - - // Deallocate Memory and Release Threads - for(i = 0; i < num_streams; i++) - cutilSafeCall( cudaStreamDestroy(streams[i]) ); - cutilSafeCall( cudaFree(d_states) ); - cutilSafeCall( cudaFreeHost(h_paged_states) ); - cudaThreadExit(); + + global_offset = (i + 1) * num_streams * grid.x * threads.x; + if (domainIndex == lastFullDomain - num_streams && (spill[1]!=0 || spill[2]!=0)) { + //printf("last memcpy in %d\n", domainIndex); + lastStreamMemorySize = sizeof(double)*rateStateCount*(spill[1]*threads_per_block+spill[2]); + memcpy(h_paged_states + local_offset, h_states + rateStateCount * global_offset + local_offset, + lastStreamMemorySize); + } else if (domainIndex < lastFullDomain - num_streams) { + //printf("normal memcpy in %d\n", domainIndex); + memcpy(h_paged_states + local_offset, h_states + rateStateCount * global_offset + local_offset, + pagedMemorySize/num_streams); + } + } + } + } + + if (timing_file) { + // Stop global timer + cutilCheckError(cutStopTimer(timer)); + + // Calculate timing statistics + dSeconds = cutGetTimerValue(timer)/1000.0; + nFLOPS = (FLOPSPerTimeStep*rateStateCount + FLOPSPerFunction*FunctionEvals)*timeSteps*num_threads; + gflops = 1.0e-9 * nFLOPS/dSeconds; + + // kernel_dSeconds = cutGetTimerValue(kernel_timer)/1000.0; + // kernel_nFLOPS = (FLOPSPerTimeStep*rateStateCount + FLOPSPerFunction*FunctionEvals)*timeSteps*num_threads/num_streams/num_partitions; + // kernel_gflops = 1.0e-9 * kernel_nFLOPS/kernel_dSeconds; + + // Store Stats + fprintf(timing_file,"%s\t%s\t%d\t%d\t%d\t%d\t%d\t%f\t%f\n", cellModelName, integratorName, num_threads, num_blocks, + threads_per_block, num_partitions, num_streams, dSeconds, gflops); + // fprintf(timing_file,"%s\t%s\t%d\t%d\t%d\t%d\t%d\t%f\t%f\t%f\t%f\t%f\n", cellModelName, integratorName, num_threads, num_blocks, + // threads_per_block, num_partitions, num_streams, dSeconds, gflops, kernel_dSeconds, + // kernel_gflops, gflops/kernel_gflops*100); + } + + // Deallocate Memory and Release Threads + for(i = 0; i < num_streams; i++) + cutilSafeCall( cudaStreamDestroy(streams[i]) ); + cutilSafeCall( cudaFree(d_states) ); + cutilSafeCall( cudaFreeHost(h_paged_states) ); + cudaThreadExit(); } @@ -596,71 +651,71 @@ void solve(double* h_states, double startTime, double endTime, double stepSize, //////////////////////////////////////////////////////////////////////////////// void solveProblem(unsigned int timeSteps, unsigned int num_threads, unsigned int threads_per_block, - unsigned int num_partitions, unsigned int num_streams, FILE *timing_file, FILE *results_file) + unsigned int num_partitions, unsigned int num_streams, FILE *timing_file, FILE *results_file) { - unsigned int i, j; - float startTime = 0.0f; - float endTime = 0.2f; - - double* h_states = NULL; - - h_states = (double *) safeMalloc(sizeof(double)*rateStateCount*num_threads); - - initProblem(num_threads, h_states); - - solve(h_states, startTime, endTime, timeSteps, num_threads, threads_per_block, num_partitions, num_streams, timing_file); - - if (results_file) { - fprintf(results_file,"\n\n"); - - for (i=0; i