00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef AMROC_TIMING_H
00010 #define AMROC_TIMING_H
00011
00019 #include <iostream>
00020 #include <vector>
00021
00022 #ifdef DAGH_NO_MPI
00023
00024 #include <sys/time.h>
00025 #include <time.h>
00026 inline static double MPI_Wtime() {
00027 struct timeval t;
00028 gettimeofday(&t,0);
00029 return (double)t.tv_sec + ((double)t.tv_usec)*0.000001;
00030 }
00031
00032 #endif
00033
00034 #include "CommServer.h"
00035
00036 class Timing;
00037 extern Timing timing;
00038
00039 #ifdef TIMING_AMR
00040
00041 #define START_WATCH timing.start();
00042 #define END_WATCH(what) timing.stop(Timing::what);
00043
00044 #define START_INTERMEDIATE_WATCH timing.start();
00045 #define END_INTERMEDIATE_WATCH END_WATCH(INTERMEDIATE)
00046
00047 #define END_WATCH_INITIALIZATION END_WATCH(INITIALIZATION)
00048 #define END_WATCH_FLAGGING END_WATCH(FLAGGING)
00049 #define END_WATCH_RECOMPOSING_WHOLE END_WATCH(RECOMPOSING_WHOLE)
00050 #define END_WATCH_RECOMPOSING_INTERPOLATION END_WATCH(RECOMPOSING_INTERPOLATION)
00051 #define END_WATCH_BOUNDARIES_WHOLE END_WATCH(BOUNDARIES_WHOLE)
00052 #define END_WATCH_BOUNDARIES_SYNC END_WATCH(BOUNDARIES_SYNC)
00053 #define END_WATCH_BOUNDARIES_INTERPOLATION END_WATCH(BOUNDARIES_INTERPOLATION)
00054 #define END_WATCH_BOUNDARIES_EXTERNAL END_WATCH(BOUNDARIES_EXTERNAL)
00055 #define END_WATCH_INTEGRATION(which) timing.stop((which==1)?Timing::INTEGRATION_ESTIMATE:(which==2)?Timing::INTEGRATION_SHADOW:Timing::INTEGRATION_MAIN);
00056 #define END_WATCH_SOURCE_INTEGRATION END_WATCH(SOURCE_INTEGRATION)
00057 #define END_WATCH_FIXUP_WHOLE END_WATCH(FIXUP_WHOLE)
00058 #define END_WATCH_FIXUP_SYNC END_WATCH(FIXUP_SYNC)
00059 #define END_WATCH_CLUSTERING END_WATCH(CLUSTERING)
00060 #define END_WATCH_OUPUT END_WATCH(OUTPUT)
00061 #define END_WATCH_EXAMINE END_WATCH(EXAMINE1)
00062
00063 #define START_WATCH_WHOLE timing.start_timing();
00064 #define END_WATCH_WHOLE timing.stop(Timing::MISC);
00065
00066 #define COUNT(what) timing.count(Timing::what);
00067 #define COUNT_FREQ(what,freq) timing.count(Timing::what,freq);
00068 #else
00069
00070 #define START_WATCH_WHOLE
00071 #define START_WATCH
00072 #define END_WATCH(what)
00073
00074 #define START_INTERMEDIATE_WATCH
00075 #define END_INTERMEDIATE_WATCH
00076 #define END_WATCH_INITIALIZATION
00077 #define END_WATCH_FLAGGING
00078 #define END_WATCH_RECOMPOSING_WHOLE
00079 #define END_WATCH_RECOMPOSING_INTERPOLATION
00080 #define END_WATCH_BOUNDARIES_WHOLE
00081 #define END_WATCH_BOUNDARIES_SYNC
00082 #define END_WATCH_BOUNDARIES_INTERPOLATION
00083 #define END_WATCH_BOUNDARIES_EXTERNAL
00084 #define END_WATCH_INTEGRATION(which)
00085 #define END_WATCH_SOURCE_INTEGRATION
00086 #define END_WATCH_FIXUP_WHOLE
00087 #define END_WATCH_FIXUP_SYNC
00088 #define END_WATCH_CLUSTERING
00089 #define END_WATCH_OUPUT
00090 #define END_WATCH_EXAMINE
00091 #define END_WATCH_WHOLE
00092 #define COUNT(what)
00093 #define COUNT_FREQ(what,freq)
00094
00095 #endif
00096
00104 class Timing {
00105 public:
00109 enum TimingAccounts {
00110 WHOLE,
00111
00112 MISC,
00113
00114
00115 INITIALIZATION, FLAGGING, CLUSTERING, OUTPUT, INTERMEDIATE,
00116 EXAMINE1, EXAMINE2, EXAMINE3, EXAMINE4,
00117
00118 PARTITION_INIT, PARTITION_CALC,
00119
00120 RECOMPOSING_WHOLE, RECOMPOSING_INTERPOLATION, RECOMPOSING_DATASYNC,
00121
00122 BOUNDARIES_WHOLE, BOUNDARIES_SYNC, BOUNDARIES_INTERPOLATION,
00123 BOUNDARIES_EXTERNAL,
00124
00125 INTEGRATION_MAIN, INTEGRATION_ESTIMATE,
00126 INTEGRATION_SHADOW, SOURCE_INTEGRATION,
00127
00128 EX_INIT, EX_SSEND, EX_SRECV,
00129 GFSYNC_GETINFO, GFSYNC_CHECKREADY,
00130 GFSYNC_IWAIT, GFSYNC_ITEST, GFSYNC_OWAIT, GFSYNC_OTEST,
00131
00132 GFSYNC_READGHOSTS, GFSYNC_READGHOSTS_SELF, GFSYNC_WRITEGHOSTS,
00133 GFSYNC_READDATA, GFSYNC_WRITEDATA,
00134
00135 UPDATE_PATCH,
00136
00137 FIXUP_WHOLE, FIXUP_CORRECTION, FIXUP_SYNC,
00138
00139 GFM_FINDING_CELLS, GFM_GEOMETRY, GFM_TRANSFORM,
00140 GFM_EXTRAPOLATION, GFM_SETBNDRY,
00141 GFM_AUXILIARY_VALUES, GFM_SETBNDRY_WHOLE,
00142
00143 LS_SET_WHOLE, LS_SYNC, LS_CPT_TRANSFORM, LS_CPT_FLOODFILL,
00144
00145 ELC_RECEIVEBOUNDARY, ELC_SENDPRESSURE,
00146
00147 FLUID_CPL_RECEIVE_OVERHEAD, FLUID_CPL_SEND_OVERHEAD, FLUID_CPL_INTERPOLATE,
00148 FLUID_CPL_VELOCITY_SEARCH, FLUID_CPL_PRESSURE_CALCULATE, FLUID_CPL_ELC_GEOMETRY,
00149
00150
00151 _ACMAX };
00152
00156 enum CountingAccounts {
00157 GF_SYNC, GF_PATCHITERATOR,
00158
00159 CS_TEST, CS_WAIT,
00160
00161
00162 _CCMAX };
00163
00164 public:
00165
00166
00167
00172 inline void count(enum CountingAccounts account, int i=1)
00173 { counts[account] += i; }
00174
00175
00176
00177
00180 inline void start_timing()
00181 { tos = ×tack[0]; ntos = 0; *tos = 0.0; start_time = MPI_Wtime(); }
00182
00185 inline void start()
00186 { add_time(); tos++; ntos++; assert (ntos<MAXRECURSIONS); *tos = 0.0; }
00187
00191 inline void stop(enum TimingAccounts account)
00192 { add_time(); times[account] += *tos; calls[account]++; tos--; ntos--; assert (ntos>=-1); }
00193
00194 protected:
00195 inline void add_time() {
00196 double end_time = MPI_Wtime();
00197 *tos += end_time - start_time; start_time = end_time;
00198 }
00199
00200 public:
00201
00202
00203
00206 static void collect(MPI_Comm Comm) {
00207 #ifdef TIMING_AMR
00208 timing.collect_timing(Comm);
00209 #endif
00210 }
00211
00212 static void print(std::ostream& os) {
00213 #ifdef TIMING_AMR
00214 timing.print_local_times = false;
00215 timing.print_timing(os);
00216 #endif
00217 }
00218
00219 static void print_local(std::ostream& os) {
00220 #ifdef TIMING_AMR
00221 timing.print_local_times = true;
00222 timing.print_timing(os);
00223 #endif
00224 }
00225
00228 void collect_timing(MPI_Comm Comm, enum TimingAccounts ac,
00229 double* stat_times) {
00230 #ifdef DAGH_NO_MPI
00231 stat_times[0] = times[ac];
00232 stat_times[1] = times[ac];
00233 stat_times[2] = times[ac];
00234 #else
00235 MPI_Reduce(×[ac], &stat_times[0], 1, MPI_DOUBLE,
00236 MPI_MAX, 0, Comm);
00237 MPI_Reduce(×[ac], &stat_times[1], 1, MPI_DOUBLE,
00238 MPI_MIN, 0, Comm);
00239 MPI_Reduce(×[ac], &stat_times[2], 1, MPI_DOUBLE,
00240 MPI_SUM, 0, Comm);
00241
00242 int procs;
00243 MPI_Comm_size(Comm, &procs);
00244 stat_times[2] /= procs;
00245 #endif
00246 }
00247
00250 void collect_timing(MPI_Comm Comm) {
00251
00252 times[WHOLE] = calcsum(times, MISC, _ACMAX-1, 0.0);
00253 calls[WHOLE] = calcsum(calls, MISC, _ACMAX-1, (unsigned)0);
00254
00255
00256 stat_times_max.resize(_ACMAX);
00257 stat_times_min.resize(_ACMAX);
00258 stat_times_avg.resize(_ACMAX);
00259 #ifdef DAGH_NO_MPI
00260 stat_times_max = times;
00261 stat_times_min = times;
00262 stat_times_avg = times;
00263 #else
00264 MPI_Reduce(×[0], &stat_times_max[0], _ACMAX, MPI_DOUBLE,
00265 MPI_MAX, 0, Comm);
00266 MPI_Reduce(×[0], &stat_times_min[0], _ACMAX, MPI_DOUBLE,
00267 MPI_MIN, 0, Comm);
00268 MPI_Reduce(×[0], &stat_times_avg[0], _ACMAX, MPI_DOUBLE,
00269 MPI_SUM, 0, Comm);
00270
00271 int procs;
00272 MPI_Comm_size(Comm, &procs);
00273 for (int i=0; i<_ACMAX; i++)
00274 stat_times_avg[i] /= procs;
00275 #endif
00276 }
00277
00281 void print_timing(std::ostream& os) {
00282 cl(os,"CommServer - MPI_Waitsome ", CS_WAIT);
00283 cl(os," MPI_Testsome ", CS_TEST,true);
00284 cl(os,"GridFunction - Sync ", GF_SYNC);
00285 cl(os," PatchIterator ", GF_PATCHITERATOR,true);
00286 pl(os,"Initialization ", INITIALIZATION);
00287 pl(os,"Integration - Main ", INTEGRATION_MAIN);
00288 pl(os," Estimation ", INTEGRATION_ESTIMATE,true);
00289 pl(os," Coarsen ", INTEGRATION_SHADOW,true);
00290 pl(os," Source ", SOURCE_INTEGRATION,true);
00291 pl(os,"Fixup - Overhead ", FIXUP_WHOLE,true);
00292 pl(os," Correction ", FIXUP_CORRECTION,true);
00293 pl(os," Syncing ", FIXUP_SYNC,true);
00294 pl(os,"Boundary Val. - Overhead ", BOUNDARIES_WHOLE);
00295 pl(os," Syncing ", BOUNDARIES_SYNC);
00296 pl(os," Interpolation", BOUNDARIES_INTERPOLATION);
00297 pl(os," Physical ", BOUNDARIES_EXTERNAL);
00298 pl(os,"Regridding - Flagging ", FLAGGING);
00299 pl(os," Clustering ", CLUSTERING);
00300 pl(os,"Partition - Init ", PARTITION_INIT);
00301 pl(os," Calculate ", PARTITION_CALC);
00302 pl(os,"Recomposition - Overhead ", RECOMPOSING_WHOLE);
00303 pl(os," Interpolation", RECOMPOSING_INTERPOLATION);
00304 pl(os," Data syncing ", RECOMPOSING_DATASYNC);
00305 pl(os,"ExchangeServ. - Init ", EX_INIT,true);
00306 pl(os," start sends ", EX_SSEND,true);
00307 pl(os," start recvs ", EX_SRECV,true);
00308 pl(os,"GFSync - gather depend. info ", GFSYNC_GETINFO,true);
00309 pl(os," check dependencies ", GFSYNC_CHECKREADY,true);
00310 pl(os," instant wait for msg ", GFSYNC_IWAIT,true);
00311 pl(os," instant test for msg ", GFSYNC_ITEST,true);
00312 pl(os," overlapped wait for msg ", GFSYNC_OWAIT,true);
00313 pl(os," overlapped test for msg ", GFSYNC_OTEST,true);
00314 pl(os," read ghosts ", GFSYNC_READGHOSTS,true);
00315 pl(os," read ghosts self ", GFSYNC_READGHOSTS_SELF,true);
00316 pl(os," write ghosts ", GFSYNC_WRITEGHOSTS,true);
00317 pl(os," read data ", GFSYNC_READDATA,true);
00318 pl(os," AMRSolver update patch ", UPDATE_PATCH,true);
00319 pl(os,"Ghost-fluid values - Overhead", GFM_SETBNDRY_WHOLE,true);
00320 pl(os," Finding internal cells ", GFM_FINDING_CELLS,true);
00321 pl(os," Calculating geometry info", GFM_GEOMETRY,true);
00322 pl(os," Calc. extrapolated values", GFM_TRANSFORM,true);
00323 pl(os," Extra- / Interpolation ", GFM_EXTRAPOLATION,true);
00324 pl(os," Setting auxiliary values ", GFM_AUXILIARY_VALUES,true);
00325 pl(os," Applying bndry values ", GFM_SETBNDRY,true);
00326 pl(os,"Level sets - Overhead ", LS_SET_WHOLE,true);
00327 pl(os," CPT - point_transform() ", LS_CPT_TRANSFORM,true);
00328 pl(os," CPT - flood_fill() ", LS_CPT_FLOODFILL,true);
00329 pl(os," Sync level set functions ", LS_SYNC,true);
00330 pl(os,"ELC - receive_mesh() ", ELC_RECEIVEBOUNDARY,true);
00331 pl(os," send_pressure() ", ELC_SENDPRESSURE,true);
00332 pl(os,"Coupling Send - Overhead ", FLUID_CPL_SEND_OVERHEAD,true);
00333 pl(os," pressure calculation() ", FLUID_CPL_PRESSURE_CALCULATE,true);
00334 pl(os," pressure interpolation() ", FLUID_CPL_INTERPOLATE,true);
00335 pl(os," ELC: face normal¢ers ", FLUID_CPL_ELC_GEOMETRY,true);
00336 pl(os,"Coupling Receive - Overhead ", FLUID_CPL_RECEIVE_OVERHEAD,true);
00337 pl(os," velocity search&constr. ", FLUID_CPL_VELOCITY_SEARCH,true);
00338 pl(os,"Output ", OUTPUT);
00339 pl(os,"Examined in detail 1 ", EXAMINE1,true);
00340 pl(os,"Examined in detail 2 ", EXAMINE2,true);
00341 pl(os,"Examined in detail 3 ", EXAMINE3,true);
00342 pl(os,"Examined in detail 4 ", EXAMINE4,true);
00343 pl(os,"Misc ", MISC);
00344 pl(os,"Whole time ", WHOLE);
00345 }
00346
00347 protected:
00351 template<typename T>
00352 inline T calcsum(typename std::vector<T> &data,
00353 int start, int end, T sum) {
00354 for (int i=start; i<=end; i++) sum += data[i];
00355 return sum;
00356 }
00357
00360 inline void pl(std::ostream& OS, char *name,
00361 enum TimingAccounts ac, bool opt=false) {
00362 char str[500];
00363 if (print_local_times) {
00364 if (opt && times[ac]==0.) return;
00365 std::sprintf(str, " %s : %4.3fs (%3.2f%%) calls: %4ix",
00366 name,
00367 times[ac], (times[WHOLE]>0. ? 100.0*times[ac]/times[WHOLE] : 0.),
00368 calls[ac]
00369 );
00370 } else {
00371 if (opt && stat_times_avg[ac]==0.) return;
00372 std::sprintf(str, " %s : %4.3fs (%3.2f%%) balance: %2.3f diff: %4.2fs (%9.2fs - %4.2fs)",
00373 name,
00374 stat_times_avg[ac], (stat_times_avg[WHOLE]>0. ?
00375 100.0*stat_times_avg[ac]/stat_times_avg[WHOLE] : 0.),
00376 (stat_times_avg[ac]>0. ? stat_times_max[ac] / stat_times_avg[ac] : 1.),
00377 stat_times_max[ac] - stat_times_min[ac],
00378 stat_times_max[ac], stat_times_min[ac]
00379 );
00380 }
00381 OS << str << std::endl;
00382 }
00383
00384 inline void cl(std::ostream& OS, char *name, enum CountingAccounts ac, bool opt=false) {
00385 if (opt && counts[ac]==0) return;
00386 OS << " " << name << " : " << counts[ac] << "x"
00387 << std::endl;
00388 }
00389
00390 public:
00391 Timing() {
00392 times.resize(_ACMAX, 0.0);
00393 calls.resize(_ACMAX, 0);
00394 counts.resize(_CCMAX, 0);
00395 timestack.resize(MAXRECURSIONS);
00396 }
00397
00398 protected:
00399 std::vector<double> times;
00400 std::vector<unsigned int> calls;
00401 std::vector<int> counts;
00402
00403 enum {MAXRECURSIONS=300};
00404 std::vector<double> timestack;
00405 double *tos;
00406 int ntos;
00407
00409 bool print_local_times;
00410 std::vector<double> stat_times_max;
00411 std::vector<double> stat_times_min;
00412 std::vector<double> stat_times_avg;
00413
00414 double start_time;
00415 };
00416
00417 #endif