]> git.pld-linux.org Git - packages/postgresql.git/blob - postgresql-pgcluster-1.7.0rc7.patch
- version 8.3.0 (merge from POSTGRESQL_8_3)
[packages/postgresql.git] / postgresql-pgcluster-1.7.0rc7.patch
1 diff -aruN postgresql-8.2.4/GNUmakefile.in pgcluster-1.7.0rc7/GNUmakefile.in
2 --- postgresql-8.2.4/GNUmakefile.in     2006-08-18 21:58:05.000000000 +0200
3 +++ pgcluster-1.7.0rc7/GNUmakefile.in   2007-02-18 22:52:16.000000000 +0100
4 @@ -63,13 +63,13 @@
5  
6  ##########################################################################
7  
8 -distdir        := postgresql-$(VERSION)
9 +distdir        := pgcluster-$(PGCLUSTER_VERSION)
10  dummy  := =install=
11 -garbage := =*  "#"*  ."#"*  *~*  *.orig  *.rej  core  postgresql-*
12 +garbage := =*  "#"*  ."#"*  *~*  *.orig  *.rej  core  pgcluster-*
13  
14  dist: $(distdir).tar.gz 
15  ifeq ($(split-dist), yes)
16 -dist: postgresql-base-$(VERSION).tar.gz postgresql-docs-$(VERSION).tar.gz postgresql-opt-$(VERSION).tar.gz postgresql-test-$(VERSION).tar.gz 
17 +dist: pgcluster-base-$(PGCLUSTER_VERSION).tar.gz pgcluster-docs-$(PGCLUSTER_VERSION).tar.gz pgcluster-opt-$(PGCLUSTER_VERSION).tar.gz pgcluster-test-$(PGCLUSTER_VERSION).tar.gz 
18  endif
19  dist:
20         -rm -rf $(distdir)
21 @@ -81,19 +81,19 @@
22         src/tools src/tutorial \
23         $(addprefix src/pl/, plperl plpython tcl)
24  
25 -docs_files := doc/postgres.tar.gz doc/src doc/TODO.detail
26 +docs_files := doc/pgcluster.tar.gz doc/src doc/TODO.detail
27  
28 -postgresql-base-$(VERSION).tar: distdir
29 +pgcluster-base-$(PGCLUSTER_VERSION).tar: distdir
30         $(TAR) -c $(addprefix --exclude $(distdir)/, $(docs_files) $(opt_files) src/test) \
31           -f $@ $(distdir)
32  
33 -postgresql-docs-$(VERSION).tar: distdir
34 +pgcluster-docs-$(PGCLUSTER_VERSION).tar: distdir
35         $(TAR) cf $@ $(addprefix $(distdir)/, $(docs_files))
36  
37 -postgresql-opt-$(VERSION).tar: distdir
38 +pgcluster-opt-$(PGCLUSTER_VERSION).tar: distdir
39         $(TAR) cf $@ $(addprefix $(distdir)/, $(opt_files))
40  
41 -postgresql-test-$(VERSION).tar: distdir
42 +pgcluster-test-$(PGCLUSTER_VERSION).tar: distdir
43         $(TAR) cf $@ $(distdir)/src/test
44  
45  distdir:
46 diff -aruN postgresql-8.2.4/INSTALL_PGCLUSTER pgcluster-1.7.0rc7/INSTALL_PGCLUSTER
47 --- postgresql-8.2.4/INSTALL_PGCLUSTER  1970-01-01 01:00:00.000000000 +0100
48 +++ pgcluster-1.7.0rc7/INSTALL_PGCLUSTER        2007-02-19 00:59:13.000000000 +0100
49 @@ -0,0 +1,392 @@
50 +PGCluster Installation Instructions
51 +
52 +=============================================================
53 +1. Installation
54 +=============================================================
55 +
56 +1-1. Install Cluster DB Server, Replication Server & Load Balancer
57 +----------------------------------------------------------------
58 +$ cd $source_dir 
59 +$ ./configure
60 +$ gmake
61 +$ su
62 +# gmake install
63 +# chown -R postgres /usr/local/pgsql
64 +----------------------------------------------------------------
65 +
66 +=============================================================
67 +2. Initialize DB
68 +=============================================================
69 +$ su
70 +# adduser postgres
71 +# mkdir /usr/local/pgsql/data
72 +# chown postgres /usr/local/pgsql/data
73 +# su - postgres
74 +$ /usr/local/pgsql/bin/initdb -D /usr/local/pgsql/data
75 +
76 +
77 +=============================================================
78 +3. Configuration
79 +=============================================================
80 +(EX.System Composition)
81 +
82 +                       |
83 +                ((Load Balance Server))
84 +                ( hostname: lb.pgcluster.org)
85 +                ( receive port:5432   )
86 +                ( recovery port:6001  )
87 +                       |
88 +----------+-------------+------------+----------
89 +               |                       |
90 + ((  Cluster DB 1      ))      ((  Cluster DB 2        ))
91 + ( hostname:c1.pgcluster.org)  ( hostname:c2.pgcluster.org)
92 + ( receive port: 5432 )        ( receive port:5432  )
93 + ( recovery port:7001 )        ( recovery port 7002 )
94 +                 |                       |
95 +----------+-------------+------------+----------
96 +                       |
97 +                ((Replication Server))
98 +                ( hostname:pgr.pgcluster.org)
99 +                ( receive port:8001   )
100 +                ( recovery port:8101  )
101 +
102 +
103 +3-1. Load Balance Server
104 +
105 +The setup file of load balance server is copied from the sample file and edited.
106 +(the sample file is installed '/usr/local/pgsql/share' in default)
107 +----------------------------------------------------------------
108 +$cd /usr/local/pgsql/share
109 +$cp pglb.conf.sample pglb.conf
110 +----------------------------------------------------------------
111 +
112 +In the case of the above system composition example,
113 +the setup example of pglb.conf file is as the following 
114 +
115 +#============================================================
116 +#                Load Balance Server configuration file
117 +#-------------------------------------------------------------
118 +# file: pglb.conf
119 +#-------------------------------------------------------------
120 +# This file controls:
121 +#         o which hosts are db cluster server
122 +#         o which port  use connect to db cluster server
123 +#         o how many connections are allowed on each DB server
124 +#============================================================
125 +#-------------------------------------------------------------
126 +# set cluster DB server information
127 +#              o Host_Name :           hostname
128 +#              o Port :                Connection for postmaster
129 +#              o Max_Connection :      Maximum number of connection to postmaster
130 +#-------------------------------------------------------------
131 +<Cluster_Server_Info>
132 +       <Host_Name>             c1.pgcluster.org        </Host_Name>
133 +       <Port>                  5432                    </Port>
134 +       <Max_Connect>           32                      </Max_Connect>
135 +</Cluster_Server_Info>
136 +<Cluster_Server_Info>
137 +       <Host_Name>             c2.pgcluster.org        </Host_Name>
138 +       <Port>                  5432                    </Port>
139 +       <Max_Connect>           32                      </Max_Connect>
140 +</Cluster_Server_Info>
141 +#-------------------------------------------------------------
142 +# set Load Balance server information
143 +#              o Host_Name :                   The host name of this load balance server.
144 +#                                                -- please write a host name by FQDN or IP address.
145 +#              o Backend_Socket_Dir :          Unix domain socket path for the backend
146 +#              o Receive_Port :                Connection from client
147 +#              o Recovery_Port :               Connection for recovery process
148 +#              o Max_Cluster_Num :             Maximum number of cluster DB servers
149 +#              o Use_Connection_Pooling :      Use connection pool [yes/no] 
150 +#              o Lifecheck_Timeout :           Timeout of the lifecheck response
151 +#              o Lifecheck_Interval :          Interval time of the lifecheck
152 +#                              (range 1s - 1h)
153 +#                              10s    -- 10 seconds
154 +#                              10min  -- 10 minutes
155 +#                              1h     -- 1 hours
156 +#-------------------------------------------------------------
157 +<Host_Name>                    lb.pgcluster.org        </Host_Name>
158 +<Backend_Socket_Dir>           /tmp                    </Backend_Socket_Dir>
159 +<Receive_Port>                 5432                    </Receive_Port>
160 +<Recovery_Port>                6001                    </Recovery_Port>
161 +<Max_Cluster_Num>              128                     </Max_Cluster_Num>
162 +<Use_Connection_Pooling>       no                      </Use_Connection_Pooling>
163 +<LifeCheck_Timeout>            3s                      </LifeCheck_Timeout>
164 +<LifeCheck_Interval>           15s                     </LifeCheck_Interval>
165 +#-------------------------------------------------------------
166 +# A setup of a log files 
167 +#
168 +#              o File_Name :   Log file name with full path
169 +#              o File_Size :   Maximum size of each log files
170 +#                              Please specify in a number and unit(K or M)
171 +#                                10   -- 10 Byte
172 +#                                10K  -- 10 KByte
173 +#                                10M  -- 10 MByte
174 +#              o Rotate :      Rotation times
175 +#                              If specified 0, old versions are removed.
176 +#-------------------------------------------------------------
177 +<Log_File_Info>
178 +       <File_Name>             /tmp/pglb.log           </File_Name>
179 +       <File_Size>             1M                      </File_Size>
180 +       <Rotate>                3                       </Rotate>
181 +</Log_File_Info>
182 +
183 +3-2. Cluster DB Server
184 +
185 +The Cluster DB server need edit two configuration files
186 +('pg_hba.conf' and 'cluster.conf').
187 +These files are create under the $PG_DATA directory after 'initdb'.
188 +
189 +A. pg_hba.conf
190 +Permission to connect DB via IP connectoins is need for this system.
191 +
192 +B. cluster.conf
193 +In the case of the above system composition example,
194 +the setup example of cluster.conf file is as the following 
195 +
196 +#============================================================
197 +#                Cluster DB Server configuration file
198 +#-------------------------------------------------------------
199 +# file: cluster.conf
200 +#-------------------------------------------------------------
201 +# This file controls:
202 +#         o which hosts & port are replication server
203 +#         o which port use for replication request to replication server
204 +#         o which command use for recovery function
205 +#
206 +#============================================================
207 +#-------------------------------------------------------------
208 +# set cluster DB server information
209 +#              o Host_Name :           hostname
210 +#              o Port :                Connection port for postmaster
211 +#              o Recovery_Port :       Connection for recovery process
212 +#-------------------------------------------------------------
213 +<Replicate_Server_Info>
214 +       <Host_Name>             pgr.pgcluster.org       </Host_Name>
215 +       <Port>                  8001                    </Port>
216 +       <Recovery_Port>         8101                    </Recovery_Port>
217 +</Replicate_Server_Info>
218 +#-------------------------------------------------------------
219 +# set Cluster DB Server information
220 +#              o Host_Name :           Host name which connect with replication server
221 +#              o Recovery_Port :       Connection port for recovery
222 +#              o Rsync_Path :          Path of rsync command 
223 +#              o Rsync_Option :        File transfer option for rsync
224 +#              o Rsync_Compress :      Use compression option for rsync
225 +#                                      [yes/no]. default : yes
226 +#              o Pg_Dump_Path :        path of pg_dump
227 +#              o When_Stand_Alone :            When all replication servers fell,
228 +#                                      you can set up two kinds of permittion,
229 +#                                      "real_only" or "read_write".
230 +#              o Replication_Timeout : Timeout of each replication request
231 +#              o Lifecheck_Timeout :   Timeout of the lifecheck response
232 +#              o Lifecheck_Interval :  Interval time of the lifecheck
233 +#                              (range 1s - 1h)
234 +#                              10s   -- 10 seconds
235 +#                              10min -- 10 minutes
236 +#                              1h    -- 1 hours
237 +#-------------------------------------------------------------
238 +<Host_Name>                    c1.pgcluster.org                        </Host_Name>
239 +<Recovery_Port>                7001                                    </Recovery_Port>
240 +<Rsync_Path>                   /usr/bin/rsync                          </Rsync_Path>
241 +<Rsync_Option>                 ssh -1                                  </Rsync_Option>
242 +<Rsync_Compress>               yes                                     </Rsync_Compress>
243 +<Pg_Dump_Path>                 /usr/local/pgsql/bin/pg_dump            </Pg_Dump_Path>
244 +<When_Stand_Alone>             read_only                               </When_Stand_Alone>
245 +<Replication_Timeout>          1min                                    </Replication_Timeout>
246 +<LifeCheck_Timeout>            3s                                      </LifeCheck_Timeout>
247 +<LifeCheck_Interval>           11s                                     </LifeCheck_Interval>
248 +#-------------------------------------------------------------
249 +# set partitional replicate control information
250 +#       set DB name and Table name to stop reprication
251 +#         o DB_Name :          DB name
252 +#         o Table_Name :       Table name
253 +#-------------------------------------------------------------
254 +#<Not_Replicate_Info>
255 +#      <DB_Name>               test_db         </DB_Name>
256 +#      <Table_Name>            log_table       </Table_Name>
257 +#</Not_Replicate_Info>
258 +
259 +3-3. Replication Server
260 +
261 +The setup file of replication server is copied from the sample file and edited.
262 +(the sample file is installed '/usr/local/pgsql/share' in default)
263 +----------------------------------------------------------------
264 +$cd /usr/local/pgsql/share
265 +$cp pgreplicate.conf.sample pgreplicate.conf
266 +----------------------------------------------------------------
267 +In the case of the above system composition example,
268 +the setup example of pgreplicate.conf file is as the following 
269 +
270 +#============================================================
271 +#                               PGReplicate configuration file
272 +#-------------------------------------------------------------
273 +# file: pgreplicate.conf
274 +#-------------------------------------------------------------
275 +# This file controls:
276 +#         o which hosts & port are cluster server
277 +#         o which port use for replication request from cluster server
278 +#============================================================
279 +#-------------------------------------------------------------
280 +# set cluster DB server information
281 +#              o Host_Name :           hostname
282 +#              o Port :                Connection port for postmaster
283 +#              o Recovery_Port :       Connection port for recovery
284 +#-------------------------------------------------------------
285 +<Cluster_Server_Info>
286 +       <Host_Name>             c1.pgcluster.org        </Host_Name>
287 +       <Port>                  5432                    </Port>
288 +       <Recovery_Port>         7001                    </Recovery_Port>
289 +</Cluster_Server_Info>
290 +<Cluster_Server_Info>
291 +       <Host_Name>             c2.pgcluster.org        </Host_Name>
292 +       <Port>                  5432                    </Port>
293 +       <Recovery_Port>         7001                    </Recovery_Port>
294 +</Cluster_Server_Info>
295 +#-------------------------------------------------------------
296 +# set Load Balance server information
297 +#              o Host_Name :           hostname
298 +#              o Recovery_Port :       Connection port for recovery
299 +#-------------------------------------------------------------
300 +<LoadBalance_Server_Info>
301 +       <Host_Name>             lb.pgcluster.org        </Host_Name>
302 +       <Recovery_Port>         6001                    </Recovery_Port>
303 +</LoadBalance_Server_Info>
304 +#------------------------------------------------------------
305 +# A setup of the cascade connection between replication servers.
306 +# When you do not use RLOG recovery, you can skip this setup
307 +#
308 +#              o Host_Name :           The host name of the upper replication server.
309 +#                                      Please write a host name by FQDN or IP address.
310 +#              o Port :                The connection port with postmaster.
311 +#              o Recovery_Port :       The connection port at the time of 
312 +#                                      a recovery sequence .
313 +#------------------------------------------------------------
314 +#<Replicate_Server_Info>
315 +#      <Host_Name>             upper_replicate.pgcluster.org           </Host_Name>
316 +#      <Port>                  8002                                    </Port>
317 +#      <Recovery_Port>         8102                                    </Recovery_Port>
318 +#</Replicate_Server_Info>
319 +#
320 +#-------------------------------------------------------------
321 +# A setup of a replication server
322 +#
323 +#              o Host_Name :           The host name of the this replication server.
324 +#                                      Please write a host name by FQDN or IP address.
325 +#              o Replicate_Port :      Connection port for replication
326 +#              o Recovery_Port :       Connection port for recovery
327 +#              o RLOG_Port :           Connection port for replication log
328 +#              o Response_mode :       Timing which returns a response
329 +#                                       - normal   -- return result of DB which received the query
330 +#                                       - reliable -- return result after waiting for response of 
331 +#                                      all Cluster DBs.
332 +#              o Use_Replication_Log : Use replication log
333 +#                                      [yes/no]. default : no
334 +#              o Replication_Timeout : Timeout of each replication response
335 +#              o Lifecheck_Timeout :   Timeout of the lifecheck response
336 +#              o Lifecheck_Interval :  Interval time of the lifecheck
337 +#                      (range 1s - 1h)
338 +#                      10s             -- 10 seconds
339 +#                      10min           -- 10 minutes
340 +#                      1h              -- 1 hours
341 +#-------------------------------------------------------------
342 +<Host_Name>                    pgr.pgcluster.org       </Host_Name>
343 +<Replication_Port>             8001                    </Replication_Port>
344 +<Recovery_Port>                8101                    </Recovery_Port>
345 +<RLOG_Port>                    8301                    </RLOG_Port>
346 +<Response_Mode>                normal                  </Response_Mode>
347 +<Use_Replication_Log>          no                      </Use_Replication_Log>
348 +<Replication_Timeout>          1min                    </Replication_Timeout>
349 +<LifeCheck_Timeout>            3s                      </LifeCheck_Timeout>
350 +<LifeCheck_Interval>           15s                     </LifeCheck_Interval>
351 +#-------------------------------------------------------------
352 +# A setup of a log files 
353 +#
354 +#              o File_Name :   Log file name with full path
355 +#              o File_Size :   maximum size of each log files
356 +#                              Please specify in a number and unit(K or M)
357 +#                                10    -- 10 Byte
358 +#                                10K   -- 10 KByte
359 +#                                10M   -- 10 MByte
360 +#              o Rotate :      Rotation times
361 +#                              If specified 0, old versions are removed.
362 +#-------------------------------------------------------------
363 +<Log_File_Info>
364 +       <File_Name>     /tmp/pgreplicate.log    </File_Name>
365 +       <File_Size>     1M                      </File_Size>
366 +       <Rotate>        3                       </Rotate>
367 +</Log_File_Info>
368 +
369 +=============================================================
370 +4. Start Up / Stop
371 +=============================================================
372 +
373 +4-1. replication server
374 +
375 +A. Start replication server
376 +----------------------------------------------------------------
377 +$ /usr/local/pgsql/bin/pgreplicate -D /usr/local/pgsql/etc
378 +----------------------------------------------------------------
379 +
380 +B. Stop replication server
381 +----------------------------------------------------------------
382 +$ /usr/local/pgsql/bin/pgreplicate -D /usr/local/pgsql/etc stop
383 +----------------------------------------------------------------
384 +
385 +usage: pgreplicate [-D path_of_config_file] [-W path_of_work_files]
386 +[-w wait time before fork process][-U login user][-l][-n][-v][-h][stop]
387 +       -l: print error logs in the log file.
388 +       -n: don't run in daemon mode.
389 +       -v: debug mode. need '-n' flag
390 +       -h: print this help
391 +       stop: stop pgreplicate
392 +(config file default path: ./pgreplicate.conf)
393 +
394 +4-2. cluster DB server
395 +$PG_HOME = /usr/local/pgsql
396 +$PG_DATA = /usr/local/pgsql/data
397 +
398 +A. Start cluster DB server
399 +----------------------------------------------------------------
400 +$ /usr/local/pgsql/bin/pg_ctl start -D /usr/local/pgsql/data 
401 +----------------------------------------------------------------
402 +
403 +B. Stop cluster DB server
404 +----------------------------------------------------------------
405 +$ /usr/local/pgsql/bin/pg_ctl stop -D /usr/local/pgsql/data
406 +----------------------------------------------------------------
407 +
408 +C-1. RE start (recovery) cluster DB server with backup
409 +----------------------------------------------------------------
410 +$ /usr/local/pgsql/bin/pg_ctl start -D /usr/local/pgsql/data -o "-R"
411 +----------------------------------------------------------------
412 +
413 +C-2. RE start (recovery) cluster DB server without backup
414 +----------------------------------------------------------------
415 +$ /usr/local/pgsql/bin/pg_ctl start -D /usr/local/pgsql/data -o "-r"
416 +----------------------------------------------------------------
417 +
418 +D. Upgrade cluster DB server with pg_dump
419 +----------------------------------------------------------------
420 +$ /usr/local/pgsql/bin/pg_ctl start -D /usr/local/pgsql/data -o "-U"
421 +----------------------------------------------------------------
422 +
423 +4-3. load balance server
424 +
425 +A. Start load balance server
426 +----------------------------------------------------------------
427 +$ /usr/local/pgsql/bin/pglb -D /usr/local/pgsql/share
428 +----------------------------------------------------------------
429 +
430 +B. Stop load balance server
431 +----------------------------------------------------------------
432 +$ /usr/local/pgsql/bin/pglb -D /usr/local/pgsql/share stop
433 +----------------------------------------------------------------
434 +
435 +usage: pglb [-D path_of_config_file] [-W path_of_work_files] [-n][-v][-h][stop]
436 +       -l: print error logs in the log file.
437 +       -n: don't run in daemon mode.
438 +       -v: debug mode. need '-n' flag
439 +       -h: print this help
440 +       stop: stop pglb
441 +       (config file default path: ./pglb.conf)
442 diff -aruN postgresql-8.2.4/README_PGCLUSTER pgcluster-1.7.0rc7/README_PGCLUSTER
443 --- postgresql-8.2.4/README_PGCLUSTER   1970-01-01 01:00:00.000000000 +0100
444 +++ pgcluster-1.7.0rc7/README_PGCLUSTER 2007-02-19 01:00:40.000000000 +0100
445 @@ -0,0 +1,118 @@
446 +PGCluster: Multi-Master Synchronous Replication System for PostgreSQL
447 +===========================================================
448 +
449 +PGCluster is a multi-master and synchronous replication system that supports load balancing of PostgreSQL.
450 +
451 +Changed:
452 +       $INSTALL_DIR/GNUmakefile.in
453 +       $INSTALL_DIR/INSTALL_PGCLUSTER
454 +       $INSTALL_DIR/README_PGCLUSTER
455 +       $INSTALL_DIR/configure
456 +       $INSTALL_DIR/configure.in
457 +       $INSTALL_DIR/pgcluster.sh.tmpl
458 +       $INSTALL_DIR/src/Makefile
459 +       $INSTALL_DIR/src/Makefile.global.in
460 +       $INSTALL_DIR/src/backend/Makefile
461 +       $INSTALL_DIR/src/backend/access/transam/clog.c
462 +       $INSTALL_DIR/src/backend/access/transam/xact.c
463 +       $INSTALL_DIR/src/backend/catalog/catalog.c
464 +       $INSTALL_DIR/src/backend/commands/analyze.c
465 +       $INSTALL_DIR/src/backend/commands/copy.c
466 +       $INSTALL_DIR/src/backend/commands/sequence.c
467 +       $INSTALL_DIR/src/backend/executor/functions.c
468 +       $INSTALL_DIR/src/backend/libpq/Makefile
469 +       $INSTALL_DIR/src/backend/libpq/be-fsstubs.c
470 +       $INSTALL_DIR/src/backend/libpq/cluster.conf.sample
471 +       $INSTALL_DIR/src/backend/libpq/recovery.c
472 +       $INSTALL_DIR/src/backend/libpq/lifecheck.c
473 +       $INSTALL_DIR/src/backend/libpq/replicate.c
474 +       $INSTALL_DIR/src/backend/libpq/replicate_com.c
475 +       $INSTALL_DIR/src/backend/main/main.c
476 +       $INSTALL_DIR/src/backend/parser/gram.y
477 +       $INSTALL_DIR/src/backend/parser/keywords.c
478 +       $INSTALL_DIR/src/backend/parser/parse_clause.c
479 +       $INSTALL_DIR/src/backend/parser/parse_relation.c
480 +       $INSTALL_DIR/src/backend/postmaster/postmaster.c
481 +       $INSTALL_DIR/src/backend/storage/large_object/inv_api.c
482 +       $INSTALL_DIR/src/backend/storage/lmgr/deadlock.c
483 +       $INSTALL_DIR/src/backend/storage/lmgr/lmgr.c
484 +       $INSTALL_DIR/src/backend/storage/lmgr/lock.c
485 +       $INSTALL_DIR/src/backend/storage/lmgr/proc.c
486 +       $INSTALL_DIR/src/backend/tcop/postgres.c
487 +       $INSTALL_DIR/src/backend/tcop/pquery.c
488 +       $INSTALL_DIR/src/backend/tcop/utility.c
489 +       $INSTALL_DIR/src/backend/utils/adt/float.c
490 +       $INSTALL_DIR/src/backend/utils/adt/nabstime.c
491 +       $INSTALL_DIR/src/backend/utils/adt/ri_triggers.c
492 +       $INSTALL_DIR/src/backend/utils/adt/timestamp.c
493 +       $INSTALL_DIR/src/backend/utils/error/assert.c
494 +       $INSTALL_DIR/src/backend/utils/error/elog.c
495 +       $INSTALL_DIR/src/backend/utils/fmgr/fmgr.c
496 +       $INSTALL_DIR/src/backend/utils/mb/mbutils.c
497 +       $INSTALL_DIR/src/backend/utils/misc/guc.c
498 +       $INSTALL_DIR/src/backend/utils/misc/postgresql.conf.sample
499 +       $INSTALL_DIR/src/bin/initdb/initdb.c
500 +       $INSTALL_DIR/src/bin/pg_dump/pg_dump.c
501 +       $INSTALL_DIR/src/bin/pg_dump/pg_dumpall.c
502 +       $INSTALL_DIR/src/include/pg_config.h.in
503 +       $INSTALL_DIR/src/include/replicate.h
504 +       $INSTALL_DIR/src/include/replicate_com.h
505 +       $INSTALL_DIR/src/include/storage/lmgr.h
506 +       $INSTALL_DIR/src/include/storage/proc.h
507 +       $INSTALL_DIR/src/interfaces/libpq/Makefile
508 +       $INSTALL_DIR/src/makefiles/Makefile.aix
509 +       $INSTALL_DIR/src/makefiles/Makefile.freebsd
510 +       $INSTALL_DIR/src/makefiles/Makefile.hpux
511 +       $INSTALL_DIR/src/makefiles/Makefile.linux
512 +       $INSTALL_DIR/src/makefiles/Makefile.netbsd
513 +       $INSTALL_DIR/src/makefiles/Makefile.openbsd
514 +       $INSTALL_DIR/src/makefiles/Makefile.solaris
515 +       $INSTALL_DIR/src/makefiles/Makefile.sunos4
516 +Added:
517 +       $INSTALL_DIR/src/pgcluster/Makefile
518 +       $INSTALL_DIR/src/pgcluster/libpgc/Makefile
519 +       $INSTALL_DIR/src/pgcluster/libpgc/libpgc.h
520 +       $INSTALL_DIR/src/pgcluster/libpgc/sem.c
521 +       $INSTALL_DIR/src/pgcluster/libpgc/show.c
522 +       $INSTALL_DIR/src/pgcluster/libpgc/signal.c
523 +       $INSTALL_DIR/src/pgcluster/pglb/AUTHORS
524 +       $INSTALL_DIR/src/pgcluster/pglb/COPYING
525 +       $INSTALL_DIR/src/pgcluster/pglb/Makefile
526 +       $INSTALL_DIR/src/pgcluster/pglb/child.c
527 +       $INSTALL_DIR/src/pgcluster/pglb/cluster_table.c
528 +       $INSTALL_DIR/src/pgcluster/pglb/lifecheck.c
529 +       $INSTALL_DIR/src/pgcluster/pglb/load_balance.c
530 +       $INSTALL_DIR/src/pgcluster/pglb/main.c
531 +       $INSTALL_DIR/src/pgcluster/pglb/pglb.conf.sample
532 +       $INSTALL_DIR/src/pgcluster/pglb/pglb.h
533 +       $INSTALL_DIR/src/pgcluster/pglb/pool_auth.c
534 +       $INSTALL_DIR/src/pgcluster/pglb/pool_connection_pool.c
535 +       $INSTALL_DIR/src/pgcluster/pglb/pool_params.c
536 +       $INSTALL_DIR/src/pgcluster/pglb/pool_process_query.c
537 +       $INSTALL_DIR/src/pgcluster/pglb/pool_stream.c
538 +       $INSTALL_DIR/src/pgcluster/pglb/recovery.c
539 +       $INSTALL_DIR/src/pgcluster/pglb/socket.c
540 +       $INSTALL_DIR/src/pgcluster/pgrp/AUTHORS
541 +       $INSTALL_DIR/src/pgcluster/pgrp/COPYING
542 +       $INSTALL_DIR/src/pgcluster/pgrp/Makefile
543 +       $INSTALL_DIR/src/pgcluster/pgrp/cascade.c
544 +       $INSTALL_DIR/src/pgcluster/pgrp/conf.c
545 +       $INSTALL_DIR/src/pgcluster/pgrp/lifecheck.c
546 +       $INSTALL_DIR/src/pgcluster/pgrp/main.c
547 +       $INSTALL_DIR/src/pgcluster/pgrp/pgreplicate.conf.sample
548 +       $INSTALL_DIR/src/pgcluster/pgrp/pgreplicate.h
549 +       $INSTALL_DIR/src/pgcluster/pgrp/pqformat.c
550 +       $INSTALL_DIR/src/pgcluster/pgrp/recovery.c
551 +       $INSTALL_DIR/src/pgcluster/pgrp/replicate.c
552 +       $INSTALL_DIR/src/pgcluster/pgrp/rlog.c
553 +       $INSTALL_DIR/src/pgcluster/tool/Makefile
554 +       $INSTALL_DIR/src/pgcluster/tool/README.jp
555 +       $INSTALL_DIR/src/pgcluster/tool/pgcbench.c
556 +       $INSTALL_DIR/src/pgcluster/tool/pgcbench.sh
557 +       $INSTALL_DIR/src/pgcluster/tool/tpc-b_like.sql
558 +
559 +The latest version of this software may be obtained at
560 +http://pgfoundry.org/projects/pgcluster/
561 +
562 +For more information look at pgFoundry web site located at 
563 +http://pgcluster.projects.postgresql.org/
564 diff -aruN postgresql-8.2.4/configure pgcluster-1.7.0rc7/configure
565 --- postgresql-8.2.4/configure  2007-02-07 04:48:58.000000000 +0100
566 +++ pgcluster-1.7.0rc7/configure        2007-03-01 16:27:35.000000000 +0100
567 @@ -275,6 +275,8 @@
568  PACKAGE_STRING='PostgreSQL 8.2.4'
569  PACKAGE_BUGREPORT='pgsql-bugs@postgresql.org'
570  
571 +PGCLUSTER_VERSION='1.7.0rc7'
572 +
573  ac_unique_file="src/backend/access/common/heaptuple.c"
574  ac_default_prefix=/usr/local/pgsql
575  # Factoring default headers for most tests.
576 @@ -314,7 +316,7 @@
577  # include <unistd.h>
578  #endif"
579  
580 -ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS configure_args build build_cpu build_vendor build_os host host_cpu host_vendor host_os PORTNAME docdir enable_nls WANTED_LANGUAGES default_port enable_shared enable_rpath enable_debug DTRACE DTRACEFLAGS enable_dtrace CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP GCC TAS autodepend INCLUDES enable_thread_safety with_tcl with_perl with_python with_krb5 krb_srvtab with_pam with_ldap with_bonjour with_openssl with_zlib EGREP ELF_SYS LDFLAGS_SL AWK FLEX FLEXFLAGS LN_S LD with_gnu_ld ld_R_works RANLIB ac_ct_RANLIB TAR STRIP ac_ct_STRIP STRIP_STATIC_LIB STRIP_SHARED_LIB YACC YFLAGS PERL perl_archlibexp perl_privlibexp perl_useshrplib perl_embed_ldflags PYTHON python_version python_configdir python_includespec python_libdir python_libspec python_additional_libs HAVE_IPV6 LIBOBJS acx_pthread_config PTHREAD_CC PTHREAD_LIBS PTHREAD_CFLAGS LDAP_LIBS_FE LDAP_LIBS_BE HAVE_POSIX_SIGNALS MSGFMT MSGMERGE XGETTEXT localedir TCLSH TCL_CONFIG_SH TCL_INCLUDE_SPEC TCL_LIB_FILE TCL_LIBS TCL_LIB_SPEC TCL_SHARED_BUILD TCL_SHLIB_LD_LIBS NSGMLS JADE have_docbook DOCBOOKSTYLE COLLATEINDEX SGMLSPL vpath_build LTLIBOBJS'
581 +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS configure_args build build_cpu build_vendor build_os host host_cpu host_vendor host_os PORTNAME docdir enable_nls WANTED_LANGUAGES default_port enable_shared enable_rpath enable_debug DTRACE DTRACEFLAGS enable_dtrace CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP GCC TAS autodepend INCLUDES enable_thread_safety with_tcl with_perl with_python with_krb5 krb_srvtab with_pam with_ldap with_bonjour with_openssl with_zlib EGREP ELF_SYS LDFLAGS_SL AWK FLEX FLEXFLAGS LN_S LD with_gnu_ld ld_R_works RANLIB ac_ct_RANLIB TAR STRIP ac_ct_STRIP STRIP_STATIC_LIB STRIP_SHARED_LIB YACC YFLAGS PERL perl_archlibexp perl_privlibexp perl_useshrplib perl_embed_ldflags PYTHON python_version python_configdir python_includespec python_libdir python_libspec python_additional_libs HAVE_IPV6 LIBOBJS acx_pthread_config PTHREAD_CC PTHREAD_LIBS PTHREAD_CFLAGS LDAP_LIBS_FE LDAP_LIBS_BE HAVE_POSIX_SIGNALS MSGFMT MSGMERGE XGETTEXT localedir TCLSH TCL_CONFIG_SH TCL_INCLUDE_SPEC TCL_LIB_FILE TCL_LIBS TCL_LIB_SPEC TCL_SHARED_BUILD TCL_SHLIB_LD_LIBS NSGMLS JADE have_docbook DOCBOOKSTYLE COLLATEINDEX SGMLSPL vpath_build LTLIBOBJS PGCLUSTER_VERSION'
582  ac_subst_files=''
583  
584  # Initialize some variables set by options.
585 @@ -1241,6 +1243,10 @@
586  #define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
587  _ACEOF
588  
589 +cat >>confdefs.h <<_ACEOF
590 +#define PGCLUSTER_VERSION "$PGCLUSTER_VERSION"
591 +_ACEOF
592 +
593  
594  # Let the site file select an alternate cache file if it wants to.
595  # Prefer explicitly selected file to automatically selected ones.
596 @@ -23555,6 +23561,7 @@
597  s,@host_os@,$host_os,;t t
598  s,@PORTNAME@,$PORTNAME,;t t
599  s,@docdir@,$docdir,;t t
600 +s,@PGCLUSTER_VERSION@,$PGCLUSTER_VERSION,;t t
601  s,@enable_nls@,$enable_nls,;t t
602  s,@WANTED_LANGUAGES@,$WANTED_LANGUAGES,;t t
603  s,@default_port@,$default_port,;t t
604 diff -aruN postgresql-8.2.4/configure.in pgcluster-1.7.0rc7/configure.in
605 --- postgresql-8.2.4/configure.in       2007-02-07 04:48:58.000000000 +0100
606 +++ pgcluster-1.7.0rc7/configure.in     2007-02-18 22:52:16.000000000 +0100
607 @@ -27,6 +27,7 @@
608  AC_SUBST(configure_args, [$ac_configure_args])
609  
610  AC_DEFINE_UNQUOTED(PG_VERSION, "$PACKAGE_VERSION", [PostgreSQL version as a string])
611 +AC_DEFINE_UNQUOTED(PGCLUSTER_VERSION, "$PGCLUSTER_VERSION", [PGCluster version])
612  
613  AC_CANONICAL_HOST
614  
615 diff -aruN postgresql-8.2.4/pgcluster.sh.tmpl pgcluster-1.7.0rc7/pgcluster.sh.tmpl
616 --- postgresql-8.2.4/pgcluster.sh.tmpl  1970-01-01 01:00:00.000000000 +0100
617 +++ pgcluster-1.7.0rc7/pgcluster.sh.tmpl        2007-02-18 22:52:16.000000000 +0100
618 @@ -0,0 +1,56 @@
619 +#!/bin/sh
620 +#
621 +# $FreeBSD: ports/databases/pgcluster/files/pgcluster.sh.tmpl,v 1.1 2004/01/26 09:02:45 kuriyama Exp $
622 +#
623 +# PROVIDE: pgcluster
624 +# REQUIRE: DAEMON
625 +# BEFORE:  pgreplicate
626 +# KEYWORD: FreeBSD
627 +#
628 +# Add the following line to /etc/rc.conf to enable pgcluster:
629 +#
630 +# pgcluster_enable="YES"
631 +# # optional
632 +# pgcluster_data="/home/pgsql/data"
633 +# pgcluster_flags="-w -s"
634 +#
635 +
636 +pgcluster_enable="NO"
637 +pgcluster_data="%%PREFIX%%/pgsql/data"
638 +pgcluster_flags="-w -s"
639 +
640 +. %%RC_SUBR%%
641 +
642 +load_rc_config pgcluster
643 +
644 +name=pgcluster
645 +command=%%PREFIX%%/bin/pg_ctl
646 +pgcluster_user=pgsql
647 +extra_commands="initdb recover"
648 +initdb_cmd="pgcluster_initdb"
649 +recover_cmd="pgcluster_recover"
650 +start_cmd="pgcluster_start"
651 +stop_cmd="pgcluster_stop"
652 +
653 +pgcluster_flags="${pgcluster_flags} -D ${pgcluster_data}"
654 +pidfile="${pgcluster_data}/postmaster.pid"
655 +
656 +pgcluster_start()
657 +{
658 +    su -m ${pgcluster_user} -c "exec ${command} start ${pgcluster_flags} -o '-i'"
659 +}
660 +pgcluster_stop()
661 +{
662 +    su -m ${pgcluster_user} -c "exec ${command} stop ${pgcluster_flags} -m i"
663 +}
664 +pgcluster_recover()
665 +{
666 +    su -m ${pgcluster_user} -c "exec ${command} start ${pgcluster_flags} -o '-i -R'"
667 +}
668 +pgcluster_initdb()
669 +{
670 +    su -m ${pgcluster_user} -c "exec %%PREFIX%%/bin/initdb -D ${pgcluster_data}"
671 +}
672 +
673 +load_rc_config $name
674 +run_rc_command "$1"
675 diff -aruN postgresql-8.2.4/src/Makefile pgcluster-1.7.0rc7/src/Makefile
676 --- postgresql-8.2.4/src/Makefile       2006-06-23 01:50:35.000000000 +0200
677 +++ pgcluster-1.7.0rc7/src/Makefile     2007-02-18 22:52:16.000000000 +0100
678 @@ -16,14 +16,15 @@
679  all install installdirs uninstall dep depend distprep:
680         $(MAKE) -C port $@
681         $(MAKE) -C timezone $@
682 +       $(MAKE) -C interfaces $@
683         $(MAKE) -C backend $@
684         $(MAKE) -C backend/utils/mb/conversion_procs $@
685         $(MAKE) -C include $@
686 -       $(MAKE) -C interfaces $@
687         $(MAKE) -C bin $@
688         $(MAKE) -C pl $@
689         $(MAKE) -C makefiles $@
690         $(MAKE) -C test/regress $@
691 +       $(MAKE) -C pgcluster $@
692  
693  install: install-local
694  
695 @@ -44,6 +45,7 @@
696         rm -f $(addprefix '$(DESTDIR)$(pgxsdir)/$(subdir)'/, Makefile.global Makefile.port Makefile.shlib nls-global.mk)
697  
698  clean:
699 +       $(MAKE) -C pgcluster $@
700         $(MAKE) -C port $@
701         $(MAKE) -C timezone $@
702         $(MAKE) -C backend $@
703 @@ -57,6 +59,7 @@
704         $(MAKE) -C test/thread $@
705  
706  distclean maintainer-clean:
707 +       -$(MAKE) -C pgcluster $@
708         -$(MAKE) -C port $@
709         -$(MAKE) -C timezone $@
710         -$(MAKE) -C backend $@
711 diff -aruN postgresql-8.2.4/src/Makefile.global.in pgcluster-1.7.0rc7/src/Makefile.global.in
712 --- postgresql-8.2.4/src/Makefile.global.in     2006-10-08 19:15:33.000000000 +0200
713 +++ pgcluster-1.7.0rc7/src/Makefile.global.in   2007-02-18 22:52:16.000000000 +0100
714 @@ -31,6 +31,9 @@
715  # PostgreSQL version number
716  VERSION = @PACKAGE_VERSION@
717  
718 +# PGCluster version number
719 +PGCLUSTER_VERSION = @PGCLUSTER_VERSION@
720 +
721  # Support for VPATH builds
722  vpath_build = @vpath_build@
723  abs_top_srcdir = @abs_top_srcdir@
724 @@ -207,6 +210,7 @@
725  GCC = @GCC@
726  CFLAGS = @CFLAGS@
727  
728 +CFLAGS += -DUSE_REPLICATION -DPRINT_DEBUG
729  # Kind-of compilers
730  
731  YACC = @YACC@
732 diff -aruN postgresql-8.2.4/src/backend/Makefile pgcluster-1.7.0rc7/src/backend/Makefile
733 --- postgresql-8.2.4/src/backend/Makefile       2006-10-08 19:15:33.000000000 +0200
734 +++ pgcluster-1.7.0rc7/src/backend/Makefile     2007-02-18 22:52:16.000000000 +0100
735 @@ -39,7 +39,7 @@
736  ifneq ($(PORTNAME), win32)
737  ifneq ($(PORTNAME), aix)
738  
739 -postgres: $(OBJS)
740 +postgres: $(OBJS) $(libpq_srcdir)/libpq.a
741         $(CC) $(CFLAGS) $(LDFLAGS) $(export_dynamic) $^ $(LIBS) -o $@
742  
743  endif
744 @@ -169,6 +169,7 @@
745         $(INSTALL_DATA) $(srcdir)/libpq/pg_ident.conf.sample '$(DESTDIR)$(datadir)/pg_ident.conf.sample'
746         $(INSTALL_DATA) $(srcdir)/utils/misc/postgresql.conf.sample '$(DESTDIR)$(datadir)/postgresql.conf.sample'
747         $(INSTALL_DATA) $(srcdir)/access/transam/recovery.conf.sample '$(DESTDIR)$(datadir)/recovery.conf.sample'
748 +       $(INSTALL_DATA) $(srcdir)/libpq/cluster.conf.sample $(DESTDIR)$(datadir)/cluster.conf.sample
749  
750  install-bin: postgres $(POSTGRES_IMP) installdirs
751         $(INSTALL_PROGRAM) postgres$(X) '$(DESTDIR)$(bindir)/postgres$(X)'
752 @@ -221,8 +222,9 @@
753         $(MAKE) -C catalog uninstall-data
754         rm -f '$(DESTDIR)$(datadir)/pg_hba.conf.sample' \
755               '$(DESTDIR)$(datadir)/pg_ident.conf.sample' \
756 -              '$(DESTDIR)$(datadir)/postgresql.conf.sample' \
757 -             '$(DESTDIR)$(datadir)/recovery.conf.sample'
758 +                 '$(DESTDIR)$(datadir)/postgresql.conf.sample' \
759 +             '$(DESTDIR)$(datadir)/recovery.conf.sample' \
760 +                 '$(DESTDIR)$(datadir)/cluster.conf.sample'
761  
762  
763  ##########################################################################
764 diff -aruN postgresql-8.2.4/src/backend/access/transam/clog.c pgcluster-1.7.0rc7/src/backend/access/transam/clog.c
765 --- postgresql-8.2.4/src/backend/access/transam/clog.c  2006-11-05 23:42:07.000000000 +0100
766 +++ pgcluster-1.7.0rc7/src/backend/access/transam/clog.c        2007-02-18 22:52:16.000000000 +0100
767 @@ -57,6 +57,9 @@
768  #define TransactionIdToByte(xid)       (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE)
769  #define TransactionIdToBIndex(xid)     ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE)
770  
771 +#ifdef USE_REPLICATION
772 +#include "replicate.h"
773 +#endif /* USE_REPLICATION */
774  
775  /*
776   * Link to shared-memory data structures for CLOG control
777 @@ -335,7 +338,16 @@
778  
779         /* Check to see if there's any files that could be removed */
780         if (!SlruScanDirectory(ClogCtl, cutoffPage, false))
781 +#ifdef USE_REPLICATION
782 +        {
783 +               /* Perform a forced CHECKPOINT */
784 +               /* CreateCheckPoint(false, true); */
785 +               RequestCheckpoint(true, false);
786 +#endif  /* USE_REPLICATION */
787                 return;                                 /* nothing to remove */
788 +#ifdef USE_REPLICATION
789 +        }
790 +#endif  /* USE_REPLICATION */
791  
792         /* Write XLOG record and flush XLOG to disk */
793         WriteTruncateXlogRec(cutoffPage);
794 diff -aruN postgresql-8.2.4/src/backend/access/transam/xact.c pgcluster-1.7.0rc7/src/backend/access/transam/xact.c
795 --- postgresql-8.2.4/src/backend/access/transam/xact.c  2006-11-23 02:14:59.000000000 +0100
796 +++ pgcluster-1.7.0rc7/src/backend/access/transam/xact.c        2007-02-18 22:52:16.000000000 +0100
797 @@ -44,6 +44,9 @@
798  #include "utils/relcache.h"
799  #include "utils/guc.h"
800  
801 +#ifdef USE_REPLICATION
802 +#include "replicate.h"
803 +#endif /* USE_REPLICATION */
804  
805  /*
806   *     User-tweakable parameters
807 @@ -4335,3 +4338,11 @@
808         else
809                 appendStringInfo(buf, "UNKNOWN");
810  }
811 +
812 +#ifdef USE_REPLICATION
813 +void
814 +PGR_Reload_Start_Time(void)
815 +{
816 +       xactStartTimestamp = GetCurrentTimestamp();
817 +}
818 +#endif /* USE_REPLICATION */
819 diff -aruN postgresql-8.2.4/src/backend/catalog/catalog.c pgcluster-1.7.0rc7/src/backend/catalog/catalog.c
820 --- postgresql-8.2.4/src/backend/catalog/catalog.c      2006-10-04 02:29:50.000000000 +0200
821 +++ pgcluster-1.7.0rc7/src/backend/catalog/catalog.c    2007-02-18 22:52:16.000000000 +0100
822 @@ -38,6 +38,9 @@
823  #include "utils/fmgroids.h"
824  #include "utils/relcache.h"
825  
826 +#ifdef USE_REPLICATION
827 +#include "replicate.h"
828 +#endif /* USE_REPLICATION */
829  
830  #define OIDCHARS       10                      /* max chars printed by %u */
831  
832 @@ -360,7 +363,7 @@
833  Oid
834  GetNewOidWithIndex(Relation relation, Relation indexrel)
835  {
836 -       Oid                     newOid;
837 +       Oid                     newOid = 0;
838         IndexScanDesc scan;
839         ScanKeyData key;
840         bool            collides;
841 @@ -368,8 +371,18 @@
842         /* Generate new OIDs until we find one not in the table */
843         do
844         {
845 +#ifdef USE_REPLICATION
846 +               if (PGR_Is_Sync_OID == true)
847 +               {
848 +                       newOid = PGRGetNewObjectId(newOid);
849 +               }
850 +               else
851 +               {
852 +                       newOid = GetNewObjectId();
853 +               }
854 +#else
855                 newOid = GetNewObjectId();
856 -
857 +#endif /* USE_REPLICATION */
858                 ScanKeyInit(&key,
859                                         (AttrNumber) 1,
860                                         BTEqualStrategyNumber, F_OIDEQ,
861 @@ -454,3 +467,4 @@
862  
863         return rnode.relNode;
864  }
865 +
866 diff -aruN postgresql-8.2.4/src/backend/commands/analyze.c pgcluster-1.7.0rc7/src/backend/commands/analyze.c
867 --- postgresql-8.2.4/src/backend/commands/analyze.c     2006-11-05 23:42:08.000000000 +0100
868 +++ pgcluster-1.7.0rc7/src/backend/commands/analyze.c   2007-02-18 22:52:16.000000000 +0100
869 @@ -36,6 +36,9 @@
870  #include "utils/syscache.h"
871  #include "utils/tuplesort.h"
872  
873 +#ifdef USE_REPLICATION
874 +#include "replicate.h"
875 +#endif /* USE_REPLICATION */
876  
877  /* Data structure for Algorithm S from Knuth 3.4.2 */
878  typedef struct
879 @@ -934,7 +937,11 @@
880  static double
881  random_fract(void)
882  {
883 +#ifdef USE_REPLICATION
884 +       return ((double) PGR_Random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
885 +#else
886         return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
887 +#endif /* USE_REPLICATION */
888  }
889  
890  /*
891 diff -aruN postgresql-8.2.4/src/backend/commands/copy.c pgcluster-1.7.0rc7/src/backend/commands/copy.c
892 --- postgresql-8.2.4/src/backend/commands/copy.c        2006-10-06 19:13:58.000000000 +0200
893 +++ pgcluster-1.7.0rc7/src/backend/commands/copy.c      2007-02-18 22:52:16.000000000 +0100
894 @@ -41,6 +41,9 @@
895  #include "utils/lsyscache.h"
896  #include "utils/memutils.h"
897  
898 +#ifdef USE_REPLICATION
899 +#include "replicate.h"
900 +#endif /* USE_REPLICATION */
901  
902  #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
903  #define OCTVALUE(c) ((c) - '0')
904 @@ -488,6 +491,9 @@
905  CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
906  {
907         int                     bytesread = 0;
908 +#ifdef USE_REPLICATION
909 +       char * ptr = (char *)databuf;
910 +#endif
911  
912         switch (cstate->copy_dest)
913         {
914 @@ -578,6 +584,9 @@
915                         }
916                         break;
917         }
918 +#ifdef USE_REPLICATION
919 +       PGR_Set_Copy_Data(PGRCopyData,ptr,bytesread,0);
920 +#endif /* USE_REPLICATION */
921  
922         return bytesread;
923  }
924 @@ -2093,6 +2102,13 @@
925                 }
926         }
927  
928 +#ifdef USE_REPLICATION
929 +       if (done)
930 +       {
931 +               PGR_Set_Copy_Data(PGRCopyData,(char *)NULL,0,1);
932 +       }
933 +#endif /* USE_REPLICATION */
934 +
935         /* Done, clean up */
936         error_context_stack = errcontext.previous;
937  
938 @@ -2201,6 +2217,11 @@
939                                 break;
940                 }
941         }
942 +#ifdef USE_REPLICATION
943 +       /*
944 +       PGR_Set_Copy_Data(PGRCopyData,cstate->line_buf.data,cstate->line_buf.len,0);
945 +       */
946 +#endif
947  
948         /* Done reading the line.  Convert it to server encoding. */
949         if (cstate->need_transcoding)
950 diff -aruN postgresql-8.2.4/src/backend/commands/prepare.c pgcluster-1.7.0rc7/src/backend/commands/prepare.c
951 --- postgresql-8.2.4/src/backend/commands/prepare.c     2006-10-04 02:29:51.000000000 +0200
952 +++ pgcluster-1.7.0rc7/src/backend/commands/prepare.c   2007-02-18 22:52:16.000000000 +0100
953 @@ -29,6 +29,9 @@
954  #include "utils/builtins.h"
955  #include "utils/memutils.h"
956  
957 +#ifdef USE_REPLICATION
958 +#include "replicate.h"
959 +#endif /* USE_REPLICATION */
960  
961  /*
962   * The hash table in which prepared queries are stored. This is
963 @@ -793,3 +796,27 @@
964         result = construct_array(tmp_ary, len, REGTYPEOID, 4, true, 'i');
965         return PointerGetDatum(result);
966  }
967 +
968 +
969 +#ifdef USE_REPLICATION
970 +bool
971 +PGR_is_select_prepared_statement(PrepareStmt *stmt)
972 +{
973 +       PreparedStatement *entry;
974 +       if ((stmt == NULL) || (stmt->name == NULL))
975 +       {
976 +               return false;
977 +       }
978 +       entry = FetchPreparedStatement(stmt->name, true);
979 +       if (entry == NULL)
980 +       {
981 +               return false;
982 +       }
983 +       if (!strcmp(entry->commandTag,"SELECT"))
984 +       {
985 +               return true;
986 +       }
987 +       return false;
988 +}
989 +#endif /* USE_REPLICATION */
990 +
991 diff -aruN postgresql-8.2.4/src/backend/commands/sequence.c pgcluster-1.7.0rc7/src/backend/commands/sequence.c
992 --- postgresql-8.2.4/src/backend/commands/sequence.c    2006-10-06 19:13:58.000000000 +0200
993 +++ pgcluster-1.7.0rc7/src/backend/commands/sequence.c  2007-02-18 22:52:16.000000000 +0100
994 @@ -31,6 +31,9 @@
995  #include "utils/resowner.h"
996  #include "utils/syscache.h"
997  
998 +#ifdef USE_REPLICATION
999 +#include "replicate.h"
1000 +#endif /* USE_REPLICATION */
1001  
1002  /*
1003   * We don't want to log each fetching of a value from a sequence,
1004 @@ -396,6 +399,9 @@
1005         RangeVar   *sequence;
1006         Oid                     relid;
1007  
1008 +#ifdef USE_REPLICATION
1009 +       Xlog_Check_Replicate(CMD_UTILITY);
1010 +#endif /* USE_REPLICATION */
1011         sequence = makeRangeVarFromNameList(textToQualifiedNameList(seqin));
1012         relid = RangeVarGetRelid(sequence, false);
1013  
1014 @@ -622,6 +628,10 @@
1015         SeqTable        elm;
1016         Relation        seqrel;
1017  
1018 +#ifdef USE_REPLICATION
1019 +       Xlog_Check_Replicate(CMD_UTILITY);
1020 +#endif /* USE_REPLICATION */
1021 +
1022         /* open and AccessShareLock sequence */
1023         init_sequence(relid, &elm, &seqrel);
1024  
1025 diff -aruN postgresql-8.2.4/src/backend/executor/functions.c pgcluster-1.7.0rc7/src/backend/executor/functions.c
1026 --- postgresql-8.2.4/src/backend/executor/functions.c   2007-02-02 01:03:17.000000000 +0100
1027 +++ pgcluster-1.7.0rc7/src/backend/executor/functions.c 2007-02-18 22:52:16.000000000 +0100
1028 @@ -30,6 +30,9 @@
1029  #include "utils/syscache.h"
1030  #include "utils/typcache.h"
1031  
1032 +#ifdef USE_REPLICATION
1033 +#include "replicate.h"
1034 +#endif /* USE_REPLICATION */
1035  
1036  /*
1037   * We have an execution_state record for each query in a function.     Each
1038 @@ -454,6 +457,13 @@
1039         Datum           value;
1040         MemoryContext oldcontext;
1041  
1042 +#ifdef USE_REPLICATION
1043 +       if ((es != NULL) && (es->qd != NULL))
1044 +       {
1045 +               Xlog_Check_Replicate(es->qd->operation);
1046 +       }
1047 +#endif /* USE_REPLICATION */
1048 +
1049         if (es->status == F_EXEC_START)
1050                 postquel_start(es, fcache);
1051  
1052 diff -aruN postgresql-8.2.4/src/backend/libpq/Makefile pgcluster-1.7.0rc7/src/backend/libpq/Makefile
1053 --- postgresql-8.2.4/src/backend/libpq/Makefile 2003-11-29 20:51:49.000000000 +0100
1054 +++ pgcluster-1.7.0rc7/src/backend/libpq/Makefile       2007-02-18 22:52:16.000000000 +0100
1055 @@ -15,7 +15,8 @@
1056  # be-fsstubs is here for historical reasons, probably belongs elsewhere
1057  
1058  OBJS = be-fsstubs.o be-secure.o auth.o crypt.o hba.o ip.o md5.o pqcomm.o \
1059 -       pqformat.o pqsignal.o
1060 +       pqformat.o pqsignal.o \
1061 +          replicate.o replicate_com.o recovery.o lifecheck.o
1062  
1063  
1064  all: SUBSYS.o
1065 diff -aruN postgresql-8.2.4/src/backend/libpq/auth.c pgcluster-1.7.0rc7/src/backend/libpq/auth.c
1066 --- postgresql-8.2.4/src/backend/libpq/auth.c   2006-11-06 02:27:52.000000000 +0100
1067 +++ pgcluster-1.7.0rc7/src/backend/libpq/auth.c 2007-02-18 22:52:16.000000000 +0100
1068 @@ -31,6 +31,9 @@
1069  #include "libpq/pqformat.h"
1070  #include "storage/ipc.h"
1071  
1072 +#ifdef USE_REPLICATION
1073 +#include "replicate.h"
1074 +#endif /* USE_REPLICATION */
1075  
1076  static void sendAuthRequest(Port *port, AuthRequest areq);
1077  static void auth_failed(Port *port, int status);
1078 @@ -888,6 +891,12 @@
1079  {
1080         StringInfoData buf;
1081  
1082 +#ifdef USE_REPLICATION
1083 +       if (PGR_password == NULL)
1084 +       {
1085 +               return NULL;
1086 +       }
1087 +#endif /* USE_REPLICATION */
1088         if (PG_PROTOCOL_MAJOR(port->proto) >= 3)
1089         {
1090                 /* Expect 'p' message type */
1091 @@ -939,6 +948,19 @@
1092         ereport(DEBUG5,
1093                         (errmsg("received password packet")));
1094  
1095 +#ifdef USE_REPLICATION
1096 +       if (strncmp(buf.data,"md5",3) == 0)
1097 +       {
1098 +               char * ptr = NULL;
1099 +               ptr = strchr(buf.data,'(');
1100 +               if (ptr != NULL)
1101 +               {
1102 +                       PGR_get_md5salt(PGR_password->md5Salt,ptr);
1103 +                       *ptr='\0';
1104 +               }
1105 +       }
1106 +       strncpy(PGR_password->password,buf.data, PASSWORD_MAX_LENGTH );
1107 +#endif /* USE_REPLICATION */
1108         /*
1109          * Return the received string.  Note we do not attempt to do any
1110          * character-set conversion on it; since we don't yet know the client's
1111 diff -aruN postgresql-8.2.4/src/backend/libpq/be-fsstubs.c pgcluster-1.7.0rc7/src/backend/libpq/be-fsstubs.c
1112 --- postgresql-8.2.4/src/backend/libpq/be-fsstubs.c     2006-09-07 17:37:25.000000000 +0200
1113 +++ pgcluster-1.7.0rc7/src/backend/libpq/be-fsstubs.c   2007-02-18 22:52:16.000000000 +0100
1114 @@ -49,6 +49,9 @@
1115  #include "storage/large_object.h"
1116  #include "utils/memutils.h"
1117  
1118 +#ifdef USE_REPLICATION
1119 +#include "replicate.h"
1120 +#endif /* USE_REPLICATION */
1121  
1122  /*#define FSDB 1*/
1123  #define BUFSIZE                        8192
1124 @@ -93,6 +96,19 @@
1125         LargeObjectDesc *lobjDesc;
1126         int                     fd;
1127  
1128 +#ifdef USE_REPLICATION
1129 +       if ((PGR_Stand_Alone != NULL) &&
1130 +               (PGR_lo_open(lobjId,mode) != STATUS_OK))
1131 +       {
1132 +               if ((mode & INV_WRITE) &&
1133 +                       (PGR_Is_Stand_Alone() == true) &&
1134 +                       (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1135 +               {
1136 +                       elog(WARNING, "This query is not permitted when all replication servers fell down ");
1137 +                       PG_RETURN_INT32(-1);
1138 +               }
1139 +       }
1140 +#endif /* USE_REPLICATION */
1141  #if FSDB
1142         elog(DEBUG4, "lo_open(%u,%d)", lobjId, mode);
1143  #endif
1144 @@ -126,6 +142,9 @@
1145                                  errmsg("invalid large-object descriptor: %d", fd)));
1146                 PG_RETURN_INT32(-1);
1147         }
1148 +#ifdef USE_REPLICATION
1149 +       PGR_lo_close(fd);
1150 +#endif
1151  #if FSDB
1152         elog(DEBUG4, "lo_close(%d)", fd);
1153  #endif
1154 @@ -183,6 +202,18 @@
1155                           errmsg("large object descriptor %d was not opened for writing",
1156                                          fd)));
1157  
1158 +#ifdef USE_REPLICATION
1159 +       if ((PGR_Stand_Alone != NULL) &&
1160 +               (PGR_lo_write(fd, buf, len) != STATUS_OK))
1161 +       {
1162 +               if ((PGR_Is_Stand_Alone() == true) &&
1163 +                       (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1164 +               {
1165 +                       elog(WARNING, "This query is not permitted when all replication servers fell down ");
1166 +                       return -1;
1167 +               }
1168 +       }
1169 +#endif
1170         status = inv_write(cookies[fd], buf, len);
1171  
1172         return status;
1173 @@ -205,6 +236,10 @@
1174                 PG_RETURN_INT32(-1);
1175         }
1176  
1177 +#ifdef USE_REPLICATION
1178 +       PGR_lo_lseek(fd, offset, whence);
1179 +#endif /* USE_REPLICATION */
1180 +
1181         status = inv_seek(cookies[fd], offset, whence);
1182  
1183         PG_RETURN_INT32(status);
1184 @@ -221,6 +256,18 @@
1185          */
1186         CreateFSContext();
1187  
1188 +#ifdef USE_REPLICATION
1189 +       if ((PGR_Stand_Alone != NULL) &&
1190 +               (PGR_lo_create(InvalidOid) != STATUS_OK))
1191 +       {
1192 +               if ((PGR_Is_Stand_Alone() == true) &&
1193 +                       (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1194 +               {
1195 +                       elog(WARNING, "This query is not permitted when all replication servers fell down ");
1196 +                       PG_RETURN_INT32(-1);
1197 +               }
1198 +       }
1199 +#endif /* USE_REPLICATION */
1200         lobjId = inv_create(InvalidOid);
1201  
1202         PG_RETURN_OID(lobjId);
1203 @@ -231,6 +278,18 @@
1204  {
1205         Oid                     lobjId = PG_GETARG_OID(0);
1206  
1207 +#ifdef USE_REPLICATION
1208 +       if ((PGR_Stand_Alone != NULL) &&
1209 +               (PGR_lo_create(lobjId) != STATUS_OK))
1210 +       {
1211 +               if ((PGR_Is_Stand_Alone() == true) &&
1212 +                       (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1213 +               {
1214 +                       elog(WARNING, "This query is not permitted when all replication servers fell down ");
1215 +                       PG_RETURN_INT32(-1);
1216 +               }
1217 +       }
1218 +#endif /* USE_REPLICATION */
1219         /*
1220          * We don't actually need to store into fscxt, but create it anyway to
1221          * ensure that AtEOXact_LargeObject knows there is state to clean up
1222 @@ -263,6 +322,18 @@
1223  {
1224         Oid                     lobjId = PG_GETARG_OID(0);
1225  
1226 +#ifdef USE_REPLICATION
1227 +       if ((PGR_Stand_Alone != NULL)  &&
1228 +               (PGR_lo_unlink(lobjId) != STATUS_OK))
1229 +       {
1230 +               if ((PGR_Is_Stand_Alone() == true) &&
1231 +                       (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1232 +               {
1233 +                       elog(WARNING, "This query is not permitted when all replication servers fell down ");
1234 +                       return -1;
1235 +               }
1236 +       }
1237 +#endif /* USE_REPLICATION */
1238         /*
1239          * If there are any open LO FDs referencing that ID, close 'em.
1240          */
1241 @@ -360,6 +431,19 @@
1242                 nbytes = MAXPGPATH - 1;
1243         memcpy(fnamebuf, VARDATA(filename), nbytes);
1244         fnamebuf[nbytes] = '\0';
1245 +
1246 +#ifdef USE_REPLICATION
1247 +       if ((PGR_Stand_Alone != NULL) &&
1248 +               (PGR_lo_import((char*)fnamebuf) != STATUS_OK))
1249 +       {
1250 +               if ((PGR_Is_Stand_Alone() == true) &&
1251 +                       (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1252 +               {
1253 +                       elog(WARNING, "This query is not permitted when all replication servers fell down ");
1254 +                       return -1;
1255 +               }
1256 +       }
1257 +#endif
1258         fd = PathNameOpenFile(fnamebuf, O_RDONLY | PG_BINARY, 0666);
1259         if (fd < 0)
1260                 ereport(ERROR,
1261 @@ -372,6 +456,7 @@
1262          */
1263         lobjOid = inv_create(InvalidOid);
1264  
1265 +
1266         /*
1267          * read in from the filesystem and write to the inversion object
1268          */
1269 diff -aruN postgresql-8.2.4/src/backend/libpq/cluster.conf.sample pgcluster-1.7.0rc7/src/backend/libpq/cluster.conf.sample
1270 --- postgresql-8.2.4/src/backend/libpq/cluster.conf.sample      1970-01-01 01:00:00.000000000 +0100
1271 +++ pgcluster-1.7.0rc7/src/backend/libpq/cluster.conf.sample    2007-02-18 22:52:16.000000000 +0100
1272 @@ -0,0 +1,71 @@
1273 +#============================================================
1274 +#          Cluster DB Server configuration file
1275 +#------------------------------------------------------------
1276 +# file: cluster.conf
1277 +#------------------------------------------------------------
1278 +# This file controls:
1279 +#       o which hosts & port are replication server
1280 +#       o which port use for replication request to replication server
1281 +#       o which command use for recovery function
1282 +#============================================================
1283 +#------------------------------------------------------------
1284 +# set Replication Server information
1285 +#              o Host_Name :           hostname
1286 +#              o Port :                Connection port for postmaster
1287 +#              o Recovery_Port :       Connection port for recovery process
1288 +#------------------------------------------------------------
1289 +<Replicate_Server_Info>
1290 +       <Host_Name>             replicate1.pgcluster.org        </Host_Name>
1291 +       <Port>                  8001                            </Port>
1292 +       <Recovery_Port>         8101                            </Recovery_Port>
1293 +</Replicate_Server_Info>
1294 +#<Replicate_Server_Info>
1295 +#      <Host_Name>             replicate2.pgcluster.org        </Host_Name>
1296 +#      <Port>                  8002                            </Port>
1297 +#      <Recovery_Port>         8102                            </Recovery_Port>
1298 +#</Replicate_Server_Info>
1299 +#<Replicate_Server_Info>
1300 +#      <Host_Name>             replicate3.pgcluster.org        </Host_Name>
1301 +#      <Port>                  8003                            </Port>
1302 +#      <Recovery_Port>         8103                            </Recovery_Port>
1303 +#</Replicate_Server_Info>
1304 +#-------------------------------------------------------------
1305 +# set Cluster DB Server information
1306 +#              o Host_Name :           Host name which connect with replication server
1307 +#              o Recovery_Port :       Connection port for recovery
1308 +#              o Rsync_Path :          Path of rsync command 
1309 +#              o Rsync_Option :        File transfer option for rsync
1310 +#              o Rsync_Compress :      Use compression option for rsync
1311 +#                                      [yes/no]. default : yes
1312 +#              o Pg_Dump_Path :        Path of pg_dump
1313 +#              o When_Stand_Alone :    When all replication servers fell,
1314 +#                                      you can set up two kinds of permission,
1315 +#                                      "real_only" or "read_write".
1316 +#              o Replication_Timeout : Timeout of each replication request
1317 +#              o Lifecheck_Timeout :   Timeout of the lifecheck response
1318 +#              o Lifecheck_Interval :  Interval time of the lifecheck
1319 +#                              (range 1s - 1h)
1320 +#                              10s   -- 10 seconds
1321 +#                              10min -- 10 minutes
1322 +#                              1h    -- 1 hours
1323 +#-------------------------------------------------------------
1324 +<Host_Name>                    cluster1.pgcluster.org          </Host_Name>
1325 +<Recovery_Port>                7001                            </Recovery_Port>
1326 +<Rsync_Path>                   /usr/bin/rsync                  </Rsync_Path>
1327 +<Rsync_Option>                 ssh -1                          </Rsync_Option>
1328 +<Rsync_Compress>               yes                             </Rsync_Compress>
1329 +<Pg_Dump_Path>                 /usr/local/pgsql/bin/pg_dump    </Pg_Dump_Path>
1330 +<When_Stand_Alone>             read_only                       </When_Stand_Alone>
1331 +<Replication_Timeout>          1 min                           </Replication_Timeout>
1332 +<LifeCheck_Timeout>            3s                              </LifeCheck_Timeout>
1333 +<LifeCheck_Interval>           11s                             </LifeCheck_Interval>
1334 +#-------------------------------------------------------------
1335 +# set partitional replicate control information
1336 +#     set DB name and Table name to stop reprication
1337 +#       o DB_Name :            DB name
1338 +#       o Table_Name :         Table name
1339 +#-------------------------------------------------------------
1340 +#<Not_Replicate_Info>
1341 +#      <DB_Name>               test_db         </DB_Name>
1342 +#      <Table_Name>            log_table       </Table_Name>
1343 +#</Not_Replicate_Info>
1344 diff -aruN postgresql-8.2.4/src/backend/libpq/crypt.c pgcluster-1.7.0rc7/src/backend/libpq/crypt.c
1345 --- postgresql-8.2.4/src/backend/libpq/crypt.c  2006-07-14 16:52:19.000000000 +0200
1346 +++ pgcluster-1.7.0rc7/src/backend/libpq/crypt.c        2007-02-18 22:52:16.000000000 +0100
1347 @@ -23,6 +23,9 @@
1348  #include "libpq/crypt.h"
1349  #include "libpq/md5.h"
1350  
1351 +#ifdef USE_REPLICATION
1352 +#include "replicate.h"
1353 +#endif /* USE_REPLICATION */
1354  
1355  int
1356  md5_crypt_verify(const Port *port, const char *role, char *client_pass)
1357 @@ -72,13 +75,34 @@
1358                         if (isMD5(shadow_pass))
1359                         {
1360                                 /* stored password already encrypted, only do salt */
1361 -                               if (!pg_md5_encrypt(shadow_pass + strlen("md5"),
1362 -                                                                       (char *) port->md5Salt,
1363 +#ifdef USE_REPLICATION
1364 +                               if ((PGR_password != NULL) && 
1365 +                                               ((PGR_password->md5Salt[0] | 
1366 +                                               PGR_password->md5Salt[1] | 
1367 +                                               PGR_password->md5Salt[2] | 
1368 +                                               PGR_password->md5Salt[3]) != 0 ))
1369 +                               {
1370 +                                       if (!pg_md5_encrypt(shadow_pass + strlen("md5"),
1371 +                                                                       (char *) PGR_password->md5Salt,
1372                                                                         sizeof(port->md5Salt), crypt_pwd))
1373 +                                       {
1374 +                                               pfree(crypt_pwd);
1375 +                                               return STATUS_ERROR;
1376 +                                       }
1377 +                               }
1378 +                               else
1379                                 {
1380 -                                       pfree(crypt_pwd);
1381 -                                       return STATUS_ERROR;
1382 +#endif /* USE_REPLICATION */
1383 +                                       if (!pg_md5_encrypt(shadow_pass + strlen("md5"),
1384 +                                                                               (char *) port->md5Salt,
1385 +                                                                               sizeof(port->md5Salt), crypt_pwd))
1386 +                                       {
1387 +                                               pfree(crypt_pwd);
1388 +                                               return STATUS_ERROR;
1389 +                                       }
1390 +#ifdef USE_REPLICATION
1391                                 }
1392 +#endif /* USE_REPLICATION */
1393                         }
1394                         else
1395                         {
1396 @@ -134,6 +158,16 @@
1397  
1398         if (strcmp(crypt_client_pass, crypt_pwd) == 0)
1399         {
1400 +#ifdef USE_REPLICATION
1401 +               /*
1402 +               if (*(PGR_password->password) != '\0')
1403 +               {
1404 +                       memset(PGR_password->password,0,PASSWORD_MAX_LENGTH);
1405 +                       memset(PGR_password->md5Salt,0,sizeof(PGR_password->md5Salt));
1406 +                       memset(PGR_password->cryptSalt,0,sizeof(PGR_password->cryptSalt));
1407 +               }
1408 +               */
1409 +#endif /* USE_REPLICATION */
1410                 /*
1411                  * Password OK, now check to be sure we are not past valuntil
1412                  */
1413 diff -aruN postgresql-8.2.4/src/backend/libpq/lifecheck.c pgcluster-1.7.0rc7/src/backend/libpq/lifecheck.c
1414 --- postgresql-8.2.4/src/backend/libpq/lifecheck.c      1970-01-01 01:00:00.000000000 +0100
1415 +++ pgcluster-1.7.0rc7/src/backend/libpq/lifecheck.c    2007-03-01 16:27:15.000000000 +0100
1416 @@ -0,0 +1,281 @@
1417 +/*--------------------------------------------------------------------
1418 + * FILE:
1419 + *     lifecheck.c
1420 + *
1421 + * NOTE:
1422 + *     This file is composed of the functions to call with the source
1423 + *     at backend for the lifecheck.
1424 + *     Low level I/O functions that called by in these functions are 
1425 + *     contained in 'replicate_com.c'.
1426 + *
1427 + *--------------------------------------------------------------------
1428 + */
1429 +
1430 +#ifdef USE_REPLICATION
1431 +
1432 +#include "postgres.h"
1433 +
1434 +#include <stdio.h>
1435 +#include <unistd.h>
1436 +#include <signal.h>
1437 +#include <sys/wait.h>
1438 +#include <ctype.h>
1439 +#include <time.h>
1440 +#include <pwd.h>
1441 +#include <sys/time.h>
1442 +#include <sys/types.h>
1443 +#include <sys/stat.h>
1444 +#include <sys/socket.h>
1445 +#include <sys/ipc.h>
1446 +#include <sys/shm.h>
1447 +#include <netdb.h>
1448 +#include <netinet/in.h>
1449 +#include <errno.h>
1450 +#include <fcntl.h>
1451 +#include <time.h>
1452 +#include <sys/param.h>
1453 +#include <sys/select.h>
1454 +#include <netinet/tcp.h>
1455 +#include <arpa/inet.h>
1456 +#include <sys/file.h>
1457 +#include <dirent.h>
1458 +
1459 +#include "libpq/pqsignal.h"
1460 +#include "utils/guc.h"
1461 +#include "miscadmin.h"
1462 +#include "nodes/nodes.h"
1463 +#include "nodes/parsenodes.h"
1464 +#include "access/xact.h"
1465 +#include "access/xlog.h"
1466 +#include "tcop/tcopprot.h"
1467 +#include "postmaster/postmaster.h"
1468 +
1469 +#include "replicate.h"
1470 +
1471 +#ifdef WIN32
1472 +#include "win32.h"
1473 +#else
1474 +#ifdef HAVE_NETINET_TCP_H
1475 +#include <netinet/tcp.h>
1476 +#endif
1477 +#include <arpa/inet.h>
1478 +#endif
1479 +
1480 +#ifndef HAVE_STRDUP
1481 +#include "strdup.h"
1482 +#endif
1483 +#ifdef HAVE_CRYPT_H
1484 +#include <crypt.h>
1485 +#endif
1486 +
1487 +#ifdef MULTIBYTE
1488 +#include "mb/pg_wchar.h"
1489 +#endif
1490 +
1491 +static void set_replication_server_status(int status);
1492 +static int send_lifecheck(int sock);
1493 +static int recv_lifecheck(int sock);
1494 +static void set_timeout(SIGNAL_ARGS);
1495 +static void exit_lifecheck(SIGNAL_ARGS);
1496 +
1497 +ReplicateServerInfo * PGR_Replicator_4_Lifecheck = NULL;
1498 +
1499 +int
1500 +PGR_Lifecheck_Main(void)
1501 +{
1502 +       int status = STATUS_OK;
1503 +       int sock = -1;
1504 +       int pid = 0;
1505 +
1506 +       if ((pid = fork()) != 0 )
1507 +       {
1508 +               return pid;
1509 +       }
1510 +
1511 +       pqsignal(SIGHUP, exit_lifecheck);
1512 +       pqsignal(SIGTERM, exit_lifecheck);
1513 +       pqsignal(SIGINT, exit_lifecheck);
1514 +       pqsignal(SIGQUIT, exit_lifecheck);
1515 +       pqsignal(SIGALRM, set_timeout);
1516 +       PG_SETMASK(&UnBlockSig);
1517 +
1518 +       for (;;)
1519 +       {
1520 +               
1521 +               PGR_Replicator_4_Lifecheck = PGR_check_replicate_server_info();
1522 +               if (PGR_Replicator_4_Lifecheck == NULL)
1523 +               {
1524 +                       alarm(0);
1525 +                       sleep(PGR_Lifecheck_Interval);
1526 +                       continue;
1527 +               }
1528 +               /* get replication server information */
1529 +               PGR_Replicator_4_Lifecheck = PGR_get_replicate_server_info();
1530 +               if (PGR_Replicator_4_Lifecheck == NULL)
1531 +               {
1532 +                       if (Debug_pretty_print)
1533 +                       {
1534 +                               elog(DEBUG1,"not found replication server");
1535 +                       }
1536 +                       return STATUS_ERROR;
1537 +               }
1538 +               sock = PGR_get_replicate_server_socket( PGR_Replicator_4_Lifecheck , PGR_QUERY_SOCKET );
1539 +               if (sock < 0)
1540 +               {
1541 +                       set_replication_server_status(DATA_ERR);
1542 +                       if (Debug_pretty_print)
1543 +                               elog(DEBUG1,"get_replicate_server_socket failed");
1544 +                       continue;
1545 +               }
1546 +
1547 +               /* set alarm as lifecheck timeout */
1548 +               alarm(PGR_Lifecheck_Timeout * 2);
1549 +
1550 +               /* send lifecheck to replication server */
1551 +               status = send_lifecheck(sock);
1552 +               if (status != STATUS_OK)
1553 +               {
1554 +                       set_replication_server_status(DATA_ERR);
1555 +                       close(sock);
1556 +                       sock = -1;
1557 +                       if (Debug_pretty_print)
1558 +                               elog(DEBUG1,"send life check failed");
1559 +                       continue;
1560 +               }
1561 +
1562 +               /* receive lifecheck response */
1563 +               status = recv_lifecheck(sock);
1564 +               if (status != STATUS_OK)
1565 +               {
1566 +                       set_replication_server_status(DATA_ERR);
1567 +                       close(sock);
1568 +                       sock = -1;
1569 +                       if (Debug_pretty_print)
1570 +                               elog(DEBUG1,"receive life check failed");
1571 +                       continue;
1572 +               }
1573 +               
1574 +               /* stop alarm */
1575 +               alarm(0);
1576 +               set_replication_server_status(DATA_USE);
1577 +
1578 +               /* wait next lifecheck as interval */
1579 +               sleep(PGR_Lifecheck_Interval);
1580 +       }
1581 +}
1582 +
1583 +static void
1584 +set_replication_server_status(int status)
1585 +{
1586 +       if (status == DATA_ERR)
1587 +       {
1588 +               PGR_Replicator_4_Lifecheck->retry_count ++;
1589 +               if (PGR_Replicator_4_Lifecheck->retry_count > MAX_RETRY_TIMES)
1590 +               {
1591 +                       PGR_Set_Replication_Server_Status(PGR_Replicator_4_Lifecheck, status);
1592 +               }
1593 +       }
1594 +       else
1595 +       {
1596 +               PGR_Replicator_4_Lifecheck->retry_count = 0;
1597 +               PGR_Set_Replication_Server_Status(PGR_Replicator_4_Lifecheck, status);
1598 +       }
1599 +}
1600 +
1601 +static int
1602 +send_lifecheck(int sock)
1603 +{
1604 +       ReplicateHeader header;
1605 +       fd_set    wmask;
1606 +       struct timeval timeout;
1607 +       int send_size = 0;
1608 +       int buf_size = 0;
1609 +       char * send_ptr = (char *)&header;
1610 +       int s = 0;
1611 +       int rtn = 0;
1612 +
1613 +       timeout.tv_sec = PGR_Lifecheck_Timeout;
1614 +       timeout.tv_usec = 0;
1615 +
1616 +       memset(&header,0,sizeof(ReplicateHeader));
1617 +       header.cmdSys = CMD_SYS_LIFECHECK;
1618 +       header.cmdSts = CMD_STS_CLUSTER;
1619 +       buf_size = sizeof(ReplicateHeader);
1620 +
1621 +       for (;;)
1622 +       {
1623 +               FD_ZERO(&wmask);
1624 +               FD_SET(sock,&wmask);
1625 +               rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
1626 +               if (rtn < 0)
1627 +               {
1628 +                       if (errno == EINTR)
1629 +                       {
1630 +                               return STATUS_OK;
1631 +                       }
1632 +                       else
1633 +                       {
1634 +                               elog(DEBUG1, "send_lifecheck():select() failed");
1635 +                               return STATUS_ERROR;
1636 +                       }
1637 +               }
1638 +               else if (rtn && FD_ISSET(sock, &wmask))
1639 +               {
1640 +                       s = send(sock,send_ptr + send_size,buf_size - send_size ,0);
1641 +                       if (s < 0){
1642 +                               if (errno == EINTR)
1643 +                               {
1644 +                                       return STATUS_OK;
1645 +                               }
1646 +                               if (errno == EAGAIN)
1647 +                               {
1648 +                                       continue;
1649 +                               }
1650 +                               elog(DEBUG1, "send_replicate_packet():send error");
1651 +       
1652 +                               /* EPIPE || ENCONNREFUSED || ENSOCK || EHOSTUNREACH */
1653 +                               return STATUS_ERROR;
1654 +                       } else if (s == 0) {
1655 +                               elog(DEBUG1, "send_lifecheck():unexpected EOF");
1656 +                               return STATUS_ERROR;
1657 +                       } else /*if (s > 0)*/ {
1658 +                               send_size += s;
1659 +                               if (send_size == buf_size)
1660 +                               {
1661 +                                       return STATUS_OK;
1662 +                               }
1663 +                       }
1664 +               }
1665 +       }
1666 +}
1667 +
1668 +static int
1669 +recv_lifecheck(int sock)
1670 +{
1671 +       int status = STATUS_OK;
1672 +       char result[PGR_MESSAGE_BUFSIZE];
1673 +
1674 +       memset(result,0,PGR_MESSAGE_BUFSIZE);
1675 +       status = PGR_recv_replicate_result(sock,result, PGR_Lifecheck_Timeout);
1676 +       return ((status >= 0) ?STATUS_OK:STATUS_ERROR);
1677 +}
1678 +
1679 +static void
1680 +set_timeout(SIGNAL_ARGS)
1681 +{
1682 +       if (PGR_Replicator_4_Lifecheck != NULL)
1683 +       {
1684 +               set_replication_server_status(DATA_ERR);
1685 +               if (Debug_pretty_print)
1686 +                       elog(DEBUG1,"time out is occured in life check");
1687 +       }
1688 +}
1689 +
1690 +static void
1691 +exit_lifecheck(SIGNAL_ARGS)
1692 +{
1693 +       fprintf(stderr,"lifecheck stopped\n");
1694 +       exit(0);
1695 +}
1696 +
1697 +#endif /* USE_REPLICATION */
1698 diff -aruN postgresql-8.2.4/src/backend/libpq/recovery.c pgcluster-1.7.0rc7/src/backend/libpq/recovery.c
1699 --- postgresql-8.2.4/src/backend/libpq/recovery.c       1970-01-01 01:00:00.000000000 +0100
1700 +++ pgcluster-1.7.0rc7/src/backend/libpq/recovery.c     2007-02-18 22:52:16.000000000 +0100
1701 @@ -0,0 +1,1566 @@
1702 +/*--------------------------------------------------------------------
1703 + * FILE:
1704 + *     recovery.c
1705 + *
1706 + * NOTE:
1707 + *     This file is composed of the functions to call with the source
1708 + *     at backend for the recovery.
1709 + *     Low level I/O functions that called by in these functions are 
1710 + *     contained in 'replicate_com.c'.
1711 + *
1712 + *--------------------------------------------------------------------
1713 + */
1714 +
1715 +/*--------------------------------------
1716 + * INTERFACE ROUTINES
1717 + *
1718 + * I/O call:
1719 + *      PGR_recovery_finish_send
1720 + * master module:
1721 + *      PGR_Master_Main(void);
1722 + * recovery module:
1723 + *      PGR_Recovery_Main
1724 + *-------------------------------------
1725 + */
1726 +#ifdef USE_REPLICATION
1727 +
1728 +#include "postgres.h"
1729 +
1730 +#include <stdio.h>
1731 +#include <unistd.h>
1732 +#include <signal.h>
1733 +#include <sys/wait.h>
1734 +#include <ctype.h>
1735 +#include <time.h>
1736 +#include <pwd.h>
1737 +#include <sys/time.h>
1738 +#include <sys/types.h>
1739 +#include <sys/stat.h>
1740 +#include <sys/socket.h>
1741 +#include <sys/ipc.h>
1742 +#include <sys/shm.h>
1743 +#include <netdb.h>
1744 +#include <netinet/in.h>
1745 +#include <errno.h>
1746 +#include <fcntl.h>
1747 +#include <time.h>
1748 +#include <sys/param.h>
1749 +#include <sys/select.h>
1750 +#include <netinet/tcp.h>
1751 +#include <arpa/inet.h>
1752 +#include <sys/file.h>
1753 +#include <dirent.h>
1754 +
1755 +#include "libpq/pqsignal.h"
1756 +#include "utils/guc.h"
1757 +#include "miscadmin.h"
1758 +#include "nodes/nodes.h"
1759 +#include "nodes/parsenodes.h"
1760 +#include "access/xact.h"
1761 +#include "access/xlog.h"
1762 +#include "tcop/tcopprot.h"
1763 +#include "postmaster/postmaster.h"
1764 +
1765 +#include "../interfaces/libpq/libpq-fe.h"
1766 +#include "../interfaces/libpq/libpq-int.h"
1767 +#include "../interfaces/libpq/fe-auth.h"
1768 +
1769 +#include "replicate.h"
1770 +
1771 +#ifdef WIN32
1772 +#include "win32.h"
1773 +#else
1774 +#ifdef HAVE_NETINET_TCP_H
1775 +#include <netinet/tcp.h>
1776 +#endif
1777 +#include <arpa/inet.h>
1778 +#endif
1779 +
1780 +#ifndef HAVE_STRDUP
1781 +#include "strdup.h"
1782 +#endif
1783 +#ifdef HAVE_CRYPT_H
1784 +#include <crypt.h>
1785 +#endif
1786 +
1787 +#ifdef MULTIBYTE
1788 +#include "mb/pg_wchar.h"
1789 +#endif
1790 +
1791 +#define RECOVERY_LOOP_END      (0)
1792 +#define RECOVERY_LOOP_CONTINUE (1)
1793 +#define RECOVERY_LOOP_FAIL     (2)
1794 +char Local_Host_Name[HOSTNAME_MAX_LENGTH];
1795 +int PGR_Recovery_Mode = 0;
1796 +
1797 +static int read_packet(int sock,RecoveryPacket * packet);
1798 +static int send_recovery_packet(int  sock, RecoveryPacket * packet);
1799 +static int send_packet(int * sock, RecoveryPacket * packet );
1800 +static void master_loop(int fd);
1801 +static int start_recovery_send(int * sock, ReplicateServerInfo * host);
1802 +static int stop_recovery_send(int * sock, ReplicateServerInfo * host);
1803 +static int rsync_pg_data(char * src , char * dest);
1804 +static int remove_dir(char * dir_name);
1805 +static int clear_bkup_dir(char * dir_name);
1806 +static int bkup_dir(char * dir_name);
1807 +static int restore_dir(char * dir_name);
1808 +static int rsync_global_dir(char * src, char * dest, int stage);
1809 +static int first_recovery(char * src, char * dest, char * dir);
1810 +static int second_recovery(char * src, char * dest, char * dir);
1811 +static int recovery_rsync(char * src , char * dest, int stage);
1812 +static int recovery_loop(int fd, int mode);
1813 +static void show_recovery_packet(RecoveryPacket * packet);
1814 +static int direct_send_packet(int packet_no);
1815 +static void set_recovery_packet(RecoveryPacket * packet, int packet_no);
1816 +static int cold_recovery(char * src, RecoveryPacket *packet, bool need_sync_table_space, int stage);
1817 +static int hot_recovery(RecoveryPacket *packet, int stage);
1818 +static int restore_from_dumpall( char * hostName, uint16_t portNum, char * userName);
1819 +static int restore_from_dump( char * hostName, uint16_t portNum, char * userName, char * dbName);
1820 +static int restore_from_each_dump( char * hostName, uint16_t portNum, char * userName);
1821 +static PGresult * get_dbName(char * hostName, uint16_t portNum, char * userName);
1822 +
1823 +static int sync_table_space(char * hostName, uint16_t portNum, char * userName, int stage);
1824 +static PGresult * get_table_space_location(char * hostName, uint16_t portNum, char * userName);
1825 +static int rsync_table_space(char * hostName, char * location, int stage);
1826 +
1827 +int PGR_recovery_error_send(void);
1828 +int PGR_recovery_finish_send(void);
1829 +int PGR_recovery_queue_data_req(void);
1830 +int PGR_Master_Main(void);
1831 +int PGR_Recovery_Main(int mode);
1832 +
1833 +static int
1834 +read_packet(int sock,RecoveryPacket * packet)
1835 +{
1836 +       int r;
1837 +       char * read_ptr;
1838 +       int read_size = 0;
1839 +       int packet_size = 0;
1840 +
1841 +       read_ptr = (char*)packet;
1842 +       packet_size = sizeof(RecoveryPacket);
1843 +
1844 +       for (;;){
1845 +               r = recv(sock,read_ptr + read_size ,packet_size, MSG_WAITALL);
1846 +               if (r < 0) {
1847 +                       if (errno == EINTR || errno == EAGAIN) {
1848 +                               continue;
1849 +                       } else {
1850 +                               elog(DEBUG1, "read_packet():recv failed");
1851 +                               return -1;
1852 +                       }
1853 +               } else if (r == 0) {
1854 +                       elog(DEBUG1, "read_packet():unexpected EOF");
1855 +                       return -1;
1856 +               } else /*if (r > 0)*/ {
1857 +                       read_size += r;
1858 +                       if (read_size == packet_size) {
1859 +                               show_recovery_packet(packet);
1860 +                               return read_size;
1861 +                       }
1862 +               }
1863 +       }
1864 +       return -1;
1865 +}
1866 +
1867 +static int
1868 +send_recovery_packet(int  sock, RecoveryPacket * packet)
1869 +{
1870 +       char * send_ptr;
1871 +       int send_size= 0;
1872 +       int buf_size = 0;
1873 +       int s;
1874 +       int rtn;        
1875 +       fd_set    wmask;
1876 +       struct timeval timeout;
1877 +
1878 +       timeout.tv_sec = RECOVERY_TIMEOUT;
1879 +       timeout.tv_usec = 0;
1880 +
1881 +       /*
1882 +        * Wait for something to happen.
1883 +        */
1884 +       rtn = 1;
1885 +       while (rtn)
1886 +       {
1887 +               for (;;)
1888 +               {
1889 +                       timeout.tv_sec = RECOVERY_TIMEOUT;
1890 +                       timeout.tv_usec = 0;
1891 +
1892 +                       FD_ZERO(&wmask);
1893 +                       FD_SET(sock,&wmask);
1894 +                       rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
1895 +
1896 +                       if (rtn < 0)
1897 +                       {
1898 +                               if (errno == EINTR || errno == EAGAIN)
1899 +                               {
1900 +                                       continue;
1901 +                               }
1902 +                               else
1903 +                               {
1904 +                                       rtn = 0;
1905 +                                       break;
1906 +                               }
1907 +                       }
1908 +                       else if (rtn && FD_ISSET(sock, &wmask))
1909 +                       {
1910 +                               send_ptr = (char *)packet;
1911 +                               buf_size = sizeof(RecoveryPacket);
1912 +
1913 +                               s = send(sock,send_ptr + send_size,buf_size - send_size ,0);
1914 +                               if (s < 0) {
1915 +                                       if (errno == EINTR || errno == EAGAIN) {
1916 +                                               continue;
1917 +                                       }
1918 +                                       elog(DEBUG1, "send_recovery_packet():send error");
1919 +
1920 +                                       /* EPIPE || ENCONNREFUSED || ENSOCK || EHOSTUNREACH */
1921 +                                       return STATUS_ERROR;
1922 +                               } else if (s == 0) {
1923 +                                       elog(DEBUG1, "send_recovery_packet():unexpected EOF");
1924 +                                       return STATUS_ERROR;
1925 +                               } else /*if (s > 0)*/ {
1926 +                                       send_size += s;
1927 +                                       if (send_size == buf_size)
1928 +                                       {
1929 +                                               return STATUS_OK;
1930 +                                       }
1931 +                               }
1932 +                       }
1933 +               }
1934 +       }
1935 +       return STATUS_ERROR;
1936 +}
1937 +
1938 +static int
1939 +send_packet(int * sock, RecoveryPacket * packet )
1940 +{
1941 +       int count = 0;
1942 +       ReplicateServerInfo * host = NULL;
1943 +
1944 +       host = PGR_get_replicate_server_info();
1945 +       if (host == (ReplicateServerInfo*)NULL)
1946 +       {
1947 +               return STATUS_ERROR;
1948 +       }
1949 +       count = 0;
1950 +       while (send_recovery_packet(*sock,packet) != STATUS_OK)
1951 +       {
1952 +               if (count < MAX_RETRY_TIMES )
1953 +               {
1954 +                       count ++;
1955 +                       continue;
1956 +               }
1957 +               count = 0;
1958 +               close(*sock);
1959 +               PGR_Set_Replication_Server_Status(host,DATA_ERR);
1960 +               host = PGR_get_replicate_server_info();
1961 +               if (host == (ReplicateServerInfo*)NULL)
1962 +               {
1963 +                       return STATUS_ERROR;
1964 +               }
1965 +               PGR_Set_Replication_Server_Status(host,DATA_USE);
1966 +               PGR_Create_Socket_Connect(sock, host->hostName , host->recoveryPortNumber);
1967 +       }
1968 +       return STATUS_OK;
1969 +}
1970 +
1971 +static void
1972 +master_loop(int fd)
1973 +{
1974 +       int count;
1975 +       int sock;
1976 +       int status = STATUS_OK;
1977 +       RecoveryPacket packet;
1978 +       int r_size = 0;
1979 +       bool loop_end = false;
1980 +
1981 +       count = 0;
1982 +       while ((status = PGR_Create_Acception(fd,&sock,"",RecoveryPortNumber)) != STATUS_OK)
1983 +       {
1984 +               PGR_Close_Sock(&sock);
1985 +               sock = -1;
1986 +               if ( count > MAX_RETRY_TIMES)
1987 +               {
1988 +                       return;
1989 +               }
1990 +               count ++;
1991 +       }
1992 +       for(;;)
1993 +       {
1994 +               int     rtn;
1995 +               fd_set    rmask;
1996 +               struct timeval timeout;
1997 +
1998 +               timeout.tv_sec = RECOVERY_TIMEOUT;
1999 +               timeout.tv_usec = 0;
2000 +
2001 +               /*
2002 +                * Wait for something to happen.
2003 +                */
2004 +               FD_ZERO(&rmask);
2005 +               FD_SET(sock,&rmask);
2006 +               memset(&packet,0,sizeof(RecoveryPacket));
2007 +               rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
2008 +               if (rtn && FD_ISSET(sock, &rmask))
2009 +               {
2010 +                       r_size = read_packet(sock,&packet);
2011 +                       if (r_size == 0)
2012 +                       {
2013 +                               continue;
2014 +                       }
2015 +                       else if (r_size < 0)
2016 +                       {
2017 +                           loop_end=true;
2018 +                           break;
2019 +                       }
2020 +               }
2021 +               else
2022 +               {
2023 +                       continue;
2024 +               }
2025 +               switch (ntohs(packet.packet_no))
2026 +               {
2027 +                       case RECOVERY_PGDATA_REQ :
2028 +                               /*
2029 +                                * PGDATA information request
2030 +                                */
2031 +                               /*
2032 +                                * get master server information
2033 +                                */
2034 +                               memset(&packet,0,sizeof(packet));
2035 +                               set_recovery_packet(&packet, RECOVERY_PGDATA_ANS) ;
2036 +                               status = send_packet(&sock,&packet);
2037 +                               PGR_Set_Cluster_Status(STATUS_RECOVERY);
2038 +                               break;
2039 +                       case RECOVERY_FSYNC_REQ : 
2040 +                               /*
2041 +                                * get master server information
2042 +                                */
2043 +                               memset(&packet,0,sizeof(packet));
2044 +                               set_recovery_packet(&packet, RECOVERY_FSYNC_ANS );
2045 +                               status = send_packet(&sock,&packet);
2046 +                               PGR_Set_Cluster_Status(STATUS_RECOVERY);
2047 +                               loop_end = true;
2048 +                               break;
2049 +                       case RECOVERY_ERROR_TARGET_ONLY:        
2050 +                               memset(&packet,0,sizeof(packet));
2051 +                               set_recovery_packet(&packet, RECOVERY_ERROR_ANS );
2052 +                               status = send_packet(&sock,&packet);
2053 +                               PGR_Set_Cluster_Status(STATUS_REPLICATED);
2054 +                               break;
2055 +                       case RECOVERY_ERROR_CONNECTION:
2056 +                               memset(&packet,0,sizeof(packet));
2057 +                               set_recovery_packet(&packet, RECOVERY_ERROR_ANS );
2058 +                               status = send_packet(&sock,&packet);
2059 +                               PGR_Set_Cluster_Status(STATUS_REPLICATED);
2060 +                               /**
2061 +                                * kill broken cluster db.
2062 +                                * FIXME: missing MyProcPid here. It must be postmaster's pid.
2063 +                                * but here's a bug MyProcPid doesn't initialized properly , so MyProcPid = postmaster's pid.                           
2064 +                                * To fix this, define variable to set posmaster's pid.
2065 +                                */
2066 +                               kill(MyProcPid,SIGQUIT);
2067 +                               loop_end = true;
2068 +                               break;
2069 +                       case RECOVERY_ERROR_ANS:
2070 +                         /* TODO: recovery failed. close this postmaster */
2071 +                               loop_end = true;
2072 +                               break;
2073 +                       case RECOVERY_FINISH:
2074 +                               PGR_Set_Cluster_Status(STATUS_REPLICATED);
2075 +                               loop_end = true;
2076 +                               break;
2077 +                       default:
2078 +                               loop_end = true;
2079 +                               break;
2080 +               }
2081 +               if (loop_end)
2082 +               {
2083 +                       break;
2084 +               }
2085 +       }
2086 +       PGR_Close_Sock(&sock);
2087 +}
2088 +
2089 +int
2090 +PGR_Master_Main(void)
2091 +{
2092 +       int status;
2093 +       int fd = -1;
2094 +       int rtn;
2095 +       int pid;
2096 +
2097 +       if ((pid = fork()) != 0 )
2098 +       {
2099 +               return pid;
2100 +       }
2101 +       
2102 +       memset(Local_Host_Name,0,sizeof(Local_Host_Name));
2103 +       gethostname(Local_Host_Name,sizeof(Local_Host_Name));
2104 +       pqsignal(SIGHUP, authdie);
2105 +       pqsignal(SIGTERM, authdie);
2106 +       pqsignal(SIGINT, authdie);
2107 +       pqsignal(SIGQUIT, authdie);
2108 +       pqsignal(SIGALRM, authdie);
2109 +       PG_SETMASK(&UnBlockSig);
2110 +
2111 +       status = STATUS_ERROR;
2112 +       status = PGR_Create_Socket_Bind(&fd, "", RecoveryPortNumber);
2113 +
2114 +       if (status != STATUS_OK)
2115 +       {
2116 +               return pid;
2117 +       }
2118 +       for (;;)
2119 +       {
2120 +               fd_set    rmask;
2121 +               struct timeval timeout;
2122 +
2123 +               timeout.tv_sec = 60;
2124 +               timeout.tv_usec = 0;
2125 +
2126 +               /*
2127 +                * Wait for something to happen.
2128 +                */
2129 +               FD_ZERO(&rmask);
2130 +               FD_SET(fd,&rmask);
2131 +               rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
2132 +               if (rtn && FD_ISSET(fd, &rmask))
2133 +               {
2134 +                       master_loop(fd);
2135 +               }
2136 +       }
2137 +       return pid;
2138 +}
2139 +
2140 +static int
2141 +start_recovery_send(int * sock, ReplicateServerInfo * host)
2142 +{
2143 +       int status;
2144 +       RecoveryPacket packet;
2145 +       status = PGR_Create_Socket_Connect(sock, host->hostName, host->recoveryPortNumber);
2146 +       if (status != STATUS_OK)
2147 +       {
2148 +               if (Debug_pretty_print)
2149 +               {
2150 +                       elog(DEBUG1,"connection error to replication server");
2151 +               }
2152 +               return STATUS_ERROR;
2153 +       }
2154 +
2155 +       memset(&packet,0,sizeof(packet));
2156 +       set_recovery_packet(&packet, RECOVERY_PREPARE_REQ );
2157 +       status = send_packet(sock,&packet);
2158 +
2159 +       return status;
2160 +}
2161 +
2162 +static int
2163 +stop_recovery_send(int * sock, ReplicateServerInfo * host)
2164 +{
2165 +       int status;
2166 +       RecoveryPacket packet;
2167 +
2168 +       memset(&packet,0,sizeof(packet));
2169 +       set_recovery_packet(&packet, RECOVERY_ERROR_ANS );
2170 +       status = send_packet(sock,&packet);
2171 +       return status;
2172 +}
2173 +
2174 +static int
2175 +direct_send_packet(int packet_no)
2176 +{
2177 +
2178 +       int status;
2179 +       int fd = -1;
2180 +       ReplicateServerInfo * host;
2181 +       RecoveryPacket packet;
2182 +
2183 +       host = PGR_get_replicate_server_info();
2184 +       if (host == NULL)
2185 +       {
2186 +               return STATUS_ERROR;
2187 +       }
2188 +       status = PGR_Create_Socket_Connect(&fd, host->hostName, host->recoveryPortNumber);
2189 +       if (status != STATUS_OK)
2190 +       {
2191 +               PGR_Set_Replication_Server_Status(host,DATA_ERR);
2192 +               return STATUS_ERROR;
2193 +       }
2194 +
2195 +       memset(&packet,0,sizeof(packet));
2196 +       set_recovery_packet(&packet, packet_no );
2197 +       status = send_packet(&fd,&packet);
2198 +
2199 +       close(fd);
2200 +
2201 +       return status;
2202 +}
2203 +
2204 +int
2205 +PGR_recovery_error_send(void)
2206 +{
2207 +       return direct_send_packet(RECOVERY_ERROR_ANS);
2208 +}
2209 +
2210 +int
2211 +PGR_recovery_finish_send(void)
2212 +{
2213 +       return direct_send_packet(RECOVERY_FINISH);
2214 +}
2215 +
2216 +int
2217 +PGR_recovery_queue_data_req(void)
2218 +{
2219 +       int status = STATUS_OK;
2220 +       int r_size = 0;
2221 +       int rtn = STATUS_OK;
2222 +       int fd = -1;
2223 +       ReplicateServerInfo * host = NULL;
2224 +       RecoveryPacket packet;
2225 +
2226 +       host = PGR_get_replicate_server_info();
2227 +       if (host == NULL)
2228 +       {
2229 +               return STATUS_ERROR;
2230 +       }
2231 +       status = PGR_Create_Socket_Connect(&fd, host->hostName, host->recoveryPortNumber);
2232 +       if (status != STATUS_OK)
2233 +       {
2234 +               PGR_Set_Replication_Server_Status(host,DATA_ERR);
2235 +               PGR_Set_Cluster_Status(STATUS_REPLICATED);
2236 +               close(fd);
2237 +               return STATUS_ERROR;
2238 +       }
2239 +
2240 +       memset(&packet,0,sizeof(packet));
2241 +       PGRset_recovery_packet_no(&packet, RECOVERY_QUEUE_DATA_REQ );
2242 +       status = send_packet(&fd,&packet);
2243 +       if (status != STATUS_OK)
2244 +       {
2245 +               status = stop_recovery_send(&fd,host);
2246 +               PGR_Set_Cluster_Status(STATUS_REPLICATED);
2247 +               close(fd);
2248 +               return STATUS_ERROR;
2249 +       }
2250 +       memset(&packet,0,sizeof(RecoveryPacket));
2251 +       r_size = read_packet(fd,&packet);
2252 +       if (r_size <= 0)
2253 +       {
2254 +               rtn =  STATUS_ERROR;
2255 +       }
2256 +       switch (ntohs(packet.packet_no))
2257 +       {
2258 +               case RECOVERY_QUEUE_DATA_ANS:
2259 +                       rtn =  STATUS_OK;
2260 +                       break;
2261 +               default:
2262 +                       rtn =  STATUS_ERROR;
2263 +                       break;
2264 +       }
2265 +       PGR_Set_Cluster_Status(STATUS_REPLICATED);
2266 +       close(fd);
2267 +       return rtn;
2268 +}
2269 +
2270 +static int
2271 +rsync_pg_data(char * src, char * dest)
2272 +{
2273 +       int status;
2274 +       char *args[12];
2275 +       int pid, i = 0;
2276 +
2277 +       args[i++] = "rsync";
2278 +       args[i++] = "-a";
2279 +       args[i++] = "-r";
2280 +       if (RsyncCompress)
2281 +               args[i++] = "-z";
2282 +       args[i++] = "--delete";
2283 +       args[i++] = "-e";
2284 +       args[i++] = RsyncOption;
2285 +       args[i++] = src;
2286 +       args[i++] = dest;
2287 +       args[i++] = NULL;
2288 +
2289 +       pid = fork();
2290 +       if (pid == 0)
2291 +       {
2292 +               status = execv(RsyncPath,args);
2293 +       }
2294 +       else
2295 +       {
2296 +               for (;;)
2297 +               {
2298 +                       int result;
2299 +                       result = wait(&status);
2300 +                       if (result < 0)
2301 +                       {
2302 +                               if (errno == EINTR)
2303 +                                       continue;
2304 +                               return STATUS_ERROR;
2305 +                       }
2306 +
2307 +                       if (WIFEXITED(status) == 0 || WEXITSTATUS(status) != 0)
2308 +                               return STATUS_ERROR;
2309 +                       else
2310 +                               break;
2311 +               }
2312 +       }
2313 +       return STATUS_OK;
2314 +}
2315 +
2316 +static int
2317 +remove_dir(char * dir_name)
2318 +{
2319 +       DIR * dp = NULL;
2320 +       struct dirent *dirp = NULL;
2321 +       char fname[256];
2322 +       int status = 0;
2323 +
2324 +       if ((dp = opendir(dir_name)) == NULL)
2325 +       {
2326 +               return STATUS_ERROR;
2327 +       }
2328 +       while ((dirp = readdir(dp)) != NULL)
2329 +       {
2330 +               if ((!strcmp(dirp->d_name,".")) ||
2331 +                       (!strcmp(dirp->d_name,"..")))
2332 +               {
2333 +                       continue;
2334 +               }
2335 +               sprintf(fname,"%s/%s",dir_name,dirp->d_name);
2336 +               status = remove(fname);
2337 +               if (status < 0)
2338 +               {
2339 +                       remove_dir(fname);
2340 +               }
2341 +       }
2342 +       closedir(dp);
2343 +       if (remove(dir_name) < 0)
2344 +       {
2345 +               return STATUS_ERROR;
2346 +       }
2347 +       return STATUS_OK;
2348 +}
2349 +
2350 +static int
2351 +clear_bkup_dir(char * dir_name)
2352 +{
2353 +       char bkp_dir[256];
2354 +       pid_t pid = getpid();
2355 +
2356 +       sprintf(bkp_dir,"%s_%d",dir_name,pid);
2357 +       return (remove_dir(bkp_dir));
2358 +}
2359 +
2360 +static int
2361 +bkup_dir(char * dir_name)
2362 +{
2363 +       int status;
2364 +       char org_dir[256];
2365 +       char bkp_dir[256];
2366 +       pid_t pid = getpid();
2367 +
2368 +       sprintf(org_dir,"%s",dir_name);
2369 +       sprintf(bkp_dir,"%s_%d",dir_name,pid);
2370 +       status = rename(org_dir,bkp_dir);
2371 +       if (status < 0)
2372 +       {
2373 +               return STATUS_ERROR;
2374 +       }
2375 +       return STATUS_OK;
2376 +}
2377 +
2378 +static int
2379 +restore_dir(char * dir_name)
2380 +{
2381 +       int status;
2382 +       char org_dir[256];
2383 +       char bkp_dir[256];
2384 +       pid_t pid = getpid();
2385 +
2386 +       sprintf(org_dir,"%s",dir_name);
2387 +       sprintf(bkp_dir,"%s_%d",dir_name,pid);
2388 +       status = rename(bkp_dir,org_dir);
2389 +       if (status < 0)
2390 +       {
2391 +               remove_dir(org_dir);
2392 +               status = rename(bkp_dir,org_dir);
2393 +               if (status < 0)
2394 +               {
2395 +                       return STATUS_ERROR;
2396 +               }
2397 +       }
2398 +       return STATUS_OK;
2399 +}
2400 +
2401 +static int
2402 +rsync_global_dir(char * src, char * dest, int stage)
2403 +{
2404 +       int status;
2405 +       char control_file[256];
2406 +       char org_dir[256];
2407 +       char src_dir[256];
2408 +       struct stat fstat;
2409 +       int cnt;
2410 +
2411 +       sprintf(org_dir,"%s/global",dest);
2412 +       sprintf(control_file,"%s/global/pg_control",dest);
2413 +       if ((stage == PGR_1ST_RECOVERY) && (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP))
2414 +       {
2415 +               if (bkup_dir(org_dir) != STATUS_OK)
2416 +               {
2417 +                       return STATUS_ERROR;
2418 +               }
2419 +       }
2420 +       sprintf(src_dir,"%s/global",src);
2421 +       status = rsync_pg_data(src_dir, dest);
2422 +       if (status != STATUS_OK )
2423 +       {
2424 +               restore_dir(org_dir);
2425 +               return STATUS_ERROR;
2426 +       }
2427 +       /* check pg_control file */
2428 +       cnt = 0;
2429 +       while (stat(control_file, &fstat) < 0)
2430 +       {
2431 +               if (cnt > MAX_RETRY_TIMES )
2432 +               {
2433 +                       restore_dir(org_dir);
2434 +                       return STATUS_ERROR;
2435 +               }
2436 +               cnt ++;
2437 +               sleep(1);
2438 +       }
2439 +       if ((stage == PGR_2ND_RECOVERY) && (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP))
2440 +       {
2441 +               clear_bkup_dir(org_dir);
2442 +       }
2443 +       return STATUS_OK;
2444 +}
2445 +
2446 +static int
2447 +first_recovery(char * src, char * dest, char * dir)
2448 +{
2449 +       int status = STATUS_OK;
2450 +       char src_dir[256];
2451 +       char dest_dir[256];
2452 +
2453 +       memset(src_dir,0,sizeof(src_dir));
2454 +       memset(dest_dir,0,sizeof(dest_dir));
2455 +       sprintf(src_dir,"%s/%s",src,dir);
2456 +       sprintf(dest_dir,"%s/%s",dest,dir);
2457 +       if (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP)
2458 +       {
2459 +               status = bkup_dir(dest_dir);
2460 +               if (status < 0)
2461 +               {
2462 +                       return STATUS_ERROR;
2463 +               }
2464 +       }
2465 +       status = rsync_pg_data(src_dir, dest);
2466 +       if (status != STATUS_OK )
2467 +       {
2468 +               restore_dir(dest_dir);
2469 +               return STATUS_ERROR;
2470 +       }
2471 +       return STATUS_OK;
2472 +}
2473 +
2474 +static int
2475 +second_recovery(char * src, char * dest, char * dir)
2476 +{
2477 +       int status = STATUS_OK;
2478 +       char src_dir[256];
2479 +       char dest_dir[256];
2480 +
2481 +       memset(src_dir,0,sizeof(src_dir));
2482 +       memset(dest_dir,0,sizeof(dest_dir));
2483 +       sprintf(src_dir,"%s/%s",src,dir);
2484 +       sprintf(dest_dir,"%s/%s",dest,dir);
2485 +
2486 +       status = rsync_pg_data(src_dir, dest);
2487 +       if (status != STATUS_OK )
2488 +       {
2489 +               restore_dir(dest_dir);
2490 +               return STATUS_ERROR;
2491 +       }
2492 +       if (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP)
2493 +       {
2494 +               clear_bkup_dir(dest_dir);
2495 +       }
2496 +
2497 +       return STATUS_OK;
2498 +}
2499 +
2500 +static int
2501 +recovery_rsync(char * src , char * dest, int stage)
2502 +{
2503 +       if ((src== NULL) || ( dest == NULL))
2504 +       {
2505 +               return STATUS_ERROR;
2506 +       }
2507 +
2508 +       /* recovery step of "global" directory */
2509 +       fprintf(stderr,"%s recovery step of [global] directory...",
2510 +                       ((stage == 1)?"1st":"2nd"));
2511 +       if (rsync_global_dir(src, dest, stage) != STATUS_OK)
2512 +       {
2513 +               fprintf(stderr,"NG\n");
2514 +               return STATUS_ERROR;
2515 +       }
2516 +       fprintf(stderr,"OK\n");
2517 +
2518 +       if (stage == PGR_1ST_RECOVERY)
2519 +       {
2520 +               /* 1st recovery step of "base" directory */
2521 +               fprintf(stderr,"1st recovery step of [base] directory...");
2522 +               if (first_recovery(src,dest,"base") != STATUS_OK)
2523 +               {
2524 +                       fprintf(stderr,"NG\n");
2525 +                       return STATUS_ERROR;
2526 +               }
2527 +               fprintf(stderr,"OK\n");
2528 +
2529 +               fprintf(stderr,"1st recovery step of [pg_clog] directory...");
2530 +               /* 1st recovery step of "pg_clog" directory */
2531 +               if (first_recovery(src,dest,"pg_clog") != STATUS_OK)
2532 +               {
2533 +                       fprintf(stderr,"NG\n");
2534 +                       return STATUS_ERROR;
2535 +               }
2536 +               fprintf(stderr,"OK\n");
2537 +
2538 +               /* 1st recovery step of "pg_xlog" directory */
2539 +               fprintf(stderr,"1st recovery step of [pg_xlog] directory...");
2540 +               if (first_recovery(src,dest,"pg_xlog") != STATUS_OK)
2541 +               {
2542 +                       fprintf(stderr,"NG\n");
2543 +                       return STATUS_ERROR;
2544 +               }
2545 +               fprintf(stderr,"OK\n");
2546 +       }
2547 +       else
2548 +       {
2549 +               /* 2nd recovery step of "base" directory */
2550 +               fprintf(stderr,"2nd recovery step of [base] directory...");
2551 +               if (second_recovery(src,dest,"base") != STATUS_OK)
2552 +               {
2553 +                       fprintf(stderr,"NG\n");
2554 +                       return STATUS_ERROR;
2555 +               }
2556 +               fprintf(stderr,"OK\n");
2557 +
2558 +               /* 2nd recovery step of "pg_clog" directory */
2559 +               fprintf(stderr,"2nd recovery step of [pg_clog] directory...");
2560 +               if (second_recovery(src,dest,"pg_clog") != STATUS_OK)
2561 +               {
2562 +                       fprintf(stderr,"NG\n");
2563 +                       return STATUS_ERROR;
2564 +               }
2565 +               fprintf(stderr,"OK\n");
2566 +
2567 +               /* 2nd recovery step of "pg_xlog" directory */
2568 +               fprintf(stderr,"2nd recovery step of [pg_xlog] directory...");
2569 +               if (second_recovery(src,dest,"pg_xlog") != STATUS_OK)
2570 +               {
2571 +                       fprintf(stderr,"NG\n");
2572 +                       return STATUS_ERROR;
2573 +               }
2574 +               fprintf(stderr,"OK\n");
2575 +       }
2576 +
2577 +       return STATUS_OK;
2578 +}
2579 +
2580 +static int
2581 +recovery_loop(int fd, int mode)
2582 +{
2583 +
2584 +       int status = STATUS_OK;
2585 +       RecoveryPacket packet;
2586 +       int r_size = 0;
2587 +       int rtn = RECOVERY_LOOP_END;
2588 +       char src[256];
2589 +       bool need_sync_table_space = false;
2590 +
2591 +       memset(&packet,0,sizeof(RecoveryPacket));
2592 +       r_size = read_packet(fd,&packet);
2593 +       if (r_size <= 0)
2594 +       {
2595 +               rtn = RECOVERY_LOOP_FAIL;
2596 +       }
2597 +       switch (ntohs(packet.packet_no))
2598 +       {
2599 +               case RECOVERY_PREPARE_ANS :
2600 +                       /*
2601 +                        * get master information
2602 +                        */
2603 +                       /*
2604 +                        * sync master data before recovery
2605 +                        */
2606 +                       if (Debug_pretty_print)
2607 +                       {
2608 +                               elog(DEBUG1,"local host : %s  master:%s",Local_Host_Name,packet.hostName);
2609 +                       }
2610 +                       if (!strncmp(Local_Host_Name,packet.hostName,strlen(Local_Host_Name)))
2611 +                       {
2612 +                               strcpy(src,packet.pg_data);
2613 +                               need_sync_table_space = false;
2614 +                       }
2615 +                       else
2616 +                       {
2617 +                               sprintf(src,"%s:%s",packet.hostName,packet.pg_data);
2618 +                               need_sync_table_space = true;
2619 +                       }
2620 +                       if (PGR_Recovery_Mode == PGR_COLD_RECOVERY)
2621 +                       {
2622 +                               rtn = cold_recovery(src,&packet,need_sync_table_space,PGR_1ST_RECOVERY);
2623 +                       }
2624 +                       else
2625 +                       {
2626 +                               rtn = hot_recovery(&packet,PGR_1ST_RECOVERY);
2627 +                       }
2628 +                       if (rtn != STATUS_OK)
2629 +                       {
2630 +                               rtn = RECOVERY_LOOP_FAIL;
2631 +                               break;
2632 +                       }
2633 +
2634 +                       /*
2635 +                        * send recovery start request
2636 +                        */
2637 +                       PGRset_recovery_packet_no(&packet, RECOVERY_START_REQ );
2638 +                       status = send_packet(&fd,&packet);
2639 +                       if (status != STATUS_OK)
2640 +                       {
2641 +                               fprintf(stderr,"RECOVERY_START_REQ send error\n");
2642 +                               rtn = RECOVERY_LOOP_FAIL;
2643 +                               break;
2644 +                       }
2645 +                       rtn = RECOVERY_LOOP_CONTINUE;
2646 +                       break;
2647 +               case RECOVERY_START_ANS : 
2648 +                       /*
2649 +                        * sync master data for recovery
2650 +                        */
2651 +                       if (!strncmp(Local_Host_Name,packet.hostName,strlen(Local_Host_Name)))
2652 +                       {
2653 +                               strcpy(src,packet.pg_data);
2654 +                               need_sync_table_space = false;
2655 +                       }
2656 +                       else
2657 +                       {
2658 +                               sprintf(src,"%s:%s",packet.hostName,packet.pg_data);
2659 +                               need_sync_table_space = true;
2660 +                       }
2661 +                       if (PGR_Recovery_Mode == PGR_COLD_RECOVERY)
2662 +                       {
2663 +                               rtn = cold_recovery(src,&packet,need_sync_table_space,PGR_2ND_RECOVERY);
2664 +                       }
2665 +                       else
2666 +                       {
2667 +                               rtn = hot_recovery(&packet,PGR_2ND_RECOVERY);
2668 +                       }
2669 +
2670 +                       if (rtn == STATUS_OK)
2671 +                       {
2672 +                               fprintf(stderr,"2nd recovery successed\n");
2673 +                               if (mode == PGR_HOT_RECOVERY)
2674 +                               {
2675 +                                       rtn = RECOVERY_LOOP_CONTINUE;
2676 +                                       /*
2677 +                                        * send recovery queued data request
2678 +                                        */
2679 +                                       PGRset_recovery_packet_no(&packet, RECOVERY_QUEUE_DATA_REQ );
2680 +                                       status = send_packet(&fd,&packet);
2681 +                                       if (status != STATUS_OK)
2682 +                                       {
2683 +                                               fprintf(stderr,"RECOVERY_QUEUE_DATA_REQ send error\n");
2684 +                                               rtn = RECOVERY_LOOP_FAIL;
2685 +                                               break;
2686 +                                       }
2687 +                               }
2688 +                               else
2689 +                               {
2690 +                                       rtn = RECOVERY_LOOP_END;
2691 +                               }
2692 +                       }
2693 +                       else
2694 +                       {
2695 +                               fprintf(stderr,"2nd hot recovery failed\n");
2696 +                               rtn = RECOVERY_LOOP_FAIL;
2697 +                       }
2698 +                       break;
2699 +               case RECOVERY_QUEUE_DATA_ANS:
2700 +                       rtn = RECOVERY_LOOP_END;
2701 +                       break;
2702 +               case RECOVERY_ERROR_OCCUPIED:
2703 +                       fprintf(stderr,"already in use for another recovery\n");
2704 +                       rtn = RECOVERY_LOOP_FAIL;
2705 +                       break;
2706 +               case RECOVERY_ERROR_CONNECTION:
2707 +                       fprintf(stderr,"connection failed\n");
2708 +                       rtn = RECOVERY_LOOP_FAIL;
2709 +                       break;
2710 +               default:
2711 +                       fprintf(stderr,"unknown packet received\n");
2712 +                       rtn = RECOVERY_LOOP_FAIL;
2713 +                       break;
2714 +       }
2715 +
2716 +       return rtn;
2717 +}
2718 +
2719 +int
2720 +PGR_Recovery_Main(int mode)
2721 +{
2722 +       int status;
2723 +       int fd = -1;
2724 +       int rtn;
2725 +       ReplicateServerInfo * host;
2726 +
2727 +       memset(Local_Host_Name,0,sizeof(Local_Host_Name));
2728 +       gethostname(Local_Host_Name,sizeof(Local_Host_Name));
2729 +       PGR_Recovery_Mode = mode;
2730 +
2731 +       status = STATUS_ERROR;
2732 +
2733 +Retry_Start_Recovery:
2734 +       host = PGR_get_replicate_server_info();
2735 +       if (host == NULL)
2736 +       {
2737 +               if (Debug_pretty_print)
2738 +               {
2739 +                       elog(DEBUG1,"not found replication server");
2740 +               }
2741 +               PGR_Set_Cluster_Status(STATUS_REPLICATED);
2742 +               return STATUS_ERROR;
2743 +       }
2744 +
2745 +       PGR_Set_Cluster_Status(STATUS_RECOVERY);
2746 +       status = start_recovery_send(&fd,host);
2747 +       if (status != STATUS_OK)
2748 +       {
2749 +               PGR_Set_Replication_Server_Status(host,DATA_ERR);
2750 +               close(fd);
2751 +               if (Debug_pretty_print)
2752 +               {
2753 +                       elog(DEBUG1,"start recovery packet send error");
2754 +               }
2755 +               goto Retry_Start_Recovery;
2756 +       }
2757 +
2758 +       for (;;)
2759 +       {
2760 +               fd_set    rmask;
2761 +               struct timeval timeout;
2762 +
2763 +               timeout.tv_sec = RECOVERY_TIMEOUT;
2764 +               timeout.tv_usec = 0;
2765 +
2766 +               /*
2767 +                * Wait for something to happen.
2768 +                */
2769 +               FD_ZERO(&rmask);
2770 +               FD_SET(fd,&rmask);
2771 +               rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
2772 +               if (rtn && FD_ISSET(fd, &rmask))
2773 +               {
2774 +                       status = recovery_loop(fd, mode);
2775 +                       if (status == RECOVERY_LOOP_CONTINUE)
2776 +                       {
2777 +                               continue;
2778 +                       }
2779 +                       else if (status == RECOVERY_LOOP_END)
2780 +                       {
2781 +                               close(fd);
2782 +                               break;
2783 +                       }
2784 +                       else if (status == RECOVERY_LOOP_FAIL)
2785 +                       {
2786 +                               status = stop_recovery_send(&fd,host);
2787 +                               PGR_Set_Cluster_Status(STATUS_REPLICATED);
2788 +                               if (status != STATUS_OK)
2789 +                               {
2790 +                                       close(fd);
2791 +                                       return STATUS_ERROR;
2792 +                               }
2793 +                               close(fd);
2794 +                               return STATUS_ERROR;
2795 +                       }
2796 +                       else 
2797 +                       {
2798 +                           close(fd);
2799 +                               PGR_Set_Cluster_Status(STATUS_REPLICATED);
2800 +                           return STATUS_ERROR;
2801 +                       }
2802 +               }
2803 +       }
2804 +       PGR_Set_Cluster_Status(STATUS_REPLICATED);
2805 +       return STATUS_OK;
2806 +}
2807 +
2808 +static void
2809 +show_recovery_packet(RecoveryPacket * packet)
2810 +{
2811 +
2812 +       if (Debug_pretty_print)
2813 +       {
2814 +               elog(DEBUG1,"no = %d",ntohs(packet->packet_no));
2815 +               elog(DEBUG1,"max_connect = %d",ntohs(packet->max_connect));
2816 +               elog(DEBUG1,"port = %d",ntohs(packet->port));
2817 +               elog(DEBUG1,"recoveryPort = %d",ntohs(packet->recoveryPort));
2818 +               if (packet->hostName != NULL)
2819 +                       elog(DEBUG1,"hostName = %s",packet->hostName);
2820 +               if (packet->pg_data != NULL)
2821 +                       elog(DEBUG1,"pg_data = %s",packet->pg_data);
2822 +       }
2823 +}
2824 +
2825 +static void
2826 +set_recovery_packet(RecoveryPacket * packet, int packet_no)
2827 +{
2828 +       struct passwd * pw = NULL;
2829 +
2830 +       if (packet == NULL)
2831 +       {
2832 +               return;
2833 +       }
2834 +       PGRset_recovery_packet_no(packet, packet_no );
2835 +       packet->max_connect = htons(MaxBackends);
2836 +       packet->port = htons(PostPortNumber);
2837 +       packet->recoveryPort = htons(RecoveryPortNumber);
2838 +       gethostname(packet->hostName,sizeof(packet->hostName));
2839 +       memcpy(packet->pg_data,DataDir,sizeof(packet->pg_data));
2840 +       memset(packet->userName,0,sizeof(packet->userName));
2841 +       if ((pw = getpwuid(geteuid())) != NULL)
2842 +       {
2843 +               strncpy(packet->userName,pw->pw_name,sizeof(packet->userName));
2844 +       }
2845 +       else
2846 +       {
2847 +               cuserid(packet->userName);
2848 +       }
2849 +}
2850 +
2851 +static int
2852 +sync_table_space(char * hostName, uint16_t portNum, char * userName, int stage)
2853 +{
2854 +       PGresult * res = (PGresult *)NULL;
2855 +       int i = 0;
2856 +       int row_num = 0;
2857 +       char * location = NULL;
2858 +       int rtn = STATUS_OK;
2859 +
2860 +       res = get_table_space_location(hostName, portNum, userName);
2861 +       if (res == (PGresult *)NULL)
2862 +       {
2863 +               return STATUS_ERROR;
2864 +       }
2865 +       row_num = PQntuples(res);
2866 +       for ( i = 0 ; i < row_num ; i ++)
2867 +       {
2868 +               location = PQgetvalue(res,i,0);
2869 +               if (strlen(location) > 0 )
2870 +               {
2871 +                       fprintf(stderr,"sync tablespace[%s]...",location);
2872 +                       rtn = rsync_table_space(hostName, location, stage);
2873 +                       fprintf(stderr,"%s\n", (rtn == STATUS_OK)?"OK":"NG");
2874 +               }
2875 +       }
2876 +       if (res != (PGresult *)NULL)
2877 +       {
2878 +               PQclear(res);
2879 +       }
2880 +
2881 +       return STATUS_OK;
2882 +}
2883 +
2884 +static PGresult *
2885 +get_table_space_location(char * hostName, uint16_t portNum, char * userName)
2886 +{
2887 +       PGresult * res = (PGresult *)NULL;
2888 +       int cnt = 0;
2889 +       PGconn * conn = (PGconn *)NULL;
2890 +       char port[8];
2891 +       char *database = "template1";
2892 +       char * query = "select spclocation from pg_tablespace where spcname not like 'pg_%'";
2893 +
2894 +       if ( (hostName == NULL) ||
2895 +               (portNum <= 0)      ||
2896 +               (userName == NULL))
2897 +       {
2898 +               return (PGresult *)NULL;
2899 +       }
2900 +       snprintf(port,sizeof(port),"%d", portNum);
2901 +
2902 +       /* create connection to master */
2903 +       conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
2904 +       if (conn == NULL)
2905 +       {
2906 +               return (PGresult *)NULL;
2907 +       }
2908 +       /* check to see that the backend Connection was successfully made */
2909 +       cnt = 0;
2910 +       while (PQstatus(conn) == CONNECTION_BAD)
2911 +       {
2912 +               if (conn != NULL)
2913 +               {
2914 +                       PQfinish(conn);
2915 +               }
2916 +               if (cnt > MAX_RETRY_TIMES )
2917 +               {
2918 +                       return (PGresult *)NULL;
2919 +               }
2920 +               conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
2921 +               cnt ++;
2922 +       }
2923 +       res = PQexec(conn , query);
2924 +       if ((res == NULL) ||
2925 +               (PQresultStatus(res) != PGRES_TUPLES_OK))
2926 +       {
2927 +               PQclear(res);
2928 +               res = (PGresult *)NULL;
2929 +       }
2930 +       if (conn != NULL)
2931 +       {
2932 +               PQfinish(conn);
2933 +       }
2934 +
2935 +       return res;
2936 +}
2937 +
2938 +static int
2939 +rsync_table_space(char * hostName, char * location, int stage)
2940 +{
2941 +       int status = STATUS_OK;
2942 +       char src_dir[256];
2943 +       char dest_dir[256];
2944 +       struct stat fstat;
2945 +       int cnt = 0;
2946 +
2947 +       sprintf(src_dir,"%s:%s",hostName,location);
2948 +       strncpy(dest_dir,location,sizeof(dest_dir));
2949 +
2950 +       if ((stage == PGR_1ST_RECOVERY) && (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP))
2951 +       {
2952 +               status = bkup_dir(location);
2953 +       }
2954 +       status = rsync_pg_data(src_dir, dest_dir);
2955 +       if (status != STATUS_OK )
2956 +       {
2957 +               restore_dir(location);
2958 +               return STATUS_ERROR;
2959 +       }
2960 +       /* check file status */
2961 +       cnt = 0;
2962 +       while (stat(location,&fstat) < 0)
2963 +       {
2964 +               if (cnt > MAX_RETRY_TIMES )
2965 +               {
2966 +                       restore_dir(location);
2967 +                       return STATUS_ERROR;
2968 +               }
2969 +               cnt ++;
2970 +               sleep(1);
2971 +       }
2972 +       if ((stage == PGR_2ND_RECOVERY) && (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP))
2973 +       {
2974 +               clear_bkup_dir(location);
2975 +       }
2976 +       return STATUS_OK;
2977 +}
2978 +
2979 +static int
2980 +cold_recovery(char * src, RecoveryPacket *packet, bool need_sync_table_space, int stage)
2981 +{
2982 +       int status = STATUS_OK;
2983 +
2984 +       status = recovery_rsync(src,DataDir,stage);
2985 +       if (status != STATUS_OK)
2986 +       {
2987 +               if (Debug_pretty_print)
2988 +               {
2989 +                       elog(DEBUG1,"%s rsync error",
2990 +                               ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
2991 +               }
2992 +               return STATUS_ERROR;
2993 +       }
2994 +       if (need_sync_table_space == true)
2995 +       {
2996 +               status = sync_table_space(packet->hostName, ntohs(packet->port), packet->userName, stage);
2997 +               fprintf(stderr,"%s sync_table_space ",
2998 +                       ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
2999 +               if (status != STATUS_OK)
3000 +               {
3001 +                       if (Debug_pretty_print)
3002 +                       {
3003 +                               elog(DEBUG1,"%s sync table space error",
3004 +                                       ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
3005 +                       }
3006 +                       fprintf(stderr,"NG\n");
3007 +                       return STATUS_ERROR;
3008 +               }
3009 +               fprintf(stderr,"OK\n");
3010 +       }
3011 +       return STATUS_OK;
3012 +}
3013 +
3014 +static int
3015 +hot_recovery(RecoveryPacket *packet, int stage)
3016 +{
3017 +       int status = STATUS_OK;
3018 +
3019 +       fprintf(stderr,"%s restore from pg_dump ",
3020 +               ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
3021 +       if (stage == PGR_1ST_RECOVERY)
3022 +       {
3023 +               status = restore_from_dumpall(packet->hostName, ntohs(packet->port), packet->userName );
3024 +       }
3025 +       else
3026 +       {
3027 +               status = restore_from_each_dump(packet->hostName, ntohs(packet->port), packet->userName );
3028 +       }
3029 +       if (status != STATUS_OK)
3030 +       {
3031 +               if (Debug_pretty_print)
3032 +               {
3033 +                       elog(DEBUG1,"%s sync table space error",
3034 +                               ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
3035 +               }
3036 +               fprintf(stderr,"->NG\n");
3037 +               return STATUS_ERROR;
3038 +       }
3039 +       fprintf(stderr,"->OK\n");
3040 +       return STATUS_OK;
3041 +}
3042 +
3043 +static int
3044 +restore_from_dumpall( char * hostName, uint16_t portNum, char * userName)
3045 +{
3046 +       int status;
3047 +       char exec_command[512];
3048 +       int pid;
3049 +       char pg_dumpall[256];
3050 +       char psql[256];
3051 +       char *p=NULL;
3052 +
3053 +       /* set pg_dumpall path */
3054 +       memset(pg_dumpall, 0, sizeof(pg_dumpall));
3055 +       strncpy(pg_dumpall, PgDumpPath, sizeof(pg_dumpall));
3056 +       p = strrchr(pg_dumpall,'/');
3057 +       if (p == NULL)
3058 +       {
3059 +               return STATUS_ERROR;
3060 +       }
3061 +       p++;
3062 +       strcpy(p,"pg_dumpall");
3063 +
3064 +       /* set psql path */
3065 +       p = NULL;
3066 +       memset(psql, 0, sizeof(psql));
3067 +       strncpy(psql, PgDumpPath, sizeof(psql));
3068 +       p = strrchr(psql,'/');
3069 +       if (p == NULL)
3070 +       {
3071 +               return STATUS_ERROR;
3072 +       }
3073 +       p++;
3074 +       strcpy(p,"psql");
3075 +       p+=4;
3076 +       *p = '\0';
3077 +
3078 +       snprintf(exec_command,sizeof(exec_command),"%s -i -o -c -h %s -p %d -U %s | %s -p %d template1",
3079 +               pg_dumpall,
3080 +               hostName,
3081 +               portNum,
3082 +               userName,
3083 +               psql,
3084 +               PostPortNumber
3085 +       );
3086 +       fprintf(stderr,"1st exec:[%s]\n",exec_command);
3087 +
3088 +       pid = fork();
3089 +       if (pid == 0)
3090 +       {
3091 +               system(exec_command);
3092 +               exit(0);
3093 +       }
3094 +       else
3095 +       {
3096 +               for (;;)
3097 +               {
3098 +                       int result;
3099 +                       result = wait(&status);
3100 +                       if (result < 0)
3101 +                       {
3102 +                               if (errno == EINTR)
3103 +                                       continue;
3104 +                               return STATUS_ERROR;
3105 +                       }
3106 +
3107 +                       if (WIFEXITED(status) == 0 || WEXITSTATUS(status) != 0)
3108 +                               return STATUS_ERROR;
3109 +                       else
3110 +                               break;
3111 +               }
3112 +       }
3113 +       return STATUS_OK;
3114 +}
3115 +
3116 +static int
3117 +restore_from_dump( char * hostName, uint16_t portNum, char * userName, char * dbName)
3118 +{
3119 +       int status;
3120 +       char exec_command[512];
3121 +       int pid= 0;
3122 +       char pg_restore[256];
3123 +       char *p=NULL;
3124 +
3125 +       /* set pq_restore path */
3126 +       p = NULL;
3127 +       memset(pg_restore, 0, sizeof(pg_restore));
3128 +       strncpy(pg_restore, PgDumpPath, sizeof(pg_restore));
3129 +       p = strrchr(pg_restore,'/');
3130 +       if (p == NULL)
3131 +       {
3132 +               return STATUS_ERROR;
3133 +       }
3134 +       p++;
3135 +       strcpy(p,"pg_restore");
3136 +
3137 +       snprintf(exec_command,sizeof(exec_command),"%s -i -Fc -o -b -h %s -p %d -U %s %s | %s -i -c -p %d -d %s",
3138 +               PgDumpPath,
3139 +               hostName,
3140 +               portNum,
3141 +               userName,
3142 +               dbName,
3143 +               pg_restore,
3144 +               PostPortNumber,
3145 +               dbName
3146 +       );
3147 +
3148 +       fprintf(stderr,"2nd exec:[%s]\n",exec_command);
3149 +       pid = fork();
3150 +       if (pid == 0)
3151 +       {
3152 +               system(exec_command);
3153 +               exit(0);
3154 +       }
3155 +       else
3156 +       {
3157 +               for (;;)
3158 +               {
3159 +                       int result;
3160 +                       result = wait(&status);
3161 +                       if (result < 0)
3162 +                       {
3163 +                               if (errno == EINTR)
3164 +                                       continue;
3165 +                               return STATUS_ERROR;
3166 +                       }
3167 +
3168 +                       if (WIFEXITED(status) == 0 || WEXITSTATUS(status) != 0)
3169 +                               return STATUS_ERROR;
3170 +                       else
3171 +                               break;
3172 +               }
3173 +       }
3174 +       return STATUS_OK;
3175 +}
3176 +
3177 +static int
3178 +restore_from_each_dump( char * hostName, uint16_t portNum, char * userName)
3179 +{
3180 +       PGresult * res = (PGresult *)NULL;
3181 +       int i = 0;
3182 +       int row_num = 0;
3183 +       char * dbName = NULL;
3184 +       int rtn = STATUS_OK;
3185 +
3186 +       res = get_dbName(hostName, portNum, userName);
3187 +       if (res == (PGresult *)NULL)
3188 +       {
3189 +               return STATUS_ERROR;
3190 +       }
3191 +       row_num = PQntuples(res);
3192 +       for ( i = 0 ; i < row_num ; i ++)
3193 +       {
3194 +               dbName = PQgetvalue(res,i,0);
3195 +               if (strlen(dbName) > 0 )
3196 +               {
3197 +                       if ((strcmp("template0",dbName)) &&
3198 +                               (strcmp("template1",dbName)))
3199 +                       {
3200 +                               rtn = restore_from_dump(hostName, portNum, userName, dbName);
3201 +                               fprintf(stderr,".");
3202 +                       }
3203 +               }
3204 +       }
3205 +       if (res != (PGresult *)NULL)
3206 +       {
3207 +               PQclear(res);
3208 +       }
3209 +
3210 +       return STATUS_OK;
3211 +}
3212 +
3213 +static PGresult *
3214 +get_dbName(char * hostName, uint16_t portNum, char * userName)
3215 +{
3216 +       PGresult * res = (PGresult *)NULL;
3217 +       int cnt = 0;
3218 +       PGconn * conn = (PGconn *)NULL;
3219 +       char port[8];
3220 +       char *database = "template1";
3221 +       char * query = "SELECT datname FROM pg_database";
3222 +
3223 +       if ( (hostName == NULL) ||
3224 +               (portNum <= 0)      ||
3225 +               (userName == NULL))
3226 +       {
3227 +               return (PGresult *)NULL;
3228 +       }
3229 +       snprintf(port,sizeof(port),"%d", portNum);
3230 +
3231 +       /* create connection to master */
3232 +       conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
3233 +       if (conn == NULL)
3234 +       {
3235 +               return (PGresult *)NULL;
3236 +       }
3237 +       /* check to see that the backend Connection was successfully made */
3238 +       cnt = 0;
3239 +       while (PQstatus(conn) == CONNECTION_BAD)
3240 +       {
3241 +               if (conn != NULL)
3242 +               {
3243 +                       PQfinish(conn);
3244 +               }
3245 +               if (cnt > MAX_RETRY_TIMES )
3246 +               {
3247 +                       return (PGresult *)NULL;
3248 +               }
3249 +               conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
3250 +               cnt ++;
3251 +       }
3252 +       res = PQexec(conn , query);
3253 +       if ((res == NULL) ||
3254 +               (PQresultStatus(res) != PGRES_TUPLES_OK))
3255 +       {
3256 +               PQclear(res);
3257 +               res = (PGresult *)NULL;
3258 +       }
3259 +       if (conn != NULL)
3260 +       {
3261 +               PQfinish(conn);
3262 +       }
3263 +
3264 +       return res;
3265 +}
3266 +
3267 +#endif /* USE_REPLICATION */
3268 diff -aruN postgresql-8.2.4/src/backend/libpq/replicate.c pgcluster-1.7.0rc7/src/backend/libpq/replicate.c
3269 --- postgresql-8.2.4/src/backend/libpq/replicate.c      1970-01-01 01:00:00.000000000 +0100
3270 +++ pgcluster-1.7.0rc7/src/backend/libpq/replicate.c    2007-02-18 22:52:16.000000000 +0100
3271 @@ -0,0 +1,4021 @@
3272 +/*--------------------------------------------------------------------
3273 + * FILE:
3274 + *     replicate.c
3275 + *
3276 + * NOTE:
3277 + *     This file is composed of the functions to call with the source
3278 + *     at backend for the replication.
3279 + *     Low level I/O functions that called by in these functions are 
3280 + *     contained in 'replicate_com.c'.
3281 + *
3282 + *--------------------------------------------------------------------
3283 + */
3284 +
3285 +/*--------------------------------------
3286 + * INTERFACE ROUTINES
3287 + *
3288 + * setup/teardown:
3289 + *      PGR_Init_Replicate_Server_Data
3290 + *      PGR_Set_Replicate_Server_Socket
3291 + *      PGR_delete_shm
3292 + * I/O call:
3293 + *      PGR_Send_Replicate_Command
3294 + * table handling:
3295 + *      PGR_get_replicate_server_info
3296 + * status distinction:
3297 + *      PGR_Is_Replicated_Command
3298 + *      Xlog_Check_Replicatec
3299 + * replicateion main:
3300 + *      PGR_replication 
3301 + *-------------------------------------
3302 + */
3303 +#ifdef USE_REPLICATION
3304 +
3305 +#include "postgres.h"
3306 +
3307 +#include <stdio.h>
3308 +#include <strings.h>
3309 +#include <signal.h>
3310 +#include <errno.h>
3311 +#include <fcntl.h>
3312 +#include <grp.h>
3313 +#include <unistd.h>
3314 +#include <ctype.h>
3315 +#include <time.h>
3316 +#include <sys/time.h>
3317 +#include <sys/types.h>
3318 +#include <sys/stat.h>
3319 +#include <sys/socket.h>
3320 +#include <sys/ipc.h>
3321 +#include <sys/shm.h>
3322 +#include <netdb.h>
3323 +#include <netinet/in.h>
3324 +#ifdef HAVE_NETINET_TCP_H
3325 +#include <netinet/tcp.h>
3326 +#endif
3327 +#include <arpa/inet.h>
3328 +#include <sys/file.h>
3329 +#include <netdb.h>
3330 +
3331 +#include "access/transam.h"
3332 +#include "bootstrap/bootstrap.h"
3333 +#include "libpq/libpq.h"
3334 +#include "libpq/pqformat.h"
3335 +#include "miscadmin.h"
3336 +#include "commands/prepare.h"
3337 +#include "nodes/nodes.h"
3338 +#include "nodes/print.h"
3339 +#include "utils/guc.h"
3340 +#include "parser/parser.h"
3341 +#include "access/xact.h"
3342 +#include "storage/proc.h"
3343 +#include "tcop/tcopprot.h"
3344 +#include "tcop/utility.h"
3345 +#include "postmaster/postmaster.h"
3346 +#include "replicate.h"
3347 +
3348 +/* the source of this value is 'access/transam/varsup.c' */
3349 +#define VAR_OID_PREFETCH                (8192)
3350 +
3351 +PGR_ReplicationLog_Info ReplicationLog_Info;
3352 +bool pgr_skip_in_prepared_query = false;
3353 +
3354 +/*--------------------------------------
3355 + * PROTOTYPE DECLARATION
3356 + *--------------------------------------
3357 + */
3358 +static int set_command_args(char argv[PGR_CMD_ARG_NUM][256],char *str);
3359 +static bool is_same_replication_server(ReplicateServerInfo * sp1, ReplicateServerInfo * sp2 );
3360 +static ReplicateServerInfo * search_new_replication_server ( ReplicateServerInfo * sp , int socket_type );
3361 +
3362 +static int close_replicate_server_socket ( ReplicateServerInfo * sp , int socket_type );
3363 +static int recv_message(int sock,char * buf,int flag);
3364 +static int send_replicate_packet(int sock,ReplicateHeader * header, char * query_string);
3365 +static bool is_copy_from(char * query);
3366 +static int get_words( char words[MAX_WORDS][MAX_WORD_LETTERS] ,char * string,int length,int upper);
3367 +static int get_table_name(char * table_name, char * query, int position );
3368 +static bool is_not_replication_query(char * query_string, int query_len, char cmdType);
3369 +static int Comp_Not_Replicate(PGR_Not_Replicate_Type * nrp1,PGR_Not_Replicate_Type* nrp2);
3370 +static bool is_serial_control_query(char cmdType,char * query);
3371 +static bool is_select_into_query(char cmdType,char * query);
3372 +static int send_response_to_replication_server(const char * notice);
3373 +static bool do_not_replication_command(const char * commandTag);
3374 +static bool is_create_temp_table(char * query);
3375 +static int add_replication_server(char * hostname,char * port, char * recovery_port);
3376 +static int change_replication_server(char * hostname,char * port, char * recovery_port);
3377 +static int get_new_replication_socket( ReplicateServerInfo * base, ReplicateServerInfo * sp, int socket_type);
3378 +static char * get_hostName(char * str);
3379 +static void set_response_mode(char * mode);
3380 +static void PGR_Set_Current_Replication_Query_ID(char *id);
3381 +#ifdef CONTROL_LOCK_CONFLICT
3382 +static int wait_lock_answer(void);
3383 +static int read_trigger(char * result, int buf_size);
3384 +#endif /* CONTROL_LOCK_CONFLICT */
3385 +static int check_conf_data(void);
3386 +
3387 +static unsigned int get_next_request_id(void);
3388 +static bool is_this_query_replicated(char * id);
3389 +static int set_replication_id(char * id);
3390 +static int return_current_oid(void);
3391 +static int sync_oid(char * oid);
3392 +static bool is_concerned_with_prepared_select(char cmdType, char * query_string);
3393 +static int skip_non_blank(char * ptr, int max);
3394 +static int skip_blank(char * ptr, int max);
3395 +static int parse_message(char * query_string);
3396 +static bool is_prepared_as_select(char * query_string);
3397 +static bool is_statement_as_select(char * query_string);
3398 +
3399 +extern ssize_t secure_read(Port *, void *, size_t);
3400 +/*--------------------------------------------------------------------
3401 + * SYMBOL
3402 + *    PGR_Init_Replicate_Server_Data()
3403 + * NOTES
3404 + *    Read Configuration file and create ReplicateServerData table
3405 + * ARGS
3406 + *    void
3407 + * RETURN
3408 + *    OK: STATUS_OK
3409 + *    NG: STATUS_ERROR
3410 + *--------------------------------------------------------------------
3411 + */
3412 +int
3413 +PGR_Init_Replicate_Server_Data(void)
3414 +{
3415 +       int table_size,str_size;
3416 +       ReplicateServerInfo  *sp;
3417 +       PGR_Not_Replicate_Type * nrp;
3418 +       ConfDataType * conf;
3419 +       int rec_no,cnt;
3420 +       unsigned int ip;
3421 +       char HostName[HOSTNAME_MAX_LENGTH];
3422 +
3423 +       memset (HostName,0,sizeof(HostName));
3424 +       if (ConfData_Top == (ConfDataType *)NULL)
3425 +       {
3426 +               return STATUS_ERROR;
3427 +       }
3428 +
3429 +       /* allocate replication server information table */
3430 +       table_size = sizeof(ReplicateServerInfo) * MAX_SERVER_NUM;
3431 +       ReplicateServerShmid = shmget(IPC_PRIVATE,table_size,IPC_CREAT | IPC_EXCL | 0600);
3432 +       if (ReplicateServerShmid < 0)
3433 +       {
3434 +               return STATUS_ERROR;
3435 +       }
3436 +       ReplicateServerData = (ReplicateServerInfo *)shmat(ReplicateServerShmid,0,0);
3437 +       if (ReplicateServerData == (ReplicateServerInfo *)-1)
3438 +       {
3439 +               return STATUS_ERROR;
3440 +       }
3441 +       memset(ReplicateServerData,0,table_size);
3442 +       sp = ReplicateServerData;
3443 +
3444 +       /* allocate cluster db information table */
3445 +       ClusterDBShmid = shmget(IPC_PRIVATE,sizeof(ClusterDBInfo),IPC_CREAT | IPC_EXCL | 0600);
3446 +       if (ClusterDBShmid < 0)
3447 +       {
3448 +               return STATUS_ERROR;
3449 +       }
3450 +       ClusterDBData = (ClusterDBInfo *)shmat(ClusterDBShmid,0,0);
3451 +       if (ClusterDBData == (ClusterDBInfo *)-1)
3452 +       {
3453 +               return STATUS_ERROR;
3454 +       }
3455 +       memset(ClusterDBData,0,sizeof(ClusterDBInfo));
3456 +       PGR_Set_Cluster_Status(STATUS_REPLICATED);
3457 +
3458 +       /* allocate partial replicate table */
3459 +       table_size = sizeof(PGR_Not_Replicate_Type) * MAX_SERVER_NUM;
3460 +       PGR_Not_Replicate = malloc(table_size);
3461 +       if (PGR_Not_Replicate == (PGR_Not_Replicate_Type*)NULL)
3462 +       {
3463 +               return STATUS_ERROR;
3464 +       }
3465 +       memset(PGR_Not_Replicate, 0, table_size);
3466 +       nrp = PGR_Not_Replicate;
3467 +       cnt = 0;
3468 +       conf = ConfData_Top;
3469 +       while ((conf != (ConfDataType *)NULL) && (cnt < MAX_SERVER_NUM))
3470 +       {
3471 +               /* set replication server table */
3472 +               if (!strcmp(conf->table,REPLICATION_SERVER_INFO_TAG))
3473 +               {
3474 +                       rec_no = conf->rec_no;
3475 +                       cnt = rec_no;
3476 +                       if (!strcmp(conf->key,HOST_NAME_TAG))
3477 +                       {
3478 +                               strncpy((sp + rec_no)->hostName,conf->value,sizeof(sp->hostName));
3479 +                               conf = (ConfDataType *)conf->next;
3480 +                               continue;
3481 +                       }
3482 +                       if (!strcmp(conf->key,PORT_TAG))
3483 +                       {
3484 +                               (sp + rec_no)->portNumber = atoi(conf->value);
3485 +                               (sp + rec_no)->sock = -1;
3486 +                               if ((sp + rec_no)->useFlag != DATA_USE)
3487 +                               {
3488 +                                       PGR_Set_Replication_Server_Status((sp+rec_no), DATA_INIT);
3489 +                               }
3490 +                               memset((sp + rec_no + 1)->hostName,0,sizeof(sp->hostName));
3491 +                               (sp + rec_no + 1)->useFlag = DATA_END;
3492 +                               conf = (ConfDataType *)conf->next;
3493 +                               continue;
3494 +                       }
3495 +                       if (!strcmp(conf->key,RECOVERY_PORT_TAG))
3496 +                       {
3497 +                               (sp + rec_no)->recoveryPortNumber = atoi(conf->value);
3498 +                               if ((sp + rec_no)->useFlag != DATA_USE)
3499 +                               {
3500 +                                       PGR_Set_Replication_Server_Status((sp+rec_no), DATA_INIT);
3501 +                               }
3502 +                               memset((sp + rec_no + 1)->hostName,0,sizeof(sp->hostName));
3503 +                               (sp + rec_no + 1)->useFlag = DATA_END;
3504 +                               conf = (ConfDataType *)conf->next;
3505 +                               continue;
3506 +                       }
3507 +               }
3508 +               /* set part replication table */
3509 +               if (!strcmp(conf->table,NOT_REPLICATE_INFO_TAG))
3510 +               {
3511 +                       rec_no = conf->rec_no;
3512 +                       cnt = rec_no;
3513 +                       if (PGR_Not_Replicate_Rec_Num < rec_no +1)
3514 +                       {
3515 +                               PGR_Not_Replicate_Rec_Num = rec_no +1;
3516 +                       }
3517 +                       if (!strcmp(conf->key,DB_NAME_TAG))
3518 +                       {
3519 +                               strncpy((nrp + rec_no)->db_name,conf->value,sizeof(nrp->db_name));
3520 +                               conf = (ConfDataType *)conf->next;
3521 +                               continue;
3522 +                       }
3523 +                       if (!strcmp(conf->key,TABLE_NAME_TAG))
3524 +                       {
3525 +                               strncpy((nrp + rec_no)->table_name,conf->value,sizeof(nrp->table_name));
3526 +                               conf = (ConfDataType *)conf->next;
3527 +                               continue;
3528 +                       }
3529 +               }
3530 +               if (!strcmp(conf->key,HOST_NAME_TAG))
3531 +               {
3532 +                       str_size = sizeof(HostName) ;
3533 +                       memset(HostName,0,str_size);
3534 +                       strncpy(HostName,conf->value,str_size-1);
3535 +               }
3536 +               else if (!strcmp(conf->key,RECOVERY_PORT_TAG))
3537 +               {
3538 +                       RecoveryPortNumber = atoi(conf->value);
3539 +               }
3540 +               else if (!strcmp(conf->key,RSYNC_PATH_TAG))
3541 +               {
3542 +                       str_size = strlen(conf->value) ;
3543 +                       RsyncPath = malloc(str_size + 1);
3544 +                       if (RsyncPath == NULL)
3545 +                       {
3546 +                               return STATUS_ERROR;
3547 +                       }
3548 +                       memset(RsyncPath,0,str_size + 1);
3549 +                       strncpy(RsyncPath,conf->value,str_size);
3550 +               }
3551 +               else if (!strcmp(conf->key,RSYNC_OPTION_TAG))
3552 +               {
3553 +                       str_size = strlen(conf->value) ;
3554 +                       RsyncOption = malloc(str_size + 1);
3555 +                       if (RsyncOption == NULL)
3556 +                       {
3557 +                               return STATUS_ERROR;
3558 +                       }
3559 +                       memset(RsyncOption,0,str_size + 1);
3560 +                       strncpy(RsyncOption,conf->value,str_size);
3561 +               }
3562 +               else if (!strcmp(conf->key,RSYNC_COMPRESS_TAG))
3563 +               {
3564 +                       if (!strcmp(conf->value, "yes"))
3565 +                               RsyncCompress = true;
3566 +                       else if (!strcmp(conf->value, "no"))
3567 +                               RsyncCompress = false;
3568 +               }
3569 +               else if (!strcmp(conf->key,PG_DUMP_PATH_TAG))
3570 +               {
3571 +                       str_size = strlen(conf->value) ;
3572 +                       PgDumpPath = malloc(str_size + 1);
3573 +                       if (PgDumpPath == NULL)
3574 +                       {
3575 +                               return STATUS_ERROR;
3576 +                       }
3577 +                       memset(PgDumpPath,0,str_size + 1);
3578 +                       strncpy(PgDumpPath,conf->value,str_size);
3579 +               }
3580 +               else if (!strcmp(conf->key,STAND_ALONE_TAG))
3581 +               {
3582 +                       PGR_Stand_Alone = (PGR_Stand_Alone_Type*)malloc(sizeof(PGR_Stand_Alone_Type));
3583 +                       if (PGR_Stand_Alone == (PGR_Stand_Alone_Type *)NULL)
3584 +                       {
3585 +                               return STATUS_ERROR;
3586 +                       }
3587 +                       PGR_Stand_Alone->is_stand_alone = false;
3588 +                       if (!strcmp(conf->value,READ_WRITE_IF_STAND_ALONE))
3589 +                       {
3590 +                               PGR_Stand_Alone->permit = PERMIT_READ_WRITE;
3591 +                       }
3592 +                       else
3593 +                       {
3594 +                               PGR_Stand_Alone->permit = PERMIT_READ_ONLY;
3595 +                       }
3596 +               }
3597 +               else if (!strcmp(conf->key,TIMEOUT_TAG))
3598 +               {
3599 +                       /* get repliaction timeout */
3600 +                       PGR_Replication_Timeout = PGRget_time_value(conf->value);
3601 +                       if ((PGR_Replication_Timeout < 1) || (PGR_Replication_Timeout > 3600))
3602 +                       {
3603 +                               fprintf(stderr,"%s is out of range. It should be between 1sec-1hr.\n",TIMEOUT_TAG);
3604 +                               return STATUS_ERROR;
3605 +                       }
3606 +               }
3607 +               else if (!strcmp(conf->key,LIFECHECK_TIMEOUT_TAG))
3608 +               {
3609 +                       /* get lifecheck timeout */
3610 +                       PGR_Lifecheck_Timeout = PGRget_time_value(conf->value);
3611 +                       if ((PGR_Lifecheck_Timeout < 1) || (PGR_Lifecheck_Timeout > 3600))
3612 +                       {
3613 +                               fprintf(stderr,"%s is out of range. It should be between 1sec-1hr.\n",LIFECHECK_TIMEOUT_TAG);
3614 +                               return STATUS_ERROR;
3615 +                       }
3616 +               }
3617 +               else if (!strcmp(conf->key,LIFECHECK_INTERVAL_TAG))
3618 +               {
3619 +                       /* get lifecheck interval */
3620 +                       PGR_Lifecheck_Interval = PGRget_time_value(conf->value);
3621 +                       if ((PGR_Lifecheck_Interval < 1) || (PGR_Lifecheck_Interval > 3600))
3622 +                       {
3623 +                               fprintf(stderr,"%s is out of range. It should between 1sec-1hr.\n",LIFECHECK_INTERVAL_TAG);
3624 +                               return STATUS_ERROR;
3625 +                       }
3626 +               }
3627 +               conf = (ConfDataType *)conf->next;
3628 +       }
3629 +       TransactionSock = -1;
3630 +       ReplicateCurrentTime = (ReplicateNow *)malloc(sizeof(ReplicateNow));
3631 +       if (ReplicateCurrentTime == (ReplicateNow *)NULL)
3632 +       {
3633 +               return STATUS_ERROR;
3634 +       }
3635 +       memset(ReplicateCurrentTime,0,sizeof(ReplicateNow));
3636 +
3637 +       PGRCopyData = (CopyData *)malloc(sizeof(CopyData));
3638 +       if (PGRCopyData == (CopyData *)NULL)
3639 +       {
3640 +               return STATUS_ERROR;
3641 +       }
3642 +       memset(PGRCopyData,0,sizeof(CopyData));
3643 +
3644 +       if (PGR_Not_Replicate_Rec_Num  == 0)
3645 +       {
3646 +               free(PGR_Not_Replicate);
3647 +               PGR_Not_Replicate = NULL;
3648 +       }
3649 +       else
3650 +       {
3651 +               qsort((char *)PGR_Not_Replicate,PGR_Not_Replicate_Rec_Num,sizeof(PGR_Not_Replicate_Type), (int (*)(const void*,const void*))Comp_Not_Replicate);
3652 +       }
3653 +
3654 +       PGRSelfHostName = malloc(HOSTNAME_MAX_LENGTH);
3655 +       if (PGRSelfHostName == NULL)
3656 +       {
3657 +               return STATUS_ERROR;
3658 +       }
3659 +       memset(PGRSelfHostName,0,HOSTNAME_MAX_LENGTH);
3660 +
3661 +       PGR_password = malloc(sizeof(PGR_Password_Info));
3662 +       if (PGR_password == NULL)
3663 +       {
3664 +               return STATUS_ERROR;
3665 +       }
3666 +       memset(PGR_password,0,sizeof(PGR_Password_Info));
3667 +       PGR_password->password = malloc(PASSWORD_MAX_LENGTH);
3668 +       if (PGR_password->password == NULL)
3669 +       {
3670 +               return STATUS_ERROR;
3671 +       }
3672 +       memset(PGR_password->password,0,PASSWORD_MAX_LENGTH);
3673 +
3674 +       if (HostName[0] == 0)
3675 +       {
3676 +               if (gethostname(HostName,HOSTNAME_MAX_LENGTH) < 0)
3677 +               {
3678 +                       return STATUS_ERROR;
3679 +               }
3680 +       }
3681 +       ip=PGRget_ip_by_name(HostName);
3682 +
3683 +       sprintf(PGRSelfHostName,
3684 +                                "%d.%d.%d.%d",
3685 +                                (ip      ) & 0xff ,
3686 +                                (ip >>  8) & 0xff ,
3687 +                                (ip >> 16) & 0xff ,
3688 +                                (ip >> 24) & 0xff );
3689 +       if (RsyncPath == NULL)
3690 +       {
3691 +               RsyncPath = strdup(DEFAULT_RSYNC);
3692 +       }
3693 +       if (PgDumpPath == NULL)
3694 +       {
3695 +               PgDumpPath = strdup(DEFAULT_PG_DUMP);
3696 +       }
3697 +
3698 +       return (check_conf_data());
3699 +}
3700 +
3701 +static int
3702 +check_conf_data(void)
3703 +{
3704 +       int i = 0;
3705 +       ReplicateServerInfo  *sp;
3706 +       sp = ReplicateServerData;
3707 +       while ((sp + i)->useFlag != DATA_END)
3708 +       {
3709 +               if (*((sp + i)->hostName) == 0)
3710 +               {
3711 +                       fprintf(stderr,"Hostname of replication server is not valid.\n");
3712 +                       return STATUS_ERROR;
3713 +               }
3714 +               if ((sp + i)->portNumber < 1024)
3715 +               {
3716 +                       fprintf(stderr,"Replication Port of replication server is not valid. It's required larger than 1024.\n");
3717 +                       return STATUS_ERROR;
3718 +               }
3719 +               if ((sp + i)->recoveryPortNumber < 1024)
3720 +               {
3721 +                       fprintf(stderr,"RecoveryPort of replication server is not valid. It's required larger than 1024.\n");
3722 +                       return STATUS_ERROR;
3723 +               }
3724 +               if ((sp + i)->portNumber == (sp + i)->recoveryPortNumber)
3725 +               {
3726 +                       fprintf(stderr,"Replication Port and RecoveryPort is conflicted.\n");
3727 +                       return STATUS_ERROR;
3728 +               }
3729 +               i++;
3730 +       }
3731 +       if (RecoveryPortNumber < 1024)
3732 +       {
3733 +               fprintf(stderr,"RecoveryPort of Cluster DB is not valid. It's required larger than 1024.\n");
3734 +               return STATUS_ERROR;
3735 +       }
3736 +       if (PGR_Stand_Alone == NULL)
3737 +       {
3738 +               fprintf(stderr,"Stand Alone Mode is not specified.\n");
3739 +               return STATUS_ERROR;
3740 +       }
3741 +       if (RsyncOption == NULL)
3742 +       {
3743 +               fprintf(stderr,"Option of rsync command is not specified.\n");
3744 +               return STATUS_ERROR;
3745 +       }
3746 +       if (strlen(PGRSelfHostName) <= 0)
3747 +       {
3748 +               fprintf(stderr,"Hostname of Cluster DB is not valid.\n");
3749 +               return STATUS_ERROR;
3750 +       }
3751 +       if (PGR_Lifecheck_Timeout > PGR_Lifecheck_Interval)
3752 +       {
3753 +               fprintf(stderr,"The lifecheck timeouti(%d) should be shorter than interval(%d).\n",PGR_Lifecheck_Timeout,PGR_Lifecheck_Interval);
3754 +               return STATUS_ERROR;
3755 +       }
3756 +       return STATUS_OK;
3757 + }
3758 +
3759 +/*--------------------------------------------------------------------
3760 + * SYMBOL
3761 + *    PGR_Set_Replicate_Server_Socket()
3762 + * NOTES
3763 + *    Create new socket and set ReplicateServerData table
3764 + * ARGS
3765 + *    void
3766 + * RETURN
3767 + *    OK: STATUS_OK
3768 + *    NG: STATUS_ERROR
3769 + *--------------------------------------------------------------------
3770 + */
3771 +int
3772 +PGR_Set_Replicate_Server_Socket(void)
3773 +{
3774 +       ReplicateServerInfo * sp;
3775 +       if (ReplicateServerData == NULL)
3776 +       {
3777 +               return STATUS_ERROR;
3778 +       }
3779 +       sp = ReplicateServerData;
3780 +       while (sp->useFlag != DATA_END){
3781 +               sp->sock = -1;
3782 +               PGR_Create_Socket_Connect(&(sp->sock),sp->hostName,sp->portNumber);
3783 +               sp ++;
3784 +       }
3785 +       return  STATUS_OK;
3786 +}
3787 +
3788 +/*--------------------------------------------------------------------
3789 + * SYMBOL
3790 + *    PGR_get_replicate_server_socket()
3791 + * NOTES
3792 + *    search or create a socket to connect with the replication server
3793 + * ARGS
3794 + *    ReplicateServerInfo * sp: replication server data (I)
3795 + *    int socket_type: socket type (I)
3796 + *                       -PGR_TRANSACTION_SOCKET:
3797 + *                       -PGR_QUERY_SOCKET:
3798 + * RETURN
3799 + *    OK: >0(socket)
3800 + *    NG: -1
3801 + *--------------------------------------------------------------------
3802 + */
3803 +int
3804 +PGR_get_replicate_server_socket ( ReplicateServerInfo * sp , int socket_type )
3805 +{
3806 +       ReplicateServerInfo * tmp;
3807 +       tmp = sp;
3808 +       if (tmp == (ReplicateServerInfo *) NULL)
3809 +       {
3810 +               return -1;
3811 +       }
3812 +       if (tmp->hostName[0] == '\0')
3813 +       {
3814 +               return -1;
3815 +       }
3816 +
3817 +       if (TransactionSock != -1)
3818 +       {
3819 +               return TransactionSock;
3820 +       }
3821 +
3822 +       while(PGR_Create_Socket_Connect(&TransactionSock,tmp->hostName,tmp->portNumber) != STATUS_OK)
3823 +       {
3824 +               close(TransactionSock);
3825 +               TransactionSock = -1;
3826 +               PGR_Set_Replication_Server_Status(tmp, DATA_ERR);
3827 +               usleep(20);
3828 +               tmp = PGR_get_replicate_server_info();
3829 +               if (tmp == (ReplicateServerInfo *)NULL)
3830 +               {
3831 +                       return -1;
3832 +               }
3833 +               PGR_Set_Replication_Server_Status(tmp, DATA_USE);
3834 +               usleep(10);
3835 +       }
3836 +       return TransactionSock;
3837 +}
3838 +
3839 +/*--------------------------------------------------------------------
3840 + * SYMBOL
3841 + *    close_replicate_server_socket()
3842 + * NOTES
3843 + *    close the socket connected with the replication server
3844 + * ARGS
3845 + *    ReplicateServerInfo * sp: replication server data (I)
3846 + *    int socket_type: socket type (I)
3847 + *                       -PGR_TRANSACTION_SOCKET:
3848 + *                       -PGR_QUERY_SOCKET:
3849 + * RETURN
3850 + *    OK: STATUS_OK
3851 + *    NG: STATUS_ERROR
3852 + *--------------------------------------------------------------------
3853 + */
3854 +static int
3855 +close_replicate_server_socket ( ReplicateServerInfo * sp , int socket_type )
3856 +{
3857 +       if (sp == (ReplicateServerInfo *)NULL )
3858 +       {
3859 +               return STATUS_ERROR;
3860 +       }
3861 +       if (sp->hostName[0] == '\0')
3862 +       {
3863 +               return STATUS_ERROR;
3864 +       }
3865 +       if (TransactionSock != -1)
3866 +       {
3867 +               PGR_Close_Sock(&(TransactionSock));
3868 +               TransactionSock = -1;
3869 +       }
3870 +       switch (socket_type)
3871 +       {
3872 +               case PGR_TRANSACTION_SOCKET:
3873 +                       if (TransactionSock != -1)
3874 +                       {
3875 +                               PGR_Close_Sock(&(TransactionSock));
3876 +                       }
3877 +                       TransactionSock = -1;
3878 +                       sp->sock = -1;
3879 +                       break;
3880 +               case PGR_QUERY_SOCKET:
3881 +                       if (sp->sock != -1)
3882 +                       {
3883 +                               PGR_Close_Sock(&(sp->sock));
3884 +                       }
3885 +                       sp->sock = -1;
3886 +                       break;
3887 +       }
3888 +       PGR_Set_Replication_Server_Status(sp, DATA_INIT);
3889 +       return STATUS_OK;
3890 +}
3891 +
3892 +static bool
3893 +is_same_replication_server(ReplicateServerInfo * sp1, ReplicateServerInfo * sp2 )
3894 +{
3895 +       if ((sp1 == NULL) || (sp2 == NULL))
3896 +       {
3897 +               return false;
3898 +       }
3899 +       if ((!strcmp(sp1->hostName,sp2->hostName)) &&
3900 +               (sp1->portNumber == sp2->portNumber) &&
3901 +               (sp1->recoveryPortNumber == sp2->recoveryPortNumber))
3902 +       {
3903 +               return true;
3904 +       }
3905 +       return false;
3906 +}
3907 +
3908 +static ReplicateServerInfo *
3909 +search_new_replication_server ( ReplicateServerInfo * sp , int socket_type )
3910 +{
3911 +       ReplicateHeader dummy_header;
3912 +       ReplicateServerInfo * rs_tbl;
3913 +       char command[256];
3914 +       int sock = -1;
3915 +       int cnt = 0;
3916 +
3917 +       if ((ReplicateServerData == NULL) || ( sp == NULL))
3918 +       {
3919 +               return NULL;
3920 +       }
3921 +       rs_tbl = sp;
3922 +       close_replicate_server_socket ( sp , socket_type);
3923 +       sp ++;
3924 +       while (is_same_replication_server(sp,rs_tbl) != true)
3925 +       {
3926 +               if (sp->useFlag == DATA_END)
3927 +               {
3928 +                       sp = ReplicateServerData;
3929 +               }
3930 +               sock = PGR_get_replicate_server_socket( sp , socket_type);
3931 +               if (sock < 0 )
3932 +               {
3933 +                       if (is_same_replication_server(sp,rs_tbl) == true)
3934 +                       {
3935 +                               return NULL;
3936 +                       }
3937 +                       else
3938 +                       {
3939 +                               sp++;
3940 +                       }
3941 +                       continue;
3942 +               }
3943 +               memset(&dummy_header, 0, sizeof(ReplicateHeader));
3944 +               memset(command,0,sizeof(command));
3945 +               snprintf(command,sizeof(command)-1,"SELECT %s(%d,%s,%d,%d)",
3946 +                               PGR_SYSTEM_COMMAND_FUNC,
3947 +                               PGR_CHANGE_REPLICATION_SERVER_FUNC_NO,
3948 +                               sp->hostName,
3949 +                               sp->portNumber,
3950 +                               sp->recoveryPortNumber);
3951 +               dummy_header.cmdSys = CMD_SYS_CALL;
3952 +               dummy_header.cmdSts = CMD_STS_NOTICE;
3953 +               dummy_header.query_size = htonl(strlen(command));
3954 +               if (send_replicate_packet(sock,&dummy_header,command) != STATUS_OK)
3955 +               {
3956 +                       cnt ++;
3957 +                       close_replicate_server_socket ( sp , socket_type);
3958 +                       PGR_Set_Replication_Server_Status(sp, DATA_ERR);
3959 +               }
3960 +               else
3961 +               {
3962 +                       PGR_Set_Replication_Server_Status(sp, DATA_USE);
3963 +                       return sp;
3964 +               }
3965 +               if (cnt > MAX_RETRY_TIMES )
3966 +               {
3967 +                       sp++;
3968 +                       cnt = 0;
3969 +               }
3970 +               else
3971 +               {
3972 +                       continue;
3973 +               }
3974 +       }
3975 +       return NULL;
3976 +}
3977 +
3978 +static int
3979 +get_table_name(char * table_name, char * query, int position )
3980 +{
3981 +       
3982 +       int i,wc;
3983 +       char * p;
3984 +       char * sp;
3985 +       int length;
3986 +
3987 +       if ((table_name == NULL) || (query == NULL) || (position < 1))
3988 +       {
3989 +               return STATUS_ERROR;
3990 +       }
3991 +       length = strlen(query);
3992 +       p = query;
3993 +       wc = 1;
3994 +       sp = table_name;
3995 +       for (i = 0 ; i < length ; i ++)
3996 +       {
3997 +               while(isspace(*p))
3998 +               {
3999 +                       p++;
4000 +                       i++;
4001 +               }
4002 +               while((*p != '\0') && (! isspace(*p)))
4003 +               {
4004 +                       if ((*p == ';') || (*p == '('))
4005 +                               break;
4006 +                       if (wc == position)
4007 +                       {
4008 +                               *sp = *p;
4009 +                               sp++;
4010 +                       }
4011 +                       p++;
4012 +                       i++;
4013 +               }
4014 +               if (wc == position)
4015 +               {
4016 +                       *sp = '\0';
4017 +                       break;
4018 +               }
4019 +               wc++;
4020 +       }
4021 +       return STATUS_OK;
4022 +}
4023 +
4024 +static bool 
4025 +is_not_replication_query(char * query_string, int query_len, char cmdType)
4026 +{
4027 +       PGR_Not_Replicate_Type key;
4028 +       PGR_Not_Replicate_Type * ptr = NULL;
4029 +
4030 +       if (PGR_Not_Replicate_Rec_Num <= 0)
4031 +               return false;
4032 +       if (query_string == NULL)
4033 +               return true;
4034 +       memset(&key,0,sizeof(PGR_Not_Replicate_Type));
4035 +       strncpy(key.db_name ,(char *)(MyProcPort->database_name),sizeof(key.db_name)-1);
4036 +       switch (cmdType)
4037 +       {
4038 +               case CMD_TYPE_INSERT:
4039 +                       get_table_name(key.table_name,query_string,3);
4040 +                       break;
4041 +               case CMD_TYPE_UPDATE:
4042 +                       get_table_name(key.table_name,query_string,2);
4043 +                       break;
4044 +               case CMD_TYPE_DELETE:
4045 +                       get_table_name(key.table_name,query_string,3);
4046 +                       break;
4047 +               case CMD_TYPE_COPY:
4048 +                       get_table_name(key.table_name,query_string,2);
4049 +                       break;
4050 +               default:
4051 +                       return false;
4052 +       }
4053 +       ptr = (PGR_Not_Replicate_Type*)bsearch((void*)&key,(void*)PGR_Not_Replicate,PGR_Not_Replicate_Rec_Num,sizeof(PGR_Not_Replicate_Type), (int (*)(const void*,const void*))Comp_Not_Replicate);
4054 +       if (ptr == NULL)
4055 +       {
4056 +               return false;
4057 +       }
4058 +       return true;
4059 +
4060 +}
4061 +
4062 +/*--------------------------------------------------------------------
4063 + * SYMBOL
4064 + *    PGR_Send_Replicate_Command()
4065 + * NOTES
4066 + *    create new socket
4067 + * ARGS
4068 + *    char * query_string: query strings (I)
4069 + *    char cmdSts: 
4070 + *    char cmdType:
4071 + * RETURN
4072 + *    OK: result
4073 + *    NG: NULL
4074 + *--------------------------------------------------------------------
4075 + */
4076 +char *
4077 +PGR_Send_Replicate_Command(char * query_string, int query_len, char cmdSts ,char cmdType)
4078 +{
4079 +       int sock = -1;
4080 +       int cnt = 0;
4081 +       ReplicateHeader header;
4082 +       char * serverName = NULL;
4083 +       int portNumber=0;
4084 +       char * result = NULL;
4085 +       ReplicateServerInfo * sp = NULL;
4086 +       ReplicateServerInfo * base = NULL;
4087 +       int socket_type = 0;
4088 +       char argv[ PGR_CMD_ARG_NUM ][256];
4089 +       int argc = 0;
4090 +       int func_no = 0;
4091 +       int check_flag =0;
4092 +       bool in_transaction = false;
4093 +
4094 +
4095 +       /*
4096 +        * check query string
4097 +        */
4098 +       if ((query_string == NULL)  ||
4099 +               (query_len < 0))
4100 +       {
4101 +               return NULL;
4102 +       }
4103 +       /* check not replication query */
4104 +       if (is_not_replication_query(query_string, query_len, cmdType) == true)
4105 +       {
4106 +               PGR_Copy_Data_Need_Replicate = false;
4107 +               return NULL;
4108 +       }
4109 +
4110 +       if ((cmdSts == CMD_STS_TRANSACTION ) ||
4111 +               (cmdSts == CMD_STS_SET_SESSION_AUTHORIZATION ) ||
4112 +               (cmdSts == CMD_STS_TEMP_TABLE ))
4113 +       {
4114 +               socket_type = PGR_TRANSACTION_SOCKET ;
4115 +       }
4116 +       else
4117 +       {
4118 +               socket_type = PGR_QUERY_SOCKET ;
4119 +       }
4120 +
4121 +       if(cmdSts==CMD_STS_TRANSACTION 
4122 +          && (cmdType!=CMD_TYPE_BEGIN && cmdType!=CMD_TYPE_ROLLBACK))
4123 +       {
4124 +               in_transaction = true;
4125 +       }
4126 +
4127 +       sp = PGR_get_replicate_server_info();
4128 +       if (sp == NULL)
4129 +       {
4130 +               if (Debug_pretty_print)
4131 +                       elog(DEBUG1,"PGR_get_replicate_server_info get error");
4132 +               return NULL;
4133 +       }
4134 +       sock = PGR_get_replicate_server_socket( sp , socket_type);
4135 +       if (sock < 0)
4136 +       {
4137 +               if (Debug_pretty_print)
4138 +                       elog(DEBUG1,"PGR_get_replicate_server_socket fail");
4139 +               return NULL;
4140 +       }
4141 +       result = malloc(PGR_MESSAGE_BUFSIZE + 4);
4142 +       if (result == NULL)
4143 +       {
4144 +               return NULL;
4145 +       }
4146 +
4147 +       serverName = sp->hostName;
4148 +       portNumber = (int)sp->portNumber;
4149 +       memset(&header,0,sizeof(ReplicateHeader));
4150 +
4151 +       header.cmdSts = cmdSts;
4152 +       header.cmdType = cmdType;
4153 +       header.port = htons(PostPortNumber);
4154 +       header.pid = htons(getpid());
4155 +       header.query_size = htonl(query_len); 
4156 +       strncpy(header.dbName ,(char *)(MyProcPort->database_name),sizeof(header.dbName)-1);
4157 +       strncpy(header.userName , (char *)(MyProcPort->user_name),sizeof(header.userName)-1);
4158 +       strncpy(header.password , PGR_password->password, PASSWORD_MAX_LENGTH );
4159 +       memcpy(header.md5Salt ,MyProcPort->md5Salt, sizeof(header.md5Salt));
4160 +       memcpy(header.cryptSalt ,MyProcPort->cryptSalt, sizeof(header.cryptSalt));
4161 +       header.request_id = htonl(get_next_request_id());
4162 +       header.rlog = 0;
4163 +
4164 +       if (PGRSelfHostName != NULL)
4165 +       {
4166 +               strncpy(header.from_host, PGRSelfHostName, HOSTNAME_MAX_LENGTH);
4167 +       }
4168 +
4169 +       base = sp;
4170 +       PGR_Sock_To_Replication_Server = sock;
4171 +
4172 +retry_send_prereplicate_packet:
4173 +
4174 +       memset(result,0,PGR_MESSAGE_BUFSIZE + 4);
4175 +       cnt = 0;
4176 +       header.cmdSys=CMD_SYS_PREREPLICATE;
4177 +
4178 +       while (send_replicate_packet(sock,&header,query_string) != STATUS_OK)
4179 +       {
4180 +               cnt++;
4181 +               if (cnt >= MAX_RETRY_TIMES )
4182 +               {
4183 +                       sock = get_new_replication_socket( base, sp, socket_type);
4184 +                       if (sock < 0)
4185 +                       {
4186 +                               if (Debug_pretty_print)
4187 +                                       elog(DEBUG1,"all replication servers may be down");
4188 +                               PGR_Stand_Alone->is_stand_alone = true;
4189 +                               if (cmdSts == CMD_STS_TRANSACTION )
4190 +                               {
4191 +                                       strcpy(result,PGR_REPLICATION_ABORT_MSG);
4192 +                                       return result;
4193 +                               }
4194 +                               free(result);
4195 +                               result = NULL;
4196 +                               return NULL;
4197 +                               
4198 +                       }
4199 +                       if(in_transaction)
4200 +                       {
4201 +                               elog(ERROR,"replicate server down during replicating transaction. aborted.");
4202 +                               free(result);
4203 +                               return NULL;
4204 +                       }                                        
4205 +                       PGR_Sock_To_Replication_Server = sock;
4206 +                       cnt = 0;
4207 +               }
4208 +       }
4209 +
4210 +       memset(result,0,PGR_MESSAGE_BUFSIZE);
4211 +       if (PGR_recv_replicate_result(sock,result,0) < 0)
4212 +       {
4213 +
4214 +                       sock = get_new_replication_socket( base, sp, socket_type);
4215 +                       if (sock < 0)
4216 +                       {
4217 +                                       if (Debug_pretty_print)
4218 +                                                       elog(DEBUG1,"all replication servers may be down");
4219 +                                       PGR_Stand_Alone->is_stand_alone = true;
4220 +
4221 +                                       if (cmdSts == CMD_STS_TRANSACTION )
4222 +                                       {
4223 +                                                       strcpy(result,PGR_REPLICATION_ABORT_MSG);
4224 +                                                       return result;
4225 +                                       }
4226 +                                       if(result!=NULL) {
4227 +                                                       free(result);
4228 +                                                       result = NULL;
4229 +                                       }
4230 +                                       return NULL;
4231 +                       }
4232 +                       PGR_Sock_To_Replication_Server = sock;
4233 +                       /* replication server should be down */
4234 +
4235 +                       if(in_transaction)
4236 +                       {
4237 +                               elog(ERROR,"replicate server down during replicating transaction. aborted.");
4238 +                               free(result);
4239 +                               return NULL;
4240 +                       }
4241 +
4242 +                       goto retry_send_prereplicate_packet;
4243 +       }
4244 +
4245 +
4246 +       argc = set_command_args(argv,result);
4247 +       func_no=atoi(argv[0]);
4248 +       if(func_no==0) {
4249 +                       /* this server is not primary replicate server*/
4250 +                       sock=-1;
4251 +                       goto retry_send_prereplicate_packet;
4252 +       }
4253 +retry_send_replicate_packet:
4254 +
4255 +       memset(result,0,PGR_MESSAGE_BUFSIZE + 4);
4256 +       cnt = 0;
4257 +       header.cmdSys = CMD_SYS_REPLICATE;
4258 +       while (send_replicate_packet(sock,&header,query_string) != STATUS_OK)
4259 +       {
4260 +               if (cnt > MAX_RETRY_TIMES )
4261 +               {
4262 +                       sock = get_new_replication_socket( base, sp, socket_type);
4263 +                       if (sock < 0)
4264 +                       {
4265 +                               if (Debug_pretty_print)
4266 +                                 elog(DEBUG1,"all replication servers may be down");
4267 +                               PGR_Stand_Alone->is_stand_alone = true;
4268 +                               if (cmdSts == CMD_STS_TRANSACTION )
4269 +                               {
4270 +                                       strcpy(result,PGR_REPLICATION_ABORT_MSG);
4271 +                                       return result;
4272 +                               }
4273 +                               free(result);
4274 +                               result = NULL;
4275 +                               return NULL;
4276 +
4277 +                       }
4278 +                       PGR_Sock_To_Replication_Server = sock;
4279 +                       header.rlog = CONNECTION_SUSPENDED_TYPE;
4280 +                       cnt = 0;
4281 +               }
4282 +               cnt ++;
4283 +       }
4284 +
4285 +       memset(result,0,PGR_MESSAGE_BUFSIZE);
4286 +       if (PGR_recv_replicate_result(sock,result,0) < 0)
4287 +       {
4288 +               /* replication server should be down */
4289 +               sock = get_new_replication_socket( base, sp, socket_type);
4290 +               if (sock < 0)
4291 +               {
4292 +                       if (Debug_pretty_print)
4293 +                               elog(DEBUG1,"all replication servers may be down");
4294 +                       PGR_Stand_Alone->is_stand_alone = true;
4295 +
4296 +                       if (cmdSts == CMD_STS_TRANSACTION )
4297 +                       {
4298 +                               strcpy(result,PGR_REPLICATION_ABORT_MSG);
4299 +                               return result;
4300 +                       }
4301 +                       if(result!=NULL) {
4302 +                               free(result);
4303 +                               result = NULL;
4304 +                       }
4305 +                       return NULL;
4306 +               }
4307 +               PGR_Sock_To_Replication_Server = sock;
4308 +               header.rlog = CONNECTION_SUSPENDED_TYPE;
4309 +
4310 +               goto retry_send_replicate_packet;
4311 +       }
4312 +
4313 +       argc = set_command_args(argv,result);
4314 +       if (argc >= 1)
4315 +       {
4316 +               func_no = atoi(argv[0]);
4317 +               if (func_no == PGR_SET_CURRENT_TIME_FUNC_NO)
4318 +               {
4319 +                       if(! in_transaction)
4320 +                               PGR_Set_Current_Time(argv[1],argv[2]);
4321 +                       set_replication_id(argv[3]);
4322 +                       set_response_mode(argv[4]);
4323 +                       PGR_Set_Current_Replication_Query_ID(argv[5]);
4324 +               }
4325 +               else if (func_no == PGR_NOTICE_DEADLOCK_DETECTION_FUNC_NO)
4326 +               {
4327 +                       memset(result,0,PGR_MESSAGE_BUFSIZE);
4328 +                       strcpy(result,PGR_DEADLOCK_DETECTION_MSG);
4329 +               }
4330 +                else if (func_no == PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO) 
4331 +               {
4332 +                       PGR_Set_Current_Replication_Query_ID(argv[1]);
4333 +               }  
4334 +               else if (func_no == PGR_QUERY_CONFIRM_ANSWER_FUNC_NO)
4335 +               {
4336 +                       check_flag = atoi(argv[1]);
4337 +                       if (check_flag == PGR_ALREADY_COMMITTED )
4338 +                       {
4339 +                               if(! in_transaction)
4340 +                                       PGR_Set_Current_Time(argv[2],argv[3]);
4341 +                               set_replication_id(argv[4]);
4342 +                       }
4343 +                       else
4344 +                       {
4345 +                               if(! in_transaction)
4346 +                                       PGR_Set_Current_Time(argv[1],argv[2]);
4347 +                               set_replication_id(argv[3]);
4348 +                               /* this query is not replicated */
4349 +                               /*
4350 +                               free(result);
4351 +                               return NULL;
4352 +                               */
4353 +                       }
4354 +               }
4355 +       }
4356 +       return result;
4357 +}
4358 +
4359 +uint32_t
4360 +PGRget_replication_id(void)
4361 +{
4362 +       return (ReplicationLog_Info.PGR_Replicate_ID);
4363 +}
4364 +
4365 +static int
4366 +set_replication_id(char * id)
4367 +{
4368 +        uint32_t rid=0;
4369 +               uint32_t saved_id;
4370 +               if (id == NULL)
4371 +               {
4372 +                               return STATUS_ERROR;
4373 +               }
4374 +
4375 +               rid=(uint32_t)atol(id);
4376 +               if(rid==0)
4377 +                               return STATUS_OK;
4378 +
4379 +               needToUpdateReplicateIdOnNextQueryIsDone=true;
4380 +               saved_id=ReplicationLog_Info.PGR_Replicate_ID;
4381 +
4382 +               ReplicationLog_Info.PGR_Replicate_ID =rid;
4383 +
4384 +
4385 +               /*set replicate id in this process */
4386 +
4387 +
4388 +               if (CurrentReplicateServer == NULL)
4389 +               {
4390 +                               PGR_get_replicate_server_info();
4391 +               }
4392 +               if (CurrentReplicateServer != NULL)
4393 +               {
4394 +                               /* set replicate id in this system */
4395 +                               saved_id=CurrentReplicateServer->replicate_id;
4396 +                               elog(DEBUG1, "replication id set from %d to %d", saved_id, rid); 
4397 +
4398 +                               CurrentReplicateServer->replicate_id = (uint32_t)(atol(id));
4399 +               }
4400 +       
4401 +               return STATUS_OK;
4402 +}
4403 +
4404 +
4405 +static unsigned int
4406 +get_next_request_id(void)
4407 +{
4408 +       if (ReplicationLog_Info.PGR_Request_ID +1 < PGR_MAX_COUNTER)
4409 +       {
4410 +               ReplicationLog_Info.PGR_Request_ID ++;
4411 +       }
4412 +       else
4413 +       {
4414 +               ReplicationLog_Info.PGR_Request_ID = 0;
4415 +       }
4416 +       return ReplicationLog_Info.PGR_Request_ID ;
4417 +               
4418 +}
4419 +
4420 +static bool
4421 +is_this_query_replicated(char * id)
4422 +{
4423 +       uint32_t replicate_id = 0;
4424 +       uint32_t saved_id = 0;
4425 +       int32_t diff=0;
4426 +       ReplicateServerInfo * replicate_server_info = NULL;
4427 +
4428 +       if (id == NULL)
4429 +       {
4430 +               return false;
4431 +       }
4432 +       replicate_id = (uint32_t)atol(id);
4433 +       elog(DEBUG1, "check for replication id , input=%u", replicate_id);
4434 +
4435 +       if (CurrentReplicateServer == NULL)
4436 +       {
4437 +               PGR_get_replicate_server_info();
4438 +       }
4439 +
4440 +       if (CurrentReplicateServer != NULL)
4441 +       {
4442 +               replicate_server_info = CurrentReplicateServer;
4443 +       }
4444 +        else if (LastReplicateServer != NULL)
4445 +       {        
4446 +               replicate_server_info = LastReplicateServer;
4447 +       }
4448 +       if (replicate_server_info != NULL)
4449 +       {
4450 +
4451 +               saved_id=replicate_server_info->replicate_id;
4452 +               saved_id = saved_id < ReplicationLog_Info.PGR_Replicate_ID
4453 +                 ? ReplicationLog_Info.PGR_Replicate_ID
4454 +                 : saved_id;
4455 +
4456 +               elog(DEBUG1, "check for replication id , now=%u", saved_id);
4457 +               /* check replicate_id < saved_id logically 
4458 +                * 
4459 +                * see also:
4460 +                *  backend/transam/transam.c#TransactionIdPrecedes
4461 +                */
4462 +
4463 +               diff = (int32) (saved_id-replicate_id);
4464 +               return (diff > 0);
4465 +       }
4466 +       elog(DEBUG1, "check for replication id check failed. no replication server");
4467 +       return false;
4468 +}
4469 +
4470 +
4471 +static int
4472 +get_new_replication_socket( ReplicateServerInfo * base, ReplicateServerInfo * sp, int socket_type)
4473 +{
4474 +       int sock = -1;
4475 +
4476 +       if (( base == NULL) ||
4477 +               ( sp == NULL))
4478 +       {
4479 +               return -1;
4480 +       }
4481 +       close_replicate_server_socket ( sp , socket_type);
4482 +       PGR_Set_Replication_Server_Status(sp, DATA_ERR);
4483 +       sp = search_new_replication_server(base, socket_type);
4484 +       if (sp == NULL)
4485 +       {
4486 +               if (Debug_pretty_print)
4487 +                       elog(DEBUG1,"all replication servers may be down");
4488 +               PGR_Stand_Alone->is_stand_alone = true;
4489 +               return -1;
4490 +       }
4491 +       sock = PGR_get_replicate_server_socket( sp , socket_type);
4492 +       return sock;
4493 +}
4494 +
4495 +
4496 +int
4497 +PGR_recv_replicate_result(int sock,char * result,int user_timeout)
4498 +{
4499 +       fd_set      rmask;
4500 +       struct timeval timeout;
4501 +       int rtn;
4502 +
4503 +       if (result == NULL)
4504 +       {
4505 +               return -1;
4506 +       }
4507 +
4508 +       /*
4509 +        * Wait for something to happen.
4510 +        */
4511 +       for (;;)
4512 +       {
4513 +               if (user_timeout == 0)
4514 +                       timeout.tv_sec = PGR_Replication_Timeout;
4515 +               else
4516 +                       timeout.tv_sec = user_timeout;
4517 +
4518 +               timeout.tv_usec = 0;
4519 +
4520 +               FD_ZERO(&rmask);
4521 +               FD_SET(sock,&rmask);
4522 +               rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
4523 +               if (rtn <= 0)
4524 +               {
4525 +                       if (errno != EINTR)
4526 +                               return -1;
4527 +               }
4528 +
4529 +               else if ((rtn > 0) && (FD_ISSET(sock, &rmask)))
4530 +               {
4531 +                       return (recv_message(sock, result,0));
4532 +               }
4533 +       }
4534 +       return -1;
4535 +}
4536 +
4537 +static int
4538 +recv_message(int sock,char * buf,int flag)
4539 +{
4540 +       int cnt = 0;
4541 +       int r = 0;
4542 +       char * read_ptr;
4543 +       int read_size = 0;
4544 +       cnt = 0;
4545 +       read_ptr = buf;
4546 +
4547 +       for (;;)
4548 +       {
4549 +               r = recv(sock,read_ptr + read_size ,PGR_MESSAGE_BUFSIZE - read_size, flag); 
4550 +               if (r < 0) {
4551 +                       if (errno == EINTR || errno == EAGAIN) {
4552 +                               continue;
4553 +                       } else {
4554 +                         elog(DEBUG1, "recv_message():recv failed");
4555 +                         return -1;
4556 +                       }
4557 +               } else if (r == 0) {
4558 +                       elog(DEBUG1, "recv_message():unexpected EOF");
4559 +                       return -1;
4560 +               } else /*if (r > 0)*/ {
4561 +                       read_size += r;
4562 +                       if (read_size == PGR_MESSAGE_BUFSIZE)
4563 +                       {
4564 +                               return read_size;
4565 +                       }
4566 +               }
4567 +       }
4568 +       return -1;
4569 +}
4570 +
4571 +static int
4572 +send_replicate_packet(int sock,ReplicateHeader * header, char * query_string)
4573 +{
4574 +       int s = 0;
4575 +       char * send_ptr = NULL;
4576 +       char * buf = NULL;
4577 +       int send_size = 0;
4578 +       int buf_size = 0;
4579 +       int header_size = 0;
4580 +       int rtn = 0;
4581 +       fd_set      wmask;
4582 +       struct timeval timeout;
4583 +       int query_size = 0;
4584 +
4585 +       /* check parameter */
4586 +       if ((sock < 0) || (header == NULL))
4587 +       {
4588 +               return STATUS_ERROR;
4589 +       }
4590 +
4591 +       query_size = ntohl(header->query_size);
4592 +       header_size = sizeof(ReplicateHeader);
4593 +       buf_size = header_size + query_size + 4;
4594 +       buf = malloc(buf_size);
4595 +       if (buf == NULL)
4596 +       {
4597 +               return STATUS_ERROR;
4598 +       }
4599 +       memset(buf,0,buf_size);
4600 +       buf_size -= 4;
4601 +       memcpy(buf,header,header_size);
4602 +       if (query_string != NULL)
4603 +       {
4604 +               memcpy((char *)(buf+header_size),query_string,query_size+1);
4605 +       }
4606 +       send_ptr = buf;
4607 +
4608 +       /*
4609 +        * Wait for something to happen.
4610 +        */
4611 +       rtn = 1;
4612 +       for (;;)
4613 +       {
4614 +               timeout.tv_sec = PGR_Replication_Timeout;
4615 +               timeout.tv_usec = 0;
4616 +
4617 +               FD_ZERO(&wmask);
4618 +               FD_SET(sock,&wmask);
4619 +               rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
4620 +               if (rtn < 0)
4621 +               {
4622 +                       if (errno == EINTR)
4623 +                               continue;
4624 +                       else
4625 +                       {
4626 +                               elog(DEBUG1, "send_replicate_packet():select() failed");
4627 +                               return STATUS_ERROR;
4628 +                       }
4629 +               }
4630 +               else if (rtn && FD_ISSET(sock, &wmask))
4631 +               {
4632 +
4633 +
4634 +                       s = send(sock,send_ptr + send_size,buf_size - send_size ,0);
4635 +                       if (s < 0){
4636 +                               if (errno == EINTR || errno == EAGAIN)
4637 +                               {
4638 +                                       continue;
4639 +                               }
4640 +                               elog(DEBUG1, "send_replicate_packet():send error");
4641 +
4642 +                               /* EPIPE || ENCONNREFUSED || ENSOCK || EHOSTUNREACH */
4643 +                               return STATUS_ERROR;
4644 +                       } else if (s == 0) {
4645 +                               free(buf);
4646 +                               buf = NULL;
4647 +                               elog(DEBUG1, "send_replicate_packet():unexpected EOF");
4648 +                               return STATUS_ERROR;
4649 +                       } else /*if (s > 0)*/ {
4650 +                               send_size += s;
4651 +                               if (send_size == buf_size)
4652 +                               {
4653 +                                       free(buf);
4654 +                                       buf = NULL;
4655 +                                       return STATUS_OK;
4656 +                               }
4657 +                       }
4658 +               }
4659 +       }
4660 +       if (buf != NULL)
4661 +       {
4662 +               free(buf);
4663 +               buf = NULL;
4664 +       }
4665 +       return STATUS_ERROR;
4666 +}
4667 +
4668 +bool
4669 +PGR_Is_Replicated_Command(char * query)
4670 +{
4671 +
4672 +       return (PGR_Is_System_Command(query));
4673 +}
4674 +
4675 +int
4676 +Xlog_Check_Replicate(int operation)
4677 +{
4678 +       if (PGR_Get_Cluster_Status() == STATUS_RECOVERY)
4679 +       {
4680 +               return STATUS_OK;
4681 +               /* elog(WARNING, "This query is not permitted while recovery db "); */
4682 +       }
4683 +       else if ((operation == CMD_UTILITY ) ||
4684 +               (operation == CMD_INSERT )  ||
4685 +               (operation == CMD_UPDATE )  ||
4686 +               (operation == CMD_DELETE ))
4687 +       {
4688 +               return (PGR_Replicate_Function_Call());
4689 +       }
4690 +       return STATUS_OK;
4691 +}
4692 +
4693 +int 
4694 +PGR_Replicate_Function_Call(void)
4695 +{
4696 +       char *result = NULL;
4697 +       int status = STATUS_OK;
4698 +
4699 +       if ((PGR_Get_Cluster_Status() == STATUS_RECOVERY) ||
4700 +               (PGR_Stand_Alone == NULL))
4701 +       {
4702 +                return STATUS_OK;
4703 +       }
4704 +    if (Query_String != NULL)
4705 +    {
4706 +               if (PGR_Is_Stand_Alone() == true)
4707 +               {
4708 +                       if (PGR_Stand_Alone->permit == PERMIT_READ_ONLY)
4709 +                       {
4710 +                               Query_String = NULL;
4711 +                               return STATUS_ERROR;
4712 +                       }
4713 +               }
4714 +               PGR_Need_Notice = true;
4715 +               PGR_Check_Lock.check_lock_conflict = true;
4716 +        result = PGR_Send_Replicate_Command(Query_String,strlen(Query_String), CMD_STS_QUERY,CMD_TYPE_SELECT);
4717 +               if (result != NULL)
4718 +               {
4719 +                       PGR_Reload_Start_Time();
4720 +                       if (!strncmp(result,PGR_DEADLOCK_DETECTION_MSG,strlen(PGR_DEADLOCK_DETECTION_MSG)))
4721 +                       {
4722 +                               status = STATUS_DEADLOCK_DETECT;
4723 +                       }
4724 +                       free(result);
4725 +                       result = NULL;
4726 +               }
4727 +               else
4728 +               {
4729 +                       status = STATUS_ERROR;
4730 +               }
4731 +               Query_String = NULL;
4732 +    }
4733 +       return status;
4734 +}
4735 +
4736 +void
4737 +PGR_delete_shm(void)
4738 +{
4739 +
4740 +       if (ReplicateServerData != NULL)
4741 +       {
4742 +               shmdt(ReplicateServerData);
4743 +               ReplicateServerData = NULL;
4744 +               shmctl(ReplicateServerShmid,IPC_RMID,(struct shmid_ds *)NULL);
4745 +       }
4746 +       if (ClusterDBData != NULL)
4747 +       {
4748 +               shmdt(ClusterDBData);
4749 +               ClusterDBData = NULL;
4750 +               shmctl(ClusterDBShmid,IPC_RMID,(struct shmid_ds *)NULL);
4751 +       }
4752 +
4753 +       if (TransactionSock != -1)
4754 +       {
4755 +               close(TransactionSock);
4756 +       }
4757 +       
4758 +       if (RsyncPath != NULL)
4759 +       {
4760 +               free(RsyncPath);
4761 +               RsyncPath = NULL;
4762 +       }
4763 +       if (RsyncOption != NULL)
4764 +       {
4765 +               free(RsyncOption);
4766 +               RsyncOption = NULL;
4767 +       }
4768 +
4769 +       if (ReplicateCurrentTime != NULL)
4770 +       {
4771 +               free(ReplicateCurrentTime);
4772 +               ReplicateCurrentTime = NULL;
4773 +       }
4774 +
4775 +       if (PGRCopyData != NULL)
4776 +       {
4777 +               free (PGRCopyData);
4778 +               PGRCopyData = NULL;
4779 +       }
4780 +
4781 +       if (PGR_Stand_Alone != NULL)
4782 +       {
4783 +               free(PGR_Stand_Alone);
4784 +               PGR_Stand_Alone = NULL;
4785 +       }
4786 +
4787 +       if (PGR_Not_Replicate != NULL)
4788 +       {
4789 +               free(PGR_Not_Replicate);
4790 +               PGR_Not_Replicate = NULL;
4791 +       }
4792 +       if (PGRSelfHostName != NULL)
4793 +       {
4794 +               free(PGRSelfHostName);
4795 +               PGRSelfHostName = NULL;
4796 +       }
4797 +       if (PGR_password != NULL)
4798 +       {
4799 +               if (PGR_password->password != NULL)
4800 +               {
4801 +                       free(PGR_password->password);
4802 +                       PGR_password->password = NULL;
4803 +               }
4804 +               free(PGR_password);
4805 +               PGR_password = NULL;
4806 +       }
4807 +}
4808 +
4809 +ReplicateServerInfo * 
4810 +PGR_get_replicate_server_info(void)
4811 +{
4812 +
4813 +       ReplicateServerInfo * sp;
4814 +
4815 +       if (ReplicateServerData == NULL)
4816 +       {
4817 +               return (ReplicateServerInfo *)NULL;
4818 +       }
4819 +       /* check current using replication server */
4820 +       sp = PGR_check_replicate_server_info();
4821 +       if (sp != NULL)
4822 +       {
4823 +               if (CurrentReplicateServer != NULL)
4824 +               {
4825 +                       LastReplicateServer = CurrentReplicateServer;
4826 +                       CurrentReplicateServer->replicate_id = LastReplicateServer->replicate_id;
4827 +               }
4828 +               CurrentReplicateServer = sp;
4829 +               return sp;
4830 +       }
4831 +       /* there is no used replication server */
4832 +       /* however it may exist still in initial status */
4833 +       sp = ReplicateServerData;
4834 +       while (sp->useFlag != DATA_END)
4835 +       {
4836 +               if (sp->useFlag != DATA_ERR )
4837 +               {
4838 +                       if (CurrentReplicateServer != NULL)
4839 +                       {
4840 +                               LastReplicateServer = CurrentReplicateServer;
4841 +                               CurrentReplicateServer->replicate_id = LastReplicateServer-> replicate_id;
4842 +                       }
4843 +                       CurrentReplicateServer = sp;
4844 +                       PGR_Set_Replication_Server_Status(sp, DATA_USE);
4845 +                       return sp;
4846 +               }
4847 +               sp++;
4848 +       }
4849 +       PGR_Stand_Alone->is_stand_alone = true;
4850 +       if (CurrentReplicateServer != NULL)
4851 +       {
4852 +         LastReplicateServer = CurrentReplicateServer;
4853 +         CurrentReplicateServer->replicate_id = LastReplicateServer-> replicate_id;
4854 +       }
4855 +       CurrentReplicateServer = NULL;
4856 +       return (ReplicateServerInfo *)NULL;
4857 +}
4858 +
4859 +ReplicateServerInfo * 
4860 +PGR_check_replicate_server_info(void)
4861 +{
4862 +
4863 +       ReplicateServerInfo * sp;
4864 +
4865 +       if (ReplicateServerData == NULL)
4866 +       {
4867 +               return (ReplicateServerInfo *)NULL;
4868 +       }
4869 +       sp = ReplicateServerData;
4870 +       while (sp->useFlag != DATA_END)
4871 +       {
4872 +               if (sp->useFlag == DATA_USE )
4873 +               {
4874 +                       return sp;
4875 +               }
4876 +               sp++;
4877 +       }
4878 +       return NULL;
4879 +} 
4880 +
4881 +int
4882 +PGR_Send_Copy(CopyData * copy,int end )
4883 +{
4884 +
4885 +       char cmdSts,cmdType;
4886 +       char * p = NULL;
4887 +       char *result = NULL;
4888 +       char term[8];
4889 +       /*int status = 0; */
4890 +
4891 +       if (copy == NULL)
4892 +       {
4893 +               return STATUS_ERROR;
4894 +       }
4895 +
4896 +       cmdSts = CMD_STS_COPY;
4897 +
4898 +       if (Transaction_Mode > 0)
4899 +       {
4900 +               cmdSts = CMD_STS_TRANSACTION ;
4901 +       }
4902 +       if (Session_Authorization_Mode)
4903 +       {
4904 +               cmdSts = CMD_STS_SET_SESSION_AUTHORIZATION ;
4905 +       }
4906 +       cmdType = CMD_TYPE_COPY_DATA;
4907 +
4908 +       copy->copy_data[copy->cnt] = '\0';
4909 +       if (end)
4910 +       {
4911 +               memset(term,0,sizeof(term));
4912 +               term[0]='\\';
4913 +               term[1]='.';
4914 +               term[2]='\n';
4915 +
4916 +               cmdType = CMD_TYPE_COPY_DATA_END;
4917 +               p = NULL;
4918 +               if (copy->cnt > 0)
4919 +               {
4920 +                       copy->copy_data[copy->cnt] = '\0';
4921 +                       p = strstr(copy->copy_data,term);
4922 +                       if (p == NULL)
4923 +                       {
4924 +                               p = &(copy->copy_data[copy->cnt-1]);
4925 +                               copy->cnt--;
4926 +                       }
4927 +                       else
4928 +                       {
4929 +                               p = NULL;
4930 +                       }
4931 +               }
4932 +               if (p != NULL)
4933 +               {
4934 +                       strncpy(p,term,sizeof(term));
4935 +                       copy->cnt += 4;
4936 +               }
4937 +       }
4938 +       result = PGR_Send_Replicate_Command(copy->copy_data, copy->cnt, cmdSts, cmdType);
4939 +       memset(copy,0,sizeof(CopyData));
4940 +
4941 +       if (result != NULL)
4942 +       {
4943 +               PGR_Reload_Start_Time();
4944 +               free(result);
4945 +               result = NULL;
4946 +               return STATUS_OK;
4947 +       }
4948 +       else
4949 +       {
4950 +               return STATUS_ERROR;
4951 +       }
4952 +}
4953 +
4954 +CopyData * 
4955 +PGR_Set_Copy_Data(CopyData * copy, char *str, int len,int end)
4956 +{
4957 +       CopyData save;
4958 +       int save_len = 0;
4959 +       int read_index = 0;
4960 +       int send_size = 0;
4961 +       int buf_size = 0;
4962 +       int rest_len = 0;
4963 +       int rest_buf_size = 0;
4964 +       int status = STATUS_OK;
4965 +       char * ep = NULL;
4966 +       char term[4];
4967 +
4968 +       #define BUFF_OFFSET (8)
4969 +
4970 +       if ((PGR_Copy_Data_Need_Replicate == false) ||
4971 +               (copy == NULL))
4972 +       {
4973 +               return (CopyData *)NULL;
4974 +       }
4975 +       memset(term,0,sizeof(term));
4976 +       term[0]='\n';
4977 +       term[1]='\\';
4978 +       term[2]='.';
4979 +       buf_size = COPYBUFSIZ - BUFF_OFFSET;
4980 +       read_index = 0;
4981 +       rest_len = len;
4982 +       rest_buf_size = buf_size - copy->cnt; 
4983 +       while ((rest_len > 0) && (rest_buf_size > 0))
4984 +       {
4985 +               if (rest_buf_size < rest_len)
4986 +               {
4987 +                       send_size = rest_buf_size;
4988 +                       rest_len -= send_size;
4989 +               }
4990 +               else
4991 +               {
4992 +                       send_size = rest_len;
4993 +                       rest_len = 0;
4994 +               }
4995 +               memcpy(&(copy->copy_data[copy->cnt]) ,str + read_index ,send_size);
4996 +               copy->cnt += send_size;
4997 +               read_index += send_size;
4998 +               rest_buf_size = buf_size - copy->cnt; 
4999 +               if (strstr(copy->copy_data,term) != NULL)
5000 +               {
5001 +                       break;
5002 +               }
5003 +               if (rest_buf_size <= 0)
5004 +               {
5005 +                       ep = strrchr(copy->copy_data,'\n');
5006 +                       if (ep != NULL)
5007 +                       {
5008 +                               *ep = '\0';
5009 +                               save_len = copy->cnt - strlen(copy->copy_data) -1;
5010 +                               copy->cnt -= save_len ;
5011 +                               memset(&save,0,sizeof(CopyData));
5012 +                               memcpy(save.copy_data,(ep+1),save_len+1);
5013 +                               save.cnt = save_len;
5014 +                               *ep = '\n';
5015 +                               *(ep+1) = '\0';
5016 +                               status = PGR_Send_Copy(copy,0);
5017 +                               memset(copy,0,sizeof(CopyData));
5018 +                               if (save_len > 0)
5019 +                               {
5020 +                                       memcpy(copy,&save,sizeof(CopyData));
5021 +                               }
5022 +                               rest_buf_size = buf_size - copy->cnt; 
5023 +
5024 +                       }
5025 +                       else
5026 +                       {
5027 +                               /* one record is bigger than COPYBUFSIZ */
5028 +                               /* buffer would be over flow*/
5029 +                               status = PGR_Send_Copy(copy,0);
5030 +                               memset(copy,0,sizeof(CopyData));
5031 +                               rest_buf_size = buf_size - copy->cnt; 
5032 +                       }
5033 +               }
5034 +       }
5035 +       if (end)
5036 +       {
5037 +               status = PGR_Send_Copy(copy,end);
5038 +               memset(copy,0,sizeof(CopyData));
5039 +       }
5040 +       if (status != STATUS_OK)
5041 +       {
5042 +               return (CopyData *)NULL;
5043 +       }
5044 +       return copy;
5045 +}
5046 +
5047 +int
5048 +PGR_replication(char * query_string, CommandDest dest, Node *parsetree, const char * commandTag)
5049 +{
5050 +       char *result = NULL;
5051 +       char cmdSts = CMD_STS_OTHER;
5052 +       char cmdType = CMD_TYPE_OTHER;
5053 +       int query_len = 0;
5054 +
5055 +       if ((query_string == NULL) ||
5056 +               (commandTag == NULL))
5057 +       {
5058 +               return STATUS_ERROR;
5059 +       }
5060 +
5061 +       Query_String = NULL;
5062 +       query_len = strlen(query_string);
5063 +
5064 +       /* save query data for retry */
5065 +       PGR_Retry_Query.query_string = query_string;
5066 +       PGR_Retry_Query.query_len = query_len;
5067 +       PGR_Retry_Query.cmdSts = cmdSts;
5068 +       PGR_Retry_Query.cmdType = cmdType;
5069 +       PGR_Retry_Query.useFlag = DATA_USE;
5070 +       /* set cmdType */
5071 +       if (!strcmp(commandTag,"BEGIN")) cmdType = CMD_TYPE_BEGIN ;
5072 +       else if (!strcmp(commandTag,"COMMIT")) cmdType = CMD_TYPE_COMMIT ;
5073 +       else if (!strcmp(commandTag,"SELECT")) cmdType = CMD_TYPE_SELECT ;
5074 +       else if (!strcmp(commandTag,"INSERT")) cmdType = CMD_TYPE_INSERT ;
5075 +       else if (!strcmp(commandTag,"UPDATE")) cmdType = CMD_TYPE_UPDATE ;
5076 +       else if (!strcmp(commandTag,"DELETE")) cmdType = CMD_TYPE_DELETE ;
5077 +       else if (!strcmp(commandTag,"VACUUM")) cmdType = CMD_TYPE_VACUUM ;
5078 +       else if (!strcmp(commandTag,"ANALYZE")) cmdType = CMD_TYPE_ANALYZE ;
5079 +       else if (!strcmp(commandTag,"REINDEX")) cmdType = CMD_TYPE_REINDEX ;
5080 +       else if (!strcmp(commandTag,"ROLLBACK")) cmdType = CMD_TYPE_ROLLBACK ;
5081 +       else if (!strcmp(commandTag,"RESET")) cmdType = CMD_TYPE_RESET ;
5082 +       else if (!strcmp(commandTag,"START TRANSACTION")) cmdType = CMD_TYPE_BEGIN ;
5083 +
5084 +       /* only "replication_server" statement-name is replicated for SHOW. */
5085 +       /*   see CreateCommandTag() @ backend/tcop/postgres.c      */
5086 +
5087 +       else if (!strcmp(commandTag,"COPY"))
5088 +       {
5089 +               cmdType = CMD_TYPE_COPY ;
5090 +               if (is_copy_from(query_string))
5091 +               {
5092 +                       PGR_Copy_Data_Need_Replicate = true;
5093 +               }
5094 +               else
5095 +               {
5096 +                       PGR_Copy_Data_Need_Replicate = false;
5097 +                       return STATUS_NOT_REPLICATE;
5098 +               }
5099 +       }
5100 +       else if (!strcmp(commandTag,"SET")) 
5101 +       {
5102 +               cmdType = CMD_TYPE_SET;
5103 +               /*
5104 +               VariableSetStmt *stmt = (VariableSetStmt *)parsetree;
5105 +               if (strcmp(stmt->name, "TRANSACTION ISOLATION LEVEL") &&
5106 +                       strcmp(stmt->name, "datestyle") &&
5107 +                       strcmp(stmt->name, "autocommit") &&
5108 +                       strcmp(stmt->name, "client_encoding") &&
5109 +                       strcmp(stmt->name, "password_encryption") &&
5110 +                       strcmp(stmt->name, "search_path") &&
5111 +                       strcmp(stmt->name, "session_authorization") &&
5112 +                       strcmp(stmt->name, "timezone"))
5113 +
5114 +                       return STATUS_NOT_REPLICATE;
5115 +               */
5116 +               if (strstr(query_string,SYS_QUERY_1) != NULL)
5117 +               {
5118 +                       return STATUS_NOT_REPLICATE;
5119 +               }
5120 +       }
5121 +       else if (!strcmp(commandTag,"CREATE TABLE")) 
5122 +       {
5123 +               if (is_create_temp_table(query_string))
5124 +               {
5125 +                       Create_Temp_Table_Mode = true;
5126 +               }
5127 +       }
5128 +       if (Create_Temp_Table_Mode)
5129 +       {
5130 +               cmdSts = CMD_STS_TEMP_TABLE ;
5131 +       }
5132 +       if (Transaction_Mode > 0)
5133 +       {
5134 +               cmdSts = CMD_STS_TRANSACTION ;
5135 +       }
5136 +       else
5137 +       {
5138 +               if ((cmdType == CMD_TYPE_COMMIT ) ||
5139 +                       (cmdType == CMD_TYPE_ROLLBACK ))
5140 +               {
5141 +                       cmdSts = CMD_STS_TRANSACTION ;
5142 +                       if (ReplicateCurrentTime != NULL)
5143 +                       {
5144 +                               ReplicateCurrentTime->useFlag = DATA_INIT;
5145 +                               ReplicateCurrentTime->use_seed = 0;
5146 +                       }
5147 +               }
5148 +       }
5149 +       if (Session_Authorization_Mode)
5150 +       {
5151 +               cmdSts = CMD_STS_SET_SESSION_AUTHORIZATION ;
5152 +               if (cmdType == CMD_TYPE_SESSION_AUTHORIZATION_END)
5153 +               {
5154 +                       Session_Authorization_Mode = false;
5155 +               }
5156 +       }
5157 +       if ((cmdSts == CMD_STS_TRANSACTION ) ||
5158 +               (cmdSts == CMD_STS_SET_SESSION_AUTHORIZATION ) ||
5159 +               (cmdSts == CMD_STS_TEMP_TABLE ))
5160 +       {
5161 +               /* check partitional replication table */
5162 +               if (is_not_replication_query(query_string, query_len, cmdType)== true )
5163 +               {
5164 +                       PGR_Copy_Data_Need_Replicate = false;
5165 +                       return STATUS_NOT_REPLICATE;
5166 +               }
5167 +               Query_String = NULL;
5168 +               if (( do_not_replication_command(commandTag) == true) &&
5169 +                       (strcmp(commandTag,"SELECT")))
5170 +               {
5171 +                       return STATUS_NOT_REPLICATE;
5172 +               }
5173 +
5174 +               if (Debug_pretty_print)
5175 +                       elog(DEBUG1,"transaction query send :%s",(char *)query_string);
5176 +               PGR_Retry_Query.cmdSts = cmdSts;
5177 +               PGR_Retry_Query.cmdType = cmdType;
5178 +               result = PGR_Send_Replicate_Command(query_string,query_len, cmdSts,cmdType);
5179 +               if (result != NULL)
5180 +               {
5181 +                       if (!strncmp(result,PGR_DEADLOCK_DETECTION_MSG,strlen(PGR_DEADLOCK_DETECTION_MSG)))
5182 +                       {
5183 +                               /*
5184 +                               PGR_Send_Message_To_Frontend(result);
5185 +                               */
5186 +                               free(result);
5187 +                               result = NULL;
5188 +                               return STATUS_DEADLOCK_DETECT;
5189 +                       }
5190 +                       else if (!strncmp(result,PGR_REPLICATION_ABORT_MSG,strlen(PGR_REPLICATION_ABORT_MSG)))
5191 +                       {
5192 +                               free(result);
5193 +                               result = NULL;
5194 +                               return STATUS_REPLICATION_ABORT;
5195 +                       }
5196 +                       free(result);
5197 +                       result = NULL;
5198 +                       return STATUS_CONTINUE;
5199 +               }
5200 +               else
5201 +               {
5202 +                       return STATUS_ERROR;
5203 +               }
5204 +       }
5205 +       else
5206 +       {
5207 +               cmdSts = CMD_STS_QUERY ;
5208 +               if ( do_not_replication_command(commandTag) == false)
5209 +               {
5210 +                       Query_String = NULL;
5211 +                       /* check partitional replication table */
5212 +                       if (is_not_replication_query(query_string, query_len, cmdType)== true )
5213 +                       {
5214 +                               PGR_Copy_Data_Need_Replicate = false;
5215 +                               return STATUS_NOT_REPLICATE;
5216 +                       }
5217 +                       result = PGR_Send_Replicate_Command(query_string,query_len,cmdSts,cmdType);
5218 +                       if (result != NULL)
5219 +                       {
5220 +                               if (!strncmp(result,PGR_DEADLOCK_DETECTION_MSG,strlen(PGR_DEADLOCK_DETECTION_MSG)))
5221 +                               {
5222 +                                       free(result);
5223 +                                       result = NULL;
5224 +                                       return STATUS_DEADLOCK_DETECT;
5225 +                               }
5226 +                               else if (!strncmp(result,PGR_REPLICATION_ABORT_MSG,strlen(PGR_REPLICATION_ABORT_MSG)))
5227 +                               {
5228 +                                       free(result);
5229 +                                       result = NULL;
5230 +                                       return STATUS_REPLICATION_ABORT;
5231 +                               }
5232 +                               /*
5233 +                               PGR_Send_Message_To_Frontend(result);
5234 +                               */
5235 +                               free(result);
5236 +                               result = NULL;
5237 +                               return STATUS_CONTINUE;
5238 +                       }
5239 +                       else
5240 +                       {
5241 +                               return STATUS_ERROR;
5242 +                       }
5243 +               }
5244 +               else
5245 +               {
5246 +                       if (( is_serial_control_query(cmdType,query_string) == true) ||
5247 +                               ( is_select_into_query(cmdType,query_string) == true))
5248 +                       {
5249 +                               Query_String = NULL;
5250 +                               PGR_Need_Notice = true;
5251 +                               PGR_Check_Lock.check_lock_conflict = true;
5252 +                               result = PGR_Send_Replicate_Command(query_string,query_len,cmdSts,cmdType);
5253 +                               if (result != NULL)
5254 +                               {
5255 +                                       /*
5256 +                                       PGR_Send_Message_To_Frontend(result);
5257 +                                       */
5258 +                                       if (!strncmp(result,PGR_DEADLOCK_DETECTION_MSG,strlen(PGR_DEADLOCK_DETECTION_MSG)))
5259 +                                       {
5260 +                                               free(result);
5261 +                                               return STATUS_DEADLOCK_DETECT;
5262 +                                       }
5263 +                                       free(result);
5264 +                                       result = NULL;
5265 +                                       return STATUS_CONTINUE;
5266 +                               }
5267 +                               else
5268 +                               {
5269 +                                       return STATUS_ERROR;
5270 +                               }
5271 +                       }
5272 +                       else
5273 +                       {
5274 +                               Query_String = query_string;
5275 +                               /*PGR_Sock_To_Replication_Server = -1;*/
5276 +                       }
5277 +                       return STATUS_CONTINUE_SELECT;
5278 +               }
5279 +       }
5280 +       return STATUS_CONTINUE;
5281 +}
5282 +
5283 +
5284 +bool
5285 +PGR_Is_System_Command(char * query)
5286 +{
5287 +       char * ptr;
5288 +
5289 +       if (query == NULL)
5290 +       {
5291 +               return false;
5292 +       }
5293 +       ptr = strstr(query,PGR_SYSTEM_COMMAND_FUNC);
5294 +       if (ptr != NULL)
5295 +       {
5296 +               ptr = strchr(ptr,'(');
5297 +               if (ptr == NULL)
5298 +                       return false;
5299 +               return true;
5300 +       }
5301 +       return false;
5302 +}
5303 +
5304 +static int
5305 +set_command_args(char argv[ PGR_CMD_ARG_NUM ][256],char *str)
5306 +{
5307 +       int i,j,cnt,len;
5308 +       char * ptr = str;
5309 +
5310 +       if (str == NULL)
5311 +       {
5312 +               return 0;
5313 +       }
5314 +       len = strlen(str);
5315 +       cnt = j = 0;
5316 +       for ( i = 0 ; i < len ; i++,ptr++)
5317 +       {
5318 +               if (cnt >= PGR_CMD_ARG_NUM)
5319 +                       break;
5320 +               if (( *ptr == ',') || (*ptr == ')'))
5321 +               {
5322 +                       argv[cnt][j] = '\0';
5323 +                       cnt ++;
5324 +                       j = 0;
5325 +                       continue;
5326 +               }
5327 +               argv[cnt][j] = *ptr;
5328 +               j++;
5329 +       }
5330 +       if (cnt < PGR_CMD_ARG_NUM)
5331 +               argv[cnt][j] = '\0';
5332 +       cnt ++;
5333 +
5334 +       return cnt;
5335 +}
5336 +
5337 +static int
5338 +add_replication_server(char * hostname,char * port, char * recovery_port)
5339 +{
5340 +       int cnt;
5341 +       int portNumber;
5342 +       int recoveryPortNumber;
5343 +       ReplicateServerInfo * sp;
5344 +
5345 +       if ((hostname == NULL) ||
5346 +               (port == NULL ) ||
5347 +               (recovery_port == NULL ))
5348 +       {
5349 +               return STATUS_ERROR;
5350 +       }
5351 +       if (ReplicateServerData == NULL)
5352 +       {
5353 +               return STATUS_ERROR;
5354 +       }
5355 +       portNumber = atoi(port);
5356 +       recoveryPortNumber = atoi(recovery_port);
5357 +       cnt = 0;
5358 +       sp = ReplicateServerData;
5359 +       while (sp->useFlag != DATA_END){
5360 +               if((!strncmp(sp->hostName,hostname,sizeof(sp->hostName))) &&
5361 +                       (sp->portNumber == portNumber) &&
5362 +                       (sp->recoveryPortNumber == recoveryPortNumber))
5363 +               {
5364 +                       if (sp->useFlag != DATA_USE)
5365 +                       {
5366 +                               PGR_Set_Replication_Server_Status(sp, DATA_INIT);
5367 +                       }
5368 +                       return STATUS_OK;
5369 +               }
5370 +               sp ++;
5371 +               cnt ++;
5372 +       }
5373 +       if (cnt < MAX_SERVER_NUM)
5374 +       {
5375 +               strncpy(sp->hostName,hostname,sizeof(sp->hostName));
5376 +               sp->portNumber = portNumber;
5377 +               sp->recoveryPortNumber = recoveryPortNumber;
5378 +               PGR_Set_Replication_Server_Status(sp, DATA_INIT);
5379 +               memset((sp+1),0,sizeof(ReplicateServerInfo));
5380 +               (sp + 1)->useFlag = DATA_END;
5381 +       }
5382 +       else
5383 +       {
5384 +               return STATUS_ERROR;
5385 +       }
5386 +       return  STATUS_OK;
5387 +}
5388 +
5389 +static int
5390 +change_replication_server(char * hostname,char * port, char * recovery_port)
5391 +{
5392 +       int cnt;
5393 +       int portNumber;
5394 +       int recoveryPortNumber;
5395 +       ReplicateServerInfo * sp;
5396 +
5397 +       if ((hostname == NULL) ||
5398 +               (port == NULL ) ||
5399 +               (recovery_port == NULL ))
5400 +       {
5401 +               return STATUS_ERROR;
5402 +       }
5403 +       if (ReplicateServerData == NULL)
5404 +       {
5405 +               return STATUS_ERROR;
5406 +       }
5407 +       portNumber = atoi(port);
5408 +       recoveryPortNumber = atoi(recovery_port);
5409 +       cnt = 0;
5410 +       sp = ReplicateServerData;
5411 +       while (sp->useFlag != DATA_END){
5412 +               if((!strcmp(sp->hostName,hostname)) &&
5413 +                       (sp->portNumber == portNumber) &&
5414 +                       (sp->recoveryPortNumber == recoveryPortNumber))
5415 +               {
5416 +                       PGR_Set_Replication_Server_Status(sp, DATA_USE);
5417 +               }
5418 +               else
5419 +               {
5420 +                       if (sp->useFlag == DATA_USE)
5421 +                       {
5422 +                               PGR_Set_Replication_Server_Status(sp, DATA_INIT);
5423 +                       }
5424 +               }
5425 +               sp ++;
5426 +               cnt ++;
5427 +       }
5428 +       return  STATUS_OK;
5429 +}
5430 +
5431 +int
5432 +PGR_Set_Current_Time(char * sec, char * usec)
5433 +{
5434 +       int rtn = 0;
5435 +       struct timeval local_tp;
5436 +       struct timezone local_tpz;
5437 +       struct timeval tv;
5438 +
5439 +       if ((sec == NULL) ||
5440 +               (usec == NULL))
5441 +       {
5442 +               return STATUS_ERROR;
5443 +       }
5444 +       rtn = gettimeofday(&local_tp, &local_tpz);
5445 +       tv.tv_sec = atol(sec);
5446 +       tv.tv_usec = atol(usec);
5447 +       ReplicateCurrentTime->offset_sec = local_tp.tv_sec - tv.tv_sec;
5448 +       ReplicateCurrentTime->offset_usec = local_tp.tv_usec - tv.tv_usec;
5449 +       ReplicateCurrentTime->tp.tv_sec = tv.tv_sec;
5450 +       ReplicateCurrentTime->tp.tv_usec = tv.tv_usec;
5451 +       ReplicateCurrentTime->useFlag = DATA_USE;
5452 +       ReplicateCurrentTime->use_seed = 0;
5453 +
5454 +       return  STATUS_OK;
5455 +}
5456 +
5457 +static void
5458 +PGR_Set_Current_Replication_Query_ID(char *id) {
5459 +      MyProc->replicationId=atol(id);
5460 +      return;
5461 +}
5462 +
5463 +static void
5464 +set_response_mode(char * mode)
5465 +{
5466 +       int response_mode = 0;
5467 +
5468 +       if (mode == NULL)
5469 +               return;
5470 +       response_mode = atoi(mode);
5471 +       if (response_mode < 0)
5472 +               return;
5473 +       if (CurrentReplicateServer == NULL)
5474 +       {
5475 +               PGR_get_replicate_server_info();
5476 +               if (CurrentReplicateServer == NULL)
5477 +               {
5478 +                       return;
5479 +               }
5480 +       }
5481 +       if (CurrentReplicateServer->response_mode != response_mode)
5482 +       {
5483 +               CurrentReplicateServer->response_mode = response_mode;
5484 +       }
5485 +}
5486 +
5487 +int
5488 +PGR_Call_System_Command(char * command)
5489 +{
5490 +       char * ptr;
5491 +       char * args;
5492 +       char argv[ PGR_CMD_ARG_NUM ][256];
5493 +       int argc = 0;
5494 +       int func_no;
5495 +       char * hostName = NULL;
5496 +
5497 +       if ((command == NULL) || (ReplicateCurrentTime == NULL))
5498 +       {
5499 +               return STATUS_ERROR;
5500 +       }
5501 +       ptr = strstr(command,PGR_SYSTEM_COMMAND_FUNC);
5502 +       if (ptr == NULL)
5503 +               return STATUS_ERROR;
5504 +       ptr = strchr(ptr,'(');
5505 +       if (ptr == NULL)
5506 +               return STATUS_ERROR;
5507 +       args = ptr+1;
5508 +       ptr = strchr(ptr,')');
5509 +       if (ptr == NULL)
5510 +               return STATUS_ERROR;
5511 +       *ptr = '\0';
5512 +       argc = set_command_args(argv,args);
5513 +       if (argc < 1)
5514 +               return STATUS_ERROR;
5515 +       func_no = atoi(argv[0]);
5516 +       switch (func_no)
5517 +       {
5518 +               /* set current system time */
5519 +               case PGR_SET_CURRENT_TIME_FUNC_NO:
5520 +                       if (atol(argv[1]) == 0)
5521 +                       {
5522 +                               CreateCheckPoint(false,true);
5523 +                       }
5524 +                       else
5525 +                       {
5526 +                         /*
5527 +                         if ((atoi(argv[3]) > 0) &&
5528 +                               (is_this_query_replicated(argv[3]) == true))
5529 +                           {                           
5530 +                             return STATUS_SKIP_QUERY;
5531 +                           }
5532 +                         */
5533 +                               PGR_Set_Current_Time(argv[1],argv[2]);
5534 +                               set_replication_id(argv[3]);
5535 +                               set_response_mode(argv[4]);
5536 +                               PGR_Set_Current_Replication_Query_ID(argv[5]);
5537 +
5538 +                       }
5539 +                       break;
5540 +               /* add new replication server data */
5541 +               case PGR_STARTUP_REPLICATION_SERVER_FUNC_NO:
5542 +                       hostName = get_hostName(argv[1]);
5543 +                       add_replication_server(hostName,argv[2],argv[3]);
5544 +                       break;
5545 +               /* change new replication server */
5546 +               case PGR_CHANGE_REPLICATION_SERVER_FUNC_NO:
5547 +                       hostName = get_hostName(argv[1]);
5548 +                       change_replication_server(hostName,argv[2],argv[3]);
5549 +                       break;
5550 +               case PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO:
5551 +                 PGR_Set_Current_Replication_Query_ID(argv[1]);
5552 +                       break;
5553 +               case PGR_QUERY_CONFIRM_ANSWER_FUNC_NO:
5554 +                       if ((atoi(argv[3]) > 0) &&
5555 +                               (is_this_query_replicated(argv[3]) == true))
5556 +                       {
5557 +                               /* skip this query */
5558 +                         return STATUS_SKIP_QUERY;
5559 +                       }
5560 +                       else
5561 +                       {
5562 +                               PGR_Set_Current_Time(argv[1],argv[2]);
5563 +                               set_replication_id(argv[3]);
5564 +                       }
5565 +                       break;
5566 +               /* get current oid */
5567 +               case PGR_GET_OID_FUNC_NO:
5568 +                       return_current_oid();
5569 +                       break;
5570 +               /* set current oid */
5571 +               case PGR_SET_OID_FUNC_NO:
5572 +                       sync_oid(argv[1]);
5573 +                       break;
5574 +               /* set noticed session abort */
5575 +               case PGR_NOTICE_ABORT_FUNC_NO:
5576 +                       PGR_Noticed_Abort = true;
5577 +                       break;
5578 +       }
5579 +       return STATUS_OK;
5580 +}
5581 +
5582 +int
5583 +PGR_GetTimeOfDay(struct timeval *tp, struct timezone *tpz)
5584 +{
5585 +
5586 +       int rtn;
5587 +
5588 +       rtn = gettimeofday(tp, tpz);
5589 +       if (ReplicateCurrentTime == NULL)
5590 +       {
5591 +               return rtn;
5592 +       }
5593 +       if (ReplicateCurrentTime->useFlag == DATA_USE)
5594 +       {
5595 +               if (ReplicateCurrentTime->use_seed != 0)
5596 +               {
5597 +                       tp->tv_sec -= ReplicateCurrentTime->offset_sec;
5598 +                       if (tp->tv_usec < ReplicateCurrentTime->offset_usec)
5599 +                       {
5600 +                               tp->tv_usec += (1000000 -  ReplicateCurrentTime->offset_usec);
5601 +                               tp->tv_sec -= 1;
5602 +                       }
5603 +                       else
5604 +                       {
5605 +                               tp->tv_usec -= ReplicateCurrentTime->offset_usec;
5606 +                       }
5607 +               }
5608 +               else
5609 +               {
5610 +                       tp->tv_sec = ReplicateCurrentTime->tp.tv_sec;
5611 +                       tp->tv_usec = ReplicateCurrentTime->tp.tv_usec;
5612 +               }
5613 +               rtn = 0;
5614 +       }
5615 +       return rtn;
5616 +}
5617 +
5618 +long
5619 +PGR_Random(void)
5620 +{
5621 +       double rtn;
5622 +       if (ReplicateCurrentTime != NULL)
5623 +       {
5624 +               if ( ReplicateCurrentTime->use_seed == 0)
5625 +               {
5626 +                       srand( ReplicateCurrentTime->tp.tv_usec );
5627 +                       ReplicateCurrentTime->use_seed = 1;
5628 +               }
5629 +       }
5630 +       rtn = random();
5631 +       return rtn;
5632 +}
5633 +
5634 +char *
5635 +PGR_scan_terminate( char * str)
5636 +{
5637 +       char * p;
5638 +       int sflag = 0;
5639 +       int dflag = 0;
5640 +       int lflag = 0;
5641 +       int i = 0;
5642 +       char tag[256];
5643 +
5644 +       if (str == NULL)
5645 +               return NULL;
5646 +       p = str;
5647 +       memset(tag,0,sizeof(tag));
5648 +       while ( *p != '\0' )
5649 +       {
5650 +               if ((!strncmp(p,"--",2)) ||
5651 +                       (!strncmp(p,"//",2)))
5652 +               {
5653 +                       while (( *p != '\n') && (*p != '\0'))
5654 +                       {
5655 +                               p++;
5656 +                       }
5657 +                       continue;
5658 +               }
5659 +
5660 +               switch (*p)
5661 +               {
5662 +                       case '\'':
5663 +                               sflag ^= 1;
5664 +                               break;
5665 +                       case '\"':
5666 +                               dflag ^= 1;
5667 +                               break;
5668 +                       case '$':
5669 +                               i = 0;
5670 +                               p++;
5671 +                               while (( *p != '\n') && (*p != '\0'))
5672 +                               {
5673 +                                       if (isalnum(*p) == 0)
5674 +                                       {
5675 +                                               if (*p == '$')
5676 +                                               {
5677 +                                                       lflag ^= 1;
5678 +                                               }
5679 +                                               break;
5680 +                                       }
5681 +                                       else
5682 +                                       {
5683 +                                               if (i >= sizeof(tag))
5684 +                                                       break;
5685 +                                               if (lflag == 0)
5686 +                                               {
5687 +                                                       tag[i] = *p;
5688 +                                               }
5689 +                                               else
5690 +                                               {
5691 +                                                       if (tag[i] != *p)
5692 +                                                       {
5693 +                                                               break;
5694 +                                                       }
5695 +                                               }
5696 +                                               i++;
5697 +                                       }
5698 +                                       p++;
5699 +                               }
5700 +                               break;
5701 +                       case '\\':
5702 +                               p +=2;
5703 +                               continue;
5704 +                               break;
5705 +                       case ';':
5706 +                               if ((!sflag) && (!dflag) && (!lflag))
5707 +                                       return p;
5708 +                               break;
5709 +               }
5710 +               p++;
5711 +       }
5712 +       return NULL;
5713 +}
5714 +
5715 +static bool
5716 +is_copy_from(char * query)
5717 +{
5718 +       char * p;
5719 +       int i;
5720 +       char buf[12];
5721 +       int c_flag = 0;
5722 +       if (query == NULL)
5723 +               return false;
5724 +       p = query;
5725 +       for ( i = 0 ; i <= 1 ; i ++)
5726 +       {
5727 +               /* get 'copy table_name' string */
5728 +               while(isspace(*p))
5729 +                       p++;
5730 +               while ((*p != '\0') && (*p  != '(') && (!isspace(*p)))
5731 +                       p++;
5732 +       }
5733 +       while(isspace(*p))
5734 +               p++;
5735 +       /* skip table column */
5736 +       if (*p == '(')
5737 +       {
5738 +               c_flag = 1;
5739 +               p++;
5740 +               while (*p != '\0') 
5741 +               {
5742 +                       if (*p == '(')
5743 +                               c_flag ++;
5744 +                       if (*p == ')')
5745 +                               c_flag --;
5746 +                       if (c_flag == 0)
5747 +                       {
5748 +                               p++;
5749 +                               break;
5750 +                       }
5751 +                       p++;
5752 +               }
5753 +               while(isspace(*p))
5754 +                       p++;
5755 +       }
5756 +       /* get 'from' or 'to' */
5757 +       i = 0;
5758 +       memset(buf,0,sizeof(buf));
5759 +       while ((*p != '\0') && (!isspace(*p)) && ( i < sizeof(buf)-1))
5760 +       {
5761 +               buf[i] = (char)toupper(*p);
5762 +               p++;
5763 +               i++;
5764 +       }
5765 +       if (!strcmp(buf,"FROM"))
5766 +       {
5767 +               return true;
5768 +       }
5769 +       else
5770 +       {
5771 +               return false;
5772 +       }
5773 +}
5774 +
5775 +static bool
5776 +is_create_temp_table(char * query)
5777 +{
5778 +       int len,wc;
5779 +       char buf[MAX_WORDS][MAX_WORD_LETTERS];
5780 +
5781 +       if (query == NULL)
5782 +               return false;
5783 +       len = strlen(query);
5784 +       wc = get_words(buf,query,len,1);
5785 +       if (wc < 4)
5786 +               return false;
5787 +       if ((!strncmp(buf[0],"CREATE", strlen("CREATE"))) &&
5788 +               (!strncmp(buf[1],"TEMP",strlen("TEMP"))) &&
5789 +               (!strncmp(buf[2],"TABLE",strlen("TABLE"))))
5790 +       {
5791 +               return true;
5792 +       }
5793 +       return false;
5794 +}
5795 +
5796 +static int
5797 +get_words( char words[MAX_WORDS][MAX_WORD_LETTERS] ,char * string,int length,int upper)
5798 +{
5799 +       int i,wc,lc;
5800 +       char * p = NULL;
5801 +       char * buf = NULL;
5802 +
5803 +       if (string == NULL)
5804 +               return STATUS_ERROR;
5805 +       buf = malloc(length);
5806 +       if (buf == NULL)
5807 +               return STATUS_ERROR;
5808 +
5809 +       memset(buf,0,length);
5810 +       p = string;
5811 +       wc = 0;
5812 +       for (i = 0 ; i < length ; i ++)
5813 +       {
5814 +               if ((*p == '\0') || (wc >= MAX_WORDS))
5815 +                       break;
5816 +               while (isspace(*p))
5817 +               {
5818 +                       p++;
5819 +                       i++;
5820 +               }
5821 +               lc = 0;
5822 +               while ((*p != '\0') && (! isspace(*p)))
5823 +               {
5824 +                       if (upper)
5825 +                               *(buf+lc) = (char)toupper(*p);
5826 +                       else
5827 +                               *(buf+lc) = *p;
5828 +
5829 +                       p++;
5830 +                       i++;
5831 +                       lc++;
5832 +               }
5833 +               memset(words[wc],0,MAX_WORD_LETTERS);
5834 +               memcpy(words[wc],buf,lc);
5835 +               memset(buf,0,length);
5836 +               wc++;
5837 +       }
5838 +       free(buf);
5839 +       buf = NULL;
5840 +       return wc;
5841 +}
5842 +
5843 +static int
5844 +Comp_Not_Replicate(PGR_Not_Replicate_Type * nrp1,PGR_Not_Replicate_Type* nrp2)
5845 +{
5846 +       int rtn;
5847 +
5848 +       if ((nrp1 == NULL) ||
5849 +               (nrp2 == NULL))
5850 +       {
5851 +               return 0;
5852 +       }
5853 +       rtn = strcasecmp(nrp1->table_name,nrp2->table_name);
5854 +       if (rtn == 0)
5855 +       {
5856 +               rtn = strcasecmp(nrp1->db_name,nrp2->db_name);
5857 +       }
5858 +       return rtn;
5859 +}
5860 +
5861 +bool
5862 +PGR_Is_Stand_Alone(void)
5863 +{
5864 +       ReplicateServerInfo * sp = NULL;
5865 +
5866 +       if (PGR_Stand_Alone == NULL)
5867 +               return true;
5868 +       if (PGR_Stand_Alone->is_stand_alone == true)
5869 +       {
5870 +               sp = PGR_get_replicate_server_info();
5871 +               if (sp == NULL)
5872 +               {
5873 +                       return true;
5874 +               }
5875 +       }
5876 +       return false;
5877 +}
5878 +
5879 +void
5880 +PGR_Send_Message_To_Frontend(char * msg)
5881 +{
5882 +       StringInfoData msgbuf;
5883 +
5884 +       pq_beginmessage(&msgbuf, 'N');
5885 +
5886 +       if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
5887 +       {
5888 +               /* New style with separate fields */
5889 +               char            tbuf[12];
5890 +               int                     ssval;
5891 +               int                     i;
5892 +
5893 +               pq_sendbyte(&msgbuf, PG_DIAG_SEVERITY);
5894 +               pq_sendstring(&msgbuf, "NOTICE" );
5895 +
5896 +               /* unpack MAKE_SQLSTATE code */
5897 +               ssval = ERRCODE_WARNING ;
5898 +               for (i = 0; i < 5; i++)
5899 +               {
5900 +                       tbuf[i] = PGUNSIXBIT(ssval);
5901 +                       ssval >>= 6;
5902 +               }
5903 +               tbuf[i] = '\0';
5904 +
5905 +               pq_sendbyte(&msgbuf, PG_DIAG_SQLSTATE);
5906 +               pq_sendstring(&msgbuf, tbuf);
5907 +
5908 +               /* M field is required per protocol, so always send something */
5909 +               pq_sendbyte(&msgbuf, PG_DIAG_MESSAGE_PRIMARY);
5910 +               if (msg)
5911 +                       pq_sendstring(&msgbuf, msg);
5912 +               else
5913 +                       pq_sendstring(&msgbuf, _("missing error text"));
5914 +
5915 +               pq_sendbyte(&msgbuf, '\0');             /* terminator */
5916 +       }
5917 +       else
5918 +       {
5919 +               /* Old style --- gin up a backwards-compatible message */
5920 +               StringInfoData buf;
5921 +
5922 +               initStringInfo(&buf);
5923 +
5924 +               appendStringInfo(&buf, "%s:  ", "NOTICE");
5925 +
5926 +               if (msg)
5927 +                       appendStringInfoString(&buf, msg);
5928 +               else
5929 +                       appendStringInfoString(&buf, _("missing error text"));
5930 +
5931 +               appendStringInfoChar(&buf, '\n');
5932 +
5933 +               pq_sendstring(&msgbuf, buf.data);
5934 +
5935 +               pfree(buf.data);
5936 +       }
5937 +
5938 +       pq_endmessage(&msgbuf);
5939 +
5940 +       /*
5941 +        * This flush is normally not necessary, since postgres.c will flush out
5942 +        * waiting data when control returns to the main loop. But it seems best
5943 +        * to leave it here, so that the client has some clue what happened if the
5944 +        * backend dies before getting back to the main loop ... error/notice
5945 +        * messages should not be a performance-critical path anyway, so an extra
5946 +        * flush won't hurt much ...
5947 +        */
5948 +       pq_flush();
5949 +}
5950 +
5951 +static bool
5952 +is_serial_control_query(char cmdType,char * query)
5953 +{
5954 +       char * buf = NULL;
5955 +       int len = 0;
5956 +       int i = 0;
5957 +       char * p = NULL;
5958 +
5959 +       if ((cmdType != CMD_TYPE_SELECT ) ||
5960 +               ( query == NULL))
5961 +       {
5962 +               return false;
5963 +       }
5964 +
5965 +       p = query;
5966 +       len = strlen(query) +1;
5967 +       buf = malloc(len);
5968 +       if (buf == NULL)
5969 +               return false;
5970 +
5971 +       memset(buf,0,len);
5972 +       for ( i = 0 ; i < len ; i ++)
5973 +       {
5974 +               *(buf+i) = toupper(*(query+i));
5975 +       }
5976 +       if ((strstr(buf,"NEXTVAL") != NULL) ||
5977 +               (strstr(buf,"SETVAL") != NULL))
5978 +       {
5979 +               free(buf);
5980 +               buf = NULL;
5981 +               return true;
5982 +       }
5983 +       free(buf);
5984 +       buf = NULL;
5985 +       return false;
5986 +}
5987 +
5988 +static bool
5989 +is_select_into_query(char cmdType,char * query)
5990 +{
5991 +       char * buf = NULL;
5992 +       int len = 0;
5993 +       int i = 0;
5994 +       char * p = NULL;
5995 +
5996 +       if ((cmdType != CMD_TYPE_SELECT ) ||
5997 +               ( query == NULL))
5998 +       {
5999 +               return false;
6000 +       }
6001 +
6002 +       p = query;
6003 +       len = strlen(query) +1;
6004 +       buf = malloc(len);
6005 +       if (buf == NULL)
6006 +               return false;
6007 +
6008 +       memset(buf,0,len);
6009 +       for ( i = 0 ; i < len ; i ++)
6010 +       {
6011 +               *(buf+i) = toupper(*(query+i));
6012 +       }
6013 +       if (strstr(buf,"INTO") != NULL)
6014 +       {
6015 +               free(buf);
6016 +               buf = NULL;
6017 +               return true;
6018 +       }
6019 +       if (strstr(buf,"CREATE") != NULL)
6020 +       {
6021 +               free(buf);
6022 +               buf = NULL;
6023 +               return true;
6024 +       }
6025 +       free(buf);
6026 +       buf = NULL;
6027 +       return false;
6028 +}
6029 +
6030 +static int
6031 +send_response_to_replication_server(const char * notice)
6032 +{
6033 +       ReplicateHeader header;
6034 +       int status;
6035 +
6036 +       if (PGR_Lock_Noticed)
6037 +       {
6038 +               return STATUS_OK;
6039 +       }
6040 +       if ((notice == NULL) ||
6041 +               (PGR_Sock_To_Replication_Server < 0))
6042 +       {
6043 +               return STATUS_ERROR;
6044 +       }
6045 +
6046 +       memset(&header,0,sizeof(ReplicateHeader));
6047 +       header.cmdSys = CMD_SYS_CALL;
6048 +       header.cmdSts = CMD_STS_RESPONSE;
6049 +       if (!strcmp(notice,PGR_QUERY_ABORTED_NOTICE_CMD))
6050 +       {
6051 +               header.cmdType = CMD_TYPE_FRONTEND_CLOSED;
6052 +       }
6053 +       header.query_size = htonl(strlen(notice));
6054 +       status = send_replicate_packet(PGR_Sock_To_Replication_Server,&header,(char *)notice);
6055 +       return status;
6056 +}
6057 +
6058 +void
6059 +PGR_Notice_Transaction_Query_Done(void)
6060 +{
6061 +       send_response_to_replication_server(PGR_QUERY_DONE_NOTICE_CMD);
6062 +}
6063 +
6064 +void
6065 +PGR_Notice_Transaction_Query_Aborted(void)
6066 +{
6067 +       send_response_to_replication_server(PGR_QUERY_ABORTED_NOTICE_CMD);
6068 +}
6069 +
6070 +int
6071 +PGR_Notice_Conflict(void)
6072 +{
6073 +       const char * msg = NULL ;
6074 +       int rtn = STATUS_OK;
6075 +
6076 +       msg = PGR_LOCK_CONFLICT_NOTICE_CMD ;
6077 +       if (PGR_Check_Lock.deadlock == true)
6078 +       {
6079 +               msg = PGR_DEADLOCK_DETECT_NOTICE_CMD ;
6080 +       }
6081 +       if (PGR_Check_Lock.dest == TO_FRONTEND)
6082 +       {
6083 +               ReadyForQuery(DestRemote);
6084 +               EndCommand(msg,DestRemote);
6085 +#ifdef CONTROL_LOCK_CONFLICT
6086 +               rtn = wait_lock_answer();
6087 +#endif /* CONTROL_LOCK_CONFLICT */
6088 +       }
6089 +       else
6090 +       {
6091 +               send_response_to_replication_server(msg);
6092 +#ifdef CONTROL_LOCK_CONFLICT
6093 +               rtn = PGR_Recv_Trigger (PGR_Replication_Timeout);
6094 +#endif /* CONTROL_LOCK_CONFLICT */
6095 +       }
6096 +       return rtn;
6097 +}
6098 +
6099 +#ifdef CONTROL_LOCK_CONFLICT
6100 +static int
6101 +wait_lock_answer(void)
6102 +{
6103 +       char result[PGR_MESSAGE_BUFSIZE+4];
6104 +       int rtn = 0;
6105 +
6106 +       memset(result,0,sizeof(result));
6107 +       rtn = read_trigger(result, PGR_MESSAGE_BUFSIZE);
6108 +       if (rtn < 0)
6109 +               return STATUS_ERROR;
6110 +       return STATUS_OK;
6111 +}
6112 +
6113 +static int
6114 +read_trigger(char * result, int buf_size)
6115 +{
6116 +       int i = 0;
6117 +       char c;
6118 +       int r = 0;
6119 +
6120 +       if ((result == NULL) || (buf_size <= 0 ))
6121 +       {
6122 +               return EOF;
6123 +       }
6124 +       /*
6125 +       pq_getbytes(result,buf_size);
6126 +       */
6127 +       while ((r = pq_getbytes(&c,1)) == 0)
6128 +       {
6129 +               if (i < buf_size -1)
6130 +               {
6131 +                       *(result + i) = c;
6132 +               }
6133 +               else
6134 +               {
6135 +                       break;
6136 +               }
6137 +               if (c == '\0')
6138 +                       break;
6139 +               i++;
6140 +       }
6141 +
6142 +       return r;
6143 +}
6144 +#endif /* CONTROL_LOCK_CONFLICT */
6145 +
6146 +int
6147 +PGR_Recv_Trigger (int user_timeout)
6148 +{
6149 +       char result[PGR_MESSAGE_BUFSIZE];
6150 +       int rtn = 0;
6151 +       int func_no = 0;
6152 +
6153 +       
6154 +       if (PGR_Lock_Noticed)
6155 +       {
6156 +               return STATUS_OK;
6157 +       }
6158 +       if (PGR_Sock_To_Replication_Server < 0)
6159 +               return STATUS_ERROR;
6160 +       memset(result,0,sizeof(result));
6161 +       rtn = PGR_recv_replicate_result(PGR_Sock_To_Replication_Server,result,user_timeout);
6162 +       if (rtn > 0)
6163 +       {
6164 +               func_no = atoi(result);
6165 +               if (func_no  <= 0)
6166 +               {
6167 +                       func_no = STATUS_OK;
6168 +               }
6169 +               return func_no;
6170 +       }
6171 +       else 
6172 +       {
6173 +               if (user_timeout == 0)
6174 +               {
6175 +                       PGR_Set_Replication_Server_Status(CurrentReplicateServer, DATA_ERR);
6176 +               }
6177 +               return STATUS_ERROR;
6178 +       }
6179 +       return STATUS_OK;
6180 +}
6181 +
6182 +
6183 +int
6184 +PGR_Set_Transaction_Mode(int mode,const char * commandTag)
6185 +{
6186 +       if (commandTag == NULL)
6187 +       {
6188 +               return mode;
6189 +       }
6190 +       if ((!strcmp(commandTag,"BEGIN")) ||
6191 +               (!strcmp(commandTag,"START TRANSACTION")) )
6192 +       {
6193 +               return (++mode);
6194 +       }
6195 +       if (mode > 0)
6196 +       {
6197 +               if ((!strncmp(commandTag,"COMMIT",strlen("COMMIT"))) ||
6198 +                       (!strncmp(commandTag,"ROLLBACK",strlen("ROLLBACK"))))
6199 +               {
6200 +                       return (--mode);
6201 +               }
6202 +       }
6203 +       return mode;
6204 +}
6205 +
6206 +static bool
6207 +do_not_replication_command(const char * commandTag)
6208 +{
6209 +       if (commandTag == NULL)
6210 +       {
6211 +               return true;
6212 +       }
6213 +       if ((!strcmp(commandTag,"SELECT")) ||
6214 +               (!strcmp(commandTag,"CLOSE CURSOR")) ||
6215 +               (!strcmp(commandTag,"MOVE")) ||
6216 +               (!strcmp(commandTag,"FETCH")) ||
6217 +               (!strcmp(commandTag,"EXPLAIN")))
6218 +       {
6219 +               return true;
6220 +       }
6221 +       else
6222 +       {
6223 +               return false;
6224 +       }
6225 +}
6226 +
6227 +void
6228 +PGR_Set_Replication_Server_Status( ReplicateServerInfo * sp, int status)
6229 +{
6230 +       if (sp == NULL)
6231 +       {
6232 +               return;
6233 +       }
6234 +       if (sp->useFlag != status)
6235 +       {
6236 +               sp->useFlag = status;
6237 +       }
6238 +}
6239 +
6240 +int
6241 +PGR_Is_Skip_Replication(char * query)
6242 +{
6243 +       char skip_2[256];
6244 +
6245 +       if ((query == NULL) ||
6246 +               (MyProcPort == NULL))
6247 +       {
6248 +               return -1;
6249 +       }
6250 +       snprintf(skip_2,sizeof(skip_2),SKIP_QUERY_2,MyProcPort->user_name);
6251 +       if ((strncmp(query,SKIP_QUERY_1,strlen(SKIP_QUERY_1)) == 0) ||
6252 +               (strncmp(query,skip_2,strlen(skip_2)) == 0))
6253 +       {
6254 +               return 3;
6255 +       }
6256 +       if ((strncmp(query,SKIP_QUERY_3,strlen(SKIP_QUERY_3)) == 0) ||
6257 +               (strncmp(query,SKIP_QUERY_4,strlen(SKIP_QUERY_4)) == 0))
6258 +       {
6259 +               return 1;
6260 +       }
6261 +       return 0;
6262 +}
6263 +
6264 +bool
6265 +PGR_Did_Commit_Transaction(void)
6266 +{
6267 +
6268 +       int sock = -1;
6269 +       int cnt = 0;
6270 +       ReplicateHeader header;
6271 +       char * serverName = NULL;
6272 +       int portNumber=0;
6273 +       char * result = NULL;
6274 +       ReplicateServerInfo * sp = NULL;
6275 +       ReplicateServerInfo * base = NULL;
6276 +       int socket_type = 0;
6277 +       char argv[ PGR_CMD_ARG_NUM ][256];
6278 +       int argc = 0;
6279 +       int func_no = 0;
6280 +
6281 +       if (ReplicateCurrentTime->useFlag != DATA_USE)
6282 +       {
6283 +               return false;
6284 +       }
6285 +       sp = PGR_get_replicate_server_info();
6286 +       if (sp == NULL)
6287 +       {
6288 +               if (Debug_pretty_print)
6289 +                       elog(DEBUG1,"PGR_get_replicate_server_info get error");
6290 +               return false;
6291 +       }
6292 +       sock = PGR_get_replicate_server_socket( sp , PGR_QUERY_SOCKET);
6293 +       if (sock < 0)
6294 +       {
6295 +               if (Debug_pretty_print)
6296 +                       elog(DEBUG1,"PGR_get_replicate_server_socket fail");
6297 +               return false;
6298 +       }
6299 +       result = malloc(PGR_MESSAGE_BUFSIZE);
6300 +       if (result == NULL)
6301 +       {
6302 +               return false;
6303 +       }
6304 +       memset(result,0,PGR_MESSAGE_BUFSIZE);
6305 +
6306 +       serverName = sp->hostName;
6307 +       portNumber = (int)sp->portNumber;
6308 +       header.cmdSys = CMD_SYS_CALL;
6309 +       header.cmdSts = CMD_STS_TRANSACTION_ABORT;
6310 +       header.cmdType = CMD_TYPE_COMMIT_CONFIRM;
6311 +       header.port = htons(PostPortNumber);
6312 +       header.pid = htons(getpid());
6313 +       header.tv.tv_sec = htonl(ReplicateCurrentTime->tp.tv_sec);
6314 +       header.tv.tv_usec = htonl(ReplicateCurrentTime->tp.tv_usec);
6315 +       header.query_size = htonl(0); 
6316 +       strncpy(header.dbName ,(char *)(MyProcPort->database_name),sizeof(header.dbName)-1);
6317 +       strncpy(header.userName , (char *)(MyProcPort->user_name),sizeof(header.userName)-1);
6318 +       strncpy(header.password , PGR_password->password, PASSWORD_MAX_LENGTH );
6319 +       memcpy(header.md5Salt ,MyProcPort->md5Salt, sizeof(header.md5Salt));
6320 +       memcpy(header.cryptSalt ,MyProcPort->cryptSalt, sizeof(header.cryptSalt));
6321 +       if (PGRSelfHostName != NULL)
6322 +       {
6323 +               strncpy(header.from_host, PGRSelfHostName, HOSTNAME_MAX_LENGTH);
6324 +       }
6325 +       header.replicate_id = htonl(ReplicationLog_Info.PGR_Replicate_ID);
6326 +       header.request_id = 0;
6327 +
6328 +       base = sp;
6329 +       PGR_Sock_To_Replication_Server = sock;
6330 +
6331 +       cnt = 0;
6332 +       while (send_replicate_packet(sock,&header,"") != STATUS_OK)
6333 +       {
6334 +               if (cnt > MAX_RETRY_TIMES )
6335 +               {
6336 +                       sock = get_new_replication_socket( base, sp, socket_type);
6337 +                       if (sock < 0)
6338 +                       {
6339 +                               if (Debug_pretty_print)
6340 +                                       elog(DEBUG1,"all replication servers may be down");
6341 +                               PGR_Stand_Alone->is_stand_alone = true;
6342 +                               free(result);
6343 +                               result = NULL;
6344 +                               return false;
6345 +                       }
6346 +                       PGR_Sock_To_Replication_Server = sock;
6347 +                       cnt = 0;
6348 +               }
6349 +               cnt ++;
6350 +       }
6351 +
6352 +       if (PGR_recv_replicate_result(sock,result,6) < 0)
6353 +       {
6354 +               free(result);
6355 +               result = NULL;
6356 +               return false;
6357 +       }
6358 +       /* read answer */
6359 +       argc = set_command_args(argv,result);
6360 +       if (argc >= 1)
6361 +       {
6362 +               func_no = atoi(argv[0]);
6363 +               if (func_no == PGR_TRANSACTION_CONFIRM_ANSWER_FUNC_NO)
6364 +               {
6365 +                       /* the transaction was commited in other server */
6366 +                       if (atoi(argv[1]) == PGR_ALREADY_COMMITTED)
6367 +                       {
6368 +                               free(result);
6369 +                               result = NULL;
6370 +                               return true;
6371 +                       }
6372 +               }
6373 +       }
6374 +       free(result);
6375 +       result = NULL;
6376 +       return false;
6377 +}
6378 +
6379 +int
6380 +PGRsend_system_command(char cmdSts, char cmdType)
6381 +{
6382 +       ReplicateServerInfo * sp = NULL;
6383 +       int sock = -1;
6384 +       int socket_type = 0;
6385 +       char * result = NULL;
6386 +       char * serverName = NULL;
6387 +       int portNumber=0;
6388 +       ReplicateHeader header;
6389 +       int cnt = 0;
6390 +       ReplicateServerInfo * base = NULL;
6391 +
6392 +       sp = PGR_get_replicate_server_info();
6393 +       if (sp == NULL)
6394 +       {
6395 +               if (Debug_pretty_print)
6396 +                       elog(DEBUG1,"PGR_get_replicate_server_info get error");
6397 +               return STATUS_ERROR;
6398 +       }
6399 +       sock = PGR_get_replicate_server_socket( sp , PGR_QUERY_SOCKET);
6400 +       if (sock < 0)
6401 +       {
6402 +               if (Debug_pretty_print)
6403 +                       elog(DEBUG1,"PGR_get_replicate_server_socket fail");
6404 +               return STATUS_ERROR;
6405 +       }
6406 +       result = malloc(PGR_MESSAGE_BUFSIZE);
6407 +       if (result == NULL)
6408 +       {
6409 +               return STATUS_ERROR;
6410 +       }
6411 +       memset(result,0,PGR_MESSAGE_BUFSIZE);
6412 +
6413 +       serverName = sp->hostName;
6414 +       portNumber = (int)sp->portNumber;
6415 +       header.cmdSys = CMD_SYS_CALL;
6416 +       header.cmdSts = cmdSts;
6417 +       header.cmdType = cmdType;
6418 +       header.port = htons(PostPortNumber);
6419 +       header.pid = htons(getpid());
6420 +       header.tv.tv_sec = htonl(ReplicateCurrentTime->tp.tv_sec);
6421 +       header.tv.tv_usec = htonl(ReplicateCurrentTime->tp.tv_usec);
6422 +       header.query_size = htonl(0); 
6423 +       strncpy(header.dbName ,(char *)(MyProcPort->database_name),sizeof(header.dbName)-1);
6424 +       strncpy(header.userName , (char *)(MyProcPort->user_name),sizeof(header.userName)-1);
6425 +       strncpy(header.password , PGR_password->password, PASSWORD_MAX_LENGTH );
6426 +       memcpy(header.md5Salt ,MyProcPort->md5Salt, sizeof(header.md5Salt));
6427 +       memcpy(header.cryptSalt ,MyProcPort->cryptSalt, sizeof(header.cryptSalt));
6428 +       if (PGRSelfHostName != NULL)
6429 +       {
6430 +               strncpy(header.from_host, PGRSelfHostName, HOSTNAME_MAX_LENGTH);
6431 +       }
6432 +       header.replicate_id = htonl(ReplicationLog_Info.PGR_Replicate_ID);
6433 +       header.request_id = 0;
6434 +
6435 +       base = sp;
6436 +       PGR_Sock_To_Replication_Server = sock;
6437 +       cnt = 0;
6438 +       while (send_replicate_packet(sock,&header,"") != STATUS_OK)
6439 +       {
6440 +               if (cnt > MAX_RETRY_TIMES )
6441 +               {
6442 +                       sock = get_new_replication_socket( base, sp, socket_type);
6443 +                       if (sock < 0)
6444 +                       {
6445 +                               if (Debug_pretty_print)
6446 +                                       elog(DEBUG1,"all replication servers may be down");
6447 +                               PGR_Stand_Alone->is_stand_alone = true;
6448 +                               free(result);
6449 +                               result = NULL;
6450 +                               return STATUS_ERROR;
6451 +                       }
6452 +                       PGR_Sock_To_Replication_Server = sock;
6453 +                       cnt = 0;
6454 +               }
6455 +               cnt ++;
6456 +       }
6457 +       free(result);
6458 +       result = NULL;
6459 +       return STATUS_OK;
6460 +}
6461 +
6462 +static char *
6463 +get_hostName(char * str)
6464 +{
6465 +       char * top = NULL;
6466 +       char * p = NULL;
6467 +
6468 +       p = str;
6469 +       while ( *p != '\0')
6470 +       {
6471 +               if (*p == '\'')
6472 +               {
6473 +                       *p = '\0';
6474 +                       p++;
6475 +                       if (top == NULL)
6476 +                       {
6477 +                               top = p;
6478 +                       }
6479 +               }
6480 +               p++;
6481 +       }
6482 +       return top;
6483 +}
6484 +
6485 +char *
6486 +PGR_Remove_Comment(char * str)
6487 +{
6488 +       char * p = NULL;
6489 +       p = str;
6490 +       while( *p != '\0')
6491 +       {
6492 +               while(isspace(*p))
6493 +               {
6494 +                       p++;
6495 +               }
6496 +               if ((!memcmp(p,"--",2)) ||
6497 +                       (!memcmp(p,"//",2)))
6498 +               {
6499 +                       while((*p != '\n') && (*p != '\0'))
6500 +                       {
6501 +                               p++;
6502 +                       }
6503 +                       continue;
6504 +               }
6505 +               break;
6506 +       }
6507 +       return p;
6508 +}
6509 +
6510 +void
6511 +PGR_Force_Replicate_Query(void)
6512 +{
6513 +       if (PGR_Retry_Query.useFlag == DATA_USE)
6514 +       {
6515 +               PGR_Send_Replicate_Command(PGR_Retry_Query.query_string,
6516 +                       PGR_Retry_Query.query_len,
6517 +                       PGR_Retry_Query.cmdSts,
6518 +                       PGR_Retry_Query.cmdType);
6519 +       }
6520 +}
6521 +
6522 +void
6523 +PGR_Notice_DeadLock(void)
6524 +{
6525 +       ReplicateHeader header;
6526 +
6527 +       memset(&header,0,sizeof(ReplicateHeader));
6528 +       header.cmdSys = CMD_SYS_CALL;
6529 +       header.cmdSts = CMD_STS_NOTICE;
6530 +       header.cmdType = CMD_TYPE_DEADLOCK_DETECT;
6531 +       header.query_size = 0;
6532 +       send_replicate_packet(PGR_Sock_To_Replication_Server,&header,(char *)NULL);
6533 +}
6534 +
6535 +void
6536 +PGR_Set_Cluster_Status(int status)
6537 +{
6538 +       if (ClusterDBData != NULL)
6539 +       {
6540 +               if (ClusterDBData->status != status)
6541 +               {
6542 +                       ClusterDBData->status = status;
6543 +               }
6544 +       }
6545 +}
6546 +
6547 +int
6548 +PGR_Get_Cluster_Status(void)
6549 +{
6550 +       if (ClusterDBData != NULL)
6551 +       {
6552 +               return (ClusterDBData->status);
6553 +       }
6554 +       return 0;
6555 +}
6556 +
6557 +int
6558 +PGR_Check_Replicate_Server_Status(ReplicateServerInfo * sp)
6559 +{
6560 +       ReplicateHeader header;
6561 +       char * result = NULL;
6562 +       int status;
6563 +       int fdP;
6564 +
6565 +       result = malloc(PGR_MESSAGE_BUFSIZE + 4);
6566 +       if (result == NULL)
6567 +       {
6568 +               if (Debug_pretty_print)
6569 +                       elog(DEBUG1,"malloc failed in PGR_Check_Replicate_Server_Status()");
6570 +               return STATUS_ERROR;
6571 +       }
6572 +
6573 +       memset(&header, 0, sizeof(ReplicateHeader));
6574 +       memset(result,  0, PGR_MESSAGE_BUFSIZE + 4);
6575 +
6576 +       header.cmdSys = CMD_SYS_PREREPLICATE;
6577 +       header.cmdSts = CMD_STS_OTHER;
6578 +       header.cmdType = CMD_TYPE_OTHER;
6579 +       header.port = htons(PostPortNumber);
6580 +       header.pid = htons(getpid());
6581 +       header.query_size = 0;
6582 +       strncpy(header.dbName ,(char *)(MyProcPort->database_name),sizeof(header.dbName)-1);
6583 +       strncpy(header.userName , (char *)(MyProcPort->user_name),sizeof(header.userName)-1);
6584 +       strncpy(header.password , PGR_password->password, PASSWORD_MAX_LENGTH );
6585 +       memcpy(header.md5Salt ,MyProcPort->md5Salt, sizeof(header.md5Salt));
6586 +       memcpy(header.cryptSalt ,MyProcPort->cryptSalt, sizeof(header.cryptSalt));
6587 +       header.request_id = htonl(get_next_request_id());
6588 +       header.rlog = 0;
6589 +       if (PGRSelfHostName != NULL) {
6590 +               strncpy(header.from_host, PGRSelfHostName, HOSTNAME_MAX_LENGTH);
6591 +       }
6592 +
6593 +       /* open a new socket for lifecheck */
6594 +       if ((status = PGR_Create_Socket_Connect(&fdP, sp->hostName, sp->portNumber)) == STATUS_ERROR) {
6595 +               if (Debug_pretty_print) {
6596 +                       elog(DEBUG1,"create socket failed in PGR_Check_Replicate_Server_Status()");
6597 +               }
6598 +               
6599 +       /* status = STATUS_OK */
6600 +       } else {
6601 +               if ((status = send_replicate_packet(fdP, &header, (char *)NULL)) == STATUS_OK) {
6602 +                       /* receive result to check for possible deadlock */
6603 +                       status = (0 >= PGR_recv_replicate_result(fdP, result ,0))
6604 +                               ? STATUS_OK : STATUS_ERROR;
6605 +               }
6606 +       }
6607 +
6608 +       free(result);
6609 +       PGR_Close_Sock(&fdP);
6610 +
6611 +       return status;
6612 +}
6613 +
6614 +static int
6615 +return_current_oid(void)
6616 +{
6617 +       char msg[PGR_MESSAGE_BUFSIZE];
6618 +
6619 +       LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
6620 +
6621 +       if (ShmemVariableCache->nextOid < ((Oid) FirstBootstrapObjectId))
6622 +       {
6623 +               ShmemVariableCache->nextOid = FirstBootstrapObjectId;
6624 +               ShmemVariableCache->oidCount = 0;
6625 +       }
6626 +
6627 +       if (ShmemVariableCache->oidCount == 0)
6628 +       {
6629 +               XLogPutNextOid(ShmemVariableCache->nextOid + VAR_OID_PREFETCH);
6630 +               ShmemVariableCache->oidCount = VAR_OID_PREFETCH;
6631 +       }
6632 +       LWLockRelease(OidGenLock);
6633 +
6634 +       memset(msg,0,sizeof(msg));
6635 +       snprintf(msg, sizeof(msg), "%u", ShmemVariableCache->nextOid);
6636 +       if (PGR_Check_Lock.dest == TO_FRONTEND)
6637 +       {
6638 +               pq_puttextmessage('C',msg);
6639 +               pq_flush();
6640 +       }
6641 +       else
6642 +       {
6643 +               send_response_to_replication_server(msg);
6644 +       }
6645 +       return STATUS_OK;
6646 +}
6647 +
6648 +static int
6649 +sync_oid(char * oid)
6650 +{
6651 +       uint32_t next_oid = 0;
6652 +       int offset = 0;
6653 +       char msg[PGR_MESSAGE_BUFSIZE];
6654 +
6655 +       LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
6656 +
6657 +       next_oid =  strtoul(oid, NULL, 10);
6658 +       if (next_oid <= 0)
6659 +               return STATUS_ERROR;
6660 +       next_oid ++;
6661 +       offset = next_oid - ShmemVariableCache->nextOid ;
6662 +       if (offset <= 0)
6663 +               return STATUS_ERROR;
6664 +
6665 +       if (next_oid < FirstBootstrapObjectId)
6666 +       {
6667 +               ShmemVariableCache->nextOid = FirstBootstrapObjectId;
6668 +               ShmemVariableCache->oidCount = 0;
6669 +       }
6670 +
6671 +       /* If we run out of logged for use oids then we must log more */
6672 +       while (ShmemVariableCache->oidCount - offset <= 0)
6673 +       {
6674 +               offset -= (ShmemVariableCache->oidCount) ;
6675 +               (ShmemVariableCache->nextOid) += (ShmemVariableCache->oidCount);
6676 +               XLogPutNextOid(ShmemVariableCache->nextOid + VAR_OID_PREFETCH);
6677 +               ShmemVariableCache->oidCount = VAR_OID_PREFETCH;
6678 +       }
6679 +
6680 +       (ShmemVariableCache->nextOid) += offset;
6681 +       (ShmemVariableCache->oidCount) -= offset;
6682 +       
6683 +       LWLockRelease(OidGenLock);
6684 +
6685 +       memset(msg,0,sizeof(msg));
6686 +       snprintf(msg, sizeof(msg), "%u", ShmemVariableCache->nextOid);
6687 +       if (PGR_Check_Lock.dest == TO_FRONTEND)
6688 +       {
6689 +               pq_puttextmessage('C',msg);
6690 +               pq_flush();
6691 +       }
6692 +       else
6693 +       {
6694 +               send_response_to_replication_server(msg);
6695 +       }
6696 +       return STATUS_OK;
6697 +}
6698 +
6699 +int
6700 +PGR_lo_import(char * filename)
6701 +{
6702 +       char * result = NULL;
6703 +       LOArgs *lo_args;
6704 +       int len = 0;
6705 +       int buf_size = 0;
6706 +       
6707 +       if ((PGR_Is_Replicated_Query == true) ||
6708 +               (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6709 +       {
6710 +               return STATUS_OK;
6711 +       }
6712 +       if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6713 +               (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6714 +       {
6715 +               return STATUS_OK;
6716 +       }
6717 +
6718 +       len = strlen(filename);
6719 +       buf_size = sizeof(LOArgs) + len;
6720 +       lo_args = (LOArgs *)malloc(buf_size + 4);
6721 +       if (lo_args == (LOArgs *)NULL)
6722 +       {
6723 +               return STATUS_ERROR;
6724 +       }
6725 +       memset(lo_args, 0, buf_size + 4);
6726 +       lo_args->arg1 = htonl((uint32_t)len);
6727 +       memcpy(lo_args->buf, filename, len);
6728 +
6729 +       result = PGR_Send_Replicate_Command((char *)lo_args,
6730 +               buf_size,
6731 +               CMD_STS_LARGE_OBJECT,
6732 +               CMD_TYPE_LO_IMPORT);
6733 +
6734 +       free(lo_args);
6735 +       if (result != NULL)
6736 +       {
6737 +               free(result);
6738 +               return STATUS_OK;
6739 +       }
6740 +       
6741 +       return STATUS_ERROR;
6742 +}
6743 +
6744 +int
6745 +PGR_lo_create(int flags)
6746 +{
6747 +       char * result = NULL;
6748 +       LOArgs lo_args;
6749 +       
6750 +       if ((PGR_Is_Replicated_Query == true) ||
6751 +               (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6752 +       {
6753 +               return STATUS_OK;
6754 +       }
6755 +       if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6756 +               (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6757 +       {
6758 +               return STATUS_OK;
6759 +       }
6760 +       memset(&lo_args, 0, sizeof(LOArgs));
6761 +       lo_args.arg1 = htonl(flags);
6762 +
6763 +       result = PGR_Send_Replicate_Command((char *)&lo_args,
6764 +               sizeof(LOArgs),
6765 +               CMD_STS_LARGE_OBJECT,
6766 +               CMD_TYPE_LO_CREATE);
6767 +
6768 +       if (result != NULL)
6769 +       {
6770 +               free(result);
6771 +               return STATUS_OK;
6772 +       }
6773 +       
6774 +       return STATUS_ERROR;
6775 +}
6776 +
6777 +int
6778 +PGR_lo_open(Oid lobjId,int32 mode)
6779 +{
6780 +       char * result = NULL;
6781 +       LOArgs lo_args;
6782 +       
6783 +       if ((PGR_Is_Replicated_Query == true) ||
6784 +               (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6785 +       {
6786 +               return STATUS_OK;
6787 +       }
6788 +       if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6789 +               (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6790 +       {
6791 +               return STATUS_OK;
6792 +       }
6793 +       memset(&lo_args, 0, sizeof(LOArgs));
6794 +       lo_args.arg1 = htonl((uint32_t)lobjId);
6795 +       lo_args.arg2 = htonl((uint32_t)mode);
6796 +
6797 +       result = PGR_Send_Replicate_Command((char *)&lo_args,
6798 +               sizeof(LOArgs),
6799 +               CMD_STS_LARGE_OBJECT,
6800 +               CMD_TYPE_LO_OPEN);
6801 +       
6802 +       if (result != NULL)
6803 +       {
6804 +               free(result);
6805 +               return STATUS_OK;
6806 +       }
6807 +       
6808 +       return STATUS_ERROR;
6809 +}
6810 +
6811 +int
6812 +PGR_lo_close(int32 fd)
6813 +{
6814 +       char * result = NULL;
6815 +       LOArgs lo_args;
6816 +       
6817 +       if ((PGR_Is_Replicated_Query == true) ||
6818 +               (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6819 +       {
6820 +               return STATUS_OK;
6821 +       }
6822 +       if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6823 +               (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6824 +       {
6825 +               return STATUS_OK;
6826 +       }
6827 +       memset(&lo_args, 0, sizeof(LOArgs));
6828 +       lo_args.arg1 = htonl((uint32_t)fd);
6829 +
6830 +       result = PGR_Send_Replicate_Command((char *)&lo_args,
6831 +               sizeof(LOArgs),
6832 +               CMD_STS_LARGE_OBJECT,
6833 +               CMD_TYPE_LO_CLOSE);
6834 +
6835 +       if (result != NULL)
6836 +       {
6837 +               free(result);
6838 +               return STATUS_OK;
6839 +       }
6840 +       
6841 +       return STATUS_ERROR;
6842 +}
6843 +
6844 +int
6845 +PGR_lo_write(int fd, char *buf, int len)
6846 +{
6847 +       char * result = NULL;
6848 +       LOArgs *lo_args = NULL;
6849 +       int buf_size = 0;
6850 +       
6851 +       if ((PGR_Is_Replicated_Query == true) ||
6852 +               (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6853 +       {
6854 +               return STATUS_OK;
6855 +       }
6856 +       if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6857 +               (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6858 +       {
6859 +               return STATUS_OK;
6860 +       }
6861 +       buf_size = sizeof(LOArgs) + len;
6862 +       lo_args = malloc(buf_size + 4);
6863 +       if (lo_args == (LOArgs *)NULL)
6864 +       {
6865 +               return STATUS_ERROR;
6866 +       }
6867 +       memset(lo_args, 0, buf_size + 4);
6868 +       lo_args->arg1 = htonl((uint32_t)fd);
6869 +       lo_args->arg2 = htonl((uint32_t)len);
6870 +       memcpy(lo_args->buf, buf, len);
6871 +       result = PGR_Send_Replicate_Command((char *)lo_args,
6872 +               buf_size,
6873 +               CMD_STS_LARGE_OBJECT,
6874 +               CMD_TYPE_LO_WRITE);
6875 +
6876 +       free(lo_args);
6877 +       if (result != NULL)
6878 +       {
6879 +               free(result);
6880 +               return STATUS_OK;
6881 +       }
6882 +       
6883 +       return STATUS_ERROR;
6884 +}
6885 +
6886 +int
6887 +PGR_lo_lseek(int32 fd, int32 offset, int32 whence)
6888 +{
6889 +       char * result = NULL;
6890 +       LOArgs lo_args;
6891 +       
6892 +       if ((PGR_Is_Replicated_Query == true) ||
6893 +               (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6894 +       {
6895 +               return STATUS_OK;
6896 +       }
6897 +       if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6898 +               (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6899 +       {
6900 +               return STATUS_OK;
6901 +       }
6902 +       memset(&lo_args, 0, sizeof(LOArgs));
6903 +       lo_args.arg1 = htonl((uint32_t)fd);
6904 +       lo_args.arg2 = htonl((uint32_t)offset);
6905 +       lo_args.arg3 = htonl((uint32_t)whence);
6906 +
6907 +       result = PGR_Send_Replicate_Command((char *)&lo_args,
6908 +               sizeof(LOArgs),
6909 +               CMD_STS_LARGE_OBJECT,
6910 +               CMD_TYPE_LO_LSEEK);
6911 +
6912 +       if (result != NULL)
6913 +       {
6914 +               free(result);
6915 +               return STATUS_OK;
6916 +       }
6917 +       
6918 +       return STATUS_ERROR;
6919 +}
6920 +
6921 +int
6922 +PGR_lo_unlink(Oid lobjId)
6923 +{
6924 +       char * result = NULL;
6925 +       LOArgs lo_args;
6926 +       
6927 +       if ((PGR_Is_Replicated_Query == true) ||
6928 +               (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6929 +       {
6930 +               return STATUS_OK;
6931 +       }
6932 +       if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6933 +               (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6934 +       {
6935 +               return STATUS_OK;
6936 +       }
6937 +       memset(&lo_args, 0, sizeof(LOArgs));
6938 +       lo_args.arg1 = htonl((uint32_t)lobjId);
6939 +
6940 +       result = PGR_Send_Replicate_Command((char *)&lo_args,
6941 +               sizeof(LOArgs),
6942 +               CMD_STS_LARGE_OBJECT,
6943 +               CMD_TYPE_LO_UNLINK);
6944 +
6945 +       if (result != NULL)
6946 +       {
6947 +               free(result);
6948 +               return STATUS_OK;
6949 +       }
6950 +       
6951 +       return STATUS_ERROR;
6952 +}
6953 +
6954 +Oid
6955 +PGRGetNewObjectId(Oid last_id)
6956 +{
6957 +       Oid newId = 0;
6958 +
6959 +       if (last_id == 0)
6960 +       {
6961 +               newId = (Oid)PGRget_replication_id();
6962 +       }
6963 +       else
6964 +       {
6965 +               newId = last_id + 1;
6966 +       }
6967 +       return newId;
6968 +}
6969 +
6970 +int
6971 +PGR_Send_Input_Message(char cmdType,StringInfo input_message)
6972 +{
6973 +       int len = 0;
6974 +       char * ptr = NULL;
6975 +       char * result = NULL;
6976 +
6977 +       if (input_message == NULL)
6978 +       {
6979 +               return STATUS_ERROR;
6980 +       }
6981 +       if (PGR_Is_Replicated_Query == true)
6982 +       {
6983 +               return STATUS_OK;
6984 +       }
6985 +       len = input_message->len+1;
6986 +       ptr = input_message->data;
6987 +
6988 +       /* check setting of configuration value */
6989 +       if ( PGRnotReplicatePreparedSelect == true)
6990 +       {
6991 +               if (is_concerned_with_prepared_select(cmdType, ptr+1) == true)
6992 +               {
6993 +                       return STATUS_OK;
6994 +               }
6995 +       }
6996 +       result = PGR_Send_Replicate_Command(ptr,len, CMD_STS_PREPARE,cmdType);
6997 +       if (result != NULL)
6998 +       {
6999 +               PGR_Reload_Start_Time();
7000 +               free(result);
7001 +               result = NULL;
7002 +               return STATUS_OK;
7003 +       }
7004 +       else
7005 +       {
7006 +               return STATUS_ERROR;
7007 +       }
7008 +}
7009 +
7010 +static bool
7011 +is_concerned_with_prepared_select(char cmdType, char * query_string)
7012 +{
7013 +       if (cmdType == CMD_TYPE_P_PARSE)
7014 +       {
7015 +               switch (parse_message(query_string))
7016 +               {
7017 +                       case PGR_MESSAGE_SELECT:
7018 +                               pgr_skip_in_prepared_query = true;
7019 +                               break;  
7020 +                       case PGR_MESSAGE_PREPARE:
7021 +                               if (is_prepared_as_select(query_string) == true)
7022 +                               {
7023 +                                       pgr_skip_in_prepared_query = true;
7024 +                               }
7025 +                               break;  
7026 +                       case PGR_MESSAGE_EXECUTE:
7027 +                       case PGR_MESSAGE_DEALLOCATE:
7028 +                               if (is_statement_as_select(query_string) == true)
7029 +                               {
7030 +                                       pgr_skip_in_prepared_query = true;
7031 +                               }
7032 +                               break;  
7033 +               }
7034 +               if (pgr_skip_in_prepared_query == true)
7035 +               {
7036 +                       return true;
7037 +               }
7038 +       }
7039 +       if (pgr_skip_in_prepared_query == true)
7040 +       {
7041 +               if (cmdType == CMD_TYPE_P_SYNC)
7042 +               {
7043 +                       pgr_skip_in_prepared_query = false;
7044 +               }
7045 +               return true;
7046 +       }
7047 +       return false;
7048 +}
7049 +
7050 +static int
7051 +skip_non_blank(char * ptr, int max)
7052 +{
7053 +       int i= 0;
7054 +       while(!isspace(*(ptr+i)))
7055 +       {
7056 +               if ((*(ptr+1) == '(') || (*(ptr+1) == ')'))
7057 +               {
7058 +                       return i;
7059 +               }
7060 +               i++;
7061 +               if (i > max)
7062 +                       return -1;
7063 +       }
7064 +       return i;
7065 +}
7066 +
7067 +static int
7068 +skip_blank(char * ptr, int max)
7069 +{
7070 +       int i = 0;
7071 +       while(isspace(*(ptr+i)))
7072 +       {
7073 +               i++;
7074 +               if (i > max)
7075 +                       return -1;
7076 +       }
7077 +       return i;
7078 +}
7079 +
7080 +static int
7081 +parse_message(char * query_string)
7082 +{
7083 +       char * ptr =NULL;
7084 +       int rtn = 0;
7085 +       int i = 0;
7086 +       int len = 0;
7087 +       if (query_string == NULL)
7088 +       {
7089 +               return PGR_MESSAGE_OTHER;
7090 +       }
7091 +       len = strlen (query_string);
7092 +       if (len <= 0)
7093 +       {
7094 +               return PGR_MESSAGE_OTHER;
7095 +       }
7096 +       ptr = (char *)query_string;
7097 +       i = 0;
7098 +       /* skip space */
7099 +       rtn = skip_blank(ptr+i, len-i);
7100 +       if (rtn < 0)
7101 +               return PGR_MESSAGE_OTHER;
7102 +       i += rtn;
7103 +
7104 +       if (!strncasecmp(ptr+i,"SELECT",strlen("SELECT")))
7105 +       {
7106 +               return PGR_MESSAGE_SELECT;
7107 +       }
7108 +       if (!strncasecmp(ptr+i,"PREPARE",strlen("PREPARE")))
7109 +       {
7110 +               return PGR_MESSAGE_PREPARE;
7111 +       }
7112 +       if (!strncasecmp(ptr+i,"EXECUTE",strlen("EXECUTE")))
7113 +       {
7114 +               return PGR_MESSAGE_EXECUTE;
7115 +       }
7116 +       if (!strncasecmp(ptr+i,"DEALLOCATE",strlen("DEALLOCATE")))
7117 +       {
7118 +               return PGR_MESSAGE_DEALLOCATE;
7119 +       }
7120 +       return PGR_MESSAGE_OTHER;
7121 +}
7122 +
7123 +static bool
7124 +is_prepared_as_select(char * query_string)
7125 +{
7126 +       char * ptr =NULL;
7127 +       int rtn = 0;
7128 +       int i = 0;
7129 +       int len = 0;
7130 +       int args =0;
7131 +       if (query_string == NULL)
7132 +       {
7133 +               return false;
7134 +       }
7135 +       ptr = (char *)query_string;
7136 +       len = strlen (query_string);
7137 +       i = 0;
7138 +       /* skip "PREPARE" word */
7139 +       rtn = skip_non_blank(ptr+i, len-i);
7140 +       if (rtn < 0)
7141 +               return false;
7142 +       i += rtn;
7143 +       /* skip space */
7144 +       rtn = skip_blank(ptr+i, len-i);
7145 +       if (rtn < 0)
7146 +               return false;
7147 +       i += rtn;
7148 +       /* skip plan_name */
7149 +       rtn = skip_non_blank(ptr+i, len-i);
7150 +       if (rtn < 0)
7151 +               return false;
7152 +       i += rtn;
7153 +       /* skip space */
7154 +       rtn = skip_blank(ptr+i, len-i);
7155 +       if (rtn < 0)
7156 +               return false;
7157 +       i += rtn;
7158 +       /* skip args */
7159 +       args = 0;
7160 +       if (*(ptr+i) == '(')
7161 +       {
7162 +               args ++;
7163 +               i++;
7164 +               while(args > 0)
7165 +               {
7166 +                       if (*(ptr+i) == ')')
7167 +                               args --;
7168 +                       else if (*(ptr+i) == '(')
7169 +                               args ++;
7170 +                       i++;
7171 +                       if (i >= len) 
7172 +                               return false;
7173 +               }
7174 +               /* skip space */
7175 +               rtn = skip_blank(ptr+i, len-i);
7176 +               if (rtn < 0)
7177 +                       return false;
7178 +               i += rtn;
7179 +       }
7180 +       /* skip "AS" word */
7181 +       i += strlen("AS");
7182 +       if (i >= len) 
7183 +               return false;
7184 +       /* skip space */
7185 +       rtn = skip_blank(ptr+i, len-i);
7186 +       if (rtn < 0)
7187 +               return false;
7188 +       i += rtn;
7189 +       /* check "SELECT" word */
7190 +       if (len-i < strlen("SELECT"))
7191 +               return false;
7192 +       if (!strncasecmp(ptr+i,"SELECT",strlen("SELECT")))
7193 +       {
7194 +               return true;
7195 +       }
7196 +       return false;
7197 +       
7198 +}
7199 +
7200 +static bool
7201 +is_statement_as_select(char * query_string)
7202 +{
7203 +       char * ptr =NULL;
7204 +       int rtn = 0;
7205 +       int i = 0;
7206 +       int j = 0;
7207 +       int len = 0;
7208 +       bool result = false;
7209 +       PrepareStmt stmt;
7210 +       char * name = NULL;
7211 +       if (query_string == NULL)
7212 +       {
7213 +               return false;
7214 +       }
7215 +       ptr = (char *)query_string;
7216 +       len = strlen (query_string);
7217 +       i = 0;
7218 +       /* skip "EXECUTE" or "DEALLOCATE" word */
7219 +       rtn = skip_non_blank(ptr+i, len-i);
7220 +       if (rtn < 0)
7221 +               return false;
7222 +       i += rtn;
7223 +       /* skip space */
7224 +       rtn = skip_blank(ptr+i, len-i);
7225 +       if (rtn < 0)
7226 +               return false;
7227 +       i += rtn;
7228 +       if ((name = malloc(len)) == NULL)
7229 +               return false;
7230 +       memset(name,0,len);
7231 +       j = 0;
7232 +       while(isalnum(*(ptr+i)))
7233 +       {
7234 +               *(name+j) = *(ptr+i);
7235 +               i++;
7236 +               j++;
7237 +               if (i > len)
7238 +                       return false;
7239 +       }
7240 +       stmt.name = name;
7241 +       result = PGR_is_select_prepared_statement(&stmt);
7242 +       free(name);
7243 +       return result;
7244 +}
7245 +
7246 +bool
7247 +PGR_is_select_prepare_query(void)
7248 +{
7249 +       if (debug_query_string == NULL)
7250 +       {
7251 +               return false;
7252 +       }
7253 +       return (is_prepared_as_select((char *)debug_query_string));
7254 +}
7255 +
7256 +char *
7257 +PGR_get_md5salt(char * md5Salt, char * string)
7258 +{
7259 +       char buf[24];
7260 +       char * ptr = NULL;
7261 +       int len = 0;
7262 +       int i = 0;
7263 +       int cnt = 0;
7264 +       int index = 0;
7265 +       bool set_flag = false;
7266 +
7267 +       ptr = (char *)md5Salt;
7268 +       len = strlen(string);
7269 +       for ( i = 0 ; i < len ; i ++)
7270 +       {
7271 +               if (*(string+i) == ')')
7272 +               {
7273 +                       buf[index++] = '\0';
7274 +                       *ptr = (char)atoi(buf);
7275 +                       set_flag = false;
7276 +               }
7277 +               if (set_flag)
7278 +               {
7279 +                       buf[index++] = *(string+i);
7280 +               }
7281 +               if (*(string+i) == '(')
7282 +               {
7283 +                       set_flag = true;
7284 +                       index = 0;
7285 +                       ptr = (char *)(md5Salt + cnt);
7286 +                       cnt++;
7287 +               }
7288 +       }
7289 +       return md5Salt;
7290 +}
7291 +
7292 +#endif /* USE_REPLICATION */
7293 diff -aruN postgresql-8.2.4/src/backend/libpq/replicate_com.c pgcluster-1.7.0rc7/src/backend/libpq/replicate_com.c
7294 --- postgresql-8.2.4/src/backend/libpq/replicate_com.c  1970-01-01 01:00:00.000000000 +0100
7295 +++ pgcluster-1.7.0rc7/src/backend/libpq/replicate_com.c        2007-02-18 22:52:16.000000000 +0100
7296 @@ -0,0 +1,675 @@
7297 +/*--------------------------------------------------------------------
7298 + * FILE:
7299 + *     replicate_com.c
7300 + *
7301 + * NOTE:
7302 + *     This file is composed of the functions to call with the source
7303 + *     at backend for the replication.
7304 + *     Low level I/O functions that called by in these functions are 
7305 + *     contained in 'replicate_com.c'.
7306 + *
7307 + *--------------------------------------------------------------------
7308 + */
7309 +
7310 +/*--------------------------------------
7311 + * INTERFACE ROUTINES
7312 + *
7313 + * setup/teardown:
7314 + *      PGR_Close_Sock
7315 + *      PGR_Free_Conf_Data
7316 + * I/O call:
7317 + *      PGR_Create_Socket_Connect
7318 + *      PGR_Create_Socket_Bind
7319 + *      PGR_Create_Acception
7320 + * table handling:
7321 + *      PGR_Get_Conf_Data
7322 + *-------------------------------------
7323 + */
7324 +#ifdef USE_REPLICATION
7325 +
7326 +#include "postgres.h"
7327 +
7328 +#include <signal.h>
7329 +#include <errno.h>
7330 +#include <fcntl.h>
7331 +#include <grp.h>
7332 +#include <unistd.h>
7333 +#include <ctype.h>
7334 +#include <time.h>
7335 +#include <sys/types.h>
7336 +#include <sys/stat.h>
7337 +#include <sys/socket.h>
7338 +#include <sys/ipc.h>
7339 +#include <sys/shm.h>
7340 +#include <netdb.h>
7341 +#include <netinet/in.h>
7342 +#ifdef HAVE_NETINET_TCP_H
7343 +#include <netinet/tcp.h>
7344 +#endif
7345 +#include <arpa/inet.h>
7346 +#include <sys/file.h>
7347 +#include <netdb.h>
7348 +
7349 +#include "libpq/libpq.h"
7350 +#include "miscadmin.h"
7351 +#include "nodes/print.h"
7352 +#include "utils/guc.h"
7353 +#include "parser/parser.h"
7354 +#include "access/xact.h"
7355 +#include "replicate_com.h"
7356 +
7357 +int PGR_Create_Socket_Connect(int * fdP, char * hostName , unsigned short portNumber);
7358 +void PGR_Close_Sock(int * sock);
7359 +int PGR_Create_Socket_Bind(int * fdP, char * hostName , unsigned short portNumber);
7360 +int PGR_Create_Acception(int fd, int * sockP, char * hostName , unsigned short portNumber);
7361 +int PGR_Free_Conf_Data(void);
7362 +int PGR_Get_Conf_Data(char * dir , char * fname);
7363 +void PGRset_recovery_packet_no(RecoveryPacket * packet, int packet_no);
7364 +unsigned int PGRget_ip_by_name(char * host);
7365 +int PGRget_time_value(char *str);
7366 +
7367 +static char * get_string(char * buf);
7368 +static bool is_start_tag(char * ptr);
7369 +static bool is_end_tag(char * ptr);
7370 +static void init_conf_data(ConfDataType *conf);
7371 +static int get_key(char * key, char * str);
7372 +static int get_conf_key_value(char * key, char * value , char * str);
7373 +static int add_conf_data(char *table,int rec_no, char *key,char * value);
7374 +static int get_table_data(FILE * fp,char * table, int rec_no);
7375 +static int get_single_data(char * str);
7376 +static int get_conf_file(char * fname);
7377 +
7378 +/*--------------------------------------------------------------------
7379 + * SYMBOL
7380 + *     PGR_Create_Socket_Connect()
7381 + * NOTES
7382 + *     create new socket
7383 + * ARGS
7384 + *    int * fdP:
7385 + *    char * hostName:
7386 + *    unsigned short portNumber:
7387 + * RETURN
7388 + *    OK: STATUS_OK
7389 + *    NG: STATUS_ERROR
7390 + *--------------------------------------------------------------------
7391 + */
7392 +int
7393 +PGR_Create_Socket_Connect(int * fdP, char * hostName , unsigned short portNumber)
7394 +{
7395 +
7396 +       int sock;
7397 +       size_t  len = 0;
7398 +       struct sockaddr_in addr;
7399 +       int one = 1;
7400 +
7401 +       if ((*hostName == '\0') || (portNumber < 1000))
7402 +       {
7403 +               * fdP = -1;
7404 +               return STATUS_ERROR;
7405 +       }
7406 +       if ((*fdP = socket(AF_INET, SOCK_STREAM, 0)) < 0)
7407 +       {
7408 +               * fdP = -1;
7409 +               return STATUS_ERROR;
7410 +       }
7411 +       if ((setsockopt(*fdP, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one))) == -1)
7412 +       {
7413 +               PGR_Close_Sock(fdP);
7414 +               return STATUS_ERROR;
7415 +       }
7416 +       if (setsockopt(*fdP, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) < 0)
7417 +       {
7418 +               PGR_Close_Sock(fdP);
7419 +               return STATUS_ERROR;
7420 +       }
7421 +       
7422 +       addr.sin_family = AF_INET;
7423 +       if ((hostName == NULL ) || (hostName[0] == '\0'))
7424 +               addr.sin_addr.s_addr = htonl(INADDR_ANY);
7425 +       else
7426 +       {
7427 +               struct hostent *hp;
7428 +
7429 +               hp = gethostbyname(hostName);
7430 +               if ((hp == NULL) || (hp->h_addrtype != AF_INET))
7431 +               {
7432 +                       PGR_Close_Sock(fdP);
7433 +                       return STATUS_ERROR;
7434 +               }
7435 +               memmove((char *) &(addr.sin_addr), (char *) hp->h_addr, hp->h_length);
7436 +       }
7437 +
7438 +       addr.sin_port = htons(portNumber);
7439 +       len = sizeof(struct sockaddr_in);
7440 +       
7441 +       if ((sock = connect(*fdP,(struct sockaddr*)&addr,len)) < 0)
7442 +       {
7443 +               PGR_Close_Sock(fdP);
7444 +               return STATUS_ERROR;
7445 +       }
7446 +       
7447 +       return  STATUS_OK;
7448 +}
7449 +
7450 +int
7451 +PGR_Create_Socket_Bind(int * fdP, char * hostName , unsigned short portNumber)
7452 +{
7453 +
7454 +       int err;
7455 +       size_t  len = 0;
7456 +       struct sockaddr_in addr;
7457 +       int one = 1;
7458 +
7459 +       if ((*fdP = socket(AF_INET, SOCK_STREAM, 0)) < 0)
7460 +       {
7461 +               return STATUS_ERROR;
7462 +       }
7463 +       if ((setsockopt(*fdP, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one))) == -1)
7464 +       {
7465 +               PGR_Close_Sock(fdP);
7466 +               return STATUS_ERROR;
7467 +       }
7468 +       addr.sin_family = AF_INET;
7469 +       if ((hostName == NULL ) || (hostName[0] == '\0'))
7470 +               addr.sin_addr.s_addr = htonl(INADDR_ANY);
7471 +       else
7472 +       {
7473 +               struct hostent *hp;
7474 +
7475 +               hp = gethostbyname(hostName);
7476 +               if ((hp == NULL) || (hp->h_addrtype != AF_INET))
7477 +               {
7478 +                       PGR_Close_Sock(fdP);
7479 +                       return STATUS_ERROR;
7480 +               }
7481 +               memmove((char *) &(addr.sin_addr), (char *) hp->h_addr, hp->h_length);
7482 +       }
7483 +
7484 +       addr.sin_port = htons(portNumber);
7485 +       len = sizeof(struct sockaddr_in);
7486 +       
7487 +       err = bind(*fdP, (struct sockaddr *) & addr, len);
7488 +       if (err < 0)
7489 +       {
7490 +               PGR_Close_Sock(fdP);
7491 +               return STATUS_ERROR;
7492 +       }
7493 +       err = listen(*fdP, MAX_SOCKET_QUEUE );
7494 +       if (err < 0)
7495 +       {
7496 +               PGR_Close_Sock(fdP);
7497 +               return STATUS_ERROR;
7498 +       }
7499 +       return  STATUS_OK;
7500 +}
7501 +
7502 +int
7503 +PGR_Create_Acception(int fd, int * sockP, char * hostName , unsigned short portNumber)
7504 +{
7505 +       int sock;
7506 +       struct sockaddr  addr;
7507 +       size_t  len = 0;
7508 +       int one = 1;
7509 +
7510 +       len = sizeof(struct sockaddr);
7511 +       if ((sock = accept(fd, &addr, &len)) < 0)
7512 +       {
7513 +               *sockP = -1;
7514 +               return STATUS_ERROR;
7515 +       }
7516 +       
7517 +       if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) < 0)
7518 +       {
7519 +               return STATUS_ERROR;
7520 +       }
7521 +       if (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one)) < 0)
7522 +       {
7523 +               return STATUS_ERROR;
7524 +       }
7525 +       *sockP = sock;
7526 +
7527 +       return  STATUS_OK;
7528 +}
7529 +
7530 +void
7531 +PGR_Close_Sock(int * sock)
7532 +{
7533 +       close( (int)*sock);
7534 +       *sock = -1;
7535 +}
7536 +
7537 +static char *
7538 +get_string(char * buf)
7539 +{
7540 +       int i,len1,len2,start_flag;
7541 +       char *readp, *writep; 
7542 +
7543 +       writep = readp = buf;
7544 +       i = len1 = 0;
7545 +       while (*(readp +i) != '\0')
7546 +       {
7547 +               if (!isspace(*(readp+ i)))
7548 +               {
7549 +                       len1 ++;
7550 +               }
7551 +               i++;
7552 +       }
7553 +       start_flag = len2 = 0;
7554 +       while (*readp != '\0')
7555 +       {
7556 +               if (*readp == '#') 
7557 +               {
7558 +                       *writep = '\0';
7559 +                       break;
7560 +               }
7561 +               if (isspace(*readp))
7562 +               {
7563 +                       if ((len2 >= len1) || (!start_flag))
7564 +                       {
7565 +                               readp++;
7566 +                               continue;
7567 +                       }
7568 +                       *writep = *readp;
7569 +               }
7570 +               else
7571 +               {
7572 +                       start_flag = 1;
7573 +                       *writep = *readp;
7574 +                       len2 ++;
7575 +               }
7576 +               readp ++;
7577 +               writep ++;
7578 +       }
7579 +       *writep = '\0';
7580 +       return buf;
7581 +}
7582 +
7583 +static bool
7584 +is_start_tag(char * ptr)
7585 +{
7586 +       if ((*ptr == '<') && (*(ptr+1) != '/'))
7587 +       {
7588 +               return true;
7589 +       }
7590 +       return false;
7591 +}
7592 +
7593 +static bool
7594 +is_end_tag(char * ptr)
7595 +{
7596 +       if ((*ptr == '<') && (*(ptr+1) == '/'))
7597 +       {
7598 +               return true;
7599 +       }
7600 +       return false;
7601 +}
7602 +
7603 +static void
7604 +init_conf_data(ConfDataType *conf)
7605 +{
7606 +       memset(conf->table,0,sizeof(conf->table));
7607 +       memset(conf->key,0,sizeof(conf->key));
7608 +       memset(conf->value,0,sizeof(conf->value));
7609 +       conf->rec_no = 0;
7610 +       conf->last = NULL;
7611 +       conf->next = NULL;
7612 +}
7613 +
7614 +static int
7615 +get_key(char * key, char * str)
7616 +{
7617 +       int offset = 1;
7618 +       char * ptr_s,*ptr_e;
7619 +
7620 +       ptr_s = strchr(str,'<');
7621 +       if (ptr_s == NULL)
7622 +       {
7623 +               return STATUS_ERROR;
7624 +       }
7625 +       if (*(ptr_s+1) == '/')
7626 +       {
7627 +               offset = 2;
7628 +       }
7629 +       ptr_e = strchr(str,'>');
7630 +       if (ptr_e == NULL)
7631 +       {
7632 +               return STATUS_ERROR;
7633 +       }
7634 +       *ptr_e = '\0';
7635 +       strcpy(key,ptr_s + offset);
7636 +       *ptr_e = '>';
7637 +       return STATUS_OK;
7638 +}
7639 +
7640 +static int
7641 +get_conf_key_value(char * key, char * value , char * str)
7642 +{
7643 +       int i;
7644 +       int len1,len2,start_flag;
7645 +       char * ptr_s,*ptr_e;
7646 +
7647 +       if(get_key(key,str) == STATUS_ERROR)
7648 +       {
7649 +               return STATUS_ERROR;
7650 +       }
7651 +       ptr_e = strchr(str,'>');
7652 +       if (ptr_e == NULL)
7653 +       {
7654 +               return STATUS_ERROR;
7655 +       }
7656 +       ptr_s = ptr_e + 1;
7657 +
7658 +       len1 = 0;
7659 +       while ((*ptr_s != '<') && (*ptr_s != '\0'))
7660 +       {
7661 +                       if (! isspace(*ptr_s))
7662 +                       {
7663 +                               len1 ++;
7664 +                       }
7665 +                       ptr_s ++;
7666 +       }
7667 +       ptr_s = ptr_e + 1;
7668 +       i = len2 = start_flag = 0;
7669 +       while ((*ptr_s != '<') && (*ptr_s != '\0'))
7670 +       {
7671 +               if (isspace(*ptr_s))
7672 +               {
7673 +                       if ((len2 >= len1) || (!start_flag))
7674 +                       {
7675 +                               ptr_s ++;
7676 +                               continue;
7677 +                       }
7678 +                       *(value + i) = *ptr_s;
7679 +               }
7680 +               else
7681 +               {
7682 +                       start_flag = 1;
7683 +                       *(value + i) = *ptr_s;
7684 +                       len2 ++;
7685 +               }
7686 +               i++;
7687 +               ptr_s ++;
7688 +       }
7689 +       *(value + i) = '\0';
7690 +       return STATUS_OK;
7691 +}
7692 +
7693 +static int
7694 +add_conf_data(char *table,int rec_no, char *key,char * value)
7695 +{
7696 +       ConfDataType * conf_data;
7697 +
7698 +       conf_data = (ConfDataType *)malloc(sizeof(ConfDataType));
7699 +       if (conf_data == NULL)
7700 +       {
7701 +               return STATUS_ERROR;
7702 +       }
7703 +       init_conf_data(conf_data);
7704 +       if (table != NULL)
7705 +       {
7706 +               memcpy(conf_data->table,table,sizeof(conf_data->table));
7707 +       }
7708 +       else
7709 +       {
7710 +               memset(conf_data->table,0,sizeof(conf_data->table));
7711 +       }
7712 +       memcpy(conf_data->key,key,sizeof(conf_data->key));
7713 +       memcpy(conf_data->value,value,sizeof(conf_data->value));
7714 +       conf_data->rec_no = rec_no;
7715 +       if (ConfData_Top == (ConfDataType *)NULL)
7716 +       {
7717 +               ConfData_Top = conf_data;
7718 +               conf_data->last = (char *)NULL;
7719 +       }
7720 +       if (ConfData_End == (ConfDataType *)NULL)
7721 +       {
7722 +               conf_data->last = (char *)NULL;
7723 +       }
7724 +       else
7725 +       {
7726 +               conf_data->last = (char *)ConfData_End;
7727 +               ConfData_End->next = (char *)conf_data;
7728 +       }
7729 +       ConfData_End = conf_data;
7730 +       conf_data->next = (char *)NULL;
7731 +       return STATUS_OK;
7732 +}
7733 +
7734 +static int
7735 +get_table_data(FILE * fp,char * table, int rec_no)
7736 +{
7737 +       char buf[1024];
7738 +       char key_buf[1024];
7739 +       char value_buf[1024];
7740 +       int len = 0;
7741 +       char * ptr;
7742 +
7743 +       while (fgets(buf,sizeof(buf),fp) != NULL)
7744 +       {
7745 +               /*
7746 +                * pic up a data string
7747 +                */
7748 +               ptr = get_string(buf);
7749 +               len = strlen(ptr);
7750 +               if (len == 0)
7751 +               {
7752 +                       continue;
7753 +               }
7754 +               if (is_end_tag(ptr))
7755 +               {
7756 +                       if(get_key(key_buf,ptr) == STATUS_ERROR)
7757 +                       {
7758 +                               return STATUS_ERROR;
7759 +                       }
7760 +                       if (!strcmp(key_buf,table))
7761 +                       {
7762 +                               return STATUS_OK;
7763 +                       }
7764 +               }
7765 +               if (is_start_tag(ptr))
7766 +               {
7767 +                       if(get_conf_key_value(key_buf,value_buf,ptr) == STATUS_ERROR)
7768 +                       {
7769 +                               return STATUS_ERROR;
7770 +                       }
7771 +                       add_conf_data(table,rec_no,key_buf,value_buf);
7772 +               }
7773 +       }
7774 +       return STATUS_ERROR;
7775 +}
7776 +
7777 +static int
7778 +get_single_data(char * str)
7779 +{
7780 +       char key_buf[1024];
7781 +       char value_buf[1024];
7782 +       if(get_conf_key_value(key_buf,value_buf,str) == STATUS_ERROR)
7783 +       {
7784 +               return STATUS_ERROR;
7785 +       }
7786 +       add_conf_data(NULL,0,key_buf,value_buf);
7787 +       return STATUS_OK;
7788 +}
7789 +
7790 +
7791 +static int
7792 +get_conf_file(char * fname)
7793 +{
7794 +       FILE * fp = NULL;
7795 +       int len;
7796 +       char buf[1024];
7797 +       char key_buf[1024];
7798 +       char last_key_buf[1024];
7799 +       char *ptr;
7800 +       int rec_no = 0;
7801 +
7802 +       /*
7803 +        * configuration file open
7804 +        */
7805 +       if ((fp = fopen(fname,"r")) == NULL)
7806 +       {
7807 +               return STATUS_ERROR;
7808 +       }
7809 +       /*
7810 +        * configuration file read
7811 +        */
7812 +       memset(last_key_buf,0,sizeof(last_key_buf));
7813 +       memset(key_buf,0,sizeof(key_buf));
7814 +       while (fgets(buf,sizeof(buf),fp) != NULL)
7815 +       {
7816 +               /*
7817 +                * pic up a data string
7818 +                */
7819 +               ptr = get_string(buf);
7820 +               len = strlen(ptr);
7821 +               if (len == 0)
7822 +               {
7823 +                       continue;
7824 +               }
7825 +               if (is_start_tag(ptr))
7826 +               {
7827 +                       if(get_key(key_buf,ptr) == STATUS_ERROR)
7828 +                       {
7829 +                               fclose(fp);
7830 +                               return STATUS_ERROR;
7831 +                       }
7832 +                       if (strstr(ptr,"</") == NULL)
7833 +                       {
7834 +                               if (strcmp(last_key_buf,key_buf))
7835 +                               {
7836 +                                       rec_no = 0;
7837 +                                       strcpy(last_key_buf,key_buf);
7838 +                               }
7839 +                               get_table_data(fp,key_buf,rec_no);
7840 +                               rec_no ++;
7841 +                       }
7842 +                       else
7843 +                       {
7844 +                               get_single_data(ptr);
7845 +                       }
7846 +               }
7847 +       }
7848 +       fclose(fp);
7849 +       return STATUS_OK;
7850 +}
7851 +
7852 +int
7853 +PGR_Free_Conf_Data(void)
7854 +{
7855 +       ConfDataType * conf, *nextp;
7856 +
7857 +       if (ConfData_Top == (ConfDataType *)NULL)
7858 +       {
7859 +               return STATUS_ERROR;
7860 +       }
7861 +       conf = ConfData_Top;
7862 +
7863 +       while (conf != (ConfDataType *)NULL)
7864 +       {
7865 +               nextp = (ConfDataType*)conf->next;
7866 +               free (conf);
7867 +               conf = nextp;
7868 +       }
7869 +       ConfData_Top = ConfData_End = (ConfDataType *)NULL;
7870 +       return STATUS_OK;
7871 +}
7872 +
7873 +int
7874 +PGR_Get_Conf_Data(char * dir , char * fname)
7875 +{
7876 +
7877 +       int status;
7878 +
7879 +       char * conf_file;
7880 +       if ((dir == NULL) || ( fname == NULL))
7881 +       {
7882 +               return STATUS_ERROR;
7883 +       }
7884 +       conf_file = malloc(strlen(dir) + strlen(fname) + 2);
7885 +       if (conf_file == NULL)
7886 +       {
7887 +               return STATUS_ERROR;
7888 +       }
7889 +       sprintf(conf_file,"%s/%s",dir,fname);
7890 +
7891 +       ConfData_Top = ConfData_End = (ConfDataType * )NULL;
7892 +       status = get_conf_file(conf_file);
7893 +       free (conf_file);
7894 +       conf_file = NULL;
7895 +
7896 +       return status;
7897 +}
7898 +
7899 +void
7900 +PGRset_recovery_packet_no(RecoveryPacket * packet, int packet_no)
7901 +{
7902 +       if (packet == NULL)
7903 +       {
7904 +               return;
7905 +       }
7906 +       packet->packet_no = htons(packet_no) ;
7907 +
7908 +}
7909 +
7910 +unsigned int
7911 +PGRget_ip_by_name(char * host)
7912 +{
7913 +       struct hostent *hp = NULL;
7914 +       unsigned int ip = 0;
7915 +       unsigned char uc = 0;
7916 +       int i;
7917 +
7918 +       if ((host == NULL) || (*host == '\0'))
7919 +       {
7920 +               return 0;
7921 +       }
7922 +       hp = gethostbyname( host );
7923 +       if (hp == NULL)
7924 +       {
7925 +               return 0;
7926 +       }
7927 +       for (i = 3 ; i>= 0 ; i --)
7928 +       {
7929 +               uc = (unsigned char)hp->h_addr_list[0][i];
7930 +               ip = ip | uc;
7931 +               if (i > 0)
7932 +               ip = ip << 8;
7933 +       }
7934 +       return ip;
7935 +}
7936 +
7937 +int
7938 +PGRget_time_value(char *str)
7939 +{
7940 +       int i,len;
7941 +       char * ptr;
7942 +       int unit = 1;
7943 +       
7944 +       if (str == NULL)
7945 +               return -1;
7946 +
7947 +       len = strlen(str);
7948 +       ptr = str;
7949 +       for (i = 0; i < len ; i ++,ptr++)
7950 +       {
7951 +               if ((! isdigit(*ptr)) && (! isspace(*ptr)))
7952 +               {
7953 +                       switch (*ptr)
7954 +                       {
7955 +                               case 'm':
7956 +                               case 'M':
7957 +                                       unit = 60;
7958 +                                       break;
7959 +                               case 'h':
7960 +                               case 'H':
7961 +                                       unit = 60*60;
7962 +                                       break;
7963 +                       }
7964 +                       *ptr = '\0';
7965 +                       break;
7966 +               }
7967 +       }
7968 +       return (atoi(str) * unit);
7969 +}
7970 +
7971 +#endif /* USE_REPLICATION */
7972 diff -aruN postgresql-8.2.4/src/backend/main/main.c pgcluster-1.7.0rc7/src/backend/main/main.c
7973 --- postgresql-8.2.4/src/backend/main/main.c    2007-01-04 01:58:01.000000000 +0100
7974 +++ pgcluster-1.7.0rc7/src/backend/main/main.c  2007-02-18 22:52:16.000000000 +0100
7975 @@ -316,6 +316,13 @@
7976         printf(_("  -r FILENAME     send stdout and stderr to given file\n"));
7977         printf(_("  -x NUM          internal use\n"));
7978  
7979 +#ifdef USE_REPLICATION
7980 +       printf(_("\nOptions for PGCluster only:\n"));
7981 +       printf(_("  -R              recovery startup with rsync\n"));
7982 +       printf(_("  -u              recovery startup with rsync(it is not create backup files.\n"));
7983 +       printf(_("  -U              recovery startup with pg_dump\n"));
7984 +#endif /* USE_REPLICATION */
7985 +
7986         printf(_("\nPlease read the documentation for the complete list of run-time\n"
7987          "configuration settings and how to set them on the command line or in\n"
7988                          "the configuration file.\n\n"
7989 diff -aruN postgresql-8.2.4/src/backend/parser/gram.y pgcluster-1.7.0rc7/src/backend/parser/gram.y
7990 --- postgresql-8.2.4/src/backend/parser/gram.y  2006-11-05 23:42:09.000000000 +0100
7991 +++ pgcluster-1.7.0rc7/src/backend/parser/gram.y        2007-02-18 22:52:16.000000000 +0100
7992 @@ -412,10 +412,10 @@
7993         QUOTE
7994  
7995         READ REAL REASSIGN RECHECK REFERENCES REINDEX RELATIVE_P RELEASE RENAME
7996 -       REPEATABLE REPLACE RESET RESTART RESTRICT RETURNING RETURNS REVOKE RIGHT
7997 +       REPEATABLE REPLACE REPLICATION RESET RESTART RESTRICT RETURNING RETURNS REVOKE RIGHT
7998         ROLE ROLLBACK ROW ROWS RULE
7999  
8000 -       SAVEPOINT SCHEMA SCROLL SECOND_P SECURITY SELECT SEQUENCE
8001 +       SAVEPOINT SCHEMA SCROLL SECOND_P SECURITY SELECT SEQUENCE SERVER
8002         SERIALIZABLE SESSION SESSION_USER SET SETOF SHARE
8003         SHOW SIMILAR SIMPLE SMALLINT SOME STABLE START STATEMENT
8004         STATISTICS STDIN STDOUT STORAGE STRICT_P SUBSTRING SUPERUSER_P SYMMETRIC
8005 @@ -1224,6 +1224,12 @@
8006                                         n->name = $2;
8007                                         $$ = (Node *) n;
8008                                 }
8009 +                       | SHOW REPLICATION SERVER
8010 +                               {
8011 +                                       VariableShowStmt *n = makeNode(VariableShowStmt);
8012 +                                       n->name = "replication_server";
8013 +                                       $$ = (Node *) n;
8014 +                               }
8015                         | SHOW TIME ZONE
8016                                 {
8017                                         VariableShowStmt *n = makeNode(VariableShowStmt);
8018 @@ -8678,6 +8684,7 @@
8019                         | RENAME
8020                         | REPEATABLE
8021                         | REPLACE
8022 +                       | REPLICATION
8023                         | RESET
8024                         | RESTART
8025                         | RESTRICT
8026 @@ -8692,6 +8699,7 @@
8027                         | SCROLL
8028                         | SECOND_P
8029                         | SECURITY
8030 +                       | SERVER
8031                         | SEQUENCE
8032                         | SERIALIZABLE
8033                         | SESSION
8034 diff -aruN postgresql-8.2.4/src/backend/parser/keywords.c pgcluster-1.7.0rc7/src/backend/parser/keywords.c
8035 --- postgresql-8.2.4/src/backend/parser/keywords.c      2006-10-07 23:51:02.000000000 +0200
8036 +++ pgcluster-1.7.0rc7/src/backend/parser/keywords.c    2007-02-18 22:52:16.000000000 +0100
8037 @@ -281,6 +281,7 @@
8038         {"relative", RELATIVE_P},
8039         {"release", RELEASE},
8040         {"rename", RENAME},
8041 +       {"replication", REPLICATION},
8042         {"repeatable", REPEATABLE},
8043         {"replace", REPLACE},
8044         {"reset", RESET},
8045 diff -aruN postgresql-8.2.4/src/backend/parser/parse_clause.c pgcluster-1.7.0rc7/src/backend/parser/parse_clause.c
8046 --- postgresql-8.2.4/src/backend/parser/parse_clause.c  2006-11-28 13:54:41.000000000 +0100
8047 +++ pgcluster-1.7.0rc7/src/backend/parser/parse_clause.c        2007-02-18 22:52:16.000000000 +0100
8048 @@ -34,6 +34,9 @@
8049  #include "rewrite/rewriteManip.h"
8050  #include "utils/guc.h"
8051  
8052 +#ifdef USE_REPLICATION
8053 +#include "replicate.h"
8054 +#endif /* USE_REPLICATION */
8055  
8056  #define ORDER_CLAUSE 0
8057  #define GROUP_CLAUSE 1
8058 @@ -154,7 +157,18 @@
8059          * analyze.c will eventually do the corresponding heap_close(), but *not*
8060          * release the lock.
8061          */
8062 +#ifdef USE_REPLICATION
8063 +       if (PGRautoLockTable == true) 
8064 +       {
8065 +               pstate->p_target_relation = heap_openrv(relation, ShareRowExclusiveLock);
8066 +       }
8067 +       else
8068 +       {
8069 +               pstate->p_target_relation = heap_openrv(relation, RowExclusiveLock);
8070 +       }
8071 +#else
8072         pstate->p_target_relation = heap_openrv(relation, RowExclusiveLock);
8073 +#endif /* USE_REPLICATION */
8074  
8075         /*
8076          * Now build an RTE.
8077 diff -aruN postgresql-8.2.4/src/backend/parser/parse_relation.c pgcluster-1.7.0rc7/src/backend/parser/parse_relation.c
8078 --- postgresql-8.2.4/src/backend/parser/parse_relation.c        2006-10-04 02:29:56.000000000 +0200
8079 +++ pgcluster-1.7.0rc7/src/backend/parser/parse_relation.c      2007-02-18 22:52:16.000000000 +0100
8080 @@ -30,6 +30,9 @@
8081  #include "utils/lsyscache.h"
8082  #include "utils/syscache.h"
8083  
8084 +#ifdef USE_REPLICATION
8085 +#include "replicate.h"
8086 +#endif /* USE_REPLICATION */
8087  
8088  /* GUC parameter */
8089  bool           add_missing_from;
8090 @@ -636,7 +639,14 @@
8091          * to a rel in a statement, be careful to get the right access level
8092          * depending on whether we're doing SELECT FOR UPDATE/SHARE.
8093          */
8094 +#ifdef USE_REPLICATION
8095 +       if (PGRautoLockTable == true)
8096 +               lockmode = isLockedRel(pstate, refname) ? ShareRowExclusiveLock : AccessShareLock;
8097 +       else
8098 +               lockmode = isLockedRel(pstate, refname) ? RowShareLock : AccessShareLock;
8099 +#else
8100         lockmode = isLockedRel(pstate, refname) ? RowShareLock : AccessShareLock;
8101 +#endif /* USE_REPLICATION */
8102         rel = heap_openrv(relation, lockmode);
8103         rte->relid = RelationGetRelid(rel);
8104  
8105 diff -aruN postgresql-8.2.4/src/backend/postmaster/postmaster.c pgcluster-1.7.0rc7/src/backend/postmaster/postmaster.c
8106 --- postgresql-8.2.4/src/backend/postmaster/postmaster.c        2007-01-04 01:58:01.000000000 +0100
8107 +++ pgcluster-1.7.0rc7/src/backend/postmaster/postmaster.c      2007-02-18 22:52:16.000000000 +0100
8108 @@ -122,6 +122,9 @@
8109  #include "storage/spin.h"
8110  #endif
8111  
8112 +#ifdef USE_REPLICATION
8113 +#include "replicate.h"
8114 +#endif /* USE_REPLICATION */
8115  
8116  /*
8117   * List of active backends (or child processes anyway; we don't actually
8118 @@ -363,6 +366,61 @@
8119  #define EXIT_STATUS_0(st)  ((st) == 0)
8120  #define EXIT_STATUS_1(st)  (WIFEXITED(st) && WEXITSTATUS(st) == 1)
8121  
8122 +#ifdef USE_REPLICATION
8123 +char * Query_String = NULL;
8124 +ReplicateServerInfo * ReplicateServerData = NULL;
8125 +ReplicateServerInfo * CurrentReplicateServer = NULL;
8126 +ReplicateServerInfo * LastReplicateServer = NULL;
8127 +int ReplicateServerShmid = -1;
8128 +int TransactionQuery = 0;
8129 +int TransactionSock = -1;
8130 +int Transaction_Mode = 0;
8131 +bool PGR_Noticed_Abort = false;
8132 +bool Session_Authorization_Mode = false;
8133 +bool Create_Temp_Table_Mode = false;
8134 +ConfDataType * ConfData_Top = (ConfDataType *)NULL;
8135 +ConfDataType * ConfData_End = (ConfDataType *)NULL;
8136 +int RecoveryPortNumber = 0;
8137 +char * RsyncPath = NULL;
8138 +char * RsyncOption = NULL;
8139 +char * PgDumpPath = NULL;
8140 +bool RsyncCompress = true;
8141 +ReplicateNow * ReplicateCurrentTime = NULL;
8142 +CopyData * PGRCopyData = NULL;
8143 +bool PGR_Copy_Data_Need_Replicate = false;
8144 +PGR_Stand_Alone_Type * PGR_Stand_Alone = NULL;
8145 +PGR_Not_Replicate_Type * PGR_Not_Replicate = NULL;
8146 +int PGR_Not_Replicate_Rec_Num = 0;
8147 +bool PGR_Is_Replicated_Query = false;
8148 +PGR_Check_Lock_Type PGR_Check_Lock;
8149 +int PGR_Sock_To_Replication_Server = -1;
8150 +bool PGR_Need_Notice = false;
8151 +bool PGR_Lock_Noticed = false;
8152 +bool PGR_Recovery_Option = false;
8153 +int PGR_recovery_mode = 0;
8154 +char * PGRSelfHostName = NULL;
8155 +int PGR_Pending_Sem_Num = 0;
8156 +bool PGR_Reliable_Mode_Wait = true;
8157 +PGR_Retry_Query_Type PGR_Retry_Query;
8158 +int ClusterDBShmid = -1;
8159 +ClusterDBInfo * ClusterDBData = NULL;
8160 +PGR_Password_Info * PGR_password = NULL;
8161 +int PGR_Replication_Timeout = 60;
8162 +int PGR_Lifecheck_Timeout = 3;
8163 +int PGR_Lifecheck_Interval = 11;
8164 +
8165 +/* initialize in utils/misc/guc.c */
8166 +bool PGRforceLoadBalance = false;
8167 +bool PGRcheckConstraintWithLock = false;
8168 +bool PGRautoLockTable = true;
8169 +bool PGRnotReplicatePreparedSelect = false;
8170 +
8171 +bool needToUpdateReplicateIdOnNextQueryIsDone=false;
8172 +bool PGR_Is_Sync_OID = false;
8173 +
8174 +static int Master_Pid = 0;
8175 +static int Lifecheck_Pid = 0;
8176 +#endif /* USE_REPLICATION */
8177  
8178  /*
8179   * Postmaster main entry point
8180 @@ -375,6 +433,11 @@
8181         char       *userDoption = NULL;
8182         int                     i;
8183  
8184 +#ifdef USE_REPLICATION
8185 +       PGR_Check_Lock.check_lock_conflict = false;
8186 +       PGR_Check_Lock.status_lock_conflict = STATUS_OK;
8187 +#endif /* USE REPLICATION */
8188 +
8189         MyProcPid = PostmasterPid = getpid();
8190  
8191         IsPostmasterEnvironment = true;
8192 @@ -420,10 +483,24 @@
8193          * tcop/postgres.c (the option sets should not conflict)
8194          * and with the common help() function in main/main.c.
8195          */
8196 -       while ((opt = getopt(argc, argv, "A:B:c:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:")) != -1)
8197 +       while ((opt = getopt(argc, argv, "A:B:c:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:URu")) != -1)
8198         {
8199                 switch (opt)
8200                 {
8201 +#ifdef USE_REPLICATION
8202 +                       case 'U':
8203 +                               PGR_Recovery_Option = true;
8204 +                               PGR_recovery_mode = PGR_HOT_RECOVERY;
8205 +                               break;
8206 +                       case 'R':
8207 +                               PGR_Recovery_Option = true;
8208 +                               PGR_recovery_mode = PGR_COLD_RECOVERY;
8209 +                               break;
8210 +                       case 'u':
8211 +                               PGR_Recovery_Option = true;
8212 +                               PGR_recovery_mode = PGR_WITHOUT_BACKUP;
8213 +                               break;
8214 +#endif /* USE_REPLICATION */
8215                         case 'A':
8216                                 SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
8217                                 break;
8218 @@ -696,6 +773,30 @@
8219          */
8220         CreateDataDirLockFile(true);
8221  
8222 +#ifdef USE_REPLICATION
8223 +       if (PGR_Get_Conf_Data( DataDir, CLUSTER_CONF_FILE ) == STATUS_OK)
8224 +       {
8225 +               if (PGR_Init_Replicate_Server_Data() != STATUS_OK)
8226 +               {
8227 +                       fprintf(stderr,"PGR_Init_Replicate_Server_Data failed\n");
8228 +                       ExitPostmaster(0);
8229 +               }
8230 +               PGR_Set_Replicate_Server_Socket();
8231 +               PGR_Free_Conf_Data();
8232 +               if ((PGR_Recovery_Option) &&
8233 +                       (PGR_recovery_mode != PGR_HOT_RECOVERY))
8234 +               {
8235 +                       fprintf(stderr,"Start in recovery mode! \n");
8236 +                       fprintf(stderr,"Please wait until a data synchronization finishes from Master DB... \n");
8237 +                       if (PGR_Recovery_Main(PGR_recovery_mode) != STATUS_OK)
8238 +                       {
8239 +                               fprintf(stderr,"PGR_Recovery_Main() failed with cold recovery\n");
8240 +                               ExitPostmaster(0);
8241 +                       }
8242 +               }
8243 +       }
8244 +#endif /* USE_REPLICATION */
8245 +
8246         /*
8247          * If timezone is not set, determine what the OS uses.  (In theory this
8248          * should be done during GUC initialization, but because it can take as
8249 @@ -960,6 +1061,21 @@
8250          */
8251         StartupPID = StartupDataBase();
8252  
8253 +#ifdef USE_REPLICATION
8254 +       Master_Pid = PGR_Master_Main();
8255 +       if (Master_Pid < 0)
8256 +       {
8257 +               elog(DEBUG1,"PGR_Master_Main failed");
8258 +               ExitPostmaster(1);
8259 +       }
8260 +       Lifecheck_Pid = PGR_Lifecheck_Main();
8261 +       if (Lifecheck_Pid < 0)
8262 +       {
8263 +               elog(DEBUG1,"PGR_Lifecheck_Main failed");
8264 +               ExitPostmaster(1);
8265 +       }
8266 +#endif /* USE_REPLICATION */
8267 +
8268         status = ServerLoop();
8269  
8270         /*
8271 @@ -1133,6 +1249,60 @@
8272         last_touch_time = time(NULL);
8273  
8274         nSockets = initMasks(&readmask);
8275 +#ifdef USE_REPLICATION
8276 +       if (PGR_Recovery_Option)
8277 +       {
8278 +               int pid = 0;
8279 +               pid = fork_process();
8280 +               if (pid == 0)                           /* child */
8281 +               {
8282 +                       fprintf(stderr,"Start in recovery mode! \n");
8283 +                       fprintf(stderr,"Please wait until a data synchronization finishes from Master DB... \n");
8284 +                       IsUnderPostmaster = true;               /* we are a postmaster subprocess now */
8285 +
8286 +                       /* Close the postmaster's sockets */
8287 +                       ClosePostmasterPorts(false);
8288 +                       /* Lose the postmaster's on-exit routines and port connections */
8289 +                       on_exit_reset();
8290 +                       /* Release postmaster's working memory context */
8291 +                       MemoryContextSwitchTo(TopMemoryContext);
8292 +                       MemoryContextDelete(PostmasterContext);
8293 +                       PostmasterContext = NULL;
8294 +                       if (PGR_recovery_mode == PGR_HOT_RECOVERY)
8295 +                       {
8296 +                               if (PGR_Recovery_Main(PGR_recovery_mode) != STATUS_OK)
8297 +                               {
8298 +                                       elog(DEBUG1,"PGR_Recovery_Main() failed with hot recovery.");
8299 +                                       ExitPostmaster(1);
8300 +                               }
8301 +                       }
8302 +                       else
8303 +                       {
8304 +                               if (PGR_recovery_queue_data_req() != STATUS_OK)
8305 +                               {
8306 +                                       elog(DEBUG1,"PGR_recovery_queue_data_req failed");
8307 +                                       ExitPostmaster(1);
8308 +                               }
8309 +                       }
8310 +                       PGR_recovery_finish_send();
8311 +                       PGR_Recovery_Option = false;
8312 +                       fprintf(stderr,"OK!  The data synchronization with Master DB was finished. \n");
8313 +
8314 +                       ExitPostmaster(0);
8315 +               }
8316 +               else if (pid < 0)
8317 +               {
8318 +                       ExitPostmaster(1);
8319 +               }
8320 +       }
8321 +       if (PGR_password != NULL)
8322 +       {
8323 +               if(PGR_password->password != NULL)
8324 +                       memset(PGR_password->password,0,PASSWORD_MAX_LENGTH);
8325 +               memset(PGR_password->md5Salt,0,sizeof(PGR_password->md5Salt));
8326 +               memset(PGR_password->cryptSalt,0,sizeof(PGR_password->cryptSalt));
8327 +       }
8328 +#endif /* USE_REPLICATION */
8329  
8330         for (;;)
8331         {
8332 @@ -1591,6 +1761,9 @@
8333                         ereport(FATAL,
8334                                         (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
8335                                          errmsg("sorry, too many clients already")));
8336 +#ifdef USE_REPLICATION
8337 +                       return STATUS_ERROR;
8338 +#endif
8339                         break;
8340                 case CAC_OK:
8341                 default:
8342 @@ -1858,6 +2031,23 @@
8343                         (errmsg_internal("postmaster received signal %d",
8344                                                          postgres_signal_arg)));
8345  
8346 +#ifdef USE_REPLICATION
8347 +       if (PGR_Get_Cluster_Status() == STATUS_RECOVERY)
8348 +       {
8349 +               PGR_recovery_error_send();
8350 +               PGR_Recovery_Option = false;
8351 +       }
8352 +       if (Master_Pid > 0)
8353 +       {
8354 +               kill (Master_Pid,postgres_signal_arg);
8355 +       }
8356 +       if (Lifecheck_Pid > 0)
8357 +       {
8358 +               kill (Lifecheck_Pid,postgres_signal_arg);
8359 +       }
8360 +       PGR_delete_shm();
8361 +#endif /* USE_REPLICATION */
8362 +
8363         switch (postgres_signal_arg)
8364         {
8365                 case SIGTERM:
8366 @@ -3452,6 +3642,16 @@
8367          * MUST         -- vadim 05-10-1999
8368          */
8369  
8370 +#ifdef USE_REPLICATION
8371 +       if (PGR_Get_Cluster_Status() == STATUS_RECOVERY)
8372 +       {
8373 +               write_stderr("sorry, recovery failed.");
8374 +               PGR_recovery_error_send();
8375 +               PGR_Recovery_Option = false;
8376 +       }
8377 +       PGR_delete_shm();
8378 +#endif /* USE_REPLICATION */
8379 +
8380         proc_exit(status);
8381  }
8382  
8383 diff -aruN postgresql-8.2.4/src/backend/storage/large_object/inv_api.c pgcluster-1.7.0rc7/src/backend/storage/large_object/inv_api.c
8384 --- postgresql-8.2.4/src/backend/storage/large_object/inv_api.c 2006-09-07 17:37:25.000000000 +0200
8385 +++ pgcluster-1.7.0rc7/src/backend/storage/large_object/inv_api.c       2007-02-18 22:52:16.000000000 +0100
8386 @@ -36,6 +36,10 @@
8387  #include "utils/fmgroids.h"
8388  #include "utils/resowner.h"
8389  
8390 +#ifdef USE_REPLICATION
8391 +#include "replicate.h"
8392 +#endif /* USE_REPLICATION */
8393 +
8394  
8395  /*
8396   * All accesses to pg_largeobject and its index make use of a single Relation
8397 @@ -188,6 +192,9 @@
8398          * use.  We can use the index on pg_largeobject for checking OID
8399          * uniqueness, even though it has additional columns besides OID.
8400          */
8401 +#ifdef USE_REPLICATION
8402 +       PGR_Is_Sync_OID = true;
8403 +#endif /* USE_REPLICATION */
8404         if (!OidIsValid(lobjId))
8405         {
8406                 open_lo_relation();
8407 @@ -206,6 +213,9 @@
8408          */
8409         CommandCounterIncrement();
8410  
8411 +#ifdef USE_REPLICATION
8412 +       PGR_Is_Sync_OID = false;
8413 +#endif /* USE_REPLICATION */
8414         return lobjId;
8415  }
8416  
8417 diff -aruN postgresql-8.2.4/src/backend/storage/lmgr/deadlock.c pgcluster-1.7.0rc7/src/backend/storage/lmgr/deadlock.c
8418 --- postgresql-8.2.4/src/backend/storage/lmgr/deadlock.c        2006-09-23 01:20:13.000000000 +0200
8419 +++ pgcluster-1.7.0rc7/src/backend/storage/lmgr/deadlock.c      2007-02-18 22:52:16.000000000 +0100
8420 @@ -30,6 +30,9 @@
8421  #include "storage/proc.h"
8422  #include "utils/memutils.h"
8423  
8424 +#ifdef USE_REPLICATION
8425 +#include "replicate.h"
8426 +#endif /* USE_REPLICATION */
8427  
8428  /* One edge in the waits-for graph */
8429  typedef struct
8430 @@ -217,6 +220,13 @@
8431                 if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges))
8432                         elog(FATAL, "deadlock seems to have disappeared");
8433  
8434 +#ifdef USE_REPLICATION
8435 +               if (PGR_Notice_Conflict() == STATUS_ERROR)
8436 +               {
8437 +                       return FALSE;
8438 +               }
8439 +               PGR_Lock_Noticed =true;
8440 +#endif
8441                 return true;                    /* cannot find a non-deadlocked state */
8442         }
8443  
8444 @@ -426,6 +436,18 @@
8445         int                     numLockModes,
8446                                 lm;
8447  
8448 +#ifdef USE_REPLICATION
8449 +       /*
8450 +        * In PGCluster mode , conflicts with procs has younger rep-id didn't
8451 +        * matter. It's also processed younger proc's CheckDeadLock().
8452 +        * It's nesseary to make sure all nodes have same deadlock order.
8453 +        * So, always most young (rep-id) process only will rollback by deadlock.
8454 +        */
8455 +       if ( MyProc->replicationId!=0 &&
8456 +            MyProc -> replicationId < checkProc->replicationId) 
8457 +               return false;
8458 +       
8459 +#endif
8460         /*
8461          * Have we already seen this proc?
8462          */
8463 diff -aruN postgresql-8.2.4/src/backend/storage/lmgr/lmgr.c pgcluster-1.7.0rc7/src/backend/storage/lmgr/lmgr.c
8464 --- postgresql-8.2.4/src/backend/storage/lmgr/lmgr.c    2006-10-04 02:29:57.000000000 +0200
8465 +++ pgcluster-1.7.0rc7/src/backend/storage/lmgr/lmgr.c  2007-02-18 22:52:16.000000000 +0100
8466 @@ -26,6 +26,9 @@
8467  #include "utils/inval.h"
8468  #include "utils/lsyscache.h"
8469  
8470 +#ifdef USE_REPLICATION
8471 +#include "replicate.h"
8472 +#endif /* USE_REPLICATION */
8473  
8474  /*
8475   * RelationInitLockInfo
8476 @@ -476,9 +479,16 @@
8477  
8478                 SET_LOCKTAG_TRANSACTION(tag, xid);
8479  
8480 +#ifdef USE_REPLICATION
8481 +               if (!LockAcquire(&tag, ExclusiveLock, false,false))
8482 +                       elog(ERROR, "XactLockTableWait: LockAcquire failed");
8483 +
8484 +               LockRelease(&tag, ExclusiveLock,false);
8485 +#else
8486                 (void) LockAcquire(&tag, ShareLock, false, false);
8487  
8488                 LockRelease(&tag, ShareLock, false);
8489 +#endif /* USE_REPLICATION */
8490  
8491                 if (!TransactionIdIsInProgress(xid))
8492                         break;
8493 @@ -635,3 +645,37 @@
8494         }
8495         return false;                           /* default case */
8496  }
8497 +
8498 +#ifdef USE_REPLICATION
8499 +/*
8500 + *             XactLockTableWait
8501 + *
8502 + * Wait for the specified transaction to commit or abort.
8503 + */
8504 +void
8505 +XactLockTableWaitForCluster(TransactionId xid,Buffer buffer)
8506 +{
8507 +       LOCKTAG         tag;
8508 +       TransactionId myxid = GetCurrentTransactionId();
8509 +
8510 +       Assert(!TransactionIdEquals( xid, myxid ));
8511 +
8512 +       LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
8513 +
8514 +       SET_LOCKTAG_TRANSACTION(tag, xid);
8515 +
8516 +       if (!LockAcquire(&tag, ExclusiveLock, false,false))
8517 +               elog(ERROR, "XactLockTableWait: LockAcquire failed");
8518 +
8519 +       LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
8520 +
8521 +       LockRelease(&tag, ExclusiveLock,false);
8522 +       
8523 +       /*
8524 +        * Transaction was committed/aborted/crashed - we have to update
8525 +        * pg_clog if transaction is still marked as running.
8526 +        */
8527 +       if (!TransactionIdDidCommit(xid) && !TransactionIdDidAbort(xid))
8528 +               TransactionIdAbort(xid);
8529 +}
8530 +#endif /*USE_REPLICATION*/
8531 diff -aruN postgresql-8.2.4/src/backend/storage/lmgr/lock.c pgcluster-1.7.0rc7/src/backend/storage/lmgr/lock.c
8532 --- postgresql-8.2.4/src/backend/storage/lmgr/lock.c    2006-10-04 02:29:57.000000000 +0200
8533 +++ pgcluster-1.7.0rc7/src/backend/storage/lmgr/lock.c  2007-02-18 22:52:16.000000000 +0100
8534 @@ -42,6 +42,10 @@
8535  #include "utils/ps_status.h"
8536  #include "utils/resowner.h"
8537  
8538 +#ifdef USE_REPLICATION
8539 +#include "storage/lmgr.h"
8540 +#include "replicate.h"
8541 +#endif /* USE_REPLICATION */
8542  
8543  /* This configuration variable is used to set the lock table size */
8544  int                    max_locks_per_xact; /* set by guc.c */
8545 @@ -737,6 +741,10 @@
8546                 status = LockCheckConflicts(lockMethodTable, lockmode,
8547                                                                         lock, proclock, MyProc);
8548  
8549 +#ifdef USE_REPLICATION
8550 +       PGR_Check_Lock.status_lock_conflict = status;
8551 +       PGR_Check_Lock.deadlock = false;
8552 +#endif /* USE_REPLICATION */
8553         if (status == STATUS_OK)
8554         {
8555                 /* No conflict with held or previously requested locks */
8556 @@ -746,6 +754,17 @@
8557         else
8558         {
8559                 Assert(status == STATUS_FOUND);
8560 +#ifdef USE_REPLICATION
8561 +               if ((PGR_Need_Notice == true) &&
8562 +                       (PGR_Check_Lock.check_lock_conflict == true))
8563 +               {
8564 +                       if (!PGR_Lock_Noticed && PGR_Notice_Conflict() == STATUS_ERROR)
8565 +                       {
8566 +                               return FALSE;
8567 +                       }
8568 +                       PGR_Lock_Noticed = true;
8569 +               }
8570 +#endif /* USE_REPLICATION */
8571  
8572                 /*
8573                  * We can't acquire the lock immediately.  If caller specified no
8574 diff -aruN postgresql-8.2.4/src/backend/storage/lmgr/proc.c pgcluster-1.7.0rc7/src/backend/storage/lmgr/proc.c
8575 --- postgresql-8.2.4/src/backend/storage/lmgr/proc.c    2006-11-21 21:59:52.000000000 +0100
8576 +++ pgcluster-1.7.0rc7/src/backend/storage/lmgr/proc.c  2007-02-18 22:52:16.000000000 +0100
8577 @@ -43,6 +43,9 @@
8578  #include "storage/procarray.h"
8579  #include "storage/spin.h"
8580  
8581 +#ifdef USE_REPLICATION
8582 +#include "replicate.h"
8583 +#endif /* USE_REPLICATION */
8584  
8585  /* GUC variables */
8586  int                    DeadlockTimeout = 1000;
8587 @@ -263,6 +266,9 @@
8588         MyProc->lwWaitLink = NULL;
8589         MyProc->waitLock = NULL;
8590         MyProc->waitProcLock = NULL;
8591 +#ifdef USE_REPLICATION
8592 +       MyProc->replicationId = 0;
8593 +#endif
8594         for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
8595                 SHMQueueInit(&(MyProc->myProcLocks[i]));
8596  
8597 @@ -395,6 +401,9 @@
8598         MyProc->lwWaitLink = NULL;
8599         MyProc->waitLock = NULL;
8600         MyProc->waitProcLock = NULL;
8601 +#ifdef USE_REPLICATION
8602 +       MyProc->replicationId = 0;
8603 +#endif
8604         for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
8605                 SHMQueueInit(&(MyProc->myProcLocks[i]));
8606  
8607 @@ -737,6 +746,17 @@
8608                                         GrantAwaitedLock();
8609                                         return STATUS_OK;
8610                                 }
8611 +#ifdef USE_REPLICATION 
8612 +                               if(proc->replicationId == 0 ||
8613 +                                  (MyProc->replicationId > proc->replicationId &&
8614 +                                   proc->heldLocks & aheadRequests) ) {
8615 +                                       elog(DEBUG1,"origin's RId = %d , MyProc->RId = %d , skip",proc->replicationId,MyProc->replicationId);
8616 +                                       aheadRequests |= (1 << proc->waitLockMode);
8617 +                                       proc = (PGPROC *) MAKE_PTR(proc->links.next);
8618 +                                       continue;
8619 +                               }
8620 +
8621 +#endif
8622                                 /* Break out of loop to put myself before him */
8623                                 break;
8624                         }
8625 @@ -752,8 +772,21 @@
8626         }
8627         else
8628         {
8629 +#ifdef USE_REPLICATION
8630 +               proc = (PGPROC *) &(waitQueue->links);
8631 +               for (i = 0; i < waitQueue->size+1; i++){
8632 +                       elog(DEBUG1,"origin's RId = %d , MyProc->RId = %d",proc->replicationId,MyProc->replicationId);
8633 +                       if(proc->replicationId == 0 ||
8634 +                               MyProc->replicationId > proc->replicationId) {
8635 +                               proc= (PGPROC *) MAKE_PTR(proc->links.next);
8636 +                       }else {
8637 +                               break;
8638 +                       }
8639 +               }
8640 +#else
8641                 /* I hold no locks, so I can't push in front of anyone. */
8642                 proc = (PGPROC *) &(waitQueue->links);
8643 +#endif /* USE_REPLICATION */
8644         }
8645  
8646         /*
8647 @@ -776,7 +809,11 @@
8648          * CheckDeadLock's recovery code, except that we shouldn't release the
8649          * semaphore since we haven't tried to lock it yet.
8650          */
8651 +#ifdef USE_REPLICATION
8652 +       if (early_deadlock && proc->replicationId < MyProc->replicationId)
8653 +#else
8654         if (early_deadlock)
8655 +#endif
8656         {
8657                 RemoveFromWaitQueue(MyProc, hashcode);
8658                 return STATUS_ERROR;
8659 @@ -976,6 +1013,9 @@
8660  CheckDeadLock(void)
8661  {
8662         int                     i;
8663 +#ifdef USE_REPLICATION
8664 +       bool pgr_notice = false;
8665 +#endif /* USE_REPLICATION */
8666  
8667         /*
8668          * Acquire exclusive lock on the entire shared lock data structures. Must
8669 @@ -1047,6 +1087,10 @@
8670          * such processes.
8671          */
8672  
8673 +#ifdef USE_REPLICATION
8674 +       pgr_notice = true;
8675 +#endif
8676 +
8677         /*
8678          * Release locks acquired at head of routine.  Order is not critical, so
8679          * do it back-to-front to avoid waking another CheckDeadLock instance
8680 @@ -1055,6 +1099,12 @@
8681  check_done:
8682         for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
8683                 LWLockRelease(FirstLockMgrLock + i);
8684 +#ifdef USE_REPLICATION
8685 +       if (pgr_notice == true)
8686 +       {
8687 +               PGR_Notice_DeadLock();
8688 +       }
8689 +#endif
8690  }
8691  
8692  
8693 @@ -1110,6 +1160,15 @@
8694  {
8695         TimestampTz fin_time;
8696         struct itimerval timeval;
8697 +#ifdef USE_REPLICATION
8698 +       int useFlag = 0;
8699 +
8700 +       if (ReplicateCurrentTime != NULL)
8701 +       {
8702 +               useFlag = ReplicateCurrentTime->useFlag;
8703 +               ReplicateCurrentTime->useFlag = DATA_INIT;
8704 +       }
8705 +#endif /* USE_REPLICATION */
8706  
8707         if (is_statement_timeout)
8708         {
8709 @@ -1154,6 +1213,12 @@
8710                 fin_time = GetCurrentTimestamp();
8711                 fin_time = TimestampTzPlusMilliseconds(fin_time, delayms);
8712                 deadlock_timeout_active = true;
8713 +#ifdef USE_REPLICATION
8714 +       if (ReplicateCurrentTime != NULL)
8715 +       {
8716 +               ReplicateCurrentTime->useFlag = useFlag;
8717 +       }
8718 +#endif /* USE_REPLICATION */
8719                 if (fin_time >= statement_fin_time)
8720                         return true;
8721         }
8722 @@ -1167,6 +1232,12 @@
8723         MemSet(&timeval, 0, sizeof(struct itimerval));
8724         timeval.it_value.tv_sec = delayms / 1000;
8725         timeval.it_value.tv_usec = (delayms % 1000) * 1000;
8726 +#ifdef USE_REPLICATION
8727 +       if (ReplicateCurrentTime != NULL)
8728 +       {
8729 +               ReplicateCurrentTime->useFlag = useFlag;
8730 +       }
8731 +#endif /* USE_REPLICATION */
8732         if (setitimer(ITIMER_REAL, &timeval, NULL))
8733                 return false;
8734         return true;
8735 @@ -1232,12 +1303,30 @@
8736  CheckStatementTimeout(void)
8737  {
8738         TimestampTz now;
8739 +#ifdef USE_REPLICATION
8740 +       int useFlag = 0;
8741 +#endif /* USE_REPLICATION */
8742  
8743         if (!statement_timeout_active)
8744                 return true;                    /* do nothing if not active */
8745  
8746 +#ifdef USE_REPLICATION
8747 +       if (ReplicateCurrentTime != NULL)
8748 +       {
8749 +               useFlag = ReplicateCurrentTime->useFlag;
8750 +               ReplicateCurrentTime->useFlag = DATA_INIT;
8751 +       }
8752 +#endif /* USE_REPLICATION */
8753 +
8754         now = GetCurrentTimestamp();
8755  
8756 +#ifdef USE_REPLICATION
8757 +       if (ReplicateCurrentTime != NULL)
8758 +       {
8759 +               ReplicateCurrentTime->useFlag = useFlag;
8760 +       }
8761 +#endif /* USE_REPLICATION */
8762 +
8763         if (now >= statement_fin_time)
8764         {
8765                 /* Time to die */
8766 diff -aruN postgresql-8.2.4/src/backend/tcop/postgres.c pgcluster-1.7.0rc7/src/backend/tcop/postgres.c
8767 --- postgresql-8.2.4/src/backend/tcop/postgres.c        2007-01-04 01:58:01.000000000 +0100
8768 +++ pgcluster-1.7.0rc7/src/backend/tcop/postgres.c      2007-02-18 22:52:16.000000000 +0100
8769 @@ -68,6 +68,10 @@
8770  
8771  #include "pgstat.h"
8772  
8773 +#ifdef USE_REPLICATION
8774 +#include "replicate.h"
8775 +#endif /* USE_REPLICATION */
8776 +
8777  extern int     optind;
8778  extern char *optarg;
8779  
8780 @@ -91,7 +95,9 @@
8781  /* wait N seconds to allow attach from a debugger */
8782  int                    PostAuthDelay = 0;
8783  
8784 -
8785 +#ifdef USE_REPLICATION
8786 +bool PGR_Not_Replication_Query = false;
8787 +#endif /* USE_REPLICATION */
8788  
8789  /* ----------------
8790   *             private variables
8791 @@ -753,6 +759,24 @@
8792         bool            was_logged = false;
8793         char            msec_str[32];
8794  
8795 +#ifdef USE_REPLICATION
8796 +       char * query_ptr = NULL;
8797 +       char * null_ptr = NULL;
8798 +       int skip_cnt = 0;
8799 +       int status = 0;
8800 +
8801 +       PGR_Reliable_Mode_Wait = false;
8802 +       query_ptr = (char *)query_string;
8803 +       if (PGR_Is_Replicated_Query == false)
8804 +       {
8805 +               PGR_Is_Replicated_Query = PGR_Is_Replicated_Command(query_ptr);
8806 +       }
8807 +       PGR_Retry_Query.query_string = (char *)query_string;
8808 +       PGR_Retry_Query.query_len = strlen(query_string);
8809 +       PGR_Retry_Query.cmdSts = CMD_STS_OTHER;
8810 +       PGR_Retry_Query.cmdType = CMD_TYPE_OTHER;
8811 +#endif /* USE_REPLICATION */
8812 +
8813         /*
8814          * Report query to various monitoring facilities.
8815          */
8816 @@ -831,6 +855,18 @@
8817                 DestReceiver *receiver;
8818                 int16           format;
8819  
8820 +#ifdef USE_REPLICATION
8821 +               PGR_Not_Replication_Query = false;
8822 +               PGR_Reliable_Mode_Wait = false;
8823 +
8824 +               PGR_Retry_Query.query_string = NULL;
8825 +               PGR_Retry_Query.query_len = 0;
8826 +               PGR_Retry_Query.cmdSts = CMD_STS_OTHER;
8827 +               PGR_Retry_Query.cmdType = CMD_TYPE_OTHER;
8828 +               PGR_Retry_Query.useFlag = DATA_INIT;
8829 +               PGR_Lock_Noticed = false;
8830 +#endif /* USE_REPLICATION */
8831 +
8832                 /*
8833                  * Get the command name for use in status display (it also becomes the
8834                  * default completion tag, down inside PortalRun).      Set ps_status and
8835 @@ -853,10 +889,232 @@
8836                  */
8837                 if (IsAbortedTransactionBlockState() &&
8838                         !IsTransactionExitStmt(parsetree))
8839 +               {
8840 +#ifdef USE_REPLICATION
8841 +                       Transaction_Mode = 0;
8842 +#endif
8843                         ereport(ERROR,
8844                                         (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
8845                                          errmsg("current transaction is aborted, "
8846                                                 "commands ignored until end of transaction block")));
8847 +               }
8848 +
8849 +#ifdef USE_REPLICATION
8850 +               Query_String = NULL;
8851 +               query_ptr = PGR_Remove_Comment(query_ptr);
8852 +               PGR_Check_Lock.dest = TO_FRONTEND;
8853 +               PGR_Need_Notice = false;
8854 +               PGR_Check_Lock.check_lock_conflict = false;
8855 +
8856 +               /* skip replication during recovery mode runing */
8857 +               if (PGR_Get_Cluster_Status() == STATUS_RECOVERY)
8858 +               {
8859 +                       /*
8860 +                       PGR_Not_Replication_Query = true;
8861 +                       */
8862 +                       PGR_Is_Replicated_Query = true;
8863 +                       if (!strcmp(commandTag,"SELECT"))
8864 +                       {
8865 +                               if (PGR_Is_System_Command(query_ptr))
8866 +                               {
8867 +                                       status = PGR_Call_System_Command(query_ptr);
8868 +                                       if (status == STATUS_SKIP_QUERY)
8869 +                                       {
8870 +                                               EndCommand(PGR_ALREADY_REPLICATED_NOTICE_CMD,dest);
8871 +                                               break;
8872 +                                       }
8873 +                                       else
8874 +                                       {
8875 +                                               EndCommand("SYSTEM_COMMAND",dest);
8876 +                                               continue;
8877 +                                       }
8878 +                               }
8879 +                       }
8880 +                       Transaction_Mode = PGR_Set_Transaction_Mode(Transaction_Mode,commandTag);
8881 +                       if (Transaction_Mode > 0)
8882 +                       {
8883 +                               PGR_Need_Notice = true;
8884 +                               PGR_Check_Lock.check_lock_conflict = true;
8885 +                       }
8886 +                       goto Skip_Replication;
8887 +               }
8888 +
8889 +               /*
8890 +               if (!xact_started)
8891 +               {
8892 +                       start_xact_command();
8893 +                       xact_started = true;
8894 +               }
8895 +               */
8896 +               if (skip_cnt == 0)
8897 +               {
8898 +                       skip_cnt = PGR_Is_Skip_Replication(query_ptr);
8899 +               }
8900 +               null_ptr = PGR_scan_terminate (query_ptr);
8901 +               if(null_ptr != NULL)
8902 +               {
8903 +                       *null_ptr = '\0';
8904 +               }
8905 +               Transaction_Mode = PGR_Set_Transaction_Mode(Transaction_Mode,commandTag);
8906 +               if ((PGR_Is_Replicated_Query ) ||
8907 +                       (skip_cnt != 0))
8908 +               {
8909 +                       if (skip_cnt > 0)
8910 +                       {
8911 +                               skip_cnt --;
8912 +                       }
8913 +                       else
8914 +                       {
8915 +                               skip_cnt = 0;
8916 +                       }
8917 +                       PGR_Copy_Data_Need_Replicate = false;
8918 +                       if (!strncmp(commandTag,"SELECT",strlen("SELECT")))
8919 +                       {
8920 +                               if (PGR_Is_System_Command(query_ptr))
8921 +                               {
8922 +                                       status = PGR_Call_System_Command(query_ptr);
8923 +                                       if (status == STATUS_SKIP_QUERY)
8924 +                                       {
8925 +                                               EndCommand(PGR_ALREADY_REPLICATED_NOTICE_CMD,dest);
8926 +                                               break;
8927 +                                       }
8928 +                                       else
8929 +                                       {
8930 +                                               EndCommand("SYSTEM_COMMAND",dest);
8931 +                                               continue;
8932 +                                       }
8933 +                               }
8934 +                       }
8935 +                       PGR_Check_Lock.status_lock_conflict = STATUS_OK;
8936 +                       PGR_Check_Lock.dest = TO_FRONTEND;
8937 +               }
8938 +               else
8939 +               {
8940 +                       PGR_Copy_Data_Need_Replicate = false;
8941 +
8942 +                       /* check cluster db status */
8943 +                       /*
8944 +                       if ((PGR_Get_Cluster_Status() == STATUS_RECOVERY)       &&
8945 +                               (PGR_Not_Replication_Query == false)                    &&
8946 +                               (Transaction_Mode == 0 ) )
8947 +                       {
8948 +                               elog(WARNING, "This query is not permitted while recovery db ");
8949 +                               if(null_ptr != NULL)
8950 +                               {
8951 +                                       *null_ptr = ';';
8952 +                                       query_ptr = null_ptr +1;
8953 +                               }
8954 +                               continue;
8955 +                       }
8956 +                       */
8957 +                       if (PGR_Is_Stand_Alone() == true)
8958 +                       {
8959 +                               if (PGR_Stand_Alone->permit == PERMIT_READ_ONLY)
8960 +                               {
8961 +                                       if (!strcmp(commandTag, "SHOW")) {
8962 +                                               VariableShowStmt *stmt = (VariableShowStmt *)parsetree;
8963 +                                               if (!strcmp(stmt->name, "replication_server")) {
8964 +                                                       PGR_Not_Replication_Query = true;
8965 +                                               }
8966 +                                       }
8967 +                                       
8968 +                                       if (PGR_Not_Replication_Query == false)
8969 +                                               elog(ERROR, "This query is not permitted when all replication servers fell down ");
8970 +                               }
8971 +                       }
8972 +                       else if ((PGRforceLoadBalance == false) &&
8973 +                                       ((PGR_Not_Replication_Query == false ) ||
8974 +                                        (!strcmp(commandTag,"SELECT"))))
8975 +                       {
8976 +                               status = PGR_replication(query_ptr,dest,parsetree,commandTag);
8977 +                               if (status == STATUS_REPLICATED)
8978 +                               {
8979 +                                       if (xact_started)
8980 +                                       {
8981 +                                               finish_xact_command();
8982 +                                               xact_started = false;
8983 +                                       }
8984 +                                       CommandCounterIncrement();
8985 +                                       continue;
8986 +                               }
8987 +                               else if (status == STATUS_ERROR)
8988 +                               {
8989 +                                       if (!strcmp(commandTag, "SHOW")) {
8990 +                                               VariableShowStmt *stmt = (VariableShowStmt *)parsetree;
8991 +                                               if (!strcmp(stmt->name, "replication_server")) {
8992 +                                                       PGR_Not_Replication_Query = true;
8993 +                                               }
8994 +                                       } 
8995 +                                       else if (PGR_Stand_Alone->permit == PERMIT_READ_ONLY)
8996 +                                       {
8997 +                                               elog(ERROR, "This query is not permitted when all replication servers fell down ");
8998 +                                       }
8999 +                               }
9000 +                               else if (status == STATUS_DEADLOCK_DETECT)
9001 +                               {
9002 +                                       PGR_Need_Notice = false;
9003 +                                       elog(ERROR, "postmaster deadlock detected");
9004 +                                       continue;
9005 +                               }
9006 +                               else if (status == STATUS_REPLICATION_ABORT)
9007 +                               {
9008 +                                       PGR_Need_Notice = false;
9009 +                                       elog(ERROR, "replication server should be down, transaction aborted.");
9010 +                                       continue;
9011 +                               }
9012 +                               else if (status != STATUS_CONTINUE)
9013 +                               {
9014 +                                       PGR_Check_Lock.dest = TO_FRONTEND;
9015 +                               }
9016 +                               else
9017 +                               {
9018 +                                       PGR_Check_Lock.dest = TO_REPLICATION_SERVER;
9019 +                                       PGR_Reliable_Mode_Wait = true;
9020 +                               }
9021 +                       }
9022 +               }
9023 +               if(null_ptr != NULL)
9024 +               {
9025 +                       *null_ptr = ';';
9026 +                       query_ptr = null_ptr +1;
9027 +               }
9028 +               if (!PGR_Is_Replicated_Query )
9029 +               {
9030 +                       if ((!strcmp(commandTag,"BEGIN")) ||
9031 +                               (!strcmp(commandTag, "START TRANSACTION")) ||
9032 +                               (Transaction_Mode == 0 ) )
9033 +                       {
9034 +                               PGR_Reload_Start_Time();
9035 +                       }
9036 +               }
9037 +               if (((IsA(parsetree, TransactionStmt))    ||
9038 +                       (Transaction_Mode > 0)            ||
9039 +                       (Create_Temp_Table_Mode == true)      ||
9040 +                       (Session_Authorization_Mode == true)) ||
9041 +                       (!strcmp(commandTag,"COPY")))
9042 +               {
9043 +                       PGR_Need_Notice = true;
9044 +                       PGR_Check_Lock.check_lock_conflict = true;
9045 +               }
9046 +               else
9047 +               {
9048 +                       if (PGR_Not_Replication_Query == false)
9049 +                       {
9050 +                               PGR_Need_Notice = true;
9051 +                               PGR_Check_Lock.check_lock_conflict = true;
9052 +                       }
9053 +                       else
9054 +                       {
9055 +                               if ((PGR_Is_Replicated_Query ) &&
9056 +                                       (!strncmp(commandTag, "SELECT",strlen("SELECT"))))
9057 +                               {
9058 +                                       PGR_Need_Notice = true;
9059 +                                       PGR_Check_Lock.check_lock_conflict = true;
9060 +                               }
9061 +                       }
9062 +               }
9063 +Skip_Replication:
9064 +#endif /* USE_REPLICATION */
9065  
9066                 /* Make sure we are in a transaction command */
9067                 start_xact_command();
9068 @@ -983,7 +1241,44 @@
9069                  * command the client sent, regardless of rewriting. (But a command
9070                  * aborted by error will not send an EndCommand report at all.)
9071                  */
9072 +#ifdef USE_REPLICATION
9073 +               /* 
9074 +                * In Non-CONTROL LOCK CONFLICT mode, we *MUST NOT* send command tag twice.  
9075 +                * So , if it was already sent for lock notification , we didn't send 
9076 +                * tag here. also ReadyForQuery,too.
9077 +                */
9078 +               if(!(PGR_Is_Replicated_Query && PGR_Lock_Noticed))
9079 +#endif
9080                 EndCommand(completionTag, dest);
9081 +
9082 +#ifdef USE_REPLICATION
9083 +               if(PGR_Is_Replicated_Query &&
9084 +                  needToUpdateReplicateIdOnNextQueryIsDone) {
9085 +                       ++(ReplicationLog_Info.PGR_Replicate_ID);
9086 +
9087 +                       if (CurrentReplicateServer != NULL)
9088 +                       {
9089 +                               /* set replicate id in this system */
9090 +                               ++(CurrentReplicateServer->replicate_id);
9091 +                       }
9092 +                       elog(DEBUG1,"increased replicate_id to %d",CurrentReplicateServer->replicate_id);
9093 +                       needToUpdateReplicateIdOnNextQueryIsDone=false;
9094 +               }
9095 +
9096 +               if (PGR_Get_Cluster_Status() != STATUS_RECOVERY)
9097 +               {
9098 +                       if ((PGR_Need_Notice == true) &&
9099 +                               (PGRforceLoadBalance == false))
9100 +                       {
9101 +                               PGR_Notice_Transaction_Query_Done();
9102 +                       }
9103 +                       if ((Transaction_Mode == 0) &&
9104 +                               (ReplicateCurrentTime != NULL))
9105 +                       {
9106 +                               ReplicateCurrentTime->use_seed = 1;
9107 +                       }
9108 +               }
9109 +#endif
9110         }                                                       /* end loop over parsetrees */
9111  
9112         /*
9113 @@ -1144,11 +1439,15 @@
9114                  */
9115                 if (IsAbortedTransactionBlockState() &&
9116                         !IsTransactionExitStmt(parsetree))
9117 +               {
9118 +#ifdef USE_REPLICATION
9119 +                       Transaction_Mode = 0;
9120 +#endif
9121                         ereport(ERROR,
9122                                         (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
9123                                          errmsg("current transaction is aborted, "
9124                                                 "commands ignored until end of transaction block")));
9125 -
9126 +               }
9127                 /*
9128                  * OK to analyze, rewrite, and plan this query.  Note that the
9129                  * originally specified parameter set is not required to be complete,
9130 @@ -1382,11 +1681,15 @@
9131         if (IsAbortedTransactionBlockState() &&
9132                 (!IsTransactionExitStmtList(pstmt->query_list) ||
9133                  numParams != 0))
9134 +       {
9135 +#ifdef USE_REPLICATION
9136 +               Transaction_Mode = 0;
9137 +#endif
9138                 ereport(ERROR,
9139                                 (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
9140                                  errmsg("current transaction is aborted, "
9141                                                 "commands ignored until end of transaction block")));
9142 -
9143 +       }
9144         /*
9145          * Create the portal.  Allow silent replacement of an existing portal only
9146          * if the unnamed portal is specified.
9147 @@ -1769,11 +2072,15 @@
9148          */
9149         if (IsAbortedTransactionBlockState() &&
9150                 !IsTransactionExitStmtList(portal->parseTrees))
9151 +       {
9152 +#ifdef USE_REPLICATION
9153 +               Transaction_Mode = 0;
9154 +#endif
9155                 ereport(ERROR,
9156                                 (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
9157                                  errmsg("current transaction is aborted, "
9158                                                 "commands ignored until end of transaction block")));
9159 -
9160 +       }
9161         /* Check for cancel signal before we start execution */
9162         CHECK_FOR_INTERRUPTS();
9163  
9164 @@ -2101,11 +2408,15 @@
9165          */
9166         if (IsAbortedTransactionBlockState() &&
9167                 PreparedStatementReturnsTuples(pstmt))
9168 +       {
9169 +#ifdef USE_REPLICATION
9170 +               Transaction_Mode = 0;
9171 +#endif
9172                 ereport(ERROR,
9173                                 (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
9174                                  errmsg("current transaction is aborted, "
9175                                                 "commands ignored until end of transaction block")));
9176 -
9177 +       }
9178         if (whereToSendOutput != DestRemote)
9179                 return;                                 /* can't actually do anything... */
9180  
9181 @@ -2171,11 +2482,15 @@
9182          */
9183         if (IsAbortedTransactionBlockState() &&
9184                 portal->tupDesc)
9185 +       {
9186 +#ifdef USE_REPLICATION
9187 +               Transaction_Mode = 0;
9188 +#endif
9189                 ereport(ERROR,
9190                                 (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
9191                                  errmsg("current transaction is aborted, "
9192                                                 "commands ignored until end of transaction block")));
9193 -
9194 +       }
9195         if (whereToSendOutput != DestRemote)
9196                 return;                                 /* can't actually do anything... */
9197  
9198 @@ -2332,6 +2647,9 @@
9199          * backend.  This is necessary precisely because we don't clean up our
9200          * shared memory state.
9201          */
9202 +#ifdef USE_REPLICATION
9203 +       PGR_delete_shm();
9204 +#endif /* USE_REPLICATION */
9205         exit(2);
9206  }
9207  
9208 @@ -2369,6 +2687,9 @@
9209                 }
9210         }
9211  
9212 +#ifdef USE_REPLICATION
9213 +       PGR_delete_shm();
9214 +#endif /* USE_REPLICATION */
9215         errno = save_errno;
9216  }
9217  
9218 @@ -2383,6 +2704,9 @@
9219  void
9220  authdie(SIGNAL_ARGS)
9221  {
9222 +#ifdef USE_REPLICATION
9223 +       PGR_delete_shm();
9224 +#endif /* USE_REPLICATION */
9225         exit(1);
9226  }
9227  
9228 @@ -3369,6 +3693,14 @@
9229                                 pgstat_report_activity("<IDLE>");
9230                         }
9231  
9232 +#ifdef USE_REPLICATION
9233 +                       /* 
9234 +                        * In Non-CONTROL LOCK CONFLICT mode, we *MUST NOT* send command tag twice.  
9235 +                        * So , if it was already sent for lock notification , we didn't send 
9236 +                        * tag here. also ReadyForQuery,too.
9237 +                        */
9238 +                       if(!(PGR_Is_Replicated_Query && PGR_Lock_Noticed))
9239 +#endif
9240                         ReadyForQuery(whereToSendOutput);
9241                         send_ready_for_query = false;
9242                 }
9243 @@ -3409,6 +3741,26 @@
9244                 if (ignore_till_sync && firstchar != EOF)
9245                         continue;
9246  
9247 +#ifdef USE_REPLICATION
9248 +               if ((firstchar == CMD_TYPE_P_PARSE)             ||
9249 +                       (firstchar == CMD_TYPE_P_BIND)          ||
9250 +                       (firstchar == CMD_TYPE_P_DESCRIBE)      ||
9251 +                       (firstchar == CMD_TYPE_P_EXECUTE)       ||
9252 +                       (firstchar == CMD_TYPE_P_SYNC)          ||
9253 +                       (firstchar == CMD_TYPE_P_CLOSE))
9254 +               {
9255 +                       if (PGR_Send_Input_Message(firstchar, &input_message) != STATUS_OK)
9256 +                       {
9257 +                               if ((PGR_Is_Stand_Alone() == true) &&
9258 +                                       (PGR_Stand_Alone->permit == PERMIT_READ_ONLY))
9259 +                               {
9260 +                                       elog(WARNING, "This query is not permitted when all replication servers fell down ");
9261 +                                       break;
9262 +                               }
9263 +                       }
9264 +               }
9265 +#endif /* USE_REPLICATION */
9266 +
9267                 switch (firstchar)
9268                 {
9269                         case 'Q':                       /* simple query */
9270 @@ -3622,6 +3974,27 @@
9271                         case 'X':
9272                         case EOF:
9273  
9274 +#ifdef USE_REPLICATION
9275 +                               if (PGRforceLoadBalance == false)
9276 +                               {
9277 +                                       if (PGR_Is_Replicated_Query == false)
9278 +                                       {
9279 +                                               PGR_Noticed_Abort = true;
9280 +                                               PGRsend_system_command(CMD_STS_TRANSACTION_ABORT, CMD_TYPE_FRONTEND_CLOSED);
9281 +                                       }
9282 +                                       else if ((Transaction_Mode >= 1) && (PGR_Noticed_Abort == false))
9283 +                                       {
9284 +                                               if (PGR_Did_Commit_Transaction() == true)
9285 +                                               {
9286 +                                                       pgstat_report_activity("commit");
9287 +                                                       exec_simple_query("commit");
9288 +                                               }
9289 +                                       }
9290 +                               }
9291 +                               /*
9292 +                                 PGR_Notice_Transaction_Query_Aborted();
9293 +                               */
9294 +#endif /* USE_REPLICATION */
9295                                 /*
9296                                  * Reset whereToSendOutput to prevent ereport from attempting
9297                                  * to send any more messages to client.
9298 diff -aruN postgresql-8.2.4/src/backend/tcop/pquery.c pgcluster-1.7.0rc7/src/backend/tcop/pquery.c
9299 --- postgresql-8.2.4/src/backend/tcop/pquery.c  2006-10-04 02:29:58.000000000 +0200
9300 +++ pgcluster-1.7.0rc7/src/backend/tcop/pquery.c        2007-02-18 22:52:16.000000000 +0100
9301 @@ -24,6 +24,9 @@
9302  #include "tcop/utility.h"
9303  #include "utils/memutils.h"
9304  
9305 +#ifdef USE_REPLICATION
9306 +#include "replicate.h"
9307 +#endif /* USE_REPLICATION */
9308  
9309  /*
9310   * ActivePortal is the currently executing Portal (the most closely nested,
9311 @@ -188,6 +191,19 @@
9312                                 strcpy(completionTag, "???");
9313                                 break;
9314                 }
9315 +#ifdef USE_REPLICATION
9316 +               if ((PGR_Is_Replicated_Query == true  ) &&
9317 +                       (PGR_Get_Cluster_Status() != STATUS_RECOVERY))
9318 +               {
9319 +                       /*
9320 +                       * Replicated *SELECT* query is used to replicate
9321 +                       * ONLY lock and function execution , results . All of
9322 +                       * them will be discarded by pgrp processes.
9323 +                       * So , we don't need to send it.
9324 +                       */
9325 +                       dest = None_Receiver;
9326 +               }
9327 +#endif /*USE_REPLICATION */
9328         }
9329  
9330         /* Now take care of any queued AFTER triggers */
9331 diff -aruN postgresql-8.2.4/src/backend/tcop/utility.c pgcluster-1.7.0rc7/src/backend/tcop/utility.c
9332 --- postgresql-8.2.4/src/backend/tcop/utility.c 2006-10-04 02:29:58.000000000 +0200
9333 +++ pgcluster-1.7.0rc7/src/backend/tcop/utility.c       2007-02-18 22:52:16.000000000 +0100
9334 @@ -54,6 +54,9 @@
9335  #include "utils/guc.h"
9336  #include "utils/syscache.h"
9337  
9338 +#ifdef USE_REPLICATION
9339 +#include "replicate.h"
9340 +#endif /* USE_REPLICATION */
9341  
9342  /*
9343   * Error-checking support for DROP commands
9344 @@ -1289,29 +1292,48 @@
9345  
9346                 case T_SelectStmt:
9347                         tag = "SELECT";
9348 +#ifdef USE_REPLICATION
9349 +                       PGR_Not_Replication_Query = true;
9350 +#endif /* USE_REPLICATION */
9351                         break;
9352  
9353                 case T_TransactionStmt:
9354                         {
9355                                 TransactionStmt *stmt = (TransactionStmt *) parsetree;
9356  
9357 +#ifdef USE_REPLICATION
9358 +                               bool isInTransaction=IsTransactionBlock();
9359 +#endif /* USE_REPLICATION */
9360 +
9361                                 switch (stmt->kind)
9362                                 {
9363                                         case TRANS_STMT_BEGIN:
9364                                                 tag = "BEGIN";
9365 +#ifdef USE_REPLICATION
9366 +                                               PGR_Not_Replication_Query=isInTransaction;
9367 +#endif /* USE_REPLICATION */
9368                                                 break;
9369  
9370                                         case TRANS_STMT_START:
9371                                                 tag = "START TRANSACTION";
9372 +#ifdef USE_REPLICATION
9373 +                                               PGR_Not_Replication_Query=isInTransaction;
9374 +#endif /* USE_REPLICATION */
9375                                                 break;
9376  
9377                                         case TRANS_STMT_COMMIT:
9378                                                 tag = "COMMIT";
9379 +#ifdef USE_REPLICATION
9380 +                                               PGR_Not_Replication_Query=!isInTransaction;
9381 +#endif /* USE_REPLICATION */
9382                                                 break;
9383  
9384                                         case TRANS_STMT_ROLLBACK:
9385                                         case TRANS_STMT_ROLLBACK_TO:
9386                                                 tag = "ROLLBACK";
9387 +#ifdef USE_REPLICATION
9388 +                                               PGR_Not_Replication_Query=!isInTransaction;
9389 +#endif /* USE_REPLICATION */
9390                                                 break;
9391  
9392                                         case TRANS_STMT_SAVEPOINT:
9393 @@ -1343,10 +1365,16 @@
9394  
9395                 case T_DeclareCursorStmt:
9396                         tag = "DECLARE CURSOR";
9397 +#ifdef USE_REPLICATION
9398 +                       PGR_Not_Replication_Query = true;
9399 +#endif /* USE_REPLICATION */
9400                         break;
9401  
9402                 case T_ClosePortalStmt:
9403                         tag = "CLOSE CURSOR";
9404 +#ifdef USE_REPLICATION
9405 +                       PGR_Not_Replication_Query = true;
9406 +#endif /* USE_REPLICATION */
9407                         break;
9408  
9409                 case T_FetchStmt:
9410 @@ -1355,6 +1383,9 @@
9411  
9412                                 tag = (stmt->ismove) ? "MOVE" : "FETCH";
9413                         }
9414 +#ifdef USE_REPLICATION
9415 +                       PGR_Not_Replication_Query = true;
9416 +#endif /* USE_REPLICATION */
9417                         break;
9418  
9419                 case T_CreateDomainStmt:
9420 @@ -1677,10 +1708,16 @@
9421                                 tag = "VACUUM";
9422                         else
9423                                 tag = "ANALYZE";
9424 +#ifdef USE_REPLICATION
9425 +                       PGR_Not_Replication_Query = true;
9426 +#endif /* USE_REPLICATION */
9427                         break;
9428  
9429                 case T_ExplainStmt:
9430                         tag = "EXPLAIN";
9431 +#ifdef USE_REPLICATION
9432 +                       PGR_Not_Replication_Query = true;
9433 +#endif /* USE_REPLICATION */
9434                         break;
9435  
9436                 case T_VariableSetStmt:
9437 @@ -1689,6 +1726,14 @@
9438  
9439                 case T_VariableShowStmt:
9440                         tag = "SHOW";
9441 +#ifdef USE_REPLICATION
9442 +                       {
9443 +                               VariableShowStmt *stmt = (VariableShowStmt *)parsetree;
9444 +                               if (strcasecmp(stmt->name, "replication_server")) {
9445 +                                       PGR_Not_Replication_Query = true;
9446 +                               }
9447 +                       }
9448 +#endif /* USE_REPLICATION */
9449                         break;
9450  
9451                 case T_VariableResetStmt:
9452 @@ -1755,10 +1800,16 @@
9453  
9454                 case T_CheckPointStmt:
9455                         tag = "CHECKPOINT";
9456 +#ifdef USE_REPLICATION
9457 +                       PGR_Not_Replication_Query = true;
9458 +#endif /* USE_REPLICATION */
9459                         break;
9460  
9461                 case T_ReindexStmt:
9462                         tag = "REINDEX";
9463 +#ifdef USE_REPLICATION
9464 +                       PGR_Not_Replication_Query = true;
9465 +#endif /* USE_REPLICATION */
9466                         break;
9467  
9468                 case T_CreateConversionStmt:
9469 @@ -1783,14 +1834,35 @@
9470  
9471                 case T_PrepareStmt:
9472                         tag = "PREPARE";
9473 +#ifdef USE_REPLICATION
9474 +                       if ((PGRnotReplicatePreparedSelect == true) &&
9475 +                               (PGR_is_select_prepare_query() == true))
9476 +                       {
9477 +                               PGR_Not_Replication_Query = true;
9478 +                       }
9479 +#endif /* USE_REPLICATION */
9480                         break;
9481  
9482                 case T_ExecuteStmt:
9483                         tag = "EXECUTE";
9484 +#ifdef USE_REPLICATION
9485 +                       if ((PGRnotReplicatePreparedSelect == true) &&
9486 +                               (PGR_is_select_prepared_statement((PrepareStmt *)parsetree) == true))
9487 +                       {
9488 +                               PGR_Not_Replication_Query = true;
9489 +                       }
9490 +#endif /* USE_REPLICATION */
9491                         break;
9492  
9493                 case T_DeallocateStmt:
9494                         tag = "DEALLOCATE";
9495 +#ifdef USE_REPLICATION
9496 +                       if ((PGRnotReplicatePreparedSelect == true) &&
9497 +                               (PGR_is_select_prepared_statement((PrepareStmt *)parsetree) == true))
9498 +                       {
9499 +                               PGR_Not_Replication_Query = true;
9500 +                       }
9501 +#endif /* USE_REPLICATION */
9502                         break;
9503  
9504                 default:
9505 @@ -1800,6 +1872,13 @@
9506                         break;
9507         }
9508  
9509 +#ifdef USE_REPLICATION
9510 +       if(PGRforceLoadBalance == true)
9511 +       {
9512 +               PGR_Not_Replication_Query = true; 
9513 +       }
9514 +#endif /* USE_REPLICATION */
9515 +
9516         return tag;
9517  }
9518  
9519 @@ -1835,7 +1914,12 @@
9520                                         tag = "SELECT FOR SHARE";
9521                         }
9522                         else
9523 +                       {
9524                                 tag = "SELECT";
9525 +#ifdef USE_REPLICATION
9526 +                               PGR_Not_Replication_Query = true;
9527 +#endif /* USE_REPLICATION */
9528 +                       }
9529                         break;
9530                 case CMD_UPDATE:
9531                         tag = "UPDATE";
9532 @@ -1853,6 +1937,9 @@
9533                         elog(WARNING, "unrecognized commandType: %d",
9534                                  (int) parsetree->commandType);
9535                         tag = "???";
9536 +#ifdef USE_REPLICATION
9537 +                       PGR_Not_Replication_Query = true;
9538 +#endif /* USE_REPLICATION */
9539                         break;
9540         }
9541  
9542 diff -aruN postgresql-8.2.4/src/backend/utils/adt/float.c pgcluster-1.7.0rc7/src/backend/utils/adt/float.c
9543 --- postgresql-8.2.4/src/backend/utils/adt/float.c      2006-10-05 03:40:45.000000000 +0200
9544 +++ pgcluster-1.7.0rc7/src/backend/utils/adt/float.c    2007-02-18 22:52:16.000000000 +0100
9545 @@ -66,6 +66,9 @@
9546  #include "utils/array.h"
9547  #include "utils/builtins.h"
9548  
9549 +#ifdef USE_REPLICATION
9550 +#include "replicate.h"
9551 +#endif /* USE_REPLICATION */
9552  
9553  #ifndef M_PI
9554  /* from my RH5.2 gcc math.h file - thomas 2000-04-03 */
9555 @@ -1886,7 +1889,11 @@
9556         float8          result;
9557  
9558         /* result [0.0 - 1.0) */
9559 +#ifdef USE_REPLICATION
9560 +       result = ((double) PGR_Random()) / ((double) MAX_RANDOM_VALUE + 1);
9561 +#else
9562         result = (double) random() / ((double) MAX_RANDOM_VALUE + 1);
9563 +#endif /* USE_REPLICATION */
9564  
9565         PG_RETURN_FLOAT8(result);
9566  }
9567 diff -aruN postgresql-8.2.4/src/backend/utils/adt/nabstime.c pgcluster-1.7.0rc7/src/backend/utils/adt/nabstime.c
9568 --- postgresql-8.2.4/src/backend/utils/adt/nabstime.c   2006-07-14 16:52:24.000000000 +0200
9569 +++ pgcluster-1.7.0rc7/src/backend/utils/adt/nabstime.c 2007-02-18 22:52:16.000000000 +0100
9570 @@ -27,6 +27,10 @@
9571  #include "utils/builtins.h"
9572  #include "utils/nabstime.h"
9573  
9574 +#ifdef USE_REPLICATION
9575 +#include "replicate.h"
9576 +#endif /* USE_REPLICATION */
9577 +
9578  #define MIN_DAYNUM (-24856)            /* December 13, 1901 */
9579  #define MAX_DAYNUM 24854               /* January 18, 2038 */
9580  
9581 @@ -92,7 +96,13 @@
9582  {
9583         time_t          now;
9584  
9585 +#ifdef USE_REPLICATION
9586 +       struct timeval tp;
9587 +       PGR_GetTimeOfDay(&tp,NULL);
9588 +       now = tp.tv_sec;
9589 +#else
9590         now = time(NULL);
9591 +#endif /* USE_REPLICATION */
9592         return (AbsoluteTime) now;
9593  }
9594  
9595 @@ -1031,9 +1041,14 @@
9596  {
9597         time_t          sec;
9598  
9599 +#ifdef USE_REPLICATION
9600 +       struct timeval tp;
9601 +       PGR_GetTimeOfDay(&tp,NULL);
9602 +       sec = tp.tv_sec;
9603 +#else
9604         if (time(&sec) < 0)
9605                 PG_RETURN_ABSOLUTETIME(INVALID_ABSTIME);
9606 -
9607 +#endif
9608         PG_RETURN_ABSOLUTETIME((AbsoluteTime) sec);
9609  }
9610  
9611 @@ -1588,7 +1603,11 @@
9612         int                     len;
9613         pg_time_t       tt;
9614  
9615 +#ifdef USE_REPLICATION
9616 +       PGR_GetTimeOfDay(&tp,NULL);
9617 +#else
9618         gettimeofday(&tp, NULL);
9619 +#endif /* USE_REPLICATION */
9620         tt = (pg_time_t) tp.tv_sec;
9621         pg_strftime(templ, sizeof(templ), "%a %b %d %H:%M:%S.%%06d %Y %Z",
9622                                 pg_localtime(&tt, global_timezone));
9623 diff -aruN postgresql-8.2.4/src/backend/utils/adt/ri_triggers.c pgcluster-1.7.0rc7/src/backend/utils/adt/ri_triggers.c
9624 --- postgresql-8.2.4/src/backend/utils/adt/ri_triggers.c        2006-10-04 02:29:59.000000000 +0200
9625 +++ pgcluster-1.7.0rc7/src/backend/utils/adt/ri_triggers.c      2007-02-18 22:52:16.000000000 +0100
9626 @@ -40,6 +40,9 @@
9627  #include "utils/typcache.h"
9628  #include "miscadmin.h"
9629  
9630 +#ifdef USE_REPLICATION
9631 +#include "replicate.h"
9632 +#endif /* USE_REPLICATION */
9633  
9634  /* ----------
9635   * Local definitions
9636 @@ -271,8 +274,18 @@
9637                          * ----------
9638                          */
9639                         quoteRelationName(pkrelname, pk_rel);
9640 +#ifdef USE_REPLICATION
9641 +                       if (PGRcheckConstraintWithLock)
9642 +                               snprintf(querystr, sizeof(querystr), "SELECT 1 FROM ONLY %s x FOR UPDATE OF x",
9643 +                                                pkrelname);
9644 +                       else
9645 +                               snprintf(querystr, sizeof(querystr), "SELECT 1 FROM ONLY %s x ",
9646 +                                                pkrelname);
9647 +
9648 +#else
9649                         snprintf(querystr, sizeof(querystr), "SELECT 1 FROM ONLY %s x FOR SHARE OF x",
9650                                          pkrelname);
9651 +#endif /* USE_REPLICATION */
9652  
9653                         /* Prepare and save the plan */
9654                         qplan = ri_PlanCheck(querystr, 0, NULL,
9655 @@ -416,6 +429,9 @@
9656                         queryoids[i] = SPI_gettypeid(fk_rel->rd_att,
9657                                                                                  qkey.keypair[i][RI_KEYPAIR_FK_IDX]);
9658                 }
9659 +#ifdef USE_REPLICATION
9660 +               if (PGRcheckConstraintWithLock)
9661 +#endif /* USE_REPLICATION */
9662                 strcat(querystr, " FOR SHARE OF x");
9663  
9664                 /* Prepare and save the plan */
9665 @@ -577,6 +593,9 @@
9666                         queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
9667                                                                                  qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
9668                 }
9669 +#ifdef USE_REPLICATION
9670 +               if (PGRcheckConstraintWithLock)
9671 +#endif /* USE_REPLICATION */
9672                 strcat(querystr, " FOR SHARE OF x");
9673  
9674                 /* Prepare and save the plan */
9675 @@ -733,6 +752,9 @@
9676                                         queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
9677                                                                                  qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
9678                                 }
9679 +#ifdef USE_REPLICATION
9680 +                               if (PGRcheckConstraintWithLock)
9681 +#endif /* USE_REPLICATION */
9682                                 strcat(querystr, " FOR SHARE OF x");
9683  
9684                                 /* Prepare and save the plan */
9685 @@ -922,6 +944,9 @@
9686                                         queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
9687                                                                                  qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
9688                                 }
9689 +#ifdef USE_REPLICATION
9690 +                               if (PGRcheckConstraintWithLock)
9691 +#endif /* USE_REPLICATION */
9692                                 strcat(querystr, " FOR SHARE OF x");
9693  
9694                                 /* Prepare and save the plan */
9695 @@ -1428,6 +1453,9 @@
9696                                         queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
9697                                                                                  qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
9698                                 }
9699 +#ifdef USE_REPLICATION
9700 +                               if (PGRcheckConstraintWithLock)
9701 +#endif /* USE_REPLICATION */
9702                                 strcat(querystr, " FOR SHARE OF x");
9703  
9704                                 /* Prepare and save the plan */
9705 @@ -1607,6 +1635,9 @@
9706                                         queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
9707                                                                                  qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
9708                                 }
9709 +#ifdef USE_REPLICATION
9710 +                               if (PGRcheckConstraintWithLock)
9711 +#endif /* USE_REPLICATION */
9712                                 strcat(querystr, " FOR SHARE OF x");
9713  
9714                                 /* Prepare and save the plan */
9715 diff -aruN postgresql-8.2.4/src/backend/utils/adt/timestamp.c pgcluster-1.7.0rc7/src/backend/utils/adt/timestamp.c
9716 --- postgresql-8.2.4/src/backend/utils/adt/timestamp.c  2006-11-11 02:14:19.000000000 +0100
9717 +++ pgcluster-1.7.0rc7/src/backend/utils/adt/timestamp.c        2007-02-18 22:52:16.000000000 +0100
9718 @@ -39,6 +39,9 @@
9719  #error -ffast-math is known to break this code
9720  #endif
9721  
9722 +#ifdef USE_REPLICATION
9723 +#include "replicate.h"
9724 +#endif /* USE_REPLICATION */
9725  
9726  /* Set at postmaster start */
9727  TimestampTz PgStartTime;
9728 @@ -948,7 +951,11 @@
9729         TimestampTz result;
9730         struct timeval tp;
9731  
9732 +#ifdef USE_REPLICATION
9733 +       PGR_GetTimeOfDay(&tp,NULL);
9734 +#else
9735         gettimeofday(&tp, NULL);
9736 +#endif
9737  
9738         result = (TimestampTz) tp.tv_sec -
9739                 ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY);
9740 diff -aruN postgresql-8.2.4/src/backend/utils/error/assert.c pgcluster-1.7.0rc7/src/backend/utils/error/assert.c
9741 --- postgresql-8.2.4/src/backend/utils/error/assert.c   2006-03-05 16:58:46.000000000 +0100
9742 +++ pgcluster-1.7.0rc7/src/backend/utils/error/assert.c 2007-02-18 22:52:16.000000000 +0100
9743 @@ -19,6 +19,10 @@
9744  
9745  #include <unistd.h>
9746  
9747 +#ifdef USE_REPLICATION
9748 +#include "replicate.h"
9749 +#endif /* USE_REPLICATION */
9750 +
9751  /*
9752   * ExceptionalCondition - Handles the failure of an Assert()
9753   */
9754 @@ -39,6 +43,18 @@
9755                                          fileName, lineNumber);
9756         }
9757  
9758 +#ifdef USE_REPLICATION
9759 +       if ((PGR_Check_Lock.dest == TO_REPLICATION_SERVER ) &&
9760 +               (PGR_Need_Notice == true))
9761 +       {
9762 +               PGR_Notice_Transaction_Query_Aborted();
9763 +       }
9764 +       if (PGR_Copy_Data_Need_Replicate)
9765 +       {
9766 +               PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
9767 +       }
9768 +#endif /* USE_REPLICATION */
9769 +
9770  #ifdef SLEEP_ON_ASSERT
9771  
9772         /*
9773 diff -aruN postgresql-8.2.4/src/backend/utils/error/elog.c pgcluster-1.7.0rc7/src/backend/utils/error/elog.c
9774 --- postgresql-8.2.4/src/backend/utils/error/elog.c     2006-11-28 13:54:42.000000000 +0100
9775 +++ pgcluster-1.7.0rc7/src/backend/utils/error/elog.c   2007-02-18 22:52:16.000000000 +0100
9776 @@ -70,6 +70,9 @@
9777  #include "utils/memutils.h"
9778  #include "utils/ps_status.h"
9779  
9780 +#ifdef USE_REPLICATION
9781 +#include "replicate.h"
9782 +#endif /* USE_REPLICATION */
9783  
9784  /* Global variables */
9785  ErrorContextCallback *error_context_stack = NULL;
9786 @@ -314,6 +317,16 @@
9787         MemoryContext oldcontext;
9788         ErrorContextCallback *econtext;
9789  
9790 +#ifdef USE_REPLICATION
9791 +       int status = 0;
9792 +       bool parse_error_flag = false;
9793 +
9794 +       if ((edata->message) && (strstr(edata->message,"parse error") != NULL))
9795 +       {
9796 +               parse_error_flag = true;
9797 +       }
9798 +#endif /* USE_REPLICATION */
9799 +
9800         recursion_depth++;
9801         CHECK_STACK_DEPTH();
9802  
9803 @@ -363,6 +376,24 @@
9804                  * handler should reset it to something else soon.
9805                  */
9806  
9807 +#ifdef USE_REPLICATION
9808 +               if (parse_error_flag)
9809 +               {
9810 +                       if ((PGR_Check_Lock.dest != TO_FRONTEND) &&
9811 +                               (Transaction_Mode > 0))
9812 +                       {
9813 +                               PGR_Force_Replicate_Query();
9814 +                       }
9815 +               }
9816 +               if (PGR_Copy_Data_Need_Replicate)
9817 +               {
9818 +                       PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
9819 +               }
9820 +               else if (PGR_Need_Notice == true)
9821 +               {
9822 +                       PGR_Notice_Transaction_Query_Done();
9823 +               }
9824 +#endif /* USE_REPLICATION */
9825                 recursion_depth--;
9826                 PG_RE_THROW();
9827         }
9828 @@ -377,7 +408,16 @@
9829          * client_min_messages above FATAL, so don't look at output_to_client.
9830          */
9831         if (elevel >= FATAL && whereToSendOutput == DestRemote)
9832 +       {
9833 +#ifdef USE_REPLICATION
9834 +               if (PGR_Copy_Data_Need_Replicate)
9835 +               {
9836 +                       PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
9837 +               }
9838 +#endif /* USE_REPLICATION */
9839                 pq_endcopyout(true);
9840 +       }
9841 +
9842  
9843         /* Emit the message to the right places */
9844         EmitErrorReport();
9845 @@ -417,6 +457,34 @@
9846                 if (PG_exception_stack == NULL && whereToSendOutput == DestRemote)
9847                         whereToSendOutput = DestNone;
9848  
9849 +#ifdef USE_REPLICATION
9850 +               if (CurrentReplicateServer != NULL)
9851 +               {
9852 +                       if (PGR_Need_Notice == true)
9853 +                       {
9854 +                               PGR_Notice_Transaction_Query_Aborted();
9855 +                       }
9856 +                       if (PGR_Copy_Data_Need_Replicate)
9857 +                       {
9858 +                               PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
9859 +                       }
9860 +                       else
9861 +                       {
9862 +                               if ((!PGR_Is_Replicated_Query ) &&
9863 +                                       (PGR_Check_Lock.dest != TO_FRONTEND) &&
9864 +                                       (PGR_Reliable_Mode_Wait == true) &&
9865 +                                       (CurrentReplicateServer->response_mode == PGR_RELIABLE_MODE))
9866 +                               {
9867 +                                       status = PGR_Recv_Trigger(0);
9868 +                               }
9869 +                       }
9870 +               }
9871 +               if (TransactionSock != -1)
9872 +               {
9873 +                       close (TransactionSock);
9874 +                       TransactionSock = -1;
9875 +               }
9876 +#endif /* USE_REPLICATION */
9877                 /*
9878                  * fflush here is just to improve the odds that we get to see the
9879                  * error message, in case things are so hosed that proc_exit crashes.
9880 @@ -436,6 +504,34 @@
9881  
9882         if (elevel >= PANIC)
9883         {
9884 +#ifdef USE_REPLICATION
9885 +               if (CurrentReplicateServer != NULL)
9886 +               {
9887 +                       if (PGR_Need_Notice == true)
9888 +                       {
9889 +                               PGR_Notice_Transaction_Query_Aborted();
9890 +                       }
9891 +                       if (PGR_Copy_Data_Need_Replicate)
9892 +                       {
9893 +                               PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
9894 +                       }
9895 +                       else
9896 +                       {
9897 +                               if ((!PGR_Is_Replicated_Query ) &&
9898 +                                       (PGR_Check_Lock.dest != TO_FRONTEND) &&
9899 +                                       (PGR_Reliable_Mode_Wait == true) &&
9900 +                                       (CurrentReplicateServer->response_mode == PGR_RELIABLE_MODE))
9901 +                               {
9902 +                                       status = PGR_Recv_Trigger(PGR_Replication_Timeout);
9903 +                               }
9904 +                       }
9905 +               }
9906 +               if (TransactionSock != -1)
9907 +               {
9908 +                       close (TransactionSock);
9909 +                       TransactionSock = -1;
9910 +               }
9911 +#endif /* USE_REPLICATION */
9912                 /*
9913                  * Serious crash time. Postmaster will observe SIGABRT process exit
9914                  * status and kill the other backends too.
9915 diff -aruN postgresql-8.2.4/src/backend/utils/fmgr/fmgr.c pgcluster-1.7.0rc7/src/backend/utils/fmgr/fmgr.c
9916 --- postgresql-8.2.4/src/backend/utils/fmgr/fmgr.c      2006-10-04 02:30:01.000000000 +0200
9917 +++ pgcluster-1.7.0rc7/src/backend/utils/fmgr/fmgr.c    2007-02-18 22:52:16.000000000 +0100
9918 @@ -25,6 +25,9 @@
9919  #include "utils/fmgrtab.h"
9920  #include "utils/lsyscache.h"
9921  #include "utils/syscache.h"
9922 +#ifdef USE_REPLICATION
9923 +#include "replicate.h"
9924 +#endif /* USE_REPLICATION */
9925  
9926  /*
9927   * Declaration for old-style function pointer type.  This is now used only
9928 @@ -218,7 +221,12 @@
9929                 ReleaseSysCache(procedureTuple);
9930                 return;
9931         }
9932 -
9933 +#ifdef USE_REPLICATION
9934 +       if (PGR_Replicate_Function_Call() != STATUS_OK)
9935 +       {
9936 +               return;
9937 +       }
9938 +#endif /* USE_REPLICATION */
9939         switch (procedureStruct->prolang)
9940         {
9941                 case INTERNALlanguageId:
9942 diff -aruN postgresql-8.2.4/src/backend/utils/mb/mbutils.c pgcluster-1.7.0rc7/src/backend/utils/mb/mbutils.c
9943 --- postgresql-8.2.4/src/backend/utils/mb/mbutils.c     2006-10-04 02:30:02.000000000 +0200
9944 +++ pgcluster-1.7.0rc7/src/backend/utils/mb/mbutils.c   2007-02-18 22:52:16.000000000 +0100
9945 @@ -15,6 +15,9 @@
9946  #include "utils/memutils.h"
9947  #include "utils/syscache.h"
9948  
9949 +#ifdef USE_REPLICATION
9950 +#include "replicate.h"
9951 +#endif /* USE_REPLICATION */
9952  /*
9953   * We handle for actual FE and BE encoding setting encoding-identificator
9954   * and encoding-name too. It prevent searching and conversion from encoding
9955 @@ -442,6 +445,11 @@
9956                                 dest_encoding;
9957         FmgrInfo   *flinfo;
9958  
9959 +#ifdef USE_REPLICATION
9960 +       if (PGR_Is_Replicated_Query)
9961 +               return (char *)src;
9962 +#endif /* USE_REPLICATION */
9963 +
9964         if (is_client_to_server)
9965         {
9966                 src_encoding = ClientEncoding->encoding;
9967 diff -aruN postgresql-8.2.4/src/backend/utils/misc/guc.c pgcluster-1.7.0rc7/src/backend/utils/misc/guc.c
9968 --- postgresql-8.2.4/src/backend/utils/misc/guc.c       2006-11-29 15:50:07.000000000 +0100
9969 +++ pgcluster-1.7.0rc7/src/backend/utils/misc/guc.c     2007-02-18 22:52:16.000000000 +0100
9970 @@ -25,6 +25,9 @@
9971  #include <syslog.h>
9972  #endif
9973  
9974 +#ifdef USE_REPLICATION
9975 +#include "replicate.h"
9976 +#endif /* USE_REPLICATION */
9977  
9978  #include "access/gin.h"
9979  #include "access/twophase.h"
9980 @@ -236,6 +239,9 @@
9981  char      *role_string;
9982  char      *session_authorization_string;
9983  
9984 +#ifdef USE_REPLICATION
9985 +static void ShowReplicationServerConfig(DestReceiver *dest);
9986 +#endif /* USE_REPLICATION */
9987  
9988  /*
9989   * Displayable names for context types (enum GucContext)
9990 @@ -970,6 +976,40 @@
9991                 &pg_krb_caseins_users,
9992                 false, NULL, NULL
9993         },
9994 +#ifdef USE_REPLICATION
9995 +       {
9996 +               {"pgr_force_loadbalance", PGC_USERSET, CLIENT_CONN_STATEMENT,
9997 +                       gettext_noop("force loadbalance mode"),
9998 +                       NULL
9999 +               },
10000 +               &PGRforceLoadBalance,
10001 +               false, NULL, NULL
10002 +       },
10003 +       {
10004 +               {"check_constraint_with_lock", PGC_USERSET, CLIENT_CONN_STATEMENT,
10005 +                       gettext_noop("check constrain with lock"),
10006 +                       NULL
10007 +               }, 
10008 +               &PGRcheckConstraintWithLock,
10009 +               false, NULL, NULL
10010 +       },
10011 +       {
10012 +               {"auto_lock_table", PGC_USERSET, CLIENT_CONN_STATEMENT, 
10013 +                       gettext_noop("auto lock table"),
10014 +                       NULL
10015 +               }, 
10016 +               &PGRautoLockTable,
10017 +               true, NULL, NULL
10018 +       },
10019 +       {
10020 +               {"not_replicate_prepared_select", PGC_USERSET, CLIENT_CONN_STATEMENT, 
10021 +                       gettext_noop("not replicate the prepared as select"),
10022 +                       NULL
10023 +               }, 
10024 +               &PGRnotReplicatePreparedSelect,
10025 +               false, NULL, NULL
10026 +       },
10027 +#endif
10028  
10029         {
10030                 {"escape_string_warning", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS,
10031 @@ -4830,6 +4870,10 @@
10032  {
10033         if (pg_strcasecmp(name, "all") == 0)
10034                 ShowAllGUCConfig(dest);
10035 +#ifdef USE_REPLICATION
10036 +       else if (strcasecmp(name, "replication_server") == 0)
10037 +               ShowReplicationServerConfig(dest);
10038 +#endif
10039         else
10040                 ShowGUCConfigOption(name, dest);
10041  }
10042 @@ -6512,5 +6556,72 @@
10043         return nbuf;
10044  }
10045  
10046 +#ifdef USE_REPLICATION
10047 +/*
10048 + * SHOW REPLICATION SERVER command
10049 + */
10050 +static void
10051 +ShowReplicationServerConfig(DestReceiver *dest)
10052 +{
10053 +       TupOutputState *tstate;
10054 +       TupleDesc       tupdesc;
10055 +       char       *values[4];
10056 +       char        buffer[256];
10057 +       ReplicateServerInfo  *sp;
10058 +       
10059 +       /* need a tuple descriptor representing two TEXT columns */
10060 +       tupdesc = CreateTemplateTupleDesc(4, false);
10061 +       TupleDescInitEntry(tupdesc, (AttrNumber) 1, "status",
10062 +                                          TEXTOID, -1, 0 );
10063 +       TupleDescInitEntry(tupdesc, (AttrNumber) 2, "host_name",
10064 +                                          TEXTOID, -1, 0 );
10065 +       TupleDescInitEntry(tupdesc, (AttrNumber) 3, "port_num",
10066 +                                          TEXTOID, -1, 0 );
10067 +       TupleDescInitEntry(tupdesc, (AttrNumber) 4, "recovery_port_num",
10068 +                                          TEXTOID, -1, 0 );
10069 +
10070 +       /* prepare for projection of tuples */
10071 +       tstate = begin_tup_output_tupdesc(dest, tupdesc);
10072 +
10073 +       sp = ReplicateServerData;
10074 +       while (sp->useFlag != DATA_END) {
10075 +               if (PGR_Check_Replicate_Server_Status(sp) == STATUS_ERROR) {
10076 +                       PGR_Set_Replication_Server_Status(sp, DATA_ERR);
10077 +               }
10078 +
10079 +               sp++;
10080 +       }
10081 +
10082 +       sp = ReplicateServerData;
10083 +       while (sp->useFlag != DATA_END) {
10084 +               if (sp->useFlag == DATA_USE) {
10085 +                       values[0] = "ALIVE";
10086 +               } else if (sp->useFlag == DATA_ERR) {
10087 +                       values[0] = "DEAD";
10088 +               } else if (sp->useFlag == DATA_INIT) {
10089 +                       values[0] = "STANDBY";
10090 +               } else {
10091 +                       values[0] = "UNKNOWN";
10092 +               }
10093 +
10094 +               values[1] = (char *) sp->hostName;
10095 +
10096 +               snprintf(buffer, sizeof(buffer), "%d", sp->portNumber);
10097 +               values[2] = pstrdup(buffer);
10098 +
10099 +               snprintf(buffer, sizeof(buffer), "%d", sp->recoveryPortNumber);
10100 +               values[3] = pstrdup(buffer);
10101 +
10102 +               do_tup_output(tstate, values);
10103 +
10104 +               pfree(values[2]);
10105 +               pfree(values[3]);
10106 +
10107 +               sp++;
10108 +       }
10109 +
10110 +       end_tup_output(tstate);
10111 +}
10112 +#endif /* USE_REPLICATION */
10113  
10114  #include "guc-file.c"
10115 diff -aruN postgresql-8.2.4/src/backend/utils/misc/postgresql.conf.sample pgcluster-1.7.0rc7/src/backend/utils/misc/postgresql.conf.sample
10116 --- postgresql-8.2.4/src/backend/utils/misc/postgresql.conf.sample      2007-01-20 22:42:06.000000000 +0100
10117 +++ pgcluster-1.7.0rc7/src/backend/utils/misc/postgresql.conf.sample    2007-02-18 22:52:16.000000000 +0100
10118 @@ -469,3 +469,12 @@
10119  #---------------------------------------------------------------------------
10120  
10121  #custom_variable_classes = ''          # list of custom variable class names
10122 +
10123 +
10124 +#---------------------------------------------------------------------------
10125 +#  PGCluster
10126 +#---------------------------------------------------------------------------
10127 +
10128 +# auto_lock_table = true
10129 +# check_constraint_with_lock = false
10130 +# not_replicate_prepared_select = false
10131 diff -aruN postgresql-8.2.4/src/bin/initdb/initdb.c pgcluster-1.7.0rc7/src/bin/initdb/initdb.c
10132 --- postgresql-8.2.4/src/bin/initdb/initdb.c    2006-10-04 20:58:08.000000000 +0200
10133 +++ pgcluster-1.7.0rc7/src/bin/initdb/initdb.c  2007-02-18 22:52:16.000000000 +0100
10134 @@ -122,6 +122,11 @@
10135  static int     n_buffers = 50;
10136  static int     n_fsm_pages = 20000;
10137  
10138 +#ifdef USE_REPLICATION
10139 +static char *cluster_conf_file;
10140 +static char *pgreplicate_conf_file;
10141 +static char *pglb_conf_file;
10142 +#endif /* USE_REPLICATION */
10143  /*
10144   * Warning messages for authentication methods
10145   */
10146 @@ -1352,6 +1357,14 @@
10147  
10148         free(conflines);
10149  
10150 +#ifdef USE_REPLICATION
10151 +       /* cluster.conf */
10152 +       conflines = readfile(cluster_conf_file);
10153 +       snprintf(path, sizeof(path), "%s/cluster.conf", pg_data);
10154 +       writefile(path, conflines);
10155 +       chmod(path, 0600);
10156 +       free(conflines);
10157 +#endif /* USE_REPLICATION */
10158         check_ok();
10159  }
10160  
10161 @@ -2712,6 +2725,11 @@
10162         set_input(&info_schema_file, "information_schema.sql");
10163         set_input(&features_file, "sql_features.txt");
10164         set_input(&system_views_file, "system_views.sql");
10165 +#ifdef USE_REPLICATION
10166 +       set_input(&cluster_conf_file, "cluster.conf.sample");
10167 +       set_input(&pgreplicate_conf_file, "pgreplicate.conf.sample");
10168 +       set_input(&pglb_conf_file, "pglb.conf.sample");
10169 +#endif /* USE_REPLICATION */
10170  
10171         set_info_version();
10172  
10173 @@ -2730,6 +2748,16 @@
10174                                 desc_file, shdesc_file,
10175                                 conf_file,
10176                                 hba_file, ident_file);
10177 +#ifdef USE_REPLICATION
10178 +               fprintf(stderr,
10179 +                               "PGCLUSTER_VERSION=%s\n"
10180 +                               "CLUSTER_CONF_SAMPLE=%s\nPGREPLICATE_CONF_SAMPLE=%s\n"
10181 +                               "PGLB_CONF_SAMPLE=%s\n",
10182 +                               PGCLUSTER_VERSION,
10183 +                               cluster_conf_file,
10184 +                               pgreplicate_conf_file,
10185 +                               pglb_conf_file);
10186 +#endif /* USE_REPLICATION */
10187                 if (show_setting)
10188                         exit(0);
10189         }
10190 @@ -2744,6 +2772,11 @@
10191         check_input(info_schema_file);
10192         check_input(features_file);
10193         check_input(system_views_file);
10194 +#ifdef USE_REPLICATION
10195 +       check_input(cluster_conf_file);
10196 +       check_input(pgreplicate_conf_file);
10197 +       check_input(pglb_conf_file);
10198 +#endif /* USE_REPLICATION */
10199  
10200         setlocales();
10201  
10202 diff -aruN postgresql-8.2.4/src/bin/pg_dump/pg_dump.c pgcluster-1.7.0rc7/src/bin/pg_dump/pg_dump.c
10203 --- postgresql-8.2.4/src/bin/pg_dump/pg_dump.c  2006-10-10 01:36:59.000000000 +0200
10204 +++ pgcluster-1.7.0rc7/src/bin/pg_dump/pg_dump.c        2007-02-18 22:52:16.000000000 +0100
10205 @@ -119,6 +119,9 @@
10206  /* flag to turn on/off dollar quoting */
10207  static int     disable_dollar_quoting = 0;
10208  
10209 +#ifdef USE_REPLICATION
10210 +       bool            nonReplicate=true;
10211 +#endif
10212  
10213  static void help(const char *progname);
10214  static void expand_schema_name_patterns(SimpleStringList *patterns,
10215 @@ -235,6 +238,9 @@
10216                 {"column-inserts", no_argument, NULL, 'D'},
10217                 {"host", required_argument, NULL, 'h'},
10218                 {"ignore-version", no_argument, NULL, 'i'},
10219 +#ifdef USE_REPLICATION
10220 +               {"non-replicate", no_argument ,NULL, 'r'},
10221 +#endif
10222                 {"no-reconnect", no_argument, NULL, 'R'},
10223                 {"oids", no_argument, NULL, 'o'},
10224                 {"no-owner", no_argument, NULL, 'O'},
10225 @@ -368,6 +374,11 @@
10226                                 pgport = optarg;
10227                                 break;
10228  
10229 +#ifdef USE_REPLICATION
10230 +                       case 'r':
10231 +                               nonReplicate = true;
10232 +                               break;
10233 +#endif
10234                         case 'R':
10235                                 /* no-op, still accepted for backwards compatibility */
10236                                 break;
10237 @@ -553,6 +564,11 @@
10238         /*
10239          * Start serializable transaction to dump consistent data.
10240          */
10241 +#ifdef USE_REPLICATION
10242 +       if(nonReplicate) {
10243 +               do_sql_command(g_conn, "set pgr_force_loadbalance to on");
10244 +       }
10245 +#endif /* USE_REPLICATION */
10246         do_sql_command(g_conn, "BEGIN");
10247  
10248         do_sql_command(g_conn, "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE");
10249 @@ -751,6 +767,9 @@
10250         printf(_("  -o, --oids                  include OIDs in dump\n"));
10251         printf(_("  -O, --no-owner              skip restoration of object ownership\n"
10252                          "                              in plain text format\n"));
10253 +#ifdef USE_REPLICATION
10254 +       printf(_("  -r, --non-replicate      No queries replicate. Available only in pgcluster.\n"));
10255 +#endif
10256         printf(_("  -s, --schema-only           dump only the schema, no data\n"));
10257         printf(_("  -S, --superuser=NAME        specify the superuser user name to use in\n"
10258                          "                              plain text format\n"));
10259 diff -aruN postgresql-8.2.4/src/bin/pg_dump/pg_dumpall.c pgcluster-1.7.0rc7/src/bin/pg_dump/pg_dumpall.c
10260 --- postgresql-8.2.4/src/bin/pg_dump/pg_dumpall.c       2006-11-21 23:19:46.000000000 +0100
10261 +++ pgcluster-1.7.0rc7/src/bin/pg_dump/pg_dumpall.c     2007-02-18 22:52:16.000000000 +0100
10262 @@ -97,6 +97,9 @@
10263                 {"oids", no_argument, NULL, 'o'},
10264                 {"no-owner", no_argument, NULL, 'O'},
10265                 {"port", required_argument, NULL, 'p'},
10266 +#ifdef USE_REPLICATION
10267 +               {"non-replicate", no_argument ,NULL, 'r'},
10268 +#endif
10269                 {"password", no_argument, NULL, 'W'},
10270                 {"schema-only", no_argument, NULL, 's'},
10271                 {"superuser", required_argument, NULL, 'S'},
10272 @@ -161,7 +164,7 @@
10273  
10274         pgdumpopts = createPQExpBuffer();
10275  
10276 -       while ((c = getopt_long(argc, argv, "acdDgh:ioOp:sS:U:vWxX:", long_options, &optindex)) != -1)
10277 +       while ((c = getopt_long(argc, argv, "acdDgh:ioOp:rsS:U:vWxX:", long_options, &optindex)) != -1)
10278         {
10279                 switch (c)
10280                 {
10281 @@ -215,6 +218,11 @@
10282  #endif
10283                                 break;
10284  
10285 +#ifdef USE_REPLICATION
10286 +                       case 'r':
10287 +                               appendPQExpBuffer(pgdumpopts, " -r");
10288 +                               break;
10289 +#endif /* USE_REPLICATION */
10290                         case 's':
10291                                 schema_only = true;
10292                                 appendPQExpBuffer(pgdumpopts, " -s");
10293 @@ -397,6 +405,9 @@
10294         printf(_("\nConnection options:\n"));
10295         printf(_("  -h, --host=HOSTNAME      database server host or socket directory\n"));
10296         printf(_("  -p, --port=PORT          database server port number\n"));
10297 +#ifdef USE_REPLICATION
10298 +       printf(_("  -r, --non-replicate      No queries replicate. Available only in pgcluster.\n"));
10299 +#endif /* USE_REPLICATION */
10300         printf(_("  -U, --username=NAME      connect as specified database user\n"));
10301         printf(_("  -W, --password           force password prompt (should happen automatically)\n"));
10302  
10303 diff -aruN postgresql-8.2.4/src/include/commands/prepare.h pgcluster-1.7.0rc7/src/include/commands/prepare.h
10304 --- postgresql-8.2.4/src/include/commands/prepare.h     2006-10-04 02:30:08.000000000 +0200
10305 +++ pgcluster-1.7.0rc7/src/include/commands/prepare.h   2007-02-18 22:52:16.000000000 +0100
10306 @@ -64,4 +64,8 @@
10307  extern bool PreparedStatementReturnsTuples(PreparedStatement *stmt);
10308  extern List *FetchPreparedStatementTargetList(PreparedStatement *stmt);
10309  
10310 +#ifdef USE_REPLICATION
10311 +extern bool PGR_is_select_prepared_statement(PrepareStmt *stmt);
10312 +#endif /* USE_REPLICATION */
10313 +
10314  #endif   /* PREPARE_H */
10315 diff -aruN postgresql-8.2.4/src/include/pg_config.h.in pgcluster-1.7.0rc7/src/include/pg_config.h.in
10316 --- postgresql-8.2.4/src/include/pg_config.h.in 2006-11-06 04:44:38.000000000 +0100
10317 +++ pgcluster-1.7.0rc7/src/include/pg_config.h.in       2007-02-18 22:52:17.000000000 +0100
10318 @@ -673,3 +673,7 @@
10319  /* Define to empty if the keyword `volatile' does not work. Warning: valid
10320     code using `volatile' can become incorrect without. Disable with care. */
10321  #undef volatile
10322 +
10323 +/* PGCluster version */
10324 +#undef PGCLUSTER_VERSION
10325 +
10326 diff -aruN postgresql-8.2.4/src/include/replicate.h pgcluster-1.7.0rc7/src/include/replicate.h
10327 --- postgresql-8.2.4/src/include/replicate.h    1970-01-01 01:00:00.000000000 +0100
10328 +++ pgcluster-1.7.0rc7/src/include/replicate.h  2007-02-18 22:52:17.000000000 +0100
10329 @@ -0,0 +1,223 @@
10330 +/*-------------------------------------------------------------------------
10331 + *
10332 + * replicate.h
10333 + *       Primary include file for replicate server .c files
10334 + *
10335 + * This should be the first file included by replicate modules.
10336 + *
10337 + *-------------------------------------------------------------------------
10338 + */
10339 +#ifndef REPLICATE_H
10340 +#define        REPLICATE_H
10341 +
10342 +#ifndef _SYS_TIME_H
10343 +#include <sys/time.h>
10344 +#endif
10345 +#include "tcop/dest.h"
10346 +#include "storage/proc.h"
10347 +#include "lib/stringinfo.h"
10348 +#include "replicate_com.h"
10349 +
10350 +#define STAND_ALONE_TAG                        "When_Stand_Alone"
10351 +#define NOT_REPLICATE_INFO_TAG "Not_Replicate_Info"
10352 +#define DB_NAME_TAG                            "DB_Name"
10353 +#define TABLE_NAME_TAG                 "Table_Name"
10354 +#define RSYNC_PATH_TAG                 "Rsync_Path"
10355 +#define RSYNC_OPTION_TAG               "Rsync_Option"
10356 +#define RSYNC_COMPRESS_TAG             "Rsync_Compress"
10357 +#define PG_DUMP_PATH_TAG               "Pg_Dump_Path"
10358 +
10359 +#define CLUSTER_CONF_FILE              "cluster.conf"
10360 +#define DEFAULT_RSYNC                  "/usr/bin/rsync"
10361 +#define DEFAULT_PG_DUMP                        "/usr/local/pgsql/bin/pg_dump"
10362 +#define        NOT_SESSION_AUTHORIZATION       (0)
10363 +#define SESSION_AUTHORIZATION_BEGIN    (1)
10364 +#define SESSION_AUTHORIZATION_END      (2)
10365 +
10366 +#define READ_ONLY_IF_STAND_ALONE       "read_only"
10367 +#define READ_WRITE_IF_STAND_ALONE      "read_write"
10368 +#define PERMIT_READ_ONLY               (1)
10369 +#define PERMIT_READ_WRITE              (2)
10370 +#define STATUS_REPLICATED              (3)
10371 +#define STATUS_CONTINUE                        (4)
10372 +#define STATUS_CONTINUE_SELECT (5)
10373 +#define STATUS_NOT_REPLICATE   (6)
10374 +#define STATUS_SKIP_QUERY              (7)
10375 +#define STATUS_RECOVERY                        (11)
10376 +#define STATUS_REPLICATION_ABORT       (98)
10377 +#define STATUS_DEADLOCK_DETECT (99)
10378 +
10379 +#define TO_REPLICATION_SERVER  (0)
10380 +#define TO_FRONTEND                            (1)
10381 +
10382 +#define PGR_DEADLOCK_DETECTION_MSG "deadlock detected!"
10383 +#define PGR_REPLICATION_ABORT_MSG "replication aborted!"
10384 +#define SKIP_QUERY_1 "begin; select getdatabaseencoding(); commit"
10385 +#define SKIP_QUERY_2 "BEGIN; SELECT usesuper FROM pg_catalog.pg_user WHERE usename = '%s'; COMMIT"
10386 +#define SKIP_QUERY_3 "SET autocommit TO 'on'"
10387 +#define SKIP_QUERY_4 "SET search_path = public"
10388 +#define SYS_QUERY_1 "set pgr_force_loadbalance to on" 
10389 +
10390 +#define PGR_1ST_RECOVERY (1)
10391 +#define PGR_2ND_RECOVERY (2)
10392 +#define PGR_COLD_RECOVERY (1)
10393 +#define PGR_HOT_RECOVERY (2)
10394 +#define PGR_WITHOUT_BACKUP (3)
10395 +
10396 +#define PGR_MESSAGE_OTHER (0)
10397 +#define PGR_MESSAGE_SELECT (1)
10398 +#define PGR_MESSAGE_PREPARE (2)
10399 +#define PGR_MESSAGE_EXECUTE (3)
10400 +#define PGR_MESSAGE_DEALLOCATE (4)
10401 +
10402 +typedef struct
10403 +{
10404 +       bool is_stand_alone;
10405 +       int  permit;
10406 +} PGR_Stand_Alone_Type;
10407 +
10408 +typedef struct
10409 +{
10410 +       char db_name[DBNAME_MAX_LENGTH];
10411 +       char table_name[TABLENAME_MAX_LENGTH];
10412 +} PGR_Not_Replicate_Type;
10413 +
10414 +typedef struct
10415 +{
10416 +       bool check_lock_conflict;
10417 +       bool deadlock;
10418 +       int status_lock_conflict;
10419 +       int dest;
10420 +} PGR_Check_Lock_Type;
10421 +
10422 +typedef struct
10423 +{
10424 +       char * query_string;
10425 +       int query_len;
10426 +       char cmdSts;
10427 +       char cmdType;
10428 +       char useFlag;
10429 +} PGR_Retry_Query_Type;
10430 +
10431 +
10432 +/* replicaition log */
10433 +typedef struct {
10434 +       uint32_t PGR_Replicate_ID;
10435 +       uint32_t PGR_Request_ID;
10436 +} PGR_ReplicationLog_Info;
10437 +
10438 +typedef struct {
10439 +       char * password;
10440 +       char md5Salt[4];
10441 +       char cryptSalt[2];
10442 +} PGR_Password_Info;
10443 +
10444 +extern char * Query_String;
10445 +extern int TransactionQuery;
10446 +extern int Transaction_Mode;
10447 +extern bool PGR_Noticed_Abort;
10448 +extern bool Session_Authorization_Mode;
10449 +extern bool Create_Temp_Table_Mode;
10450 +extern int RecoveryPortNumber;
10451 +extern char * RsyncPath;
10452 +extern char * RsyncOption;
10453 +extern bool RsyncCompress;
10454 +extern char * PgDumpPath;
10455 +extern int TransactionSock;
10456 +extern ReplicateNow * ReplicateCurrentTime;
10457 +extern CopyData * PGRCopyData;
10458 +extern bool PGR_Copy_Data_Need_Replicate;
10459 +extern PGR_Stand_Alone_Type * PGR_Stand_Alone;
10460 +extern PGR_Not_Replicate_Type * PGR_Not_Replicate;
10461 +extern int PGR_Not_Replicate_Rec_Num;
10462 +extern bool autocommit;
10463 +extern bool PGR_Is_Replicated_Query;
10464 +extern PGR_Check_Lock_Type PGR_Check_Lock;
10465 +extern int PGR_Sock_To_Replication_Server;
10466 +extern bool PGR_Need_Notice;
10467 +extern bool PGR_Lock_Noticed;
10468 +extern bool PGR_Recovery_Option;
10469 +extern int PGR_recovery_mode;
10470 +extern ReplicateServerInfo * CurrentReplicateServer;
10471 +extern ReplicateServerInfo * LastReplicateServer;
10472 +extern char * PGRSelfHostName;
10473 +extern int PGR_Pending_Sem_Num;
10474 +extern int PGR_Response_Mode;
10475 +extern bool PGR_Reliable_Mode_Wait;
10476 +extern PGR_Retry_Query_Type PGR_Retry_Query;
10477 +extern bool needToUpdateReplicateIdOnNextQueryIsDone;
10478 +extern PGR_ReplicationLog_Info ReplicationLog_Info;
10479 +extern bool PGR_Not_Replication_Query;
10480 +extern bool PGR_Is_Sync_OID;
10481 +extern PGR_Password_Info * PGR_password;
10482 +
10483 +/* backend/utils/misc/guc.c */
10484 +extern bool PGRforceLoadBalance;
10485 +extern bool    PGRcheckConstraintWithLock;
10486 +extern bool    PGRautoLockTable;
10487 +extern bool    PGRnotReplicatePreparedSelect;
10488 +
10489 +/* in backend/libpq/replicate.c */
10490 +extern int PGR_Init_Replicate_Server_Data(void);
10491 +extern int PGR_Set_Replicate_Server_Socket(void);
10492 +extern int PGR_get_replicate_server_socket ( ReplicateServerInfo * sp , int socket_type );
10493 +extern ReplicateServerInfo * PGR_get_replicate_server_info(void);
10494 +extern ReplicateServerInfo * PGR_check_replicate_server_info(void);
10495 +extern char * PGR_Send_Replicate_Command(char * query_string, int query_len, char cmdSts ,char cmdType);
10496 +extern bool PGR_Is_Replicated_Command(char * query);
10497 +extern int Xlog_Check_Replicate(int operation);
10498 +extern int PGR_Replicate_Function_Call(void);
10499 +extern void PGR_delete_shm(void);
10500 +extern int PGR_replication(char * query_string, CommandDest dest, Node *parsetree, const char * commandTag);
10501 +extern bool PGR_Is_System_Command(char * query);
10502 +extern int PGR_Call_System_Command(char * command);
10503 +extern int PGR_GetTimeOfDay(struct timeval *tp,struct timezone *tpz);
10504 +extern long PGR_Random(void);
10505 +extern int PGR_Set_Current_Time(char * sec, char * usec);
10506 +extern int PGR_Send_Copy(CopyData * copy, int end);
10507 +extern CopyData * PGR_Set_Copy_Data(CopyData * copy, char *str, int len, int end);
10508 +extern char * PGR_scan_terminate( char * str);
10509 +extern bool PGR_Is_Stand_Alone(void);
10510 +extern void PGR_Send_Message_To_Frontend(char * msg);
10511 +extern void PGR_Notice_Transaction_Query_Done(void);
10512 +extern void PGR_Notice_Transaction_Query_Aborted(void);
10513 +extern int PGRsend_system_command(char cmdSts, char cmdType);
10514 +extern int PGR_Notice_Conflict(void);
10515 +extern int PGR_Recv_Trigger (int user_timeout);
10516 +extern void PGR_Set_Replication_Server_Status( ReplicateServerInfo * sp, int status);
10517 +extern int PGR_Is_Skip_Replication(char * query);
10518 +extern bool PGR_Did_Commit_Transaction(void);
10519 +extern int PGR_Set_Transaction_Mode(int mode,const char * commandTag);
10520 +extern char * PGR_Remove_Comment(char * str);
10521 +extern void PGR_Force_Replicate_Query(void);
10522 +extern void PGR_Notice_DeadLock(void);
10523 +extern void PGR_Set_Cluster_Status(int status);
10524 +extern int PGR_Get_Cluster_Status(void);
10525 +extern int PGR_Check_Replicate_Server_Status(ReplicateServerInfo * sp);
10526 +extern int PGR_lo_import(char * filename);
10527 +extern int PGR_lo_create(int flags);
10528 +extern int PGR_lo_open(Oid lobjId,int32 mode);
10529 +extern int PGR_lo_close(int32 fd);
10530 +extern int PGR_lo_write(int fd, char *buf, int len);
10531 +extern int PGR_lo_lseek(int32 fd, int32 offset, int32 whence);
10532 +extern int PGR_lo_unlink(Oid lobjId);
10533 +extern uint32_t PGRget_replication_id(void);
10534 +extern Oid PGRGetNewObjectId(Oid last_id);
10535 +extern int PGR_Send_Input_Message(char cmdType,StringInfo input_message);
10536 +extern bool PGR_is_select_prepare_query(void);
10537 +extern char * PGR_get_md5salt(char * md5Salt, char * string);
10538 +extern int PGR_recv_replicate_result(int sock,char * result,int user_timeout);
10539 +
10540 +/* in backend/libpq/recovery.c */
10541 +extern int PGR_Master_Main(void);
10542 +extern int PGR_Recovery_Main(int mode);
10543 +extern int PGR_recovery_error_send(void);
10544 +extern int PGR_recovery_finish_send(void);
10545 +extern int PGR_recovery_queue_data_req(void);
10546 +
10547 +/* in backend/libpq/lifecheck.c */
10548 +extern int PGR_Lifecheck_Main(void);
10549 +
10550 +/* in backend/access/transam/xact.c */
10551 +extern void PGR_Reload_Start_Time(void);
10552 +#endif /* REPLICATE_H */
10553 diff -aruN postgresql-8.2.4/src/include/replicate_com.h pgcluster-1.7.0rc7/src/include/replicate_com.h
10554 --- postgresql-8.2.4/src/include/replicate_com.h        1970-01-01 01:00:00.000000000 +0100
10555 +++ pgcluster-1.7.0rc7/src/include/replicate_com.h      2007-03-01 16:27:15.000000000 +0100
10556 @@ -0,0 +1,432 @@
10557 +/*-------------------------------------------------------------------------
10558 + *
10559 + * replicate.h
10560 + *       Primary include file for replicate server .c files
10561 + *
10562 + * This should be the first file included by replicate modules.
10563 + *
10564 + *-------------------------------------------------------------------------
10565 + */
10566 +#ifndef REPLICATE_COM_H
10567 +#define        REPLICATE_COM_H 1
10568 +
10569 +#ifndef _SYS_TYPES_H
10570 +#include <sys/types.h>
10571 +#endif
10572 +#ifndef _INTTYPES_H
10573 +#include <inttypes.h>
10574 +#endif
10575 +#ifndef _NETINET_IN_H
10576 +#include <netinet/in.h>
10577 +#endif
10578 +
10579 +#include "c.h"
10580 +#include "pg_config.h"
10581 +
10582 +/* default values */
10583 +#define DEFAULT_PGLB_PORT      (6001)
10584 +#define DEFAULT_PGLB_RECOVERY_PORT     (6101)
10585 +#define DEFAULT_PGLB_LIFECHECK_PORT    (6201)
10586 +#define DEFAULT_CLUSTER_PORT   (5432)
10587 +#define DEFAULT_CLUSTER_RECOVERY_PORT  (7101)
10588 +#define DEFAULT_CLUSTER_LIFECHECK_PORT (7201)
10589 +#define DEFAULT_PGRP_PORT      (8001)
10590 +#define DEFAULT_PGRP_RECOVERY_PORT     (8101)
10591 +#define DEFAULT_PGRP_LIFECHECK_PORT    (8201)
10592 +#define DEFAULT_PGRP_RLOG_PORT (8301)
10593 +#define MAX_DB_SERVER  (32)
10594 +
10595 +/**************************
10596 +*                         *
10597 +*   Packet ID definition  *
10598 +*                         *
10599 +***************************/
10600 +/*=========================
10601 +       Replication packet id
10602 +===========================*/
10603 +#define        CMD_SYS_REPLICATE       'R'
10604 +/*-------------------------
10605 +       Simple Query
10606 +--------------------------*/
10607 +#define CMD_STS_SET_SESSION_AUTHORIZATION      'S'
10608 +#define        CMD_STS_TRANSACTION     'T'
10609 +#define        CMD_STS_TEMP_TABLE      'E'
10610 +#define        CMD_STS_QUERY   'Q'
10611 +#define        CMD_STS_OTHER   'O'
10612 +
10613 +#define CMD_TYPE_VACUUM        'V'
10614 +#define CMD_TYPE_ANALYZE       'A'
10615 +#define CMD_TYPE_REINDEX       'N'
10616 +#define CMD_TYPE_SELECT        'S'
10617 +#define CMD_TYPE_EXPLAIN       'X'
10618 +#define CMD_TYPE_SET   'T'
10619 +#define CMD_TYPE_RESET 't'
10620 +#define CMD_TYPE_INSERT        'I'
10621 +#define CMD_TYPE_DELETE        'D'
10622 +#define CMD_TYPE_EXECUTE       'U'
10623 +#define CMD_TYPE_UPDATE        'U'
10624 +#define CMD_TYPE_BEGIN 'B'
10625 +#define CMD_TYPE_COMMIT        'E'
10626 +#define CMD_TYPE_ROLLBACK      'R'
10627 +#define CMD_TYPE_CONNECTION_CLOSE      'x'
10628 +#define CMD_TYPE_SESSION_AUTHORIZATION_BEGIN   'a'
10629 +#define CMD_TYPE_SESSION_AUTHORIZATION_END     'b'
10630 +#define CMD_TYPE_SAVEPOINT     's'
10631 +#define CMD_TYPE_ROLLBACK_TO_SAVEPOINT 'r'
10632 +#define CMD_TYPE_RELEASE_SAVEPOINT     'l'
10633 +#define CMD_TYPE_OTHER 'O'
10634 +
10635 +/*=========================
10636 +       System call packet id
10637 +===========================*/
10638 +#define CMD_SYS_CALL           'S'
10639 +#define CMD_SYS_PREREPLICATE           'Z'
10640 +
10641 +#define        CMD_STS_NOTICE  'N'
10642 +#define        CMD_STS_RESPONSE        'R'
10643 +#define        CMD_STS_TRANSACTION_ABORT       'A'
10644 +#define        CMD_STS_QUERY_SUSPEND   'P'
10645 +#define        CMD_STS_QUERY_DONE      'D'
10646 +
10647 +#define CMD_TYPE_COMMIT_CONFIRM        'c'
10648 +#define CMD_TYPE_QUERY_CONFIRM 'q'
10649 +#define CMD_TYPE_DEADLOCK_DETECT       'd'
10650 +#define CMD_TYPE_FRONTEND_CLOSED       'x'
10651 +
10652 +/*----------------------------
10653 +       Copy Command
10654 +------------------------------*/
10655 +#define        CMD_STS_COPY    'C'
10656 +
10657 +#define CMD_TYPE_COPY  'C'
10658 +#define CMD_TYPE_COPY_DATA     'd'
10659 +#define CMD_TYPE_COPY_DATA_END 'e'
10660 +
10661 +/*----------------------------
10662 +       Large Object
10663 +------------------------------*/
10664 +#define        CMD_STS_LARGE_OBJECT    'L'
10665 +
10666 +#define CMD_TYPE_LO_IMPORT     'I'
10667 +#define CMD_TYPE_LO_CREATE     'C'
10668 +#define CMD_TYPE_LO_OPEN       'O'
10669 +#define CMD_TYPE_LO_WRITE      'W'
10670 +#define CMD_TYPE_LO_LSEEK      'S'
10671 +#define CMD_TYPE_LO_CLOSE      'X'
10672 +#define CMD_TYPE_LO_UNLINK     'U'
10673 +
10674 +/*-------------------------
10675 +       Prepare/Params Query
10676 +--------------------------*/
10677 +#define CMD_STS_PREPARE        'P'
10678 +
10679 +#define CMD_TYPE_P_PARSE       'P'
10680 +#define CMD_TYPE_P_BIND                'B'
10681 +#define CMD_TYPE_P_EXECUTE     'E'
10682 +#define CMD_TYPE_P_FASTPATH    'F'
10683 +#define CMD_TYPE_P_CLOSE       'C'
10684 +#define CMD_TYPE_P_DESCRIBE    'D'
10685 +#define CMD_TYPE_P_FLUSH       'H'
10686 +#define CMD_TYPE_P_SYNC                'S'
10687 +
10688 +/*=========================
10689 +       Lifecheck packet id
10690 +===========================*/
10691 +#define CMD_SYS_LIFECHECK              'W'
10692 +#define        CMD_STS_LOADBALANCER    'A'
10693 +#define        CMD_STS_CLUSTER                 'B'
10694 +#define        CMD_STS_REPLICATOR              'C'
10695 +
10696 +#define PGR_TRANSACTION_SOCKET (0)
10697 +#define PGR_QUERY_SOCKET       (1)
10698 +
10699 +#define        DATA_FREE       (0)
10700 +#define        DATA_INIT       (1)
10701 +#define        DATA_USE        (2)
10702 +#define        DATA_ERR        (90)
10703 +#define        DATA_END        (-1)
10704 +#define HOSTNAME_MAX_LENGTH     (128)
10705 +#define DBNAME_MAX_LENGTH       (128)
10706 +#define USERNAME_MAX_LENGTH     (128)
10707 +#define PASSWORD_MAX_LENGTH            (128)
10708 +#define TABLENAME_MAX_LENGTH     (128)
10709 +#define PATH_MAX_LENGTH        (256)
10710 +#define MAX_SERVER_NUM         (128)
10711 +#define MAX_RETRY_TIMES        (3)
10712 +#define MAX_SOCKET_QUEUE       (100000)
10713 +#define TRANSACTION_ERROR_RESULT       "TRANSACTION_ERROR"
10714 +#define REPLICATE_SERVER_SHM_KEY (1020)
10715 +/* target -> replicate */
10716 +#define RECOVERY_PREPARE_REQ   (1)
10717 +/* replicate  -> master */
10718 +#define RECOVERY_PGDATA_REQ    (2)
10719 +/* master -> replicate */
10720 +#define RECOVERY_PGDATA_ANS    (3)
10721 +/* replicate -> target */
10722 +#define RECOVERY_PREPARE_ANS   (4)
10723 +/* target -> replicate */
10724 +#define RECOVERY_START_REQ     (5)
10725 +/* replicate  -> master */
10726 +#define RECOVERY_FSYNC_REQ     (6)
10727 +/* master -> replicate */
10728 +#define RECOVERY_FSYNC_ANS     (7)
10729 +/* replicate -> target */
10730 +#define RECOVERY_START_ANS     (8)
10731 +/* target -> replicate */
10732 +#define RECOVERY_QUEUE_DATA_REQ        (9)
10733 +/* replicate -> target */
10734 +#define RECOVERY_QUEUE_DATA_ANS        (10)
10735 +/* target -> replicate */
10736 +#define RECOVERY_FINISH        (11)
10737 +
10738 +#define RECOVERY_ERROR_OCCUPIED        (100)
10739 +#define RECOVERY_ERROR_CONNECTION      (101)
10740 +#define RECOVERY_ERROR_TARGET_ONLY     (102)
10741 +#define RECOVERY_ERROR_ANS     (200)
10742 +
10743 +/* lifecheck ask from cluster db */
10744 +#define LIFECHECK_ASK_FROM_CLUSTER     (1)
10745 +/* lifecheck response from replication server */
10746 +#define LIFECHECK_RES_FROM_REPLICATOR  (2)
10747 +/* lifecheck ask from replication server */
10748 +#define LIFECHECK_ASK_FROM_REPLICATOR  (3)
10749 +/* lifecheck response from cluster db */
10750 +#define LIFECHECK_RES_FROM_CLUSTER     (4)
10751 +
10752 +#define REPLICATION_SERVER_INFO_TAG "Replicate_Server_Info"
10753 +#define HOST_NAME_TAG  "Host_Name"
10754 +#define PORT_TAG       "Port"
10755 +#define RECOVERY_PORT_TAG      "Recovery_Port"
10756 +#define LIFECHECK_PORT_TAG  "LifeCheck_Port"
10757 +#define TIMEOUT_TAG  "Replication_Timeout"
10758 +#define LIFECHECK_TIMEOUT_TAG  "LifeCheck_Timeout"
10759 +#define LIFECHECK_INTERVAL_TAG  "LifeCheck_Interval"
10760 +
10761 +#define RECOVERY_INIT  (0)
10762 +#define RECOVERY_PREPARE_START (1)
10763 +#define RECOVERY_START_1       (2)
10764 +#define RECOVERY_CLEARED       (3)
10765 +#define RECOVERY_WAIT_CLEAN (10)
10766 +#define RECOVERY_ERROR (99)
10767 +
10768 +/* response mode */
10769 +#define PGR_FAST_MODE  (0)
10770 +#define PGR_NORMAL_MODE        (1)
10771 +#define PGR_RELIABLE_MODE      (2)
10772 +
10773 +#define RECOVERY_TIMEOUT       (600)
10774 +#ifndef COMPLETION_TAG_BUFSIZE
10775 +#define        COMPLETION_TAG_BUFSIZE (128)
10776 +#endif
10777 +
10778 +/* replicate log type */
10779 +#define FROM_R_LOG_TYPE        (1)
10780 +#define FROM_C_DB_TYPE (2)
10781 +#define CONNECTION_SUSPENDED_TYPE      (3)
10782 +
10783 +#define PGR_SYSTEM_COMMAND_FUNC        "PGR_SYSTEM_COMMAND_FUNCTION"
10784 +#define PGR_STARTUP_REPLICATION_SERVER_FUNC_NO (1)
10785 +#define PGR_CHANGE_REPLICATION_SERVER_FUNC_NO  (2)
10786 +#define PGR_SET_CURRENT_TIME_FUNC_NO   (3)
10787 +#define PGR_NOTICE_DEADLOCK_DETECTION_FUNC_NO  (4)
10788 +#define PGR_TRANSACTION_CONFIRM_ANSWER_FUNC_NO (5)
10789 +#define PGR_RELIABLE_MODE_DONE_FUNC_NO         (6)
10790 +#define PGR_NOTICE_ABORT_FUNC_NO               (7)
10791 +#define PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO (8)
10792 +#define PGR_QUERY_CONFIRM_ANSWER_FUNC_NO       (9)
10793 +#define PGR_GET_OID_FUNC_NO            (10)
10794 +#define PGR_SET_OID_FUNC_NO            (11)
10795 +
10796 +#define PGR_CMD_ARG_NUM        (10)
10797 +#define PGR_LOCK_CONFLICT_NOTICE_CMD   "PGR_LOCK_CONFLICT_NOTICE_CMD"
10798 +#define PGR_DEADLOCK_DETECT_NOTICE_CMD "PGR_DEADLOCK_DETECT_NOTICE_CMD"
10799 +#define PGR_QUERY_DONE_NOTICE_CMD              "PGR_QUERY_DONE_NOTICE_CMD"
10800 +#define PGR_QUERY_ABORTED_NOTICE_CMD   "PGR_QUERY_ABORTED_NOTICE_CMD"
10801 +#define PGR_RETRY_LOCK_QUERY_CMD       "PGR_RETRY_LOCK_QUERY_CMD"
10802 +#define PGR_NOT_YET_REPLICATE_NOTICE_CMD       "PGR_NOT_YET_REPLICATE_NOTICE_CMD"
10803 +#define PGR_ALREADY_REPLICATED_NOTICE_CMD      "PGR_ALREADY_REPLICATED_NOTICE_CMD"
10804 +#define PGR_NOT_YET_COMMIT             (0)
10805 +#define PGR_ALREADY_COMMITTED  (1)
10806 +
10807 +#define COPYBUFSIZ     (8192)
10808 +#define MAX_WORDS      (24)
10809 +#define MAX_WORD_LETTERS       (48)
10810 +#define PGR_MESSAGE_BUFSIZE    (128)
10811 +#define INT_LENGTH     (12)
10812 +#define PGR_MAX_COUNTER        (0x0FFFFFFF)
10813 +#define PGR_GET_OVER_FLOW_FILTER       (0xF0000000)
10814 +#define PGR_GET_DATA_FILTER    (0x0FFFFFFF)
10815 +#define PGR_SET_OVER_FLOW      (0x10000000)
10816 +#define PGR_MIN_COUNTER (0x0000000F)
10817 +
10818 +#define STRCMP(x,y)    (strncmp(x,y,strlen(y)))
10819 +
10820 +/* life check target */
10821 +#define SYN_TO_LOAD_BALANCER   (0)
10822 +#define SYN_TO_CLUSTER_DB              (1)
10823 +#define SYN_TO_REPLICATION_SERVER      (2)
10824 +#define LIFE_CHECK_TRY_COUNT   (2)
10825 +#define LIFE_CHECK_STOP                (0)
10826 +#define LIFE_CHECK_START       (1)
10827 +
10828 +#ifndef HAVE_UNION_SEMUN
10829 +union semun {
10830 +       int val;
10831 +       struct semid_ds *buf;
10832 +       unsigned short int *array;
10833 +       struct seminfo *__buf;
10834 +};
10835 +#endif
10836 +
10837 +typedef struct ReplicateHeaderType
10838 +{
10839 +       char cmdSys;
10840 +       char cmdSts;    /*
10841 +                                               Q:query 
10842 +                                               T:transaction
10843 +                                       */
10844 +       char cmdType;   /*
10845 +                                               S:select
10846 +                                               I:insert
10847 +                                               D:delete
10848 +                                               U:update
10849 +                                               B:begin
10850 +                                               E:commit/rollback/end
10851 +                                               O:others
10852 +                                       */
10853 +       char rlog;              /*
10854 +                                       -- kind of replication log --
10855 +                                               1: send from replication log
10856 +                                               2: send from cluster db (should be retry)
10857 +                                               3: connection suspended
10858 +                                       */
10859 +       uint16_t port;
10860 +       uint16_t pid;
10861 +       uint32_t query_size;
10862 +       char from_host[HOSTNAME_MAX_LENGTH];
10863 +       char dbName[DBNAME_MAX_LENGTH];
10864 +       char userName[USERNAME_MAX_LENGTH];
10865 +       struct timeval tv;
10866 +       uint32_t query_id;
10867 +    int isAutoCommit; /* 0 if autocommit is off. 1 if autocommit is on */
10868 +       uint32_t request_id;
10869 +       uint32_t replicate_id;
10870 +       char password[PASSWORD_MAX_LENGTH];
10871 +       char md5Salt[4];
10872 +       char cryptSalt[2];
10873 +       char dummySalt[2];
10874 +} ReplicateHeader;
10875 +
10876 +typedef struct RecoveryPacketType
10877 +{
10878 +       uint16_t packet_no;     /*      
10879 +                                       1:start recovery prepare
10880 +                                       2:ask pgdata
10881 +                                       3:ans pgdata
10882 +                                       4:send master info
10883 +                                       5:start queueing query
10884 +                                       6:requst fsync
10885 +                                       7:ready to fsync
10886 +                                       8:pepared master
10887 +                                       9:finished rsync
10888 +                                       */
10889 +       uint16_t max_connect;
10890 +       uint16_t port;
10891 +       uint16_t recoveryPort;
10892 +       char hostName[HOSTNAME_MAX_LENGTH];
10893 +       char pg_data[PATH_MAX_LENGTH]; 
10894 +       char userName[USERNAME_MAX_LENGTH];
10895 +} RecoveryPacket;
10896 +
10897 +typedef struct
10898 +{
10899 +       char table[128];
10900 +       int rec_no;
10901 +       char key[128];
10902 +       char value[128];
10903 +       char * last;
10904 +       char * next;
10905 +} ConfDataType;
10906 +
10907 +
10908 +typedef struct ReplicateServerInfoType
10909 +{
10910 +       uint32_t useFlag;
10911 +       char hostName[HOSTNAME_MAX_LENGTH];
10912 +       uint16_t portNumber;
10913 +       uint16_t recoveryPortNumber;
10914 +       uint16_t lifecheckPortNumber;
10915 +       uint16_t RLogPortNumber;
10916 +       uint32_t sock;
10917 +       uint32_t rlog_sock;
10918 +       uint32_t replicate_id;
10919 +       uint16_t response_mode;
10920 +       uint16_t retry_count;
10921 +} ReplicateServerInfo;
10922 +
10923 +
10924 +typedef struct ReplicateNowType
10925 +{
10926 +       uint32_t replicate_id;
10927 +       int useFlag;
10928 +       int use_seed;
10929 +       int use_time;
10930 +       int offset_sec;
10931 +       int offset_usec;
10932 +       struct timeval tp;
10933 +} ReplicateNow;
10934 +
10935 +typedef struct CopyDataType
10936 +{
10937 +       int cnt;
10938 +       char copy_data[COPYBUFSIZ];
10939 +} CopyData;
10940 +
10941 +typedef struct ClusterDBInfoType
10942 +{
10943 +       int status;
10944 +} ClusterDBInfo;
10945 +
10946 +typedef struct
10947 +{
10948 +       uint32_t arg1;
10949 +       uint32_t arg2;
10950 +       uint32_t arg3;
10951 +       char buf[1];
10952 +} LOArgs;
10953 +
10954 +typedef struct
10955 +{
10956 +       int length;
10957 +       char data[1];
10958 +} ArrayData;
10959 +
10960 +extern ConfDataType * ConfData_Top;
10961 +extern ConfDataType * ConfData_End;
10962 +extern ReplicateServerInfo * ReplicateServerData;
10963 +extern ClusterDBInfo * ClusterDBData;
10964 +extern int ReplicateServerShmid;
10965 +extern int ClusterDBShmid;
10966 +extern bool PGR_Under_Replication_Server;
10967 +extern int PGR_Replication_Timeout;
10968 +extern int PGR_Lifecheck_Timeout;
10969 +extern int PGR_Lifecheck_Interval;
10970 +
10971 +/* in backend/libpq/replicate_com.c */
10972 +extern int PGR_Create_Socket_Connect(int * fdP, char * hostName , unsigned short portNumber);
10973 +extern void PGR_Close_Sock(int * sock);
10974 +extern int PGR_Create_Socket_Bind(int * fdP, char * hostName , unsigned short portNumber);
10975 +extern int PGR_Create_Acception(int fd, int * sockP, char * hostName , unsigned short portNumber);
10976 +extern int PGR_Free_Conf_Data(void);
10977 +extern int PGR_Get_Conf_Data(char * dir , char * fname);
10978 +extern void PGRset_recovery_packet_no(RecoveryPacket * packet, int packet_no);
10979 +extern unsigned int PGRget_ip_by_name(char * host);
10980 +extern int PGRget_time_value(char *str);
10981 +
10982 +extern void PGRwrite_log_file(FILE * fp, const char * fmt,...);
10983 +extern void show_debug(const char * fmt,...);
10984 +extern void show_error(const char * fmt,...);
10985 +
10986 +
10987 +
10988 +#endif /* REPLICATE_COM_H */
10989 diff -aruN postgresql-8.2.4/src/include/storage/lmgr.h pgcluster-1.7.0rc7/src/include/storage/lmgr.h
10990 --- postgresql-8.2.4/src/include/storage/lmgr.h 2006-08-18 18:09:13.000000000 +0200
10991 +++ pgcluster-1.7.0rc7/src/include/storage/lmgr.h       2007-02-18 22:52:17.000000000 +0100
10992 @@ -15,6 +15,7 @@
10993  #define LMGR_H
10994  
10995  #include "storage/lock.h"
10996 +#include "storage/bufmgr.h"
10997  #include "utils/rel.h"
10998  
10999  
11000 @@ -69,4 +70,5 @@
11001  /* Knowledge about which locktags describe temp objects */
11002  extern bool LockTagIsTemp(const LOCKTAG *tag);
11003  
11004 +extern void XactLockTableWaitForCluster(TransactionId xid,Buffer buffer);
11005  #endif   /* LMGR_H */
11006 diff -aruN postgresql-8.2.4/src/include/storage/proc.h pgcluster-1.7.0rc7/src/include/storage/proc.h
11007 --- postgresql-8.2.4/src/include/storage/proc.h 2006-10-04 02:30:10.000000000 +0200
11008 +++ pgcluster-1.7.0rc7/src/include/storage/proc.h       2007-02-18 22:52:17.000000000 +0100
11009 @@ -97,6 +97,9 @@
11010         SHM_QUEUE       myProcLocks[NUM_LOCK_PARTITIONS];
11011  
11012         struct XidCache subxids;        /* cache for subtransaction XIDs */
11013 +#ifdef USE_REPLICATION
11014 +       unsigned int           replicationId; /* id for replication. */
11015 +#endif
11016  };
11017  
11018  /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
11019 diff -aruN postgresql-8.2.4/src/interfaces/libpq/Makefile pgcluster-1.7.0rc7/src/interfaces/libpq/Makefile
11020 --- postgresql-8.2.4/src/interfaces/libpq/Makefile      2006-12-28 01:01:12.000000000 +0100
11021 +++ pgcluster-1.7.0rc7/src/interfaces/libpq/Makefile    2007-02-18 22:52:17.000000000 +0100
11022 @@ -33,7 +33,7 @@
11023  
11024  OBJS=  fe-auth.o fe-connect.o fe-exec.o fe-misc.o fe-print.o fe-lobj.o \
11025         fe-protocol2.o fe-protocol3.o pqexpbuffer.o pqsignal.o fe-secure.o \
11026 -       md5.o ip.o wchar.o encnames.o noblock.o pgstrcasecmp.o thread.o \
11027 +       dllist.o md5.o ip.o wchar.o encnames.o noblock.o pgstrcasecmp.o thread.o \
11028         $(filter crypt.o getaddrinfo.o inet_aton.o open.o snprintf.o strerror.o strlcpy.o, $(LIBOBJS))
11029  
11030  ifeq ($(PORTNAME), cygwin)
11031 @@ -89,6 +89,9 @@
11032  encnames.c wchar.c : % : $(backend_src)/utils/mb/%
11033         rm -f $@ && $(LN_S) $< .
11034  
11035 +dllist.c  : % : $(backend_src)/lib/dllist.c
11036 +       rm -f $@ && $(LN_S) $< .
11037 +
11038  
11039  # We need several not-quite-identical variants of .DEF files to build libpq
11040  # DLLs for Windows.  These are made from the single source file exports.txt.
11041 @@ -169,7 +172,7 @@
11042         rm -f '$(DESTDIR)$(includedir)/libpq-fe.h' '$(DESTDIR)$(includedir_internal)/libpq-int.h' '$(DESTDIR)$(includedir_internal)/pqexpbuffer.h' '$(DESTDIR)$(datadir)/pg_service.conf.sample'
11043  
11044  clean distclean: clean-lib
11045 -       rm -f $(OBJS) pg_config_paths.h crypt.c getaddrinfo.c inet_aton.c noblock.c open.c pgstrcasecmp.c snprintf.c strerror.c strlcpy.c thread.c md5.c ip.c encnames.c wchar.c pthread.h exports.list
11046 +       rm -f $(OBJS) pg_config_paths.h crypt.c getaddrinfo.c inet_aton.c noblock.c open.c pgstrcasecmp.c snprintf.c strerror.c strlcpy.c thread.c md5.c ip.c encnames.c wchar.c pthread.h exports.list dllist.c
11047         rm -f pg_config_paths.h # Might be left over from a Win32 client-only build
11048  
11049  maintainer-clean: distclean
11050 diff -aruN postgresql-8.2.4/src/interfaces/libpq/fe-auth.c pgcluster-1.7.0rc7/src/interfaces/libpq/fe-auth.c
11051 --- postgresql-8.2.4/src/interfaces/libpq/fe-auth.c     2006-10-04 02:30:12.000000000 +0200
11052 +++ pgcluster-1.7.0rc7/src/interfaces/libpq/fe-auth.c   2007-02-18 22:52:17.000000000 +0100
11053 @@ -51,6 +51,10 @@
11054  #include "fe-auth.h"
11055  #include "libpq/md5.h"
11056  
11057 +#ifdef USE_REPLICATION
11058 +#include "replicate_com.h"
11059 +bool PGR_Under_Replication_Server = false;
11060 +#endif /* USE_REPLICATION */
11061  
11062  #ifdef KRB5
11063  /*
11064 @@ -412,6 +416,19 @@
11065                                         free(crypt_pwd);
11066                                         return STATUS_ERROR;
11067                                 }
11068 +#ifdef USE_REPLICATION
11069 +                               if (PGR_Under_Replication_Server)
11070 +                               {
11071 +                                       /* 
11072 +                                        * When this module is called from the replication server,
11073 +                                        * there is no need encrypt password.
11074 +                                        * Since the password was already encrypted at the Cluster DB
11075 +                                        */
11076 +                                       int size = 2 * (MD5_PASSWD_LEN + 1);
11077 +                                       memset(crypt_pwd,0, size);
11078 +                                       strncpy(crypt_pwd,password, size);
11079 +                               }
11080 +#endif /* USE_REPLICATION */
11081                                 break;
11082                         }
11083                 case AUTH_REQ_CRYPT:
11084 diff -aruN postgresql-8.2.4/src/makefiles/Makefile.aix pgcluster-1.7.0rc7/src/makefiles/Makefile.aix
11085 --- postgresql-8.2.4/src/makefiles/Makefile.aix 2006-09-19 17:36:08.000000000 +0200
11086 +++ pgcluster-1.7.0rc7/src/makefiles/Makefile.aix       2007-02-18 22:52:17.000000000 +0100
11087 @@ -44,3 +44,5 @@
11088         $(CC) $(LDFLAGS) $(LDFLAGS_SL) -o $@ $*.o -Wl,-bE:$*$(EXPSUFF) $(SHLIB_LINK)
11089  
11090  sqlmansect = 7
11091 +CFLAGS += -pthread
11092 +LDFLAGS += -L/usr/lib/threads 
11093 diff -aruN postgresql-8.2.4/src/makefiles/Makefile.freebsd pgcluster-1.7.0rc7/src/makefiles/Makefile.freebsd
11094 --- postgresql-8.2.4/src/makefiles/Makefile.freebsd     2006-04-19 18:32:08.000000000 +0200
11095 +++ pgcluster-1.7.0rc7/src/makefiles/Makefile.freebsd   2007-02-18 22:52:17.000000000 +0100
11096 @@ -28,3 +28,5 @@
11097  endif
11098  
11099  sqlmansect = 7
11100 +
11101 +LIBS += -lc_r
11102 diff -aruN postgresql-8.2.4/src/makefiles/Makefile.hpux pgcluster-1.7.0rc7/src/makefiles/Makefile.hpux
11103 --- postgresql-8.2.4/src/makefiles/Makefile.hpux        2006-02-07 18:36:13.000000000 +0100
11104 +++ pgcluster-1.7.0rc7/src/makefiles/Makefile.hpux      2007-02-18 22:52:17.000000000 +0100
11105 @@ -10,6 +10,9 @@
11106  # correctly in the LP64 data model.
11107  LIBS := -lxnet $(LIBS)
11108  
11109 +# add thread lib for PGCluster
11110 +LIBS := -lpthread $(LIBS)
11111 +
11112  # Set up rpath so that the executables don't need SHLIB_PATH to be set.
11113  # (Note: --disable-rpath is a really bad idea on this platform...)
11114  ifeq ($(with_gnu_ld), yes)
11115 diff -aruN postgresql-8.2.4/src/makefiles/Makefile.linux pgcluster-1.7.0rc7/src/makefiles/Makefile.linux
11116 --- postgresql-8.2.4/src/makefiles/Makefile.linux       2005-12-09 22:19:36.000000000 +0100
11117 +++ pgcluster-1.7.0rc7/src/makefiles/Makefile.linux     2007-02-18 22:52:17.000000000 +0100
11118 @@ -14,3 +14,4 @@
11119         $(CC) -shared -o $@ $<
11120  
11121  sqlmansect = 7
11122 +LIBS += -lpthread
11123 diff -aruN postgresql-8.2.4/src/makefiles/Makefile.netbsd pgcluster-1.7.0rc7/src/makefiles/Makefile.netbsd
11124 --- postgresql-8.2.4/src/makefiles/Makefile.netbsd      2006-04-19 18:32:08.000000000 +0200
11125 +++ pgcluster-1.7.0rc7/src/makefiles/Makefile.netbsd    2007-02-18 22:52:17.000000000 +0100
11126 @@ -30,3 +30,4 @@
11127  endif
11128  
11129  sqlmansect = 7
11130 +LIBS += -lpthread
11131 diff -aruN postgresql-8.2.4/src/makefiles/Makefile.openbsd pgcluster-1.7.0rc7/src/makefiles/Makefile.openbsd
11132 --- postgresql-8.2.4/src/makefiles/Makefile.openbsd     2006-04-19 18:32:08.000000000 +0200
11133 +++ pgcluster-1.7.0rc7/src/makefiles/Makefile.openbsd   2007-02-18 22:52:17.000000000 +0100
11134 @@ -28,3 +28,4 @@
11135  endif
11136  
11137  sqlmansect = 7
11138 +LIBS += -lc_r
11139 diff -aruN postgresql-8.2.4/src/makefiles/Makefile.solaris pgcluster-1.7.0rc7/src/makefiles/Makefile.solaris
11140 --- postgresql-8.2.4/src/makefiles/Makefile.solaris     2005-12-09 22:19:36.000000000 +0100
11141 +++ pgcluster-1.7.0rc7/src/makefiles/Makefile.solaris   2007-02-18 22:52:17.000000000 +0100
11142 @@ -20,3 +20,4 @@
11143         $(LD) -G -Bdynamic -o $@ $<
11144  
11145  sqlmansect = 5sql
11146 +LIBS += -lpthread
11147 diff -aruN postgresql-8.2.4/src/makefiles/Makefile.sunos4 pgcluster-1.7.0rc7/src/makefiles/Makefile.sunos4
11148 --- postgresql-8.2.4/src/makefiles/Makefile.sunos4      2002-09-05 00:54:18.000000000 +0200
11149 +++ pgcluster-1.7.0rc7/src/makefiles/Makefile.sunos4    2007-02-18 22:52:17.000000000 +0100
11150 @@ -11,3 +11,4 @@
11151         $(LD) -assert pure-text -Bdynamic -o $@ $<
11152  
11153  sqlmansect = 7
11154 +LIBS += -lpthread
11155 diff -aruN postgresql-8.2.4/src/pgcluster/Makefile pgcluster-1.7.0rc7/src/pgcluster/Makefile
11156 --- postgresql-8.2.4/src/pgcluster/Makefile     1970-01-01 01:00:00.000000000 +0100
11157 +++ pgcluster-1.7.0rc7/src/pgcluster/Makefile   2007-02-18 22:52:17.000000000 +0100
11158 @@ -0,0 +1,17 @@
11159 +#-------------------------------------------------------------------------
11160 +#
11161 +# Makefile for src/pgcluster (server programs)
11162 +#
11163 +#-------------------------------------------------------------------------
11164 +
11165 +subdir = src/pgcluster
11166 +top_builddir = ../..
11167 +include $(top_builddir)/src/Makefile.global
11168 +
11169 +DIRS := libpgc pgrp pglb tool
11170 +
11171 +all install installdirs uninstall depend distprep:
11172 +       @for dir in $(DIRS); do $(MAKE) -C $$dir $@ || exit; done
11173 +
11174 +clean distclean maintainer-clean:
11175 +       -@for dir in $(DIRS); do $(MAKE) -C $$dir $@; done
11176 diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/Makefile pgcluster-1.7.0rc7/src/pgcluster/libpgc/Makefile
11177 --- postgresql-8.2.4/src/pgcluster/libpgc/Makefile      1970-01-01 01:00:00.000000000 +0100
11178 +++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/Makefile    2007-02-18 22:52:17.000000000 +0100
11179 @@ -0,0 +1,29 @@
11180 +#-------------------------------------------------------------------------
11181 +#
11182 +# Makefile--
11183 +#    Makefile for libpq subsystem (common library for replication server)
11184 +#
11185 +#-------------------------------------------------------------------------
11186 +
11187 +subdir = src/pgcluster/libpgc
11188 +top_builddir = ../../..
11189 +include $(top_builddir)/src/Makefile.global
11190 +
11191 +OBJS = sem.o show.o signal.o
11192 +
11193 +all: SUBSYS.o
11194 +
11195 +SUBSYS.o: $(OBJS)
11196 +       $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS)
11197 +
11198 +depend dep:
11199 +       $(CC) -MM $(CFLAGS) *.c >depend
11200 +
11201 +distclean: clean
11202 +
11203 +clean: 
11204 +       rm -f SUBSYS.o $(OBJS) 
11205 +
11206 +ifeq (depend,$(wildcard depend))
11207 +include depend
11208 +endif
11209 diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/libpgc.h pgcluster-1.7.0rc7/src/pgcluster/libpgc/libpgc.h
11210 --- postgresql-8.2.4/src/pgcluster/libpgc/libpgc.h      1970-01-01 01:00:00.000000000 +0100
11211 +++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/libpgc.h    2007-02-18 22:52:17.000000000 +0100
11212 @@ -0,0 +1,47 @@
11213 +/*-------------------------------------------------------------------------
11214 + *
11215 + * lilbpgc.h
11216 + *       external definition of the function for pgreplicate and pglb
11217 + *
11218 + * This should be the first file included by replicate modules.
11219 + *
11220 + *-------------------------------------------------------------------------
11221 + */
11222 +#ifndef LIBPGC_H
11223 +#define        LIBPGC_H
11224 +
11225 +#include <stdio.h>
11226 +
11227 +/* character length of IP address */
11228 +#define ADDRESS_LENGTH (24)
11229 +
11230 +/* logging file data tag in configuration file */
11231 +#define        LOG_INFO_TAG    "Log_File_Info"
11232 +#define        FILE_NAME_TAG   "File_Name"
11233 +#define        FILE_SIZE_TAG   "File_Size"
11234 +#define        LOG_ROTATION_TAG        "Rotate"
11235 +
11236 +typedef struct {
11237 +       char file_name[256];
11238 +       FILE * fp;
11239 +       int max_size;
11240 +       int rotation;
11241 +} LogFileInf;
11242 +
11243 +extern LogFileInf * LogFileData;
11244 +/* external definition of the function in sem.c */
11245 +extern void PGRsem_unlock( int semid, short sem_num );
11246 +extern void PGRsem_lock( int semid, short sem_num );
11247 +
11248 +/* external definition of the function in show.c */
11249 +extern FILE * PGRopen_log_file(char * fname, int max_size, int rotation);
11250 +extern void PGRclose_log_file(FILE * fp);
11251 +extern void show_debug(const char * fmt,...);
11252 +extern void show_error(const char * fmt,...);
11253 +extern void PGRwrite_log_file(FILE * fp, const char * fmt,...);
11254 +
11255 +/* external definition of the function in signal.c */
11256 +typedef void (*PGRsighandler)(int);
11257 +extern PGRsighandler PGRsignal(int signo, PGRsighandler sighandler);
11258 +
11259 +#endif /* LIBPGC_H */
11260 diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/sem.c pgcluster-1.7.0rc7/src/pgcluster/libpgc/sem.c
11261 --- postgresql-8.2.4/src/pgcluster/libpgc/sem.c 1970-01-01 01:00:00.000000000 +0100
11262 +++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/sem.c       2007-02-18 22:52:17.000000000 +0100
11263 @@ -0,0 +1,67 @@
11264 +/*--------------------------------------------------------------------
11265 + * FILE:
11266 + *     sem.c
11267 + *
11268 + * NOTE:
11269 + *     This file is composed of the functions to call with the source
11270 + *     at pgreplicate for the semapho control.
11271 + *
11272 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
11273 + *--------------------------------------------------------------------
11274 + */
11275 +#include <stdio.h>
11276 +#include <unistd.h>
11277 +#include <sys/types.h>
11278 +#include <errno.h>
11279 +#include <sys/ipc.h>
11280 +#include <sys/sem.h>
11281 +#include <signal.h>
11282 +
11283 +extern void show_debug(const char * fmt,...);
11284 +
11285 +void PGRsem_unlock( int semid, short sem_num );
11286 +void PGRsem_lock( int semid, short sem_num );
11287 +
11288 +#define PGR_SEM_UNLOCK_WAIT_MSEC (100)
11289 +#define PGR_SEM_LOCK_WAIT_MSEC (500)
11290 +
11291 +void
11292 +PGRsem_unlock( int semid, short sem_num )
11293 +{
11294 +       int     status = 0;
11295 +       struct sembuf sops;
11296 +
11297 +       sops.sem_num = sem_num;
11298 +       sops.sem_op = 1;
11299 +       /*sops.sem_flg = IPC_NOWAIT;*/
11300 +       sops.sem_flg = 0;
11301 +       do
11302 +       {
11303 +               status = semop(semid, &sops, 1);
11304 +               if ((status == -1) && (errno != EINTR))
11305 +               {
11306 +                       usleep(PGR_SEM_UNLOCK_WAIT_MSEC);
11307 +               }
11308 +       } while (status == -1);
11309 +}
11310 +
11311 +void
11312 +PGRsem_lock( int semid, short sem_num )
11313 +{
11314 +       int     status = 0;
11315 +       struct sembuf sops;
11316 +
11317 +       sops.sem_num = sem_num;
11318 +       sops.sem_op = -1;
11319 +       /*sops.sem_flg = IPC_NOWAIT;*/
11320 +       sops.sem_flg = 0;
11321 +       do
11322 +       {
11323 +               status = semop(semid, &sops, 1);
11324 +               if ((status == -1) && (errno != EINTR))
11325 +               {
11326 +                       usleep(PGR_SEM_LOCK_WAIT_MSEC);
11327 +               }
11328 +       } while (status == -1);
11329 +}
11330 +
11331 diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/show.c pgcluster-1.7.0rc7/src/pgcluster/libpgc/show.c
11332 --- postgresql-8.2.4/src/pgcluster/libpgc/show.c        1970-01-01 01:00:00.000000000 +0100
11333 +++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/show.c      2007-02-18 22:52:17.000000000 +0100
11334 @@ -0,0 +1,226 @@
11335 +/*--------------------------------------------------------------------
11336 + * FILE:
11337 + *     show.c
11338 + *
11339 + * NOTE:
11340 + *     This file is composed of the logging and debug functions
11341 + *
11342 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
11343 + *--------------------------------------------------------------------
11344 + */
11345 +#include <sys/time.h>
11346 +#include <sys/types.h>
11347 +#include <sys/stat.h>
11348 +#include <stdio.h>
11349 +#include <stdarg.h>
11350 +#include <stdlib.h>
11351 +#include <errno.h>
11352 +#include <string.h>
11353 +#include <time.h>
11354 +#include <unistd.h>
11355 +#include "libpgc.h"
11356 +
11357 +#define TIMESTAMP_SIZE 19              /* format `YYYY-MM-DD HH:MM:SS' */
11358 +
11359 +/*--------------------------------------
11360 + * PROTOTYPE DECLARATION
11361 + *--------------------------------------
11362 + */
11363 +static char* get_current_timestamp(void);
11364 +static int file_rotation(char * fname, int max_rotation);
11365 +
11366 +FILE * PGRopen_log_file(char * fname, int max_size, int rotation);
11367 +void PGRclose_log_file(FILE * fp);
11368 +void show_debug(const char * fmt,...);
11369 +void show_error(const char * fmt,...);
11370 +void PGRwrite_log_file(FILE * fp, const char * fmt,...);
11371 +
11372 +extern int Debug_Print;
11373 +extern int Log_Print;
11374 +
11375 +LogFileInf * LogFileData = NULL;
11376 +
11377 +static char*
11378 +get_current_timestamp(void)
11379 +{
11380 +       time_t now;
11381 +       static char buf[TIMESTAMP_SIZE + 1];
11382 +
11383 +       now = time(NULL);
11384 +       strftime(buf, sizeof(buf),
11385 +                "%Y-%m-%d %H:%M:%S", localtime(&now));
11386 +       return buf;
11387 +}
11388 +
11389 +void
11390 +show_debug(const char * fmt,...)
11391 +{
11392 +       va_list ap;
11393 +       char *timestamp;
11394 +       char buf[256];
11395 +
11396 +       if (Debug_Print)
11397 +       {
11398 +               timestamp = get_current_timestamp();
11399 +               fprintf(stdout,"%s [%d] DEBUG:",timestamp, getpid());
11400 +               va_start(ap,fmt);
11401 +               vfprintf(stdout,fmt,ap);
11402 +               va_end(ap);
11403 +               fprintf(stdout,"\n");
11404 +               fflush(stdout);
11405 +               if ((Log_Print) && (LogFileData != NULL))
11406 +               {
11407 +                       FILE * fp = NULL;
11408 +                       fp = PGRopen_log_file(LogFileData->file_name, LogFileData->max_size, LogFileData->rotation);
11409 +                       va_start(ap,fmt);
11410 +                       vsnprintf(buf,sizeof(buf),fmt,ap);
11411 +                       va_end(ap);
11412 +                       PGRwrite_log_file(fp, buf);
11413 +                       PGRclose_log_file(fp);
11414 +               }
11415 +       }
11416 +}
11417 +
11418 +void
11419 +show_error(const char * fmt,...)
11420 +{
11421 +       va_list ap;
11422 +       char buf[256], *timestamp;
11423 +
11424 +       if (Debug_Print)
11425 +       {
11426 +               timestamp = get_current_timestamp();
11427 +               fprintf(stderr,"%s [%d] ERROR:",timestamp, getpid());
11428 +               va_start(ap,fmt);
11429 +               vfprintf(stderr,fmt,ap);
11430 +               va_end(ap);
11431 +               fprintf(stderr,"\n");
11432 +               fflush(stderr);
11433 +       }
11434 +       if ((Log_Print) && (LogFileData != NULL))
11435 +       {
11436 +               FILE * fp = NULL;
11437 +               fp = PGRopen_log_file(LogFileData->file_name, LogFileData->max_size, LogFileData->rotation);
11438 +               va_start(ap,fmt);
11439 +               vsnprintf(buf,sizeof(buf),fmt,ap);
11440 +               va_end(ap);
11441 +               PGRwrite_log_file(fp, buf);
11442 +               PGRclose_log_file(fp);
11443 +       }
11444 +}
11445 +
11446 +void
11447 +PGRwrite_log_file(FILE * fp, const char * fmt,...)
11448 +{
11449 +       char buf[256];
11450 +       char log[288];
11451 +       char * p;
11452 +       va_list ap;
11453 +       time_t t;
11454 +
11455 +       if (fp == NULL)
11456 +       {
11457 +               return;
11458 +       }
11459 +       if (time(&t) < 0)
11460 +       {
11461 +               return;
11462 +       }
11463 +       snprintf(log,sizeof(log),"%s ",ctime(&t));
11464 +       p = strchr(log,'\n');
11465 +       if (p != NULL)
11466 +       {
11467 +               *p = ' ';
11468 +       }
11469 +       va_start(ap,fmt);
11470 +       vsnprintf(buf,sizeof(buf),fmt,ap);
11471 +       va_end(ap);
11472 +       strcat(log,buf);
11473 +       strcat(log,"\n");
11474 +       if (fputs(log,fp) >= 0)
11475 +       {
11476 +               fflush(fp);
11477 +       }
11478 +}
11479 +
11480 +FILE *
11481 +PGRopen_log_file(char * fname, int max_size, int rotation)
11482 +{
11483 +       int rtn;
11484 +       struct stat st;
11485 +
11486 +       if (fname == NULL)
11487 +       {
11488 +               return (FILE *)NULL;
11489 +       }
11490 +
11491 +       if (max_size > 0)
11492 +       {
11493 +               rtn = stat(fname,&st);
11494 +               if (rtn == 0)
11495 +               {
11496 +                       if (st.st_size > max_size)
11497 +                       {
11498 +                               if (file_rotation(fname, rotation) < 0)
11499 +                               {
11500 +                                       return (FILE *)NULL;
11501 +                               }
11502 +                       }
11503 +               }
11504 +       }
11505 +       return (fopen(fname,"a"));
11506 +}
11507 +
11508 +void
11509 +PGRclose_log_file(FILE * fp)
11510 +{
11511 +       if (fp != NULL)
11512 +       {
11513 +               fflush(fp);
11514 +               fclose(fp);
11515 +       }
11516 +}
11517 +
11518 +static int
11519 +file_rotation(char * fname, int max_rotation)
11520 +{
11521 +       char * func = "file_rotation()";
11522 +       int i;
11523 +       int rtn;
11524 +       struct stat st;
11525 +       char old_fname[256];
11526 +       char new_fname[256];
11527 +
11528 +       if ((fname == NULL) || (max_rotation < 0))
11529 +       {
11530 +               return -1;
11531 +       }
11532 +
11533 +       for ( i = max_rotation ; i > 1 ; i -- )
11534 +       {
11535 +               sprintf(old_fname,"%s.%d",fname,i-1);
11536 +               rtn = stat(old_fname,&st);
11537 +               if (rtn == 0)
11538 +               {
11539 +                       sprintf(new_fname,"%s.%d",fname,i);
11540 +                       rtn = rename(old_fname, new_fname);
11541 +                       if (rtn < 0)
11542 +                       {
11543 +                               show_error("%s:rotate failed: (%s)",func,strerror(errno));
11544 +                               return rtn;
11545 +                       }
11546 +               }
11547 +       }
11548 +       if (max_rotation > 0)
11549 +       {
11550 +               sprintf(new_fname,"%s.1",fname);
11551 +               rtn = rename(fname, new_fname);
11552 +       }
11553 +       else
11554 +       {
11555 +               rtn = unlink(fname);
11556 +       }
11557 +
11558 +       return rtn;
11559 +}
11560 +
11561 diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/signal.c pgcluster-1.7.0rc7/src/pgcluster/libpgc/signal.c
11562 --- postgresql-8.2.4/src/pgcluster/libpgc/signal.c      1970-01-01 01:00:00.000000000 +0100
11563 +++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/signal.c    2007-02-18 22:52:17.000000000 +0100
11564 @@ -0,0 +1,35 @@
11565 +/*--------------------------------------------------------------------
11566 + * FILE:
11567 + *     replicate.c
11568 + *
11569 + * NOTE:
11570 + *     This file is composed of the functions to set signal handler
11571 + *
11572 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
11573 + *--------------------------------------------------------------------
11574 + */
11575 +
11576 +#include <signal.h>
11577 +#include "pg_config.h"
11578 +#include "libpgc.h"
11579 +
11580 +/*
11581 + * Set up a signal handler
11582 + */
11583 +PGRsighandler
11584 +PGRsignal(int signo, PGRsighandler sighandler)
11585 +{
11586 +#if !defined(HAVE_POSIX_SIGNALS)
11587 +       return signal(signo, func);
11588 +#else
11589 +       struct sigaction act,
11590 +                               oact;
11591 +
11592 +       act.sa_handler = sighandler;
11593 +       sigemptyset(&act.sa_mask);
11594 +       act.sa_flags = 0;
11595 +       if (sigaction(signo, &act, &oact) < 0)
11596 +               return SIG_ERR;
11597 +       return oact.sa_handler;
11598 +#endif   /* !HAVE_POSIX_SIGNALS */
11599 +}
11600 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/AUTHORS pgcluster-1.7.0rc7/src/pgcluster/pglb/AUTHORS
11601 --- postgresql-8.2.4/src/pgcluster/pglb/AUTHORS 1970-01-01 01:00:00.000000000 +0100
11602 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/AUTHORS       2007-02-18 22:52:17.000000000 +0100
11603 @@ -0,0 +1,4 @@
11604 +Authors of pglb
11605 +
11606 +pglb was written by Atsushi Mitani.
11607 +pglb is based on pg_pool which is written by Tatsuo Ishii.
11608 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/COPYING pgcluster-1.7.0rc7/src/pgcluster/pglb/COPYING
11609 --- postgresql-8.2.4/src/pgcluster/pglb/COPYING 1970-01-01 01:00:00.000000000 +0100
11610 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/COPYING       2007-02-18 22:52:17.000000000 +0100
11611 @@ -0,0 +1,14 @@
11612 +Copyright (c) 2003-2006        Atsushi Mitani
11613 +
11614 +Permission to use, copy, modify, and distribute this software and
11615 +its documentation for any purpose and without fee is hereby
11616 +granted, provided that the above copyright notice appear in all
11617 +copies and that both that copyright notice and this permission
11618 +notice appear in supporting documentation, and that the name of the
11619 +author not be used in advertising or publicity pertaining to
11620 +distribution of the software without specific, written prior
11621 +permission. The author makes no representations about the
11622 +suitability of this software for any purpose.  It is provided "as
11623 +is" without express or implied warranty.
11624 +
11625 +Portions copyright (c) 2003-2006, Tatsuo Ishii
11626 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/Makefile pgcluster-1.7.0rc7/src/pgcluster/pglb/Makefile
11627 --- postgresql-8.2.4/src/pgcluster/pglb/Makefile        1970-01-01 01:00:00.000000000 +0100
11628 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/Makefile      2007-02-18 22:52:17.000000000 +0100
11629 @@ -0,0 +1,38 @@
11630 +#-------------------------------------------------------------------------
11631 +#
11632 +# Makefile for src/pgcluster/pgrp
11633 +#
11634 +#-------------------------------------------------------------------------
11635 +
11636 +subdir = src/pgcluster/pglb
11637 +top_builddir = ../../..
11638 +include $(top_builddir)/src/Makefile.global
11639 +
11640 +OBJS= child.o cluster_table.o load_balance.o main.o pool_auth.o \
11641 +       pool_connection_pool.o pool_process_query.o pool_stream.o \
11642 +       pool_params.o recovery.o socket.o lifecheck.o
11643 +
11644 +EXTRA_OBJS = $(top_builddir)/src/backend/libpq/replicate_com.o ../libpgc/SUBSYS.o
11645 +
11646 +CFLAGS += -DPRINT_DEBUG
11647 +override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -DBINDIR=\"$(bindir)\"
11648 +
11649 +all: pglb
11650 +
11651 +pglb: $(OBJS) $(libpq_builddir)/libpq.a 
11652 +       $(CC) $(CFLAGS) $(OBJS) $(EXTRA_OBJS) $(libpq) $(LDFLAGS) $(LIBS) -o $@
11653 +
11654 +install: all installdirs
11655 +       $(INSTALL_PROGRAM) pglb$(X) $(DESTDIR)$(bindir)/pglb$(X)
11656 +       $(INSTALL_DATA) pglb.conf.sample  $(DESTDIR)$(datadir)/pglb.conf.sample
11657 +
11658 +installdirs:
11659 +       $(mkinstalldirs) $(DESTDIR)$(bindir)
11660 +       $(mkinstalldirs) $(DESTDIR)$(datadir)
11661 +
11662 +uninstall:
11663 +       rm -f $(addprefix $(DESTDIR)$(bindir)/, pglb$(X))
11664 +       rm -f $(DESTDIR)$(datadir)/pglb.conf.sample
11665 +
11666 +clean distclean maintainer-clean:
11667 +       rm -f pglb$(X) $(OBJS) 
11668 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/child.c pgcluster-1.7.0rc7/src/pgcluster/pglb/child.c
11669 --- postgresql-8.2.4/src/pgcluster/pglb/child.c 1970-01-01 01:00:00.000000000 +0100
11670 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/child.c       2007-02-18 22:52:17.000000000 +0100
11671 @@ -0,0 +1,1194 @@
11672 +/*--------------------------------------------------------------------
11673 + * FILE:
11674 + *     child.c
11675 + *
11676 + * NOTE:
11677 + *     This file is composed of the functions to call with the source
11678 + *     at child process of pglb.
11679 + *
11680 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
11681 + * Portions Copyright (c) 2003-2006, Tatsuo Ishii
11682 + *--------------------------------------------------------------------
11683 + */
11684 +/*
11685 + * Permission to use, copy, modify, and distribute this software and
11686 + * its documentation for any purpose and without fee is hereby
11687 + * granted, provided that the above copyright notice appear in all
11688 + * copies and that both that copyright notice and this permission
11689 + * notice appear in supporting documentation, and that the name of the
11690 + * author not be used in advertising or publicity pertaining to
11691 + * distribution of the software without specific, written prior
11692 + * permission. The author makes no representations about the
11693 + * suitability of this software for any purpose.  It is provided "as
11694 + * is" without express or implied warranty.
11695 + *
11696 +*/
11697 +#include "postgres.h"
11698 +
11699 +#include <stdio.h>
11700 +#include <string.h>
11701 +#include <unistd.h>
11702 +#include <signal.h>
11703 +#include <sys/wait.h>
11704 +#include <sys/time.h>
11705 +#include <ctype.h>
11706 +#include <sys/types.h>
11707 +#include <sys/stat.h>
11708 +#include <sys/socket.h>
11709 +#include <sys/ipc.h>
11710 +#include <netdb.h>
11711 +#include <netinet/in.h>
11712 +#include <errno.h>
11713 +#include <fcntl.h>
11714 +#include <time.h>
11715 +#include <sys/param.h>
11716 +#include <arpa/inet.h>
11717 +#include <sys/file.h>
11718 +
11719 +#ifdef HAVE_NETINET_TCP_H
11720 +#include <netinet/tcp.h>
11721 +#endif
11722 +
11723 +#ifdef HAVE_CRYPT_H
11724 +#include <crypt.h>
11725 +#endif
11726 +
11727 +#include "postgres_fe.h"
11728 +#include "libpq/pqcomm.h"
11729 +
11730 +#include "replicate_com.h"
11731 +#include "pglb.h"
11732 +
11733 +/*--------------------------------------
11734 + * GLOBAL VARIABLE DECLARATION
11735 + *--------------------------------------
11736 + */
11737 +POOL_CONNECTION * Frontend = NULL;
11738 +
11739 +/*--------------------------------------
11740 + * PROTOTYPE DECLARATION
11741 + *--------------------------------------
11742 + */
11743 +int PGRpre_fork_children(ClusterTbl * ptr);
11744 +int PGRpre_fork_child(ClusterTbl * ptr);
11745 +int PGRdo_child( int use_pool);
11746 +int PGRcreate_child(ClusterTbl * cluster_p);
11747 +pid_t PGRscan_child_tbl(ClusterTbl * cluster_p);
11748 +void notice_backend_error(void);
11749 +void do_pooling_child(int sig);
11750 +int PGRset_status_to_child_tbl(pid_t pid, int status);
11751 +int PGRadd_child_tbl(ClusterTbl * cluster_p, pid_t pid, int status);
11752 +int PGRget_child_status(pid_t pid);
11753 +void PGRreturn_connection_full_error(void);
11754 +void PGRreturn_no_connection_error(void);
11755 +void PGRquit_children_on_cluster(int rec_no);
11756 +
11757 +#ifdef NONE_BLOCK
11758 +static void set_nonblock(int fd);
11759 +#endif
11760 +static void unset_nonblock(int fd);
11761 +static POOL_CONNECTION *do_accept(int unix_fd, int inet_fd);
11762 +static PGR_StartupPacket *read_startup_packet(POOL_CONNECTION *cp);
11763 +static int send_startup_packet(POOL_CONNECTION_POOL_SLOT *cp);
11764 +static void cancel_request(CancelPacket *sp, int secondary_backend);
11765 +static POOL_CONNECTION_POOL *connect_backend(PGR_StartupPacket *sp, POOL_CONNECTION *frontend);
11766 +static int send_params(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
11767 +static void child_end(int sig);
11768 +static void PGRreturn_with_error(char *msg);
11769 +
11770 +
11771 +/*--------------------------------------------------------------------
11772 + * SYMBOL
11773 + *    PGRpre_fork_children()
11774 + * NOTES
11775 + *    pre forked child precesses
11776 + * ARGS
11777 + *    ClusterTbl * ptr: pointer of cluster server table (I)
11778 + * RETURN
11779 + *    OK: STATUS_OK
11780 + *    NG: STATUS_ERROR
11781 + *--------------------------------------------------------------------
11782 + */
11783 +int
11784 +PGRpre_fork_children(ClusterTbl * ptr)
11785 +{
11786 +       int cnt;
11787 +
11788 +       if (ptr == NULL)
11789 +       {
11790 +               return STATUS_ERROR;
11791 +       }
11792 +       cnt = 0 ;
11793 +       while ((ptr->useFlag != TBL_END) && (cnt < ClusterNum))
11794 +       {
11795 +               PGRpre_fork_child(ptr);
11796 +               cnt ++;
11797 +               ptr ++;
11798 +       }
11799 +       return STATUS_OK;
11800 +}
11801 +
11802 +/*--------------------------------------------------------------------
11803 + * SYMBOL
11804 + *    PGRpre_fork_child()
11805 + * NOTES
11806 + *    pre forked child precess
11807 + * ARGS
11808 + *    ClusterTbl * ptr: pointer of cluster server table (I)
11809 + * RETURN
11810 + *    OK: STATUS_OK
11811 + *    NG: STATUS_ERROR
11812 + *--------------------------------------------------------------------
11813 + */
11814 +int
11815 +PGRpre_fork_child(ClusterTbl * ptr)
11816 +{
11817 +       pid_t pid = 0;
11818 +       int i;
11819 +
11820 +       if (ptr == NULL)
11821 +       {
11822 +               return STATUS_ERROR;
11823 +       }
11824 +       if (ptr->useFlag == TBL_END)
11825 +       {
11826 +               return STATUS_ERROR;
11827 +       }
11828 +       for ( i = 0 ; i < ptr->max_connect * Max_Pool ; i ++)
11829 +       {
11830 +               pid = PGRcreate_child(ptr);             
11831 +       }
11832 +       return STATUS_OK;
11833 +}
11834 +/*--------------------------------------------------------------------
11835 + * SYMBOL
11836 + *    PGRdo_child()
11837 + * NOTES
11838 + *    execute child process
11839 + * ARGS
11840 + *    int use_pool: usage flag of connection pooling (I)
11841 + * RETURN
11842 + *    OK: STATUS_OK
11843 + *    NG: STATUS_ERROR
11844 + *--------------------------------------------------------------------
11845 + */
11846 +int
11847 +PGRdo_child( int use_pool)
11848 +{
11849 +       char * func = "PGRdo_child()";
11850 +       pid_t pid = 0;
11851 +       PGR_StartupPacket *sp = NULL;
11852 +       POOL_CONNECTION *frontend = NULL;
11853 +       POOL_CONNECTION_POOL *backend = NULL;
11854 +       int status = 0;
11855 +       int connection_reuse = 1;
11856 +       int ssl_request = 0;
11857 +       int count = 0;
11858 +
11859 +       pid = getpid();
11860 +#ifdef PRINT_DEBUG
11861 +       show_debug("%s:I am %d",func, pid);
11862 +#endif                 
11863 +
11864 +       /* set up signal handlers */
11865 +       PGRsignal(SIGALRM, SIG_DFL);
11866 +       PGRsignal(SIGTERM, child_end);
11867 +       PGRsignal(SIGHUP, child_end);
11868 +       PGRsignal(SIGINT, child_end);
11869 +       PGRsignal(SIGUSR1, SIG_IGN);
11870 +       PGRsignal(SIGUSR2, SIG_IGN);
11871 +
11872 +#ifdef NONE_BLOCK
11873 +       /* set listen fds to none block */
11874 +       set_nonblock(Frontend_FD.unix_fd);
11875 +       set_nonblock(Frontend_FD.inet_fd);
11876 +#endif
11877 +
11878 +retry_accept:
11879 +       /* perform accept() */
11880 +       frontend = do_accept(Frontend_FD.unix_fd,Frontend_FD.inet_fd);
11881 +       if (frontend == NULL)
11882 +       {
11883 +               /* accept() failed. return to the accept() loop */
11884 +               PGRset_status_to_child_tbl(pid,TBL_FREE);
11885 +               return STATUS_ERROR;
11886 +       }
11887 +
11888 +       /* unset frontend fd tp none block */
11889 +       unset_nonblock(frontend->fd);
11890 +
11891 +       /* read the startup packet */
11892 +       sp = 0;
11893 +retry_startup:
11894 +       if (sp)
11895 +       {
11896 +               free(sp->startup_packet);
11897 +               free(sp->database);
11898 +               free(sp->user);
11899 +               free(sp);
11900 +       }
11901 +
11902 +       sp = read_startup_packet(frontend);
11903 +       if (sp == NULL)
11904 +       {
11905 +               /* failed to read the startup packet. return to the
11906 +                  accept() loop */
11907 +               pool_close(frontend);
11908 +               PGRset_status_to_child_tbl(pid,TBL_FREE);
11909 +               return STATUS_ERROR;
11910 +       }
11911 +       PGRset_status_to_child_tbl(pid,TBL_ACCEPT);
11912 +
11913 +       /* cancel request? */
11914 +       if (sp->major == 1234 && sp->minor == 5678)
11915 +       {
11916 +               cancel_request((CancelPacket *)sp->startup_packet, 0);
11917 +               pool_close(frontend);
11918 +               return STATUS_ERROR;
11919 +       }
11920 +
11921 +       /* SSL? */
11922 +       if (sp->major == 1234 && sp->minor == 5679)
11923 +       {
11924 +               /* SSL not supported */
11925 +#ifdef PRINT_DEBUG
11926 +               show_debug("%s:SSLRequest: sent N; retry startup",func);
11927 +#endif                 
11928 +               if (ssl_request && use_pool)
11929 +               {
11930 +                       pool_close(frontend);
11931 +                       return STATUS_ERROR;
11932 +               }
11933 +
11934 +               /*
11935 +                * say to the frontend "we do not suppport SSL"
11936 +                * note that this is not a NOTICE response despite it's an 'N'!
11937 +                */
11938 +               pool_write_and_flush(frontend, "N", 1);
11939 +               ssl_request = 1;
11940 +               goto retry_startup;
11941 +       }
11942 +
11943 +       /*
11944 +        * Ok, negotiaton with frontend has been done. Let's go to the next step.
11945 +        */
11946 +       /*
11947 +        * if there's no connection associated with user and database,
11948 +        * we need to connect to the backend and send the startup packet.
11949 +        */
11950 +       count = 0;
11951 +       if ((backend = pool_get_cp(sp->user, sp->database, sp->major)) == NULL)
11952 +       {
11953 +               connection_reuse = 0;
11954 +
11955 +               if ((backend = connect_backend(sp, frontend)) == NULL)
11956 +               {
11957 +                       /*
11958 +                       PGRset_status_on_cluster_tbl(TBL_ERROR,CurrentCluster);
11959 +                       return STATUS_ERROR;
11960 +                       */
11961 +                       goto retry_accept;
11962 +               }
11963 +       }
11964 +       else
11965 +       {
11966 +               /* reuse existing connection to backend */
11967 +
11968 +               if (pool_do_reauth(frontend, backend))
11969 +               {
11970 +                       pool_close(frontend);
11971 +                       return STATUS_ERROR;
11972 +               }
11973 +
11974 +               if (MAJOR(backend) == 3)
11975 +               {
11976 +                       if (send_params(frontend, backend))
11977 +                       {
11978 +                               pool_close(frontend);
11979 +                               return STATUS_ERROR;
11980 +                       }
11981 +               }
11982 +
11983 +               /* send ReadyForQuery to frontend */
11984 +               pool_write(frontend, "Z", 1);
11985 +
11986 +               if (MAJOR(backend) == 3)
11987 +               {
11988 +                       int len;
11989 +                       char tstate;
11990 +
11991 +                       len = htonl(5);
11992 +                       pool_write(frontend, &len, sizeof(len));
11993 +                       tstate = TSTATE(backend);
11994 +                       pool_write(frontend, &tstate, 1);
11995 +               }
11996 +
11997 +               if (pool_flush(frontend) < 0)
11998 +               {
11999 +                       pool_close(frontend);
12000 +                       return STATUS_ERROR;
12001 +               }
12002 +
12003 +       }
12004 +
12005 +       /* query process loop */
12006 +       for (;;)
12007 +       {
12008 +               POOL_STATUS status;
12009 +
12010 +               status = pool_process_query(frontend, backend, 0);
12011 +
12012 +               switch (status)
12013 +               {
12014 +                       /* client exits */
12015 +                       case POOL_END:
12016 +                               /* do not cache connection to template0, template1, regression */
12017 +                               if (!strcmp(sp->database, "template0") || !strcmp(sp->database, "template1") ||
12018 +                                   !strcmp(sp->database, "regression") || use_pool == NOT_USE_CONNECTION_POOL)
12019 +                               {
12020 +                                       pool_close(frontend);
12021 +                                       pool_send_frontend_exits(backend);
12022 +                                       pool_discard_cp(sp->user, sp->database, sp->major);
12023 +                               }
12024 +                               else
12025 +                               {
12026 +                                       POOL_STATUS status1;
12027 +
12028 +                                       /* send reset request to backend */
12029 +                                       status1 = pool_process_query(frontend, backend, 1);
12030 +                                       pool_close(frontend);
12031 +
12032 +                                       /* if we detect errors on resetting connection, we need to discard
12033 +                                        * this connection since it might be in unknown status
12034 +                                        */
12035 +                                       if (status1 != POOL_CONTINUE)
12036 +                                               pool_discard_cp(sp->user, sp->database, sp->major);
12037 +                                       else
12038 +                                               pool_connection_pool_timer(backend);
12039 +                               }
12040 +                               break;
12041 +                       
12042 +                       /* error occured. discard backend connection pool
12043 +                          and disconnect connection to the frontend */
12044 +                       case POOL_ERROR:
12045 +                               show_error("%s:do_child: exits with status 1 due to error",func);
12046 +                               break;
12047 +
12048 +                       /* fatal error occured. just exit myself... */
12049 +                       case POOL_FATAL:
12050 +                               show_error("%s:do_child: fatal error occured",func);
12051 +                               notice_backend_error();
12052 +                               break;
12053 +
12054 +                       /* not implemented yet */
12055 +                       case POOL_IDLE:
12056 +                               do_accept(Frontend_FD.unix_fd,Frontend_FD.inet_fd);
12057 +#ifdef PRINT_DEBUG
12058 +                               show_debug("%s:accept while idle",func);
12059 +#endif                 
12060 +                               break;
12061 +
12062 +                       default:
12063 +                               break;
12064 +               }
12065 +
12066 +               if (status != POOL_CONTINUE)
12067 +                       break;
12068 +       }
12069 +       if ((status == POOL_ERROR) || 
12070 +               (status == POOL_FATAL))
12071 +       {
12072 +               PGRset_status_to_child_tbl(pid,TBL_FREE);
12073 +               return STATUS_ERROR;
12074 +       }
12075 +       PGRset_status_to_child_tbl(pid,TBL_INIT);
12076 +       return STATUS_OK;
12077 +}
12078 +
12079 +/*--------------------------------------------------------------------
12080 + * SYMBOL
12081 + *    PGRcreate_child()
12082 + * NOTES
12083 + *    create child process
12084 + * ARGS
12085 + *    ClusterTbl * ptr: pointer of cluster server table (I)
12086 + * RETURN
12087 + *    OK: STATUS_OK
12088 + *    NG: STATUS_ERROR
12089 + *--------------------------------------------------------------------
12090 + */
12091 +int 
12092 +PGRcreate_child(ClusterTbl * cluster_p)
12093 +{
12094 +       char * func = "PGRcreate_child()";
12095 +       pid_t pid,pgid;
12096 +
12097 +       if (cluster_p == NULL)
12098 +               return STATUS_ERROR;
12099 +
12100 +#ifdef PRINT_DEBUG
12101 +       show_debug("%s:create child [%d@%s]",func,cluster_p->port,cluster_p->hostName);
12102 +#endif                 
12103 +       PGRsignal(SIGCHLD,PGRrecreate_child);
12104 +       pgid = getpgid((pid_t)0);
12105 +       pid = fork();
12106 +       if (pid < 0)
12107 +       {
12108 +               show_error("%s:fork() failed. (%s)",func,strerror(errno));
12109 +               return STATUS_ERROR;
12110 +       }
12111 +       if (pid == 0)
12112 +       {
12113 +               CurrentCluster = cluster_p;
12114 +               if (pool_init_cp())
12115 +               {
12116 +                       show_error("%s:pool_init_cp failed",func);
12117 +                       exit(1);
12118 +               }
12119 +               PGRsignal(SIGCHLD,PGRchild_wait);
12120 +               PGRsignal(SIGTERM, child_end);
12121 +               PGRsignal(SIGHUP, child_end);
12122 +               PGRsignal(SIGINT, child_end);
12123 +               PGRsignal(SIGUSR1,do_pooling_child);
12124 +               setpgid((pid_t)0,pgid);
12125 +               for (;;)
12126 +               {
12127 +                       pause();
12128 +                       PGRsignal(SIGUSR1,do_pooling_child);
12129 +               }
12130 +#ifdef PRINT_DEBUG
12131 +               show_debug("%s:create child end [%d@%s]",func,cluster_p->port,cluster_p->hostName);
12132 +#endif                 
12133 +               child_end(SIGTERM);
12134 +       }
12135 +       else
12136 +       {
12137 +               PGRadd_child_tbl(cluster_p,pid,TBL_INIT);
12138 +       }
12139 +       return pid;
12140 +}
12141 +
12142 +/*--------------------------------------------------------------------
12143 + * SYMBOL
12144 + *    PGRscan_child_tbl()
12145 + * NOTES
12146 + *    get a child process id that is waiting for connection 
12147 + *    with the cluster server
12148 + * ARGS
12149 + *    ClusterTbl * ptr: pointer of cluster server table (I)
12150 + * RETURN
12151 + *    OK: child process id
12152 + *    NG: 0
12153 + *--------------------------------------------------------------------
12154 + */
12155 +pid_t
12156 +PGRscan_child_tbl(ClusterTbl * cluster_p)
12157 +{
12158 +       char * func = "PGRscan_child_tbl()";
12159 +       ChildTbl * p;
12160 +
12161 +       if ( cluster_p == NULL)
12162 +       {
12163 +               show_error("%s:Cluster_Tbl is not initialize",func);
12164 +               return STATUS_ERROR;
12165 +       }
12166 +       p = Child_Tbl;
12167 +       if ( p == NULL)
12168 +       {
12169 +               show_error("%s:Child_Tbl is not initialize",func);
12170 +               return STATUS_ERROR;
12171 +       }
12172 +       while(p->useFlag != TBL_END)
12173 +       {
12174 +               if (p->pid <= 0)
12175 +               {
12176 +                       p++;
12177 +                       continue;
12178 +               }
12179 +               if ((p->useFlag == TBL_INIT) &&
12180 +                       (p->rec_no == cluster_p->rec_no))
12181 +               {
12182 +                       p->useFlag = TBL_USE;
12183 +                       return (p->pid);
12184 +               }
12185 +               p++;
12186 +       }
12187 +       return 0;
12188 +}
12189 +
12190 +/* notice backend connection error using SIGUSR2 */
12191 +void
12192 +notice_backend_error(void)
12193 +{
12194 +       pid_t pid = getpid();
12195 +
12196 +       PGRset_status_to_child_tbl(pid,TBL_ERROR);
12197 +       PGRset_status_on_cluster_tbl(TBL_ERROR_NOTICE,CurrentCluster);
12198 +
12199 +       /*
12200 +       kill(parent, SIGUSR2);
12201 +       sleep(1);
12202 +       */
12203 +}
12204 +
12205 +
12206 +/*
12207 + * start up pooling child process
12208 + */
12209 +void
12210 +do_pooling_child(int sig)
12211 +{
12212 +       char * func = "do_pooling_child()";
12213 +       int rtn;
12214 +       pid_t pid;
12215 +
12216 +       pid = getpid();
12217 +       rtn = PGRdo_child(USE_CONNECTION_POOL);
12218 +       PGRrelease_connection(CurrentCluster);
12219 +       if (rtn != STATUS_OK)
12220 +       {
12221 +               show_error("%s:PGRdo_child failed",func);
12222 +               child_end(SIGTERM);
12223 +       }
12224 +       return ;
12225 +}
12226 +
12227 +/*
12228 + * set status in child process table
12229 + */
12230 +int
12231 +PGRset_status_to_child_tbl(pid_t pid, int status)
12232 +{
12233 +       char * func = "PGRset_status_to_child_tbl()";
12234 +       ChildTbl * p;
12235 +
12236 +       p = Child_Tbl;
12237 +       if ( p == NULL)
12238 +       {
12239 +               show_error("%s:Child_Tbl is not initialize",func);
12240 +               return STATUS_ERROR;
12241 +       }
12242 +       while(p->useFlag != TBL_END)
12243 +       {
12244 +               if (p->pid == pid)
12245 +               {
12246 +                       p->useFlag = status;
12247 +                       return STATUS_OK;
12248 +               }
12249 +               p++;
12250 +       }
12251 +       return STATUS_ERROR;
12252 +}
12253 +
12254 +/*
12255 + * add child process data in child process table
12256 + */
12257 +int
12258 +PGRadd_child_tbl(ClusterTbl * cluster_p, pid_t pid, int status)
12259 +{
12260 +       char * func = "PGRadd_child_tbl()";
12261 +       ChildTbl * p;
12262 +
12263 +       p = Child_Tbl;
12264 +       if ( cluster_p == NULL)
12265 +       {
12266 +               show_error("%s:Cluster_Tbl is not initialize",func);
12267 +               return STATUS_ERROR;
12268 +       }
12269 +       if ( p == NULL)
12270 +       {
12271 +               show_error("%s:Child_Tbl is not initialize",func);
12272 +               return STATUS_ERROR;
12273 +       }
12274 +       while(p->useFlag != TBL_END)
12275 +       {
12276 +               if ((p->useFlag == TBL_FREE) ||
12277 +                       (p->useFlag == TBL_ERROR))
12278 +               {
12279 +                       p->useFlag = status;
12280 +                       p->rec_no = cluster_p->rec_no;
12281 +                       p->pid = pid;
12282 +                       return STATUS_OK;
12283 +               }
12284 +               p++;
12285 +       }
12286 +       return STATUS_ERROR;
12287 +}
12288 +
12289 +int
12290 +PGRget_child_status(pid_t pid)
12291 +{
12292 +       char * func = "PGRget_child_status()";
12293 +       ChildTbl * p;
12294 +
12295 +       p = Child_Tbl;
12296 +       if ( p == NULL)
12297 +       {
12298 +               show_error("%s:Child_Tbl is not initialize",func);
12299 +               return STATUS_ERROR;
12300 +       }
12301 +
12302 +       while (p->useFlag != TBL_END)
12303 +       {
12304 +               if (p->pid == pid)
12305 +               {
12306 +                       return p->useFlag;
12307 +               }
12308 +               p++;
12309 +       }
12310 +       return STATUS_ERROR;
12311 +}
12312 +
12313 +void 
12314 +PGRreturn_connection_full_error(void)
12315 +{
12316 +  PGRreturn_with_error( "Sorry, backend connection is full\n");
12317 +}
12318 +
12319 +void 
12320 +PGRreturn_no_connection_error(void) {
12321 +  PGRreturn_with_error("pglb could not connect to server: no cluster available.\n");
12322 +}
12323 +
12324 +static void 
12325 +PGRreturn_with_error (char *msg) 
12326 +{
12327 +       PGR_StartupPacket *sp = NULL;
12328 +       POOL_CONNECTION *frontend = NULL;
12329 +
12330 +
12331 +       /* perform accept() */
12332 +       frontend = do_accept(Frontend_FD.unix_fd,Frontend_FD.inet_fd);
12333 +       if (frontend == NULL)
12334 +       {
12335 +               /* accept() failed. return to the accept() loop */
12336 +               return ;
12337 +       }
12338 +       sp = read_startup_packet(frontend);
12339 +       if (sp == NULL)
12340 +       {
12341 +               /* failed to read the startup packet. return to the
12342 +                  accept() loop */
12343 +               pool_close(frontend);
12344 +               return ;
12345 +       }
12346 +       pool_write_and_flush(frontend, "E", 1);
12347 +       pool_write_and_flush(frontend, msg, strlen(msg)+1);
12348 +       pool_close(frontend);
12349 +       return ;
12350 +}
12351 +
12352 +void
12353 +PGRquit_children_on_cluster(int rec_no)
12354 +{
12355 +       char * func = "PGRquit_children_on_cluster()";
12356 +       ChildTbl * p;
12357 +
12358 +       if (Child_Tbl == NULL)
12359 +       {
12360 +               return;
12361 +       }
12362 +       PGRsignal(SIGCHLD,SIG_IGN);
12363 +       p = Child_Tbl;
12364 +       while(p->useFlag != TBL_END)
12365 +       {
12366 +               if (p->rec_no == rec_no) 
12367 +               {
12368 +                       if (kill (p->pid,SIGTERM) == -1)
12369 +                       {
12370 +                               show_error("%s:could not stop pid: %d (%s)",func,p->pid,strerror(errno));
12371 +                               return;
12372 +                       }
12373 +                       PGRchild_wait(SIGTERM);
12374 +                       p->useFlag = DATA_FREE;
12375 +               }
12376 +               p++;
12377 +       }
12378 +       if (Use_Connection_Pool)
12379 +       {
12380 +               PGRsignal(SIGCHLD,PGRrecreate_child);
12381 +       }
12382 +       else
12383 +       {
12384 +               PGRsignal(SIGCHLD,PGRchild_wait);
12385 +       }
12386 +}
12387 +
12388 +/* -------------------------------------------------------------------
12389 + * private functions
12390 + * -------------------------------------------------------------------
12391 + */
12392 +
12393 +#ifdef NONE_BLOCK
12394 +/*
12395 + * set non-block flag
12396 + */
12397 +static void set_nonblock(int fd)
12398 +{
12399 +       char* func = "set_nonblock()";
12400 +       int var;
12401 +
12402 +       /* set fd to none blocking */
12403 +       var = fcntl(fd, F_GETFL, 0);
12404 +       if (var == -1)
12405 +       {
12406 +               show_error("%s:fcntl failed. %s", func,strerror(errno));
12407 +               child_end(SIGTERM);
12408 +       }
12409 +       if (fcntl(fd, F_SETFL, var | O_NONBLOCK) == -1)
12410 +       {
12411 +               show_error("%s:fcntl failed. %s", func,strerror(errno));
12412 +               child_end(SIGTERM);
12413 +       }
12414 +}
12415 +#endif
12416 +
12417 +/*
12418 + * unset non-block flag
12419 + */
12420 +static void unset_nonblock(int fd)
12421 +{
12422 +       char * func = "unset_nonblock()";
12423 +       int var;
12424 +
12425 +       /* set fd to none blocking */
12426 +       var = fcntl(fd, F_GETFL, 0);
12427 +       if (var == -1)
12428 +       {
12429 +               show_error("%s,fcntl failed. %s", func,strerror(errno));
12430 +               child_end(SIGTERM);
12431 +       }
12432 +       if (fcntl(fd, F_SETFL, var & ~O_NONBLOCK) == -1)
12433 +       {
12434 +               show_error("%s,fcntl failed. %s", func,strerror(errno));
12435 +               child_end(SIGTERM);
12436 +       }
12437 +}
12438 +
12439 +
12440 +/*
12441 +* perform accept() and returns new fd
12442 +*/
12443 +static POOL_CONNECTION *do_accept(int unix_fd, int inet_fd)
12444 +{
12445 +       char * func = "do_accept()";
12446 +    fd_set     readmask;
12447 +    int fds;
12448 +       struct sockaddr addr;
12449 +       socklen_t addrlen;
12450 +       int fd = 0;
12451 +       int afd;
12452 +       int inet = 0;
12453 +       POOL_CONNECTION *cp;
12454 +#ifdef ACCEPT_PERFORMANCE
12455 +       struct timeval now1, now2;
12456 +       static long atime;
12457 +       static int cnt;
12458 +#endif
12459 +
12460 +       FD_ZERO(&readmask);
12461 +       FD_SET(unix_fd, &readmask);
12462 +       if (inet_fd)
12463 +               FD_SET(inet_fd, &readmask);
12464 +
12465 +       fds = select(Max(unix_fd, inet_fd)+1, &readmask, NULL, NULL, NULL);
12466 +       if (fds == -1)
12467 +       {
12468 +               if (errno == EAGAIN || errno == EINTR)
12469 +                       return NULL;
12470 +
12471 +               show_error("%s:select() failed. reason %s",func, strerror(errno));
12472 +               return NULL;
12473 +       }
12474 +
12475 +       if (fds == 0)
12476 +               return NULL;
12477 +
12478 +       if (FD_ISSET(unix_fd, &readmask))
12479 +       {
12480 +               fd = unix_fd;
12481 +       }
12482 +
12483 +       if (FD_ISSET(inet_fd, &readmask))
12484 +       {
12485 +               fd = inet_fd;
12486 +               inet++;
12487 +       }
12488 +
12489 +       /*
12490 +        * Note that some SysV systems do not work here. For those
12491 +        * systems, we need some locking mechanism for the fd.
12492 +        */
12493 +       addrlen = sizeof(addr);
12494 +
12495 +#ifdef ACCEPT_PERFORMANCE
12496 +       gettimeofday(&now1,0);
12497 +#endif
12498 +       afd = accept(fd, &addr, &addrlen);
12499 +       if (afd < 0)
12500 +       {
12501 +               /*
12502 +                * "Resource temporarily unavailable" (EAGAIN or EWOULDBLOCK)
12503 +                * can be silently ignored.
12504 +                */
12505 +               if (errno != EAGAIN && errno != EWOULDBLOCK)
12506 +                       show_error("%s:accept() failed. reason: %s",func, strerror(errno));
12507 +               return NULL;
12508 +       }
12509 +#ifdef ACCEPT_PERFORMANCE
12510 +       gettimeofday(&now2,0);
12511 +       atime += (now2.tv_sec - now1.tv_sec)*1000000 + (now2.tv_usec - now1.tv_usec);
12512 +       cnt++;
12513 +       if (cnt % 100 == 0)
12514 +       {
12515 +               show_error("%s:cnt: %d atime: %ld",func, cnt, atime);
12516 +       }
12517 +#endif
12518 +#ifdef PRINT_DEBUG
12519 +       show_debug("%s:I am %d accept fd %d",func, getpid(), afd);
12520 +#endif                 
12521 +
12522 +       /* set NODELAY and KEEPALIVE options if INET connection */
12523 +       if (inet)
12524 +       {
12525 +               int on = 1;
12526 +
12527 +               if (setsockopt(afd, IPPROTO_TCP, TCP_NODELAY,
12528 +                                          (char *) &on,
12529 +                                          sizeof(on)) < 0)
12530 +               {
12531 +                       show_error("%s:do_accept: setsockopt() failed: %s",func, strerror(errno));
12532 +                       close(afd);
12533 +                       return NULL;
12534 +               }
12535 +               if (setsockopt(afd, SOL_SOCKET, SO_KEEPALIVE,
12536 +                                          (char *) &on,
12537 +                                          sizeof(on)) < 0)
12538 +               {
12539 +                       show_error("%s:do_accept: setsockopt() failed: %s", func,strerror(errno));
12540 +                       close(afd);
12541 +                       return NULL;
12542 +               }
12543 +       }
12544 +
12545 +       if ((cp = pool_open(afd)) == NULL)
12546 +       {
12547 +               close(afd);
12548 +               return NULL;
12549 +       }
12550 +       return cp;
12551 +}
12552 +
12553 +/*
12554 +* read startup packet
12555 +*/
12556 +static PGR_StartupPacket *read_startup_packet(POOL_CONNECTION *cp)
12557 +{
12558 +       char * func = "read_startup_packet()";
12559 +       PGR_StartupPacket *sp;
12560 +       PGR_StartupPacket_v2 *sp2;
12561 +       int protov;
12562 +       int len;
12563 +       char *p;
12564 +
12565 +       sp = (PGR_StartupPacket *)malloc(sizeof(PGR_StartupPacket));
12566 +       if (!sp)
12567 +       {
12568 +               show_error("%s:read_startup_packet: out of memory",func);
12569 +               return NULL;
12570 +       }
12571 +
12572 +       /* read startup packet length */
12573 +       if (pool_read(cp, &len, sizeof(len)))
12574 +       {
12575 +               free(sp);
12576 +               return NULL;
12577 +       }
12578 +       len = ntohl(len);
12579 +       len -= sizeof(len);
12580 +
12581 +       if (len <= 0)
12582 +       {
12583 +               show_error("%s:read_startup_packet: incorrect packet length (%d)", func,len);
12584 +               free(sp);
12585 +               return NULL;
12586 +       }
12587 +
12588 +       sp->startup_packet = calloc(len, 1);
12589 +       if (!sp->startup_packet)
12590 +       {
12591 +               show_error("%s:read_startup_packet: out of memory",func);
12592 +               free(sp);
12593 +               return NULL;
12594 +       }
12595 +
12596 +       /* read startup packet */
12597 +       if (pool_read(cp, sp->startup_packet, len))
12598 +       {
12599 +               free(sp);
12600 +               return NULL;
12601 +       }
12602 +
12603 +       sp->len = len;
12604 +       memcpy(&protov, sp->startup_packet, sizeof(protov));
12605 +       sp->major = ntohl(protov)>>16;
12606 +       sp->minor = ntohl(protov) & 0x0000ffff;
12607 +       p = sp->startup_packet;
12608 +
12609 +       switch(sp->major)
12610 +       {
12611 +               case PROTO_MAJOR_V2: /* V2 */
12612 +                       sp2 = (PGR_StartupPacket_v2 *)(sp->startup_packet);
12613 +
12614 +                       sp->database = calloc(SM_DATABASE+1, 1);
12615 +                       if (!sp->database)
12616 +                       {
12617 +                               show_error("%s:read_startup_packet: out of memory",func);
12618 +                               free(sp);
12619 +                               return NULL;
12620 +                       }
12621 +                       strncpy(sp->database, sp2->database, SM_DATABASE);
12622 +
12623 +                       sp->user = calloc(SM_USER+1, 1);
12624 +                       if (!sp->user)
12625 +                       {
12626 +                               show_error("%s:read_startup_packet: out of memory",func);
12627 +                               free(sp);
12628 +                               return NULL;
12629 +                       }
12630 +                       strncpy(sp->user, sp2->user, SM_USER);
12631 +
12632 +                       break;
12633 +
12634 +               case PROTO_MAJOR_V3: /* V3 */
12635 +                       p += sizeof(int);       /* skip protocol version info */
12636 +
12637 +                       while(*p)
12638 +                       {
12639 +                               if (!strcmp("user", p))
12640 +                               {
12641 +                                       p += (strlen(p) + 1);
12642 +                                       sp->user = strdup(p);
12643 +                                       if (!sp->user)
12644 +                                       {
12645 +                                               show_error("%s:read_startup_packet: out of memory",func);
12646 +                                               free(sp);
12647 +                                               return NULL;
12648 +                                       }
12649 +                               }
12650 +                               else if (!strcmp("database", p))
12651 +                               {
12652 +                                       p += (strlen(p) + 1);
12653 +                                       sp->database = strdup(p);
12654 +                                       if (!sp->database)
12655 +                                       {
12656 +                                               show_error("%s:read_startup_packet: out of memory",func);
12657 +                                               free(sp);
12658 +                                               return NULL;
12659 +                                       }
12660 +                               }
12661 +                               p += (strlen(p) + 1);
12662 +                       }
12663 +                       break;
12664 +
12665 +               case 1234:              /* cancel or SSL request */
12666 +                       /* set dummy database, user info */
12667 +                       sp->database = calloc(1, 1);
12668 +                       if (!sp->database)
12669 +                       {
12670 +                               show_error("%s:read_startup_packet: out of memory",func);
12671 +                               free(sp);
12672 +                               return NULL;
12673 +                       }
12674 +                       sp->user = calloc(1, 1);
12675 +                       if (!sp->user)
12676 +                       {
12677 +                               show_error("%s:read_startup_packet: out of memory",func);
12678 +                               free(sp);
12679 +                               return NULL;
12680 +                       }
12681 +                       break;
12682 +
12683 +               default:
12684 +                       show_error("%s:read_startup_packet: invalid major no: %d",func, sp->major);
12685 +                       free(sp);
12686 +                       return NULL;
12687 +       }
12688 +
12689 +#ifdef PRINT_DEBUG
12690 +       show_debug("%s:Protocol Major: %d Minor: %d database: %s user: %s", 
12691 +                          func,sp->major, sp->minor, sp->database, sp->user);
12692 +#endif                 
12693 +
12694 +       return sp;
12695 +}
12696 +
12697 +/*
12698 +* send startup packet
12699 +*/
12700 +static int send_startup_packet(POOL_CONNECTION_POOL_SLOT *cp)
12701 +{
12702 +       int len;
12703 +
12704 +       len = htonl(cp->sp->len + sizeof(len));
12705 +       pool_write(cp->con, &len, sizeof(len)); 
12706 +       return pool_write_and_flush(cp->con, cp->sp->startup_packet, cp->sp->len);
12707 +}
12708 +
12709 +/*
12710 + * process cancel request
12711 + */
12712 +static void cancel_request(CancelPacket *sp, int secondary_backend)
12713 +{
12714 +       char * func = "cancel_request()";
12715 +       int     len;
12716 +       int fd;
12717 +       POOL_CONNECTION *con;
12718 +       char hostName[128];
12719 +
12720 +#ifdef PRINT_DEBUG
12721 +       show_debug("%s:Cancel request received",func);
12722 +#endif                 
12723 +
12724 +       if (CurrentCluster == NULL)
12725 +       {
12726 +               return;
12727 +       }
12728 +       if (gethostname(hostName,sizeof(hostName)) < 0)
12729 +       {
12730 +               show_error("%s:gethostname() failed. (%s)",func,strerror(errno));
12731 +               return ;
12732 +       }
12733 +       if (secondary_backend)
12734 +       {
12735 +               if (PGRis_same_host(hostName,CurrentCluster->hostName))
12736 +                       fd = connect_unix_domain_socket(1);
12737 +               else
12738 +                       fd = connect_inet_domain_socket(1);
12739 +       }
12740 +       else
12741 +       {
12742 +               if (PGRis_same_host(hostName,CurrentCluster->hostName))
12743 +                       fd = connect_unix_domain_socket(0);
12744 +               else
12745 +                       fd = connect_inet_domain_socket(0);
12746 +       }
12747 +
12748 +       if (fd < 0)
12749 +       {
12750 +               show_error("%s:Could not create socket for sending cancel request",func);
12751 +               return;
12752 +       }
12753 +
12754 +       con = pool_open(fd);
12755 +       if (con == NULL)
12756 +               return;
12757 +
12758 +       len = htonl(sizeof(len) + sizeof(CancelPacket));
12759 +       pool_write(con, &len, sizeof(len));
12760 +
12761 +       if (pool_write_and_flush(con, sp, sizeof(CancelPacket)) < 0)
12762 +               show_error("%s:Could not send cancel request packet",func);
12763 +       pool_close(con);
12764 +}
12765 +
12766 +static POOL_CONNECTION_POOL *connect_backend(PGR_StartupPacket *sp, POOL_CONNECTION *frontend)
12767 +{
12768 +       char * func ="connect_backend()";
12769 +       POOL_CONNECTION_POOL *backend;
12770 +
12771 +       /* connect to the backend */
12772 +       backend = pool_create_cp();
12773 +       if (backend == NULL)
12774 +       {
12775 +               pool_send_error_message(frontend, sp->major, "XX000", "connection cache is full", "",
12776 +                                                               "increace max_pool", __FILE__, __LINE__);
12777 +               pool_close(frontend);
12778 +               return NULL;
12779 +       }
12780 +
12781 +       /* mark this is a backend connection */
12782 +       backend->slots[0]->con->isbackend = 1;
12783 +       /*
12784 +        * save startup packet info
12785 +        */
12786 +       backend->slots[0]->sp = sp;
12787 +
12788 +       if (pool_config_replication_enabled)
12789 +       {
12790 +               backend->slots[1]->con->isbackend = 1;
12791 +               backend->slots[1]->con->issecondary_backend = 1;
12792 +               /*
12793 +                * save startup packet info
12794 +                */
12795 +               backend->slots[1]->sp = sp;
12796 +       }
12797 +
12798 +       /* send startup packet */
12799 +       if (send_startup_packet(backend->slots[0]) < 0)
12800 +       {
12801 +               show_error("%s:do_child: fails to send startup packet to the backend",func);
12802 +               pool_close(frontend);
12803 +               return NULL;
12804 +       }
12805 +
12806 +       /* send startup packet */
12807 +       if (pool_config_replication_enabled)
12808 +       {
12809 +               if (send_startup_packet(backend->slots[1]) < 0)
12810 +               {
12811 +                       show_error("%s:do_child: fails to send startup packet to the secondary backend",func);
12812 +                       pool_close(frontend);
12813 +                       return NULL;
12814 +               }
12815 +       }
12816 +
12817 +       /*
12818 +        * do authentication stuff
12819 +        */
12820 +       if (pool_do_auth(frontend, backend))
12821 +       {
12822 +               pool_close(frontend);
12823 +               pool_discard_cp(sp->user, sp->database, sp->major);
12824 +               return NULL;
12825 +       }
12826 +       return backend;
12827 +}
12828 +
12829 +static int send_params(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend)
12830 +{
12831 +       char * func = "send_params()";
12832 +       int index;
12833 +       char *name, *value;
12834 +       int len, sendlen;
12835 +
12836 +       index = 0;
12837 +       while (pool_get_param(&MASTER(backend)->params, index++, &name, &value) == 0)
12838 +       {
12839 +               pool_write(frontend, "S", 1);
12840 +               len = sizeof(sendlen) + strlen(name) + 1 + strlen(value) + 1;
12841 +               sendlen = htonl(len);
12842 +               pool_write(frontend, &sendlen, sizeof(sendlen));
12843 +               pool_write(frontend, name, strlen(name) + 1);
12844 +               pool_write(frontend, value, strlen(value) + 1);
12845 +       }
12846 +
12847 +       if (pool_flush(frontend))
12848 +       {
12849 +               show_error("%s:pool_send_params: pool_flush() failed",func);
12850 +               return -1;
12851 +       }
12852 +       return 0;
12853 +}
12854 +
12855 +/*
12856 + * ending function of child process
12857 + */
12858 +static void
12859 +child_end(int sig)
12860 +{
12861 +       PGRsignal(sig,SIG_IGN);
12862 +
12863 +       pool_finish();
12864 +       exit(0);
12865 +}
12866 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/cluster_table.c pgcluster-1.7.0rc7/src/pgcluster/pglb/cluster_table.c
12867 --- postgresql-8.2.4/src/pgcluster/pglb/cluster_table.c 1970-01-01 01:00:00.000000000 +0100
12868 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/cluster_table.c       2007-02-18 22:52:17.000000000 +0100
12869 @@ -0,0 +1,343 @@
12870 +/*--------------------------------------------------------------------
12871 + * FILE:
12872 + *     cluster_tbl.c
12873 + *
12874 + * NOTE:
12875 + *     This file is composed of the functions to use a cluster table.
12876 + *
12877 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
12878 + *--------------------------------------------------------------------
12879 + */
12880 +/*
12881 + * Permission to use, copy, modify, and distribute this software and
12882 + * its documentation for any purpose and without fee is hereby
12883 + * granted, provided that the above copyright notice appear in all
12884 + * copies and that both that copyright notice and this permission
12885 + * notice appear in supporting documentation, and that the name of the
12886 + * author not be used in advertising or publicity pertaining to
12887 + * distribution of the software without specific, written prior
12888 + * permission. The author makes no representations about the
12889 + * suitability of this software for any purpose.  It is provided "as
12890 + * is" without express or implied warranty.
12891 + *
12892 +*/
12893 +#include <stdio.h>
12894 +#include <stdarg.h>
12895 +#include <string.h>
12896 +#include <stdlib.h>
12897 +#include <unistd.h>
12898 +#include <signal.h>
12899 +#include <sys/wait.h>
12900 +#include <sys/time.h>
12901 +#include <ctype.h>
12902 +#include <sys/types.h>
12903 +#include <sys/stat.h>
12904 +#include <sys/socket.h>
12905 +#include <sys/ipc.h>
12906 +#include <sys/shm.h>
12907 +#include <sys/sem.h>
12908 +#include <sys/msg.h>
12909 +#include <netdb.h>
12910 +#include <netinet/in.h>
12911 +#include <errno.h>
12912 +#include <fcntl.h>
12913 +#include <time.h>
12914 +#include <sys/param.h>
12915 +#include <sys/select.h>
12916 +#include <arpa/inet.h>
12917 +#include <sys/file.h>
12918 +
12919 +#ifdef HAVE_NETINET_TCP_H
12920 +#include <netinet/tcp.h>
12921 +#endif
12922 +
12923 +#include "replicate_com.h"
12924 +#include "pglb.h"
12925 +
12926 +
12927 +/*--------------------------------------
12928 + * PROTOTYPE DECLARATION
12929 + *--------------------------------------
12930 + */
12931 +int PGRis_cluster_alive(void) ;
12932 +ClusterTbl * PGRscan_cluster(void);
12933 +void PGRset_key_of_cluster(ClusterTbl * ptr, RecoveryPacket * packet);
12934 +ClusterTbl * PGRadd_cluster_tbl (ClusterTbl * conf_data);
12935 +ClusterTbl * PGRset_status_on_cluster_tbl (int status, ClusterTbl * ptr);
12936 +ClusterTbl * PGRsearch_cluster_tbl(ClusterTbl * conf_data);
12937 +
12938 +static int set_cluster_tbl(ClusterTbl * ptr , ClusterTbl * conf_data);
12939 +static ClusterTbl * search_free_cluster_tbl(void );
12940 +static void write_cluster_status_file(ClusterTbl * ptr);
12941 +
12942 +int PGRis_cluster_alive(void) 
12943 +{
12944 +       ClusterTbl * ptr = NULL;
12945 +       int use=0;
12946 +       ptr = Cluster_Tbl;
12947 +
12948 +       PGRsem_lock(ClusterSemid,MAX_DB_SERVER);
12949 +       while (ptr->useFlag != TBL_END)
12950 +       {
12951 +               if ((ptr->useFlag == TBL_USE) || (ptr->useFlag == TBL_INIT))
12952 +               {
12953 +                 use++;
12954 +               }
12955 +               ptr++;
12956 +       }
12957 +       PGRsem_unlock(ClusterSemid,MAX_DB_SERVER);
12958 +       return use==0 ? STATUS_ERROR : STATUS_OK;
12959 +}
12960 +
12961 +ClusterTbl * 
12962 +PGRscan_cluster(void)
12963 +{
12964 +       char * func = "PGRscan_cluster";
12965 +       ClusterTbl * ptr = NULL;
12966 +       ClusterTbl * rtn = NULL;
12967 +       int min_use_rate = 100;
12968 +       int use_rate = 0;
12969 +       int cnt = 0;
12970 +
12971 +
12972 +       ptr = Cluster_Tbl;
12973 +       if (ptr == NULL)
12974 +       {
12975 +               show_error("%s:Cluster Table is not initialize",func);
12976 +               return (ClusterTbl *)NULL;
12977 +       }
12978 +#ifdef PRINT_DEBUG
12979 +       show_debug("%s:%d ClusterDB can be used",func,ClusterNum);
12980 +#endif                 
12981 +       PGRsem_lock(ClusterSemid,MAX_DB_SERVER);
12982 +       while ((cnt <= ClusterNum) && (ptr->useFlag != TBL_END))
12983 +       {
12984 +#ifdef PRINT_DEBUG
12985 +               show_debug("%s:%s [%d],useFlag->%d max->%d use_num->%d\n",
12986 +                       func, ptr->hostName,ptr->port,ptr->useFlag,ptr->max_connect,ptr->use_num);
12987 +#endif                 
12988 +               cnt ++;
12989 +               if ((ptr->useFlag != TBL_USE) && (ptr->useFlag != TBL_INIT))
12990 +               {
12991 +                       ptr ++;
12992 +                       continue;
12993 +               }
12994 +               if (ptr->max_connect <= ptr->use_num)
12995 +               {
12996 +                       ptr ++;
12997 +                       continue;
12998 +               }
12999 +               if (ptr->use_num > 0)
13000 +               {
13001 +                       use_rate = ptr->use_num * 100 / ptr->max_connect ;
13002 +               }
13003 +               else
13004 +               {
13005 +                       use_rate = 0;
13006 +                       rtn = ptr;
13007 +                       break;
13008 +               }
13009 +               if (min_use_rate > use_rate)
13010 +               {
13011 +                       min_use_rate = use_rate;
13012 +                       rtn = ptr;
13013 +               }
13014 +               ptr ++;
13015 +       }
13016 +       if (rtn != NULL)
13017 +       {
13018 +               rtn->use_num ++;
13019 +               if (rtn->useFlag == TBL_INIT)
13020 +               {
13021 +                       PGRset_status_on_cluster_tbl (TBL_USE,rtn);
13022 +               }
13023 +       }
13024 +       PGRsem_unlock(ClusterSemid,MAX_DB_SERVER);
13025 +       return rtn;
13026 +}
13027 +
13028 +void
13029 +PGRset_key_of_cluster(ClusterTbl * ptr, RecoveryPacket * packet)
13030 +{
13031 +       int max_connect = 0;
13032 +       int port = 0;
13033 +
13034 +       memset(ptr,0,sizeof(ClusterTbl));
13035 +       memcpy(ptr->hostName,packet->hostName,sizeof(ptr->hostName));
13036 +       max_connect = ntohs(packet->max_connect);
13037 +       if (max_connect >= 0)
13038 +       {
13039 +               ptr->max_connect = max_connect;
13040 +       }
13041 +       else
13042 +       {
13043 +               ptr->max_connect = DEFAULT_CONNECT_NUM;
13044 +       }
13045 +       port = ntohs(packet->port);
13046 +       if ( port >= 0)
13047 +       {
13048 +               ptr->port = port;
13049 +       }
13050 +       else
13051 +       {
13052 +               ptr->port = DEFAULT_PORT;
13053 +       }
13054 +}
13055 +
13056 +ClusterTbl *
13057 +PGRadd_cluster_tbl (ClusterTbl * conf_data)
13058 +{
13059 +       char * func = "PGRadd_cluster_tbl()";
13060 +       ClusterTbl * ptr;
13061 +
13062 +       ptr = PGRsearch_cluster_tbl(conf_data);
13063 +       if ((ptr != NULL) && 
13064 +               ((ptr->useFlag == TBL_USE ) || ((ptr->useFlag == TBL_INIT))))
13065 +       {
13066 +               ptr->max_connect = conf_data->max_connect;
13067 +               ptr->use_num = 0;
13068 +               ptr->rate = 0;
13069 +               return ptr;
13070 +       }
13071 +       ptr = search_free_cluster_tbl();
13072 +       if (ptr == (ClusterTbl *) NULL)
13073 +       {
13074 +               show_error("%s:no more free space in cluster table",func);
13075 +               return (ClusterTbl *)NULL;
13076 +       }
13077 +       if (ClusterNum < Max_DB_Server)
13078 +       {
13079 +               set_cluster_tbl( ptr, conf_data);
13080 +               return ptr;
13081 +       }
13082 +       return (ClusterTbl *)NULL;
13083 +}
13084 +
13085 +ClusterTbl *
13086 +PGRset_status_on_cluster_tbl (int status, ClusterTbl * ptr)
13087 +{
13088 +#ifdef PRINT_DEBUG
13089 +       char * func = "PGRset_status_on_cluster_tbl()";
13090 +#endif                 
13091 +
13092 +       if (ptr != (ClusterTbl*)NULL)
13093 +       {
13094 +               if (ptr->useFlag != status)
13095 +               {
13096 +#ifdef PRINT_DEBUG
13097 +                       show_debug("%s:host:%s port:%d max:%d use:%d status%d",
13098 +                               func, ptr->hostName,ptr->port,ptr->max_connect,ptr->useFlag,status);
13099 +#endif                 
13100 +                       ptr->useFlag = status;
13101 +                       write_cluster_status_file(ptr);
13102 +                       if (status == TBL_INIT)
13103 +                       {
13104 +                               if (ClusterNum < Max_DB_Server)
13105 +                                       ClusterNum ++ ;
13106 +                       }
13107 +                       else if (status != TBL_STOP)
13108 +                       {
13109 +                               if (ClusterNum > 0)
13110 +                                       ClusterNum -- ;
13111 +                       }
13112 +               }
13113 +       }
13114 +       return ptr;
13115 +}
13116 +
13117 +static void
13118 +write_cluster_status_file(ClusterTbl * ptr)
13119 +{
13120 +       switch( ptr->useFlag)
13121 +       {
13122 +               case TBL_FREE:
13123 +                       PGRwrite_log_file(StatusFp,"port(%d) host:%s free",
13124 +                                       ptr->port,
13125 +                                       ptr->hostName);
13126 +                       break;
13127 +               case TBL_INIT:
13128 +                       PGRwrite_log_file(StatusFp,"port(%d) host:%s initialize",
13129 +                                       ptr->port,
13130 +                                       ptr->hostName);
13131 +                       break;
13132 +               case TBL_USE:
13133 +                       PGRwrite_log_file(StatusFp,"port(%d) host:%s start use",
13134 +                                       ptr->port,
13135 +                                       ptr->hostName);
13136 +                       break;
13137 +               case TBL_ERROR:
13138 +                       PGRwrite_log_file(StatusFp,"port(%d) host:%s error",
13139 +                                       ptr->port,
13140 +                                       ptr->hostName);
13141 +                       break;
13142 +               case TBL_END:
13143 +                       PGRwrite_log_file(StatusFp,"port(%d) host:%s end",
13144 +                                       ptr->port,
13145 +                                       ptr->hostName);
13146 +                       break;
13147 +       }
13148 +}
13149 +
13150 +ClusterTbl *
13151 +PGRsearch_cluster_tbl(ClusterTbl * conf_data)
13152 +{
13153 +       ClusterTbl *ptr;
13154 +       int cnt = 0;
13155 +       int rec_num = 0;
13156 +
13157 +       ptr = Cluster_Tbl;
13158 +       while ((cnt <= ClusterNum) && (rec_num < Max_DB_Server))
13159 +       {
13160 +               if (ptr->port > 0)
13161 +               {
13162 +                       if ((!strcmp(ptr->hostName,conf_data->hostName)) &&
13163 +                               (ptr->port == conf_data->port))
13164 +                       {
13165 +                               return ptr;
13166 +                       }
13167 +                       if ((ptr->useFlag == TBL_USE) || (ptr->useFlag == TBL_INIT))
13168 +                       {
13169 +                               cnt ++;
13170 +                       }
13171 +               }
13172 +               ptr ++;
13173 +               rec_num ++;
13174 +       }
13175 +       return (ClusterTbl *)NULL;
13176 +}
13177 +
13178 +static int
13179 +set_cluster_tbl(ClusterTbl * ptr , ClusterTbl * conf_data)
13180 +{
13181 +       int rec_no;
13182 +
13183 +       rec_no = ptr->rec_no;
13184 +       memcpy(ptr->hostName,conf_data->hostName,sizeof(ptr->hostName));
13185 +       ptr->max_connect = conf_data->max_connect;
13186 +       ptr->port = conf_data->port;
13187 +       ptr->use_num = conf_data->use_num;
13188 +       ptr->rate = conf_data->rate;
13189 +       PGRset_status_on_cluster_tbl (TBL_INIT, ptr);
13190 +
13191 +       return STATUS_OK;
13192 +}
13193 +
13194 +static ClusterTbl *
13195 +search_free_cluster_tbl(void )
13196 +{
13197 +       ClusterTbl *ptr;
13198 +       int cnt = 0;
13199 +
13200 +       ptr = Cluster_Tbl;
13201 +       while ((cnt <= ClusterNum ) && (cnt < Max_DB_Server))
13202 +       {
13203 +               if ((ptr->useFlag == TBL_FREE) || (ptr->useFlag == TBL_ERROR))
13204 +               {
13205 +                       return ptr;
13206 +               }
13207 +               cnt ++;
13208 +               ptr ++;
13209 +       }
13210 +       return (ClusterTbl *)NULL;
13211 +}
13212 +
13213 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/lifecheck.c pgcluster-1.7.0rc7/src/pgcluster/pglb/lifecheck.c
13214 --- postgresql-8.2.4/src/pgcluster/pglb/lifecheck.c     1970-01-01 01:00:00.000000000 +0100
13215 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/lifecheck.c   2007-03-01 16:27:15.000000000 +0100
13216 @@ -0,0 +1,329 @@
13217 +/*--------------------------------------------------------------------
13218 + * FILE:
13219 + *     lifecheck.c
13220 + *
13221 + * NOTE:
13222 + *     This file is composed of the functions to call with the source
13223 + *     at pgreplicate for the lifecheck.
13224 + *
13225 + * Portions Copyright (c) 2003-2007, Atsushi Mitani
13226 + *--------------------------------------------------------------------
13227 + */
13228 +#include "postgres.h"
13229 +#include "postgres_fe.h"
13230 +
13231 +#include <pthread.h>
13232 +#include <stdio.h>
13233 +#include <stdarg.h>
13234 +#include <sys/types.h>
13235 +#include <fcntl.h>
13236 +#include <errno.h>
13237 +#include <ctype.h>
13238 +#include <time.h>
13239 +#include <sys/ipc.h>
13240 +#include <sys/shm.h>
13241 +#include <sys/sem.h>
13242 +#include <sys/msg.h>
13243 +#include <signal.h>
13244 +
13245 +#include "libpq-fe.h"
13246 +#include "libpq-int.h"
13247 +#include "fe-auth.h"
13248 +
13249 +#include <sys/socket.h>
13250 +#include <unistd.h>
13251 +#include <netdb.h>
13252 +#include <arpa/inet.h>
13253 +
13254 +#ifdef HAVE_NETINET_TCP_H
13255 +#include <netinet/tcp.h>
13256 +#endif
13257 +
13258 +#ifdef HAVE_SYS_SELECT_H
13259 +#include <sys/select.h>
13260 +#endif
13261 +
13262 +
13263 +#ifdef HAVE_CRYPT_H
13264 +#include <crypt.h>
13265 +#endif
13266 +
13267 +
13268 +#ifdef MULTIBYTE
13269 +#include "mb/pg_wchar.h"
13270 +#endif
13271 +
13272 +#include "access/xact.h"
13273 +#include "lib/dllist.h"
13274 +#include "libpq/pqformat.h"
13275 +#include "replicate_com.h"
13276 +#include "pglb.h"
13277 +
13278 +#define PING_DB                "template1"
13279 +#define PING_QUERY     "SELECT 1"
13280 +
13281 +static ClusterTbl * PGR_Cluster_DB_4_Lifecheck = (ClusterTbl*)NULL;
13282 +
13283 +/*--------------------------------------
13284 + * PROTOTYPE DECLARATION
13285 + *--------------------------------------
13286 + */
13287 +int PGRlifecheck_main(int fork_wait_time);
13288 +PGconn * PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt );
13289 +
13290 +static bool is_started_loadbalance(void);
13291 +static void set_timeout(SIGNAL_ARGS);
13292 +static int lifecheck_loop(void);
13293 +static int ping_cluster(PGconn * conn);
13294 +static void set_cluster_status(ClusterTbl * host_ptr, int status);
13295 +
13296 +int
13297 +PGRlifecheck_main(int fork_wait_time)
13298 +{
13299 +       bool started = false;
13300 +       pid_t pgid = 0;
13301 +       pid_t pid = 0;
13302 +
13303 +       pgid = getpgid(0);
13304 +       pid = fork();
13305 +       if (pid != 0)
13306 +       {
13307 +               return STATUS_OK;
13308 +       }
13309 +
13310 +       /*
13311 +        * in child process,
13312 +        * call recovery module
13313 +        */
13314 +       setpgid(0,pgid);
13315 +
13316 +       PGRsignal(SIGHUP, PGRexit_subprocess);
13317 +       PGRsignal(SIGTERM, PGRexit_subprocess);
13318 +       PGRsignal(SIGINT, PGRexit_subprocess);
13319 +       PGRsignal(SIGQUIT, PGRexit_subprocess);
13320 +       PGRsignal(SIGALRM, set_timeout);
13321 +
13322 +       if (fork_wait_time > 0) {
13323 +               sleep(fork_wait_time);
13324 +       }
13325 +
13326 +       if (PGRuserName == NULL)
13327 +       {
13328 +               PGRuserName = getenv("LOGNAME");
13329 +               if (PGRuserName == NULL)
13330 +               {
13331 +                       PGRuserName = getenv("USER");
13332 +                       if (PGRuserName == NULL)
13333 +                               PGRuserName = "postgres";
13334 +               }
13335 +       }
13336 +
13337 +       for (;;)
13338 +       {
13339 +               started = is_started_loadbalance();
13340 +               if (!started)
13341 +               {
13342 +                       /* wait next lifecheck as interval */
13343 +                       sleep(PGR_Lifecheck_Interval);
13344 +                       continue;
13345 +               }
13346 +
13347 +               /* life check to all cluster dbs */
13348 +               lifecheck_loop();
13349 +
13350 +               /* wait next lifecheck as interval */
13351 +               sleep(PGR_Lifecheck_Interval);
13352 +       }
13353 +       return STATUS_OK;
13354 +}
13355 +
13356 +static bool
13357 +is_started_loadbalance(void)
13358 +{
13359 +       ClusterTbl * host_ptr = (ClusterTbl*)NULL;
13360 +
13361 +       host_ptr = Cluster_Tbl;
13362 +       if (host_ptr == NULL)
13363 +       {
13364 +               return false;
13365 +       }
13366 +       while(host_ptr->useFlag != TBL_END)
13367 +       {
13368 +               if (host_ptr->useFlag == TBL_USE)
13369 +               {
13370 +                       return true;
13371 +               }
13372 +               host_ptr ++;
13373 +       }
13374 +       return false;
13375 +}
13376 +
13377 +static void 
13378 +set_timeout(SIGNAL_ARGS)
13379 +{
13380 +       if (PGR_Cluster_DB_4_Lifecheck != NULL)
13381 +       {
13382 +               set_cluster_status( PGR_Cluster_DB_4_Lifecheck, TBL_ERROR);
13383 +       }
13384 +       PGRsignal(SIGALRM, set_timeout);
13385 +}
13386 +
13387 +static int
13388 +lifecheck_loop(void)
13389 +{
13390 +       ClusterTbl * host_ptr = (ClusterTbl*)NULL;
13391 +       char       port[8];
13392 +       char * host = NULL;
13393 +       PGconn * conn = NULL;
13394 +
13395 +       host_ptr = Cluster_Tbl;
13396 +       if (host_ptr == NULL)
13397 +       {
13398 +               return STATUS_ERROR;
13399 +       }
13400 +       alarm(0);
13401 +       while(host_ptr->useFlag != TBL_END)
13402 +       {
13403 +               /*
13404 +                * check the status of the cluster DB
13405 +                */
13406 +               if ((host_ptr->useFlag != TBL_USE) || (host_ptr->useFlag != TBL_INIT))
13407 +               {
13408 +                       host_ptr ++;
13409 +                       continue;
13410 +               }
13411 +               snprintf(port,sizeof(port),"%d", host_ptr->port);
13412 +               host = (char *)(host_ptr->hostName);
13413 +               /* set host data */
13414 +               PGR_Cluster_DB_4_Lifecheck = host_ptr;
13415 +               
13416 +               /* set alarm as lifecheck timeout */
13417 +               alarm(PGR_Lifecheck_Timeout);
13418 +
13419 +               /* connect DB */
13420 +               conn = PGRcreateConn(host,port, PING_DB ,PGRuserName,"","","");
13421 +               if ((conn != NULL) &&
13422 +                       (ping_cluster(conn) == STATUS_OK))
13423 +               {
13424 +                       set_cluster_status(host_ptr,TBL_USE);
13425 +               }
13426 +               else
13427 +               {
13428 +                       set_cluster_status(host_ptr,TBL_ERROR);
13429 +               }
13430 +               /* reset alarm */
13431 +               alarm(0);
13432 +
13433 +               PQfinish(conn);
13434 +               conn = NULL;
13435 +               host_ptr ++;
13436 +       }
13437 +
13438 +       return STATUS_OK;
13439 +}
13440 +
13441 +static int
13442 +ping_cluster(PGconn * conn)
13443 +{
13444 +       int status = 0;
13445 +       PGresult * res = (PGresult *)NULL;
13446 +
13447 +       res = PQexec(conn, PING_QUERY );
13448 +
13449 +       status = PQresultStatus(res);
13450 +       if (res != NULL)
13451 +       {
13452 +               PQclear(res);
13453 +       }
13454 +       if ((status == PGRES_NONFATAL_ERROR ) ||
13455 +               (status == PGRES_FATAL_ERROR ))
13456 +       {
13457 +               return STATUS_ERROR;
13458 +       }
13459 +       return STATUS_OK;
13460 +}
13461 +
13462 +PGconn *
13463 +PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt )
13464 +{
13465 +       int cnt = 0;
13466 +       PGconn * conn = NULL;
13467 +       char pwd[256];
13468 +
13469 +       memset(pwd,0,sizeof(pwd));
13470 +       if (*password != '\0')
13471 +       {
13472 +               if ((strncmp(password,"md5",3) == 0) && (md5Salt != NULL))
13473 +               {
13474 +                       sprintf(pwd,"%s(%d)(%d)(%d)(%d)",password,
13475 +                               *md5Salt,*(md5Salt+1),*(md5Salt+2),*(md5Salt+3));
13476 +               }
13477 +               else
13478 +               {
13479 +                       strncpy(pwd,password,sizeof(pwd));
13480 +               }
13481 +       }
13482 +       conn = PQsetdbLogin(host, port, NULL, NULL, database, userName, pwd);
13483 +       /* check to see that the backend Connection was successfully made */
13484 +       cnt = 0;
13485 +       while (PQstatus(conn) == CONNECTION_BAD)
13486 +       {
13487 +               if (conn != NULL)
13488 +               {
13489 +                       PQfinish(conn);
13490 +                       conn = NULL;
13491 +               }
13492 +               conn = PQsetdbLogin(host, port, NULL, NULL, database, userName, pwd);
13493 +               if (cnt > PGLB_CONNECT_RETRY_TIME )
13494 +               {
13495 +                       if (conn != NULL)
13496 +                       {
13497 +                               PQfinish(conn);
13498 +                               conn = NULL;
13499 +                       }
13500 +                       return (PGconn *)NULL;
13501 +               }               
13502 +               
13503 +               if(PQstatus(conn) == CONNECTION_BAD && h_errno==2)
13504 +               {
13505 +                   usleep(PGR_SEND_WAIT_MSEC);
13506 +                       cnt ++;
13507 +               }
13508 +               else if(!strncasecmp(PQerrorMessage(conn),"FATAL:  Sorry, too many clients already",30) ||
13509 +                       !strncasecmp(PQerrorMessage(conn),"FATAL:  Non-superuser connection limit",30) ) 
13510 +               {
13511 +                   usleep(PGR_SEND_WAIT_MSEC);
13512 +                       cnt ++;
13513 +               }
13514 +               else if(!strncasecmp(PQerrorMessage(conn),"FATAL:  The database system is starting up",40)   )
13515 +               {
13516 +                   usleep(PGR_SEND_WAIT_MSEC);
13517 +               }
13518 +               else
13519 +               {
13520 +                   usleep(PGR_SEND_WAIT_MSEC);
13521 +                       cnt ++;
13522 +               }
13523 +       }
13524 +       return conn;
13525 +}
13526 +
13527 +static void
13528 +set_cluster_status(ClusterTbl * host_ptr, int status)
13529 +{
13530 +       if (host_ptr == NULL)
13531 +               return;
13532 +       if (status == TBL_ERROR)
13533 +       {
13534 +               host_ptr->retry_count ++;
13535 +               if (host_ptr->retry_count > PGLB_CONNECT_RETRY_TIME )
13536 +               {
13537 +                       PGRset_status_on_cluster_tbl(status, host_ptr);
13538 +               }
13539 +       }
13540 +       else
13541 +       {
13542 +               host_ptr->retry_count = 0;
13543 +               PGRset_status_on_cluster_tbl(status, host_ptr);
13544 +       }
13545 +}
13546 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/load_balance.c pgcluster-1.7.0rc7/src/pgcluster/pglb/load_balance.c
13547 --- postgresql-8.2.4/src/pgcluster/pglb/load_balance.c  1970-01-01 01:00:00.000000000 +0100
13548 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/load_balance.c        2007-02-18 22:52:17.000000000 +0100
13549 @@ -0,0 +1,252 @@
13550 +/*--------------------------------------------------------------------
13551 + * FILE:
13552 + *     load_balance.c
13553 + *
13554 + * NOTE:
13555 + *     This file is composed of the functions of load balance modules
13556 + *     with connection pooling or not
13557 + *
13558 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
13559 + *--------------------------------------------------------------------
13560 + */
13561 +/*
13562 + * Permission to use, copy, modify, and distribute this software and
13563 + * its documentation for any purpose and without fee is hereby
13564 + * granted, provided that the above copyright notice appear in all
13565 + * copies and that both that copyright notice and this permission
13566 + * notice appear in supporting documentation, and that the name of the
13567 + * author not be used in advertising or publicity pertaining to
13568 + * distribution of the software without specific, written prior
13569 + * permission. The author makes no representations about the
13570 + * suitability of this software for any purpose.  It is provided "as
13571 + * is" without express or implied warranty.
13572 + *
13573 +*/
13574 +#include "postgres.h"
13575 +#include <stdio.h>
13576 +#include <stdlib.h>
13577 +#include <string.h>
13578 +#include <unistd.h>
13579 +#include <signal.h>
13580 +#include <sys/wait.h>
13581 +#include <sys/time.h>
13582 +#include <ctype.h>
13583 +#include <sys/types.h>
13584 +#include <sys/stat.h>
13585 +#include <sys/socket.h>
13586 +#include <sys/ipc.h>
13587 +#include <sys/sem.h>
13588 +#include <netdb.h>
13589 +#include <netinet/in.h>
13590 +#include <errno.h>
13591 +#include <fcntl.h>
13592 +#include <time.h>
13593 +#include <sys/param.h>
13594 +#include <sys/select.h>
13595 +#include <arpa/inet.h>
13596 +#include <sys/file.h>
13597 +
13598 +#ifdef HAVE_NETINET_TCP_H
13599 +#include <netinet/tcp.h>
13600 +#endif
13601 +
13602 +#include "replicate_com.h"
13603 +#include "pglb.h"
13604 +
13605 +/*--------------------------------------
13606 + * PROTOTYPE DECLARATION
13607 + *--------------------------------------
13608 + */
13609 +int PGRload_balance(void);
13610 +int PGRload_balance_with_pool(void);
13611 +char PGRis_connection_full(ClusterTbl * ptr);
13612 +void PGRrelease_connection(ClusterTbl * ptr);
13613 +void PGRchild_wait(int sig);
13614 +
13615 +/*--------------------------------------------------------------------
13616 + * SYMBOL
13617 + *    PGRload_balance()
13618 + * NOTES
13619 + *    load balance module that normal connection is used
13620 + * ARGS
13621 + *    void
13622 + * RETURN
13623 + *    OK: STATUS_OK
13624 + *    NG: STATUS_ERROR
13625 + *--------------------------------------------------------------------
13626 + */
13627 +int
13628 +PGRload_balance(void)
13629 +{
13630 +       char * func = "PGRload_balance()";
13631 +       pid_t pid,pgid;
13632 +       int count;
13633 +       int status;
13634 +       ClusterTbl * cluster_p = NULL;
13635 +
13636 +       PGRsignal(SIGCHLD, PGRchild_wait);
13637 +       /* get the least locaded cluster server info */
13638 +       cluster_p = PGRscan_cluster();
13639 +       count = 0;
13640 +       while (cluster_p == NULL )
13641 +       {
13642 +               if ( count > PGLB_CONNECT_RETRY_TIME)
13643 +               {
13644 +                       show_error("%s:no cluster available",func);
13645 +                       return STATUS_ERROR;
13646 +               }
13647 +               cluster_p = PGRscan_cluster();
13648 +               count ++;
13649 +       }
13650 +
13651 +       pgid = getpgid((pid_t)0);
13652 +       pid = fork();
13653 +       if (pid < 0)
13654 +       {
13655 +               show_error("%s:fork() failed. (%s)",func,strerror(errno));
13656 +               exit(1);
13657 +       }
13658 +       if (pid == 0)
13659 +       {
13660 +               setpgid((pid_t)0,pgid);
13661 +               CurrentCluster = cluster_p;
13662 +
13663 +               if (pool_init_cp())
13664 +               {
13665 +                       show_error("%s:pool_init_cp failed",func);
13666 +                       exit(1);
13667 +               }
13668 +               PGRsem_lock(ClusterSemid,cluster_p->rec_no);
13669 +               if (PGRget_child_status(getpid()) == STATUS_ERROR)
13670 +               {
13671 +                       PGRadd_child_tbl(cluster_p, getpid(), TBL_USE);
13672 +               }
13673 +               PGRsem_unlock(ClusterSemid,cluster_p->rec_no);
13674 +               PGRdo_child(NOT_USE_CONNECTION_POOL );
13675 +               PGRrelease_connection(cluster_p);
13676 +               PGRset_status_to_child_tbl(getpid(), TBL_FREE);
13677 +               exit(0);
13678 +       }
13679 +       else if (pid > 0)
13680 +       {
13681 +               PGRsem_lock(ClusterSemid,cluster_p->rec_no);
13682 +               if (PGRget_child_status(pid) == STATUS_ERROR)
13683 +               {
13684 +                       PGRadd_child_tbl(cluster_p, pid, TBL_USE);
13685 +               }
13686 +               PGRsem_unlock(ClusterSemid,cluster_p->rec_no);
13687 +               status = PGRget_child_status(pid);
13688 +               while (status == TBL_USE)
13689 +               {
13690 +                       status = PGRget_child_status(pid);
13691 +                       usleep(20);
13692 +               }
13693 +               return STATUS_OK;
13694 +       }
13695 +       else
13696 +       {
13697 +               return STATUS_ERROR;
13698 +       }
13699 +}
13700 +
13701 +/*--------------------------------------------------------------------
13702 + * SYMBOL
13703 + *    PGRload_balance_with_pool()
13704 + * NOTES
13705 + *    load balance module that connection pooling system is used
13706 + * ARGS
13707 + *    void
13708 + * RETURN
13709 + *    OK: STATUS_OK
13710 + *    NG: STATUS_ERROR
13711 + *--------------------------------------------------------------------
13712 + */
13713 +int
13714 +PGRload_balance_with_pool(void)
13715 +{
13716 +       char * func = "PGRload_balance_with_pool()";
13717 +       int count;
13718 +       pid_t pid;
13719 +       ClusterTbl * cluster_p = NULL;
13720 +       int status = TBL_USE;
13721 +
13722 +       /* get the least locaded cluster server info */
13723 +       cluster_p = PGRscan_cluster();
13724 +       count = 0;
13725 +       while (cluster_p == NULL )
13726 +       {
13727 +               if ( count > PGLB_CONNECT_RETRY_TIME)
13728 +               {
13729 +                       show_error("%s:no cluster available",func);
13730 +                               PGRreturn_no_connection_error();
13731 +                       return STATUS_ERROR;
13732 +               }
13733 +               cluster_p = PGRscan_cluster();
13734 +               count ++;
13735 +       }
13736 +       pid = PGRscan_child_tbl(cluster_p);
13737 +       if ((pid == 0) || (pid == STATUS_ERROR))
13738 +       {
13739 +               show_error("%s:no child process available",func);
13740 +               return STATUS_ERROR;
13741 +       }
13742 +       kill(pid,SIGUSR1);
13743 +
13744 +       status = PGRget_child_status(pid);
13745 +       while (status == TBL_USE)
13746 +       {
13747 +               status = PGRget_child_status(pid);
13748 +               usleep(20);
13749 +       }
13750 +
13751 +       return STATUS_OK;
13752 +
13753 +}
13754 +
13755 +char
13756 +PGRis_connection_full(ClusterTbl * ptr)
13757 +{
13758 +       char rtn = 1;
13759 +
13760 +       if (ptr == NULL)
13761 +       {
13762 +               return rtn;
13763 +       }
13764 +       PGRsem_lock(ClusterSemid,ptr->rec_no);
13765 +       if (ptr->max_connect > ptr->use_num)
13766 +       {
13767 +               rtn = 0;
13768 +       }
13769 +       PGRsem_unlock(ClusterSemid,ptr->rec_no);
13770 +       return rtn;
13771 +}
13772 +
13773 +void
13774 +PGRrelease_connection(ClusterTbl * ptr)
13775 +{
13776 +       if (ptr == NULL)
13777 +       {
13778 +               return;
13779 +       }
13780 +       PGRsem_lock(ClusterSemid,MAX_DB_SERVER);
13781 +       if (ptr->use_num > 0)
13782 +       {
13783 +               ptr->use_num --;
13784 +       }
13785 +       PGRsem_unlock(ClusterSemid,MAX_DB_SERVER);
13786 +}
13787 +
13788 +void
13789 +PGRchild_wait(int sig)
13790 +{
13791 +       pid_t pid = 0;
13792 +       int ret = 0;
13793 +
13794 +       do {
13795 +               pid = waitpid(-1,&ret,WNOHANG);
13796 +               if ((pid <= 0) && (WTERMSIG(ret) > 0))
13797 +               {
13798 +                       pid = 1;
13799 +               }
13800 +       } while(pid > 0);
13801 +}
13802 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/main.c pgcluster-1.7.0rc7/src/pgcluster/pglb/main.c
13803 --- postgresql-8.2.4/src/pgcluster/pglb/main.c  1970-01-01 01:00:00.000000000 +0100
13804 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/main.c        2007-03-01 16:27:15.000000000 +0100
13805 @@ -0,0 +1,1137 @@
13806 +/*--------------------------------------------------------------------
13807 + * FILE:
13808 + *              main.c
13809 + *
13810 + * NOTE:
13811 + *              This file is composed of the main function of pglb.
13812 + *
13813 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
13814 + *--------------------------------------------------------------------
13815 + */
13816 +/*
13817 + * Permission to use, copy, modify, and distribute this software and
13818 + * its documentation for any purpose and without fee is hereby
13819 + * granted, provided that the above copyright notice appear in all
13820 + * copies and that both that copyright notice and this permission
13821 + * notice appear in supporting documentation, and that the name of the
13822 + * author not be used in advertising or publicity pertaining to
13823 + * distribution of the software without specific, written prior
13824 + * permission. The author makes no representations about the
13825 + * suitability of this software for any purpose.       It is provided "as
13826 + * is" without express or implied warranty.
13827 + *
13828 +*/
13829 +#include "postgres.h"
13830 +#include <stdio.h>
13831 +#include <stdarg.h>
13832 +#include <string.h>
13833 +#include <stdlib.h>
13834 +#include <unistd.h>
13835 +#include <signal.h>
13836 +#include <sys/wait.h>
13837 +#include <sys/time.h>
13838 +#include <sys/types.h>
13839 +#include <sys/stat.h>
13840 +#include <sys/socket.h>
13841 +#include <sys/ipc.h>
13842 +#include <sys/shm.h>
13843 +#include <sys/sem.h>
13844 +#include <sys/msg.h>
13845 +#include <netdb.h>
13846 +#include <errno.h>
13847 +#include <fcntl.h>
13848 +#include <time.h>
13849 +#include <sys/param.h>
13850 +#include <sys/select.h>
13851 +#include <netinet/in.h>
13852 +#include <arpa/inet.h>
13853 +#include <sys/file.h>
13854 +#include <arpa/inet.h>
13855 +
13856 +#ifdef HAVE_NETINET_TCP_H
13857 +#include <netinet/tcp.h>
13858 +#endif
13859 +
13860 +#ifdef HAVE_CRYPT_H
13861 +#include <crypt.h>
13862 +#endif
13863 +
13864 +#ifdef HAVE_GETOPT_H
13865 +#include <getopt.h>
13866 +#endif
13867 +
13868 +#include "replicate_com.h"
13869 +#include "pglb.h"
13870 +
13871 +
13872 +
13873 +#define IPC_NMAXSEM (32)
13874 +/*--------------------------------------
13875 + * GLOBAL VARIABLE DECLARATION
13876 + *--------------------------------------
13877 + */
13878 +/* for replicate_com.h */
13879 +ConfDataType * ConfData_Top = (ConfDataType *)NULL;
13880 +ConfDataType * ConfData_End = (ConfDataType *)NULL;
13881 +int MapTableShmid = -1;
13882 +int LifeCheckStartShmid = -1;
13883 +char * LifeCheckStartFlag = NULL;
13884 +int LifeCheckTimeOut = 10;
13885 +FILE * StatusFp = (FILE *)NULL;
13886 +char * PGRStatusFileName = NULL;
13887 +char * PGRLogFileName = NULL;
13888 +char * PGRuserName = NULL;
13889 +int Log_Print = 0;
13890 +int Debug_Print = 0;
13891 +
13892 +char * ResolvedName = NULL;
13893 +int Recv_Port_Number = 0;
13894 +int Recovery_Port_Number = 0;
13895 +uint16_t LifeCheck_Port_Number = 0;
13896 +int Use_Connection_Pool = 0;
13897 +int Max_Pool = 1;
13898 +int Connection_Life_Time = 0;
13899 +int Max_DB_Server = 0;
13900 +int MaxBackends = 0;
13901 +ClusterTbl * Cluster_Tbl = (ClusterTbl *)NULL;
13902 +int ClusterNum = 0;
13903 +int ClusterShmid = 0;
13904 +int ClusterSemid = 0;
13905 +ChildTbl * Child_Tbl = (ChildTbl *)NULL;
13906 +int ChildShmid = 0;
13907 +char * PGR_Data_Path = NULL;
13908 +char * PGR_Write_Path = NULL;
13909 +char * Backend_Socket_Dir = NULL;
13910 +FrontSocket Frontend_FD;
13911 +ClusterTbl * CurrentCluster = NULL;
13912 +int PGR_Lifecheck_Timeout = 3;
13913 +int PGR_Lifecheck_Interval = 15;
13914 +
13915 +int fork_wait_time = 0;
13916 +
13917 +extern char *optarg;
13918 +
13919 +/*--------------------------------------
13920 + * PROTOTYPE DECLARATION
13921 + *--------------------------------------
13922 + */
13923 +static int init_pglb(char * path);
13924 +static void pglb_exit(int signal_args);
13925 +static void load_balance_main(void);
13926 +static void daemonize(void);
13927 +static void write_pid_file(void);
13928 +static void stop_pglb(void);
13929 +static int is_exist_pid_file(void);
13930 +static ClusterTbl * scan_cluster_by_pid(pid_t pid);
13931 +static void usage(void);
13932 +static void close_child(int signal_args);
13933 +
13934 +void PGRrecreate_child(int signal_args);
13935 +void PGRexit_subprocess(int sig);
13936 +
13937 +/*--------------------------------------------------------------------
13938 + * SYMBOL
13939 + *             init_pglb()
13940 + * NOTES
13941 + *             Reading of the setup file
13942 + *             and the initialization of the memory area.
13943 + * ARGS
13944 + *             char * path: path of the setup file (I)
13945 + * RETURN
13946 + *             OK: STATUS_OK
13947 + *             NG: STATUS_ERROR
13948 + *--------------------------------------------------------------------
13949 + */
13950 +static int
13951 +init_pglb(char * path)
13952 +{
13953 +       char * func = "init_pglb()";
13954 +
13955 +       ConfDataType * conf;
13956 +       ClusterTbl cluster_tbl[MAX_DB_SERVER];
13957 +       int size = 0;
13958 +       int rec_no = 0;
13959 +       int i;
13960 +       int max_connect = 0;
13961 +       union semun sem_arg;
13962 +       char fname[256];
13963 +
13964 +       /*
13965 +        * read configuration file
13966 +        */
13967 +       if (path == NULL)
13968 +       {
13969 +               path = ".";
13970 +       }
13971 +       if (PGR_Get_Conf_Data(path,PGLB_CONF_FILE) != STATUS_OK)
13972 +       {
13973 +               show_error("%s:PGR_Get_Conf_Data failed",func);
13974 +               return STATUS_ERROR;
13975 +       }
13976 +       
13977 +       size = sizeof(LogFileInf);
13978 +       LogFileData = (LogFileInf *) malloc(size);
13979 +       if (LogFileData == NULL)
13980 +       {
13981 +               show_error("%s:malloc() failed. reason: %s", func,strerror(errno));
13982 +               return STATUS_ERROR;
13983 +       }
13984 +       memset(LogFileData,0,size);
13985 +
13986 +       /* cluster db status file open */
13987 +       if (PGRStatusFileName == NULL)
13988 +       {
13989 +               snprintf(fname,sizeof(fname),"%s/%s",PGR_Write_Path,PGLB_STATUS_FILE);
13990 +       }
13991 +       else
13992 +       {
13993 +               memcpy(fname,PGRStatusFileName,sizeof(fname));
13994 +       }
13995 +       StatusFp = fopen(fname, "a");
13996 +       if (StatusFp == NULL)
13997 +       {
13998 +               show_error("%s:open() %s file failed. (%s)",
13999 +                                        func,fname, strerror(errno));
14000 +               exit(1);
14001 +       }
14002 +
14003 +       Backend_Socket_Dir = malloc(128);
14004 +       if (Backend_Socket_Dir == NULL)
14005 +       {
14006 +               show_error("%s:malloc() failed. (%s)",func,strerror(errno));
14007 +               return STATUS_ERROR;
14008 +       }
14009 +       memset(Backend_Socket_Dir,0,128);
14010 +       /* set initiarize data */
14011 +       strcpy(Backend_Socket_Dir,"/tmp");
14012 +       Max_Pool = 1;
14013 +       Connection_Life_Time = 0;
14014 +       Use_Connection_Pool = 0;
14015 +
14016 +       conf = ConfData_Top;
14017 +       while (conf != (ConfDataType *)NULL) 
14018 +       {
14019 +               /* get cluster db servers name */
14020 +               if (!strcmp(conf->table,CLUSTER_SERVER_TAG))
14021 +               {
14022 +                       rec_no = conf->rec_no;
14023 +                       if (!strcmp(conf->key,HOST_NAME_TAG))
14024 +                       {
14025 +                               memcpy(cluster_tbl[rec_no].hostName,conf->value,sizeof(cluster_tbl[rec_no].hostName));
14026 +                               conf = (ConfDataType*)conf->next;
14027 +                               continue;
14028 +                       }
14029 +                       if (!strcmp(conf->key,PORT_TAG))
14030 +                       {
14031 +                               cluster_tbl[rec_no].port = atoi(conf->value);
14032 +                               conf = (ConfDataType*)conf->next;
14033 +                               continue;
14034 +                       }
14035 +                       if (!strcmp(conf->key,MAX_CONNECT_TAG))
14036 +                       {
14037 +                               cluster_tbl[rec_no].max_connect = atoi(conf->value);
14038 +                               conf = (ConfDataType*)conf->next;
14039 +                               continue;
14040 +                       }
14041 +               }
14042 +               /* get logging file data */
14043 +               else if (!strcmp(conf->table, LOG_INFO_TAG))
14044 +               {
14045 +                       if (!strcmp(conf->key, FILE_NAME_TAG))
14046 +                       {
14047 +                               strncpy(LogFileData->file_name, conf->value ,sizeof(LogFileData->file_name));
14048 +                               LogFileData->fp = NULL;
14049 +                               conf = (ConfDataType*)conf->next;
14050 +                               continue;
14051 +                       }
14052 +                       if (!strcmp(conf->key, FILE_SIZE_TAG))
14053 +                       {
14054 +                               int i,len;
14055 +                               char * ptr;
14056 +                               int unit = 1;
14057 +                               len = strlen(conf->value);
14058 +                               ptr = conf->value;
14059 +                               for (i = 0; i < len ; i ++,ptr++)
14060 +                               {
14061 +                                       if ((! isdigit(*ptr)) && (! isspace(*ptr)))
14062 +                                       {
14063 +                                               switch (*ptr)
14064 +                                               {
14065 +                                                       case 'K':
14066 +                                                       case 'k':
14067 +                                                               unit = 1024;
14068 +                                                               break;
14069 +                                                       case 'M':
14070 +                                                       case 'm':
14071 +                                                               unit = 1024*1024;
14072 +                                                               break;
14073 +                                                       case 'G':
14074 +                                                       case 'g':
14075 +                                                               unit = 1024*1024*1024;
14076 +                                                               break;
14077 +                                               }
14078 +                                               *ptr = '\0';
14079 +                                               break;
14080 +                                       }
14081 +                               }
14082 +                               LogFileData->max_size = atoi(conf->value) * unit;
14083 +                               conf = (ConfDataType*)conf->next;
14084 +                               continue;
14085 +                       }
14086 +                       if (!strcmp(conf->key, LOG_ROTATION_TAG))
14087 +                       {
14088 +                               LogFileData->rotation = atoi(conf->value);
14089 +                               conf = (ConfDataType*)conf->next;
14090 +                               continue;
14091 +                       }
14092 +               }
14093 +               else
14094 +               {
14095 +                       if (!strcmp(conf->key,HOST_NAME_TAG))
14096 +                       {
14097 +                                       int ip;
14098 +                               ip=PGRget_ip_by_name(conf->value);
14099 +                               if (ResolvedName == NULL)
14100 +                               {
14101 +                                       ResolvedName = malloc(ADDRESS_LENGTH);
14102 +                               }
14103 +                               if (ResolvedName == NULL)
14104 +                               {
14105 +                                       continue;
14106 +                               }
14107 +                               else
14108 +                               {
14109 +                                       memset(ResolvedName,0,ADDRESS_LENGTH);
14110 +                               }
14111 +
14112 +                               sprintf(ResolvedName,
14113 +                                        "%d.%d.%d.%d",
14114 +                                        (ip                    ) & 0xff ,
14115 +                                        (ip >> 8) & 0xff ,
14116 +                                        (ip >> 16) & 0xff ,
14117 +                                        (ip >> 24) & 0xff );
14118 +                               conf = (ConfDataType*)conf->next;
14119 +                               continue;
14120 +                       }
14121 +                       /* get port number for receive querys */
14122 +                       else if (!strcmp(conf->key,RECV_PORT_TAG))
14123 +                       {
14124 +                               Recv_Port_Number = atoi(conf->value);
14125 +                               conf = (ConfDataType*)conf->next;
14126 +                               continue;
14127 +                       }
14128 +                       /* get port number for recovery session */
14129 +                       else if (!strcmp(conf->key,RECOVERY_PORT_TAG))
14130 +                       {
14131 +                               Recovery_Port_Number = atoi(conf->value);
14132 +                               conf = (ConfDataType*)conf->next;
14133 +                               continue;
14134 +                       }
14135 +                       else if (!strcmp(conf->key,MAX_CLUSTER_TAG))
14136 +                       {
14137 +                               Max_DB_Server = atoi(conf->value);
14138 +                               conf = (ConfDataType*)conf->next;
14139 +                               continue;
14140 +                       }
14141 +                       else if (!strcmp(conf->key,USE_CONNECTION_POOL_TAG))
14142 +                       {
14143 +                               if (!strcmp(conf->value,"yes"))
14144 +                               {
14145 +                                       Use_Connection_Pool = 1;
14146 +                               }
14147 +                               conf = (ConfDataType*)conf->next;
14148 +                               continue;
14149 +                       }
14150 +                       else if (!strcmp(conf->key,MAX_POOL_TAG))
14151 +                       {
14152 +                               Max_Pool = atoi(conf->value);
14153 +                               if (Max_Pool < 0)
14154 +                                       Max_Pool = 1;
14155 +                               conf = (ConfDataType*)conf->next;
14156 +                               continue;
14157 +                       }
14158 +                       else if (!strcmp(conf->key,CONNECTION_LIFE_TIME))
14159 +                       {
14160 +                               Connection_Life_Time = atoi(conf->value);
14161 +                               if (Connection_Life_Time < 0)
14162 +                                       Connection_Life_Time = 0;
14163 +                               conf = (ConfDataType*)conf->next;
14164 +                               continue;
14165 +                       }
14166 +                       else if (!strcmp(conf->key,BACKEND_SOCKET_DIR_TAG))
14167 +                       {
14168 +                               strncpy(Backend_Socket_Dir,conf->value,128);
14169 +                               conf = (ConfDataType*)conf->next;
14170 +                               continue;
14171 +                       }
14172 +                       else if (!STRCMP(conf->key,LIFECHECK_TIMEOUT_TAG))
14173 +                       {
14174 +                               /* get lifecheck timeout */
14175 +                               PGR_Lifecheck_Timeout = PGRget_time_value(conf->value);
14176 +                               if ((PGR_Lifecheck_Timeout < 1) || (PGR_Lifecheck_Timeout > 3600))
14177 +                               {
14178 +                                       show_error("%s is out of range. It should be between 1sec-1hr.\n",LIFECHECK_TIMEOUT_TAG);
14179 +                                       return STATUS_ERROR;
14180 +                               }
14181 +                               conf = (ConfDataType*)conf->next;
14182 +                               continue;
14183 +                       }
14184 +                       else if (!STRCMP(conf->key,LIFECHECK_INTERVAL_TAG))
14185 +                       {
14186 +                               /* get lifecheck interval */
14187 +                               PGR_Lifecheck_Interval = PGRget_time_value(conf->value);
14188 +                               if ((PGR_Lifecheck_Interval < 1) || (PGR_Lifecheck_Interval > 3600))
14189 +                               {
14190 +                                       show_error("%s is out of range. It should between 1sec-1hr.\n",LIFECHECK_INTERVAL_TAG);
14191 +                                       return STATUS_ERROR;
14192 +                               }
14193 +                               conf = (ConfDataType*)conf->next;
14194 +                               continue;
14195 +                       }
14196 +               }
14197 +               conf = (ConfDataType*)conf->next;
14198 +       }
14199 +       if (Max_DB_Server <= 0)
14200 +       {
14201 +               show_error("%s:Max_DB_Server is wrong value. %s/%s file should be broken",func, path, PGLB_CONF_FILE);
14202 +               exit(1);
14203 +       }
14204 +       /* shared memory allocation for cluster table */
14205 +       size = sizeof(ClusterTbl) * Max_DB_Server;
14206 +
14207 +       ClusterShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
14208 +       if (ClusterShmid < 0)
14209 +       {
14210 +               show_error("%s:ClusterShm shmget() failed. (%s)", func,strerror(errno));
14211 +               return STATUS_ERROR;
14212 +       }
14213 +       Cluster_Tbl = (ClusterTbl *)shmat(ClusterShmid,0,0);
14214 +       if (Cluster_Tbl == (ClusterTbl *)-1)
14215 +       {
14216 +               show_error("%s:shmat() failed. (%s)", func,strerror(errno));
14217 +               return STATUS_ERROR;
14218 +       }
14219 +       memset(Cluster_Tbl,0,size);
14220 +
14221 +       if ((ClusterSemid = semget(IPC_PRIVATE,MAX_DB_SERVER+1,IPC_CREAT | IPC_EXCL | 0600)) < 0)
14222 +       {
14223 +               show_error("%s:semget() failed. (%s)",func,strerror(errno));
14224 +               return STATUS_ERROR;
14225 +       }
14226 +       for ( i = 0 ; i <= MAX_DB_SERVER ; i ++)
14227 +       {
14228 +               semctl(ClusterSemid, i, GETVAL, sem_arg);
14229 +               sem_arg.val = 1;
14230 +               semctl(ClusterSemid, i, SETVAL, sem_arg);
14231 +       }
14232 +       ClusterNum = 0;
14233 +       /* set cluster db server name into cluster db server table */
14234 +       for ( i = 0 ; i < Max_DB_Server ; i ++)
14235 +       {
14236 +               (Cluster_Tbl + i)->rec_no = i;
14237 +       }
14238 +       (Cluster_Tbl + i)->useFlag = TBL_END;
14239 +       max_connect = 0;
14240 +       for ( i = 0 ; i <= rec_no ; i ++)
14241 +       {
14242 +               cluster_tbl[i].use_num = 0;
14243 +               cluster_tbl[i].rate = 0;
14244 +               if (cluster_tbl[i].max_connect < 0)
14245 +               {
14246 +                       cluster_tbl[i].max_connect = 0;
14247 +               }
14248 +               if (max_connect < cluster_tbl[i].max_connect)
14249 +               {
14250 +                       max_connect = cluster_tbl[i].max_connect;
14251 +               }
14252 +               PGRadd_cluster_tbl(&cluster_tbl[i]);
14253 +       }
14254 +
14255 +       /* shared memory allocation for children table */
14256 +       size = sizeof(ChildTbl) * (Max_DB_Server + 1) * max_connect * Max_Pool;
14257 +#ifdef PRINT_DEBUG
14258 +       show_debug("%s:Child_Tbl size is[%d]",func,size);
14259 +#endif                 
14260 +
14261 +       ChildShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
14262 +       if (ChildShmid < 0)
14263 +       {
14264 +               show_error("%s:ChildShm shmget() failed. (%s)",func, strerror(errno));
14265 +               return STATUS_ERROR;
14266 +       }
14267 +       Child_Tbl = (ChildTbl *)shmat(ChildShmid,0,0);
14268 +       if (Child_Tbl == (ChildTbl *)-1)
14269 +       {
14270 +               show_error("%s:shmat() failed. (%s)", func,strerror(errno));
14271 +               return STATUS_ERROR;
14272 +       }
14273 +       memset(Child_Tbl, 0, size);
14274 +       (Child_Tbl + ( Max_DB_Server * max_connect * Max_Pool) -1)->useFlag = TBL_END;
14275 +
14276 +       PGR_Free_Conf_Data();
14277 +
14278 +       return STATUS_OK;
14279 +}
14280 +
14281 +/*--------------------------------------------------------------------
14282 + * SYMBOL
14283 + *             pglb_exit()
14284 + * NOTES
14285 + *             Closing of pglb process
14286 + * ARGS
14287 + *             int signal_args: signal number (I)
14288 + * RETURN
14289 + *             none
14290 + *--------------------------------------------------------------------
14291 + */
14292 +static void
14293 +pglb_exit(int signal_args)
14294 +{
14295 +       char fname[256];
14296 +       int rtn;
14297 +       
14298 +       Child_Tbl->useFlag = TBL_END;
14299 +       PGRsignal(SIGCHLD,SIG_IGN);
14300 +       PGRsignal(signal_args,SIG_IGN);
14301 +       kill (0,signal_args);
14302 +       while (wait(NULL) > 0 )
14303 +               ;
14304 +
14305 +       if (ClusterShmid > 0)
14306 +       {
14307 +               rtn = shmdt((char *)Cluster_Tbl);
14308 +               shmctl(ClusterShmid,IPC_RMID,(struct shmid_ds *)NULL);
14309 +               ClusterShmid = 0;
14310 +               Cluster_Tbl = NULL;
14311 +       }
14312 +       if (ChildShmid > 0)
14313 +       {
14314 +               rtn = shmdt((char *)Child_Tbl);
14315 +               shmctl(ChildShmid,IPC_RMID,(struct shmid_ds *)NULL);
14316 +               ChildShmid = 0;
14317 +               Child_Tbl = NULL;
14318 +       }
14319 +       if (ClusterSemid > 0)
14320 +       {
14321 +               semctl(ClusterSemid, 0, IPC_RMID);
14322 +               ClusterSemid = 0;
14323 +       }
14324 +       
14325 +       if (StatusFp != NULL)
14326 +       {
14327 +               fflush(StatusFp);
14328 +               fclose(StatusFp);
14329 +       }
14330 +       if (Frontend_FD.unix_fd != 0)
14331 +       {
14332 +               close(Frontend_FD.unix_fd);
14333 +               Frontend_FD.unix_fd = 0;
14334 +               snprintf(fname, sizeof(fname), "%s/.s.PGSQL.%d", Backend_Socket_Dir,Recv_Port_Number);
14335 +               unlink(fname);
14336 +       }
14337 +       if (Frontend_FD.inet_fd != 0)
14338 +       {
14339 +               close(Frontend_FD.inet_fd);
14340 +               Frontend_FD.inet_fd = 0;
14341 +       }
14342 +       /*
14343 +       PGRsyn_quit();
14344 +       */
14345 +       snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGLB_PID_FILE);
14346 +       unlink(fname);
14347 +
14348 +       if (ResolvedName != NULL)
14349 +       {
14350 +               free(ResolvedName);
14351 +               ResolvedName = NULL;
14352 +       }
14353 +       exit(0);
14354 +}
14355 +
14356 +/*--------------------------------------------------------------------
14357 + * SYMBOL
14358 + *             load_balance_main()
14359 + * NOTES
14360 + *             This is a main module of load balance function
14361 + * ARGS
14362 + *             void
14363 + * RETURN
14364 + *             none
14365 + *--------------------------------------------------------------------
14366 + */
14367 +static void
14368 +load_balance_main(void)
14369 +{
14370 +       char * func = "load_balance_main()";
14371 +       int status;
14372 +       int rtn;
14373 +       int count = 0;
14374 +
14375 +       Frontend_FD.unix_fd = PGRcreate_unix_domain_socket(Backend_Socket_Dir, Recv_Port_Number);
14376 +       if (Frontend_FD.unix_fd < 0)
14377 +       {
14378 +               show_error("%s:PGRcreate_unix_domain_socket failed",func);
14379 +               pglb_exit(SIGTERM);
14380 +       }
14381 +       Frontend_FD.inet_fd = PGRcreate_recv_socket(ResolvedName, Recv_Port_Number);
14382 +       if (Frontend_FD.inet_fd < 0)
14383 +       {
14384 +               show_error("%s:PGRcreate_recv_socket failed",func);
14385 +               pglb_exit(SIGTERM);
14386 +       }
14387 +       if (Use_Connection_Pool)
14388 +       {
14389 +               PGRsignal(SIGCHLD,PGRrecreate_child);
14390 +               rtn = PGRpre_fork_children(Cluster_Tbl);
14391 +               if (rtn != STATUS_OK)
14392 +               {
14393 +                       show_error("%s:PGRpre_fork_children failed",func);
14394 +                       pglb_exit(SIGTERM);
14395 +               }
14396 +       }
14397 +       
14398 +       for (;;)
14399 +       {
14400 +               fd_set          rmask;
14401 +               struct timeval timeout;
14402 +
14403 +               timeout.tv_sec = 60;
14404 +               timeout.tv_usec = 0;
14405 +
14406 +               /*
14407 +                * Wait for something to happen.
14408 +                */
14409 +               FD_ZERO(&rmask);
14410 +               FD_SET(Frontend_FD.unix_fd,&rmask);
14411 +               if(Frontend_FD.inet_fd)
14412 +                       FD_SET(Frontend_FD.inet_fd,&rmask);
14413 +               rtn = select(Max(Frontend_FD.unix_fd, Frontend_FD.inet_fd) + 1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
14414 +               if( rtn > 0)
14415 +               {
14416 +                       if(PGRis_cluster_alive() == STATUS_ERROR) {
14417 +                               show_error("%s:all clusters were dead.",func);
14418 +                               PGRreturn_no_connection_error();                                
14419 +                               count=0;
14420 +                       }
14421 +                       else 
14422 +                       {
14423 +                               if (Use_Connection_Pool)
14424 +                               {
14425 +                                       status = PGRload_balance_with_pool();
14426 +                               }
14427 +                               else
14428 +                               {
14429 +                                       status = PGRload_balance();
14430 +                               }
14431 +                               if (status != STATUS_OK)
14432 +                               {
14433 +                                       show_error("%s:load balance process failed",func);
14434 +                                       if ( count > PGLB_CONNECT_RETRY_TIME)
14435 +                                       {
14436 +                                               show_error("%s:no cluster available",func);
14437 +                                               PGRreturn_connection_full_error();
14438 +                                               count = 0;
14439 +                                       }
14440 +                                       count ++;
14441 +                               }
14442 +                               else
14443 +                               {
14444 +                                       count = 0;
14445 +                               }
14446 +                       }
14447 +               }
14448 +       }
14449 +}
14450 +
14451 +/*--------------------------------------------------------------------
14452 + * SYMBOL
14453 + *             daemonize()
14454 + * NOTES
14455 + *             Daemonize this process
14456 + * ARGS
14457 + *             void
14458 + * RETURN
14459 + *             none
14460 + *--------------------------------------------------------------------
14461 + */
14462 +static void 
14463 +daemonize(void)
14464 +{
14465 +       char *  func = "daemonize()";
14466 +       int             i;
14467 +       pid_t           pid;
14468 +
14469 +       pid = fork();
14470 +       if (pid == (pid_t) -1)
14471 +       {
14472 +               show_error("%s:fork() failed. (%s)",func, strerror(errno));
14473 +               exit(1);
14474 +               return;                                 /* not reached */
14475 +       }
14476 +       else if (pid > 0)
14477 +       {                       /* parent */
14478 +               exit(0);
14479 +       }
14480 +
14481 +#ifdef HAVE_SETSID
14482 +       if (setsid() < 0)
14483 +       {
14484 +               show_error("%s:setsid() failed. (%s)", func,strerror(errno));
14485 +               exit(1);
14486 +       }
14487 +#endif
14488 +
14489 +       i = open("/dev/null", O_RDWR);
14490 +       dup2(i, 0);
14491 +       dup2(i, 1);
14492 +       dup2(i, 2);
14493 +       close(i);
14494 +}
14495 +
14496 +
14497 +/*--------------------------------------------------------------------
14498 + * SYMBOL
14499 + *             write_pid_file()
14500 + * NOTES
14501 + *             The process ID is written in the file.
14502 + *             This process ID is used when finish pglb.
14503 + * ARGS
14504 + *             void
14505 + * RETURN
14506 + *             none
14507 + *--------------------------------------------------------------------
14508 + */
14509 +static void 
14510 +write_pid_file(void)
14511 +{
14512 +       char * func = "write_pid_file()";
14513 +       FILE *fd;
14514 +       char fname[256];
14515 +       char pidbuf[128];
14516 +
14517 +       snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGLB_PID_FILE);
14518 +       fd = fopen(fname, "w");
14519 +       if (!fd)
14520 +       {
14521 +               show_error("%s:open() %s file failed. (%s)",
14522 +                                        func,fname, strerror(errno));
14523 +               exit(1);
14524 +       }
14525 +       snprintf(pidbuf, sizeof(pidbuf), "%d", getpid());
14526 +       fwrite(pidbuf, strlen(pidbuf), 1, fd);
14527 +       if (fclose(fd))
14528 +       {
14529 +               show_error("%s:fwrite() %s file failed. (%s)",
14530 +                                        func,fname, strerror(errno));
14531 +               exit(1);
14532 +       }
14533 +}
14534 +
14535 +
14536 +/*--------------------------------------------------------------------
14537 + * SYMBOL
14538 + *             stop_pglb()
14539 + * NOTES
14540 + *             Stop the pglb process
14541 + * ARGS
14542 + *             void
14543 + * RETURN
14544 + *             none
14545 + *--------------------------------------------------------------------
14546 + */
14547 +static void 
14548 +stop_pglb(void)
14549 +{
14550 +       char * func = "stop_pglb()";
14551 +       FILE *fd;
14552 +       char fname[256];
14553 +       char pidbuf[128];
14554 +       pid_t pid;
14555 +
14556 +       if (PGR_Write_Path == NULL)
14557 +       {
14558 +               PGR_Write_Path = ".";
14559 +       }
14560 +       snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGLB_PID_FILE);
14561 +       fd = fopen(fname, "r");
14562 +       if (!fd)
14563 +       {
14564 +               show_error("%s:open() %s file failed. (%s)",
14565 +                                        func,fname, strerror(errno));
14566 +               exit(1);
14567 +       }
14568 +       memset(pidbuf,0,sizeof(pidbuf));
14569 +       fread(pidbuf, sizeof(pidbuf), 1, fd);
14570 +       fclose(fd);
14571 +       pid = atoi(pidbuf);
14572 +       if (kill (pid,SIGTERM) == -1)
14573 +       {
14574 +               show_error("%s:could not stop pid: %d (%s)",func,pid,strerror(errno));
14575 +               exit(1);
14576 +       }
14577 +}
14578 +
14579 +
14580 +/*--------------------------------------------------------------------
14581 + * SYMBOL
14582 + *             is_exist_pid_file()
14583 + * NOTES
14584 + *             Check existence of pid file.
14585 + * ARGS
14586 + *             void
14587 + * RETURN
14588 + *             1: the pid file is exist
14589 + *             0: the pid file is not exist
14590 + *--------------------------------------------------------------------
14591 + */
14592 +static int
14593 +is_exist_pid_file(void)
14594 +{
14595 +       char fname[256];
14596 +       struct stat buf;
14597 +
14598 +       snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGLB_PID_FILE);
14599 +       if (stat(fname,&buf) == 0)
14600 +       {
14601 +               /* pid file is exist */
14602 +               return 1;
14603 +       }
14604 +       else
14605 +       {
14606 +               /* pid file is not exist */
14607 +               return 0;
14608 +       }
14609 +}
14610 +
14611 +
14612 +/*--------------------------------------------------------------------
14613 + * SYMBOL
14614 + *             PGRrecreate_child()
14615 + * NOTES
14616 + *             create the child process again which it hunged up
14617 + * ARGS
14618 + *             int signal_args: signal number (expecting the SIGCHLD)
14619 + * RETURN
14620 + *             none
14621 + *--------------------------------------------------------------------
14622 + */
14623 +void
14624 +PGRrecreate_child(int signal_args)
14625 +{
14626 +       pid_t pid = 0;
14627 +       int status;
14628 +       ClusterTbl * cluster_p;
14629 +
14630 +ReWait:
14631 +
14632 +       errno = 0;
14633 +#ifdef HAVE_WAITPID
14634 +       while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
14635 +       {
14636 +#else
14637 +       while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
14638 +       {
14639 +#endif
14640 +               cluster_p = scan_cluster_by_pid(pid);
14641 +               pid = PGRcreate_child(cluster_p);       
14642 +       }
14643 +       if ((pid < 0) && (errno == EINTR))
14644 +               goto ReWait;
14645 +}
14646 +
14647 +/*--------------------------------------------------------------------
14648 + * SYMBOL
14649 + *             close_child()
14650 + * NOTES
14651 + *             Hung up child process 
14652 + * ARGS
14653 + *             int signal_args: signal number (expecting the SIGUSR2)
14654 + * RETURN
14655 + *             none
14656 + *--------------------------------------------------------------------
14657 + */
14658 +static void
14659 +close_child(int signal_args)
14660 +{
14661 +       char * func = "close_child()";
14662 +       ChildTbl * child;
14663 +       ClusterTbl * cluster;
14664 +       int rec_no = -1;
14665 +
14666 +       if (( Cluster_Tbl == NULL) || (Child_Tbl == NULL))
14667 +       {
14668 +               show_error("%s:Cluster_Tbl or Child_Tbl is not initialize",func);
14669 +               return ;
14670 +       }
14671 +       cluster = Cluster_Tbl;
14672 +       while(cluster->useFlag != TBL_END)
14673 +       {
14674 +               if (cluster->useFlag == TBL_ERROR_NOTICE) 
14675 +               {
14676 +                       rec_no = cluster->rec_no;
14677 +                       PGRset_status_on_cluster_tbl(TBL_ERROR,cluster);
14678 +                       break;
14679 +               }
14680 +               cluster++;
14681 +       }
14682 +       if (rec_no < 0)
14683 +       {
14684 +               return;
14685 +       }
14686 +       child = Child_Tbl;
14687 +       while(child->useFlag != TBL_END)
14688 +       {
14689 +               if (child->rec_no == rec_no) 
14690 +               {
14691 +                       if (kill (child->pid,SIGTERM) == -1)
14692 +                       {
14693 +                               show_error("%s:could not stop pid: %d (%s)",func,child->pid,strerror(errno));
14694 +                               return;
14695 +                       }
14696 +                       PGRchild_wait(signal_args);
14697 +                       child->useFlag = DATA_FREE;
14698 +               }
14699 +               child++;
14700 +       }
14701 +       PGRsignal(SIGUSR2, close_child);
14702 +}
14703 +
14704 +/*--------------------------------------------------------------------
14705 + * SYMBOL
14706 + *             scan_cluster_by_pid()
14707 + * NOTES
14708 + *             get cluster server record from child process id
14709 + * ARGS
14710 + *             pid_t pid: child process id (I)
14711 + * RETURN
14712 + *             OK: pointer of cluster table
14713 + *             NG: NULL
14714 + *--------------------------------------------------------------------
14715 + */
14716 +static ClusterTbl *
14717 +scan_cluster_by_pid(pid_t pid)
14718 +{
14719 +       char * func = "scan_cluster_by_pid()";
14720 +       ChildTbl * child_p;
14721 +       ClusterTbl * cluster_p;
14722 +       int cnt;
14723 +
14724 +       child_p = Child_Tbl;
14725 +       if (child_p == NULL)
14726 +       {
14727 +               show_error("%s:Child Table is not initialize",func);
14728 +               return NULL;
14729 +       }
14730 +       cluster_p = Cluster_Tbl;
14731 +       if (cluster_p == NULL)
14732 +       {
14733 +               show_error("%s:Cluster Table is not initialize",func);
14734 +               return NULL;
14735 +       }
14736 +       
14737 +       while (child_p->useFlag != TBL_END)
14738 +       {
14739 +               if (child_p->pid == pid)
14740 +               {
14741 +                       break;
14742 +               }
14743 +               child_p++;
14744 +       }
14745 +       if (child_p->useFlag == TBL_END)
14746 +       {
14747 +               show_error("%s:pid:%d not found in child table",func,pid);
14748 +               return NULL;
14749 +       }
14750 +
14751 +       cnt = 0;
14752 +       while ((cluster_p->useFlag != TBL_END) && (cnt < ClusterNum))
14753 +       {
14754 +               if (cluster_p->rec_no == child_p->rec_no)
14755 +               {
14756 +                       return cluster_p;
14757 +               }
14758 +               cluster_p++;
14759 +               cnt ++;
14760 +       }
14761 +       return NULL;
14762 +}
14763 +
14764 +/*--------------------------------------------------------------------
14765 + * SYMBOL
14766 + *             usage()
14767 + * NOTES
14768 + *             show usage of pglb
14769 + * ARGS
14770 + *             void
14771 + * RETURN
14772 + *             none
14773 + *--------------------------------------------------------------------
14774 + */
14775 +static void
14776 +usage(void)
14777 +{
14778 +       char * path;
14779 +
14780 +       path = getenv("PGDATA");
14781 +       if (path == NULL)
14782 +               path = ".";
14783 +       fprintf(stderr,"pglb version [%s]\n",PGLB_VERSION);
14784 +       fprintf(stderr,"A load balancer for PostgreSQL\n\n");
14785 +       fprintf(stderr,"usage: pglb [-D path_of_config_file] [-W path_of_work_files] [-n][-v][-h][stop | restart]\n");
14786 +       fprintf(stderr,"                config file default path: %s/%s\n",path, PGLB_CONF_FILE);
14787 +       fprintf(stderr,"                -l: print error logs in the log file.\n");
14788 +       fprintf(stderr,"                -n: don't run in daemon mode.\n");
14789 +       fprintf(stderr,"                -v: debug mode. need '-n' flag\n");
14790 +       fprintf(stderr,"                -h: print this help\n");
14791 +       fprintf(stderr,"                stop: stop pglb\n");
14792 +       fprintf(stderr,"                restart: restart pglb\n");
14793 +}
14794 +
14795 +/*--------------------------------------------------------------------
14796 + * SYMBOL
14797 + *             main()
14798 + * NOTES
14799 + *             main module of pglb
14800 + * ARGS
14801 + *             int argc: number of parameter
14802 + *             char ** argv: value of parameter
14803 + * RETURN
14804 + *             none
14805 + *--------------------------------------------------------------------
14806 + */
14807 +int
14808 +main(int argc, char ** argv)
14809 +{
14810 +       int opt = 0;
14811 +       char * r_path = NULL;
14812 +       char * w_path = NULL;
14813 +       int detach = 1;
14814 +
14815 +       PGRsignal(SIGHUP, pglb_exit);
14816 +       PGRsignal(SIGINT, pglb_exit);   
14817 +       PGRsignal(SIGQUIT, pglb_exit);
14818 +       PGRsignal(SIGTERM, pglb_exit);
14819 +       PGRsignal(SIGALRM, SIG_IGN); /* ignored */
14820 +       PGRsignal(SIGPIPE, SIG_IGN); /* ignored */
14821 +       PGRsignal(SIGTTIN, SIG_IGN); /* ignored */
14822 +       PGRsignal(SIGTTOU, SIG_IGN); /* ignored */
14823 +       PGRsignal(SIGCHLD,PGRchild_wait);
14824 +       PGRsignal(SIGUSR1, SIG_IGN); /* ignored */
14825 +       PGRsignal(SIGUSR2, close_child); /* close child process */
14826 +       r_path = getenv("PGDATA");
14827 +       if (r_path == NULL)
14828 +               r_path = ".";
14829 +
14830 +       while ((opt = getopt(argc, argv, "U:D:W:w:lvnh")) != -1)
14831 +       {
14832 +               switch (opt)
14833 +               {
14834 +                       case 'U':
14835 +                               if (!optarg)
14836 +                               {
14837 +                                       usage();
14838 +                                       exit(1);
14839 +                               }
14840 +                               PGRuserName = strdup(optarg);
14841 +                               break;
14842 +                       case 'D':
14843 +                               if (!optarg)
14844 +                               {
14845 +                                       usage();
14846 +                                       exit(1);
14847 +                               }
14848 +                               r_path = optarg;
14849 +                               break;
14850 +                       case 'W':
14851 +                               if (!optarg)
14852 +                               {
14853 +                                       usage();
14854 +                                       exit(1);
14855 +                               }
14856 +                               w_path = optarg;
14857 +                               break;
14858 +                       case 'w':
14859 +                               fork_wait_time = atoi(optarg);
14860 +                               if (fork_wait_time < 0)
14861 +                                       fork_wait_time = 0;
14862 +                               break;
14863 +                       case 'l':
14864 +                               Log_Print = 1;
14865 +                               break;
14866 +                       case 'v':
14867 +                               Debug_Print = 1;
14868 +                               break;
14869 +                       case 'n':
14870 +                               detach = 0;
14871 +                               break;
14872 +                       case 'h':
14873 +                               usage();
14874 +                               exit(0);
14875 +                               break;
14876 +                       default:
14877 +                               usage();
14878 +                               exit(1);
14879 +               }
14880 +       }
14881 +       PGR_Data_Path = r_path;
14882 +       if (w_path == NULL)
14883 +       {
14884 +               PGR_Write_Path = PGR_Data_Path;
14885 +       }
14886 +       else
14887 +       {
14888 +               PGR_Write_Path = w_path;
14889 +       }
14890 +
14891 +       if (optind == (argc-1) &&
14892 +                       ((!strcmp(argv[optind],"stop")) ||
14893 +                       (!strcmp(argv[optind],"restart"))))
14894 +       {
14895 +               stop_pglb();
14896 +               if (!strcmp(argv[optind],"stop"))
14897 +               {
14898 +                       exit(0);
14899 +               }
14900 +       }
14901 +       else if (optind == argc)
14902 +       {
14903 +               if (is_exist_pid_file())
14904 +               {
14905 +                       fprintf(stderr,"pid file %s/%s found. is another pglb running?", PGR_Write_Path, PGLB_PID_FILE);
14906 +                       exit(1);
14907 +               }
14908 +       }
14909 +       else if (optind < argc)
14910 +       {
14911 +               usage();
14912 +               exit(1);
14913 +       }
14914 +
14915 +       if (detach)
14916 +       {
14917 +               daemonize();
14918 +       }
14919 +       write_pid_file();
14920 +       
14921 +       if (init_pglb(PGR_Data_Path) != STATUS_OK)
14922 +       {
14923 +               exit(0);
14924 +       }
14925 +
14926 +       /* call recovery process */
14927 +       PGRrecovery_main(fork_wait_time);
14928 +
14929 +       /* call lifecheck process */
14930 +       PGRlifecheck_main(fork_wait_time);
14931 +
14932 +       /* start loadbalance module */
14933 +       load_balance_main();
14934 +       pglb_exit(0);
14935 +       return STATUS_OK;
14936 +}
14937 +
14938 +void
14939 +PGRexit_subprocess(int sig)
14940 +{
14941 +       pglb_exit(sig);
14942 +}
14943 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pglb.conf.sample pgcluster-1.7.0rc7/src/pgcluster/pglb/pglb.conf.sample
14944 --- postgresql-8.2.4/src/pgcluster/pglb/pglb.conf.sample        1970-01-01 01:00:00.000000000 +0100
14945 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pglb.conf.sample      2007-02-18 22:52:17.000000000 +0100
14946 @@ -0,0 +1,73 @@
14947 +#============================================================
14948 +#          Load Balance Server configuration file
14949 +#-------------------------------------------------------------
14950 +# file: pglb.conf
14951 +#-------------------------------------------------------------
14952 +# This file controls:
14953 +#       o which hosts are db cluster server
14954 +#       o which port  use connect to db cluster server
14955 +#       o how many connections are allowed on each DB server
14956 +#============================================================
14957 +#-------------------------------------------------------------
14958 +# set cluster DB server information
14959 +#              o Host_Name :           Hostname of Cluster 
14960 +#                                      Please write a host name by FQDN or IP address.
14961 +#              o Port :                Connection port for postmaster
14962 +#              o Max_Connection :      Maximum number of connections to postmaster
14963 +#-------------------------------------------------------------
14964 +#<Cluster_Server_Info>
14965 +#    <Host_Name>               master.pgcluster.org    </Host_Name>
14966 +#    <Port>                    5432                    </Port>
14967 +#    <Max_Connect>             32                      </Max_Connect>
14968 +#</Cluster_Server_Info>
14969 +#<Cluster_Server_Info>
14970 +#    <Host_Name>               post2.pgcluster.org     </Host_Name>
14971 +#    <Port>                    5432                    </Port>
14972 +#    <Max_Connect>             32                      </Max_Connect>
14973 +#</Cluster_Server_Info>
14974 +#<Cluster_Server_Info>
14975 +#    <Host_Name>               post3.pgcluster.org     </Host_Name>
14976 +#    <Port>                    5432                    </Port>
14977 +#    <Max_Connect>             32                      </Max_Connect>
14978 +#</Cluster_Server_Info>
14979 +#-------------------------------------------------------------
14980 +# set Load Balance server information
14981 +#              o Host_Name :           The host name of this load balance server
14982 +#                                      Please write a host name by FQDN or IP address.
14983 +#              o Backend_Socket_Dir :  Unix domain socket path for the backend
14984 +#              o Receive_Port          Connection port from client
14985 +#              o Recovery_Port :       Connection port for recovery process
14986 +#              o Max_Cluster_Num :     Maximum number of cluster DB servers
14987 +#              o Use_Connection_Pooling : Use connection pool [yes/no] 
14988 +#              o Lifecheck_Timeout :   Timeout of the lifecheck response
14989 +#              o Lifecheck_Interval :  Interval time of the lifecheck
14990 +#                              (range 1s - 1h)
14991 +#                              10s   -- 10 seconds
14992 +#                              10min -- 10 minutes
14993 +#                              1h    -- 1 hours
14994 +#-------------------------------------------------------------
14995 +<Host_Name>                    loadbalancer.pgcluster.org      </Host_Name>
14996 +<Backend_Socket_Dir>           /tmp                            </Backend_Socket_Dir>
14997 +<Receive_Port>                 5432                            </Receive_Port>
14998 +<Recovery_Port>                6001                            </Recovery_Port>
14999 +<Max_Cluster_Num>              128                             </Max_Cluster_Num>
15000 +<Use_Connection_Pooling>       no                              </Use_Connection_Pooling>
15001 +<LifeCheck_Timeout>            3s                              </LifeCheck_Timeout>
15002 +<LifeCheck_Interval>           15s                             </LifeCheck_Interval>
15003 +#-------------------------------------------------------------
15004 +# A setup of a log files 
15005 +#
15006 +#              o File_Name :   Log file name with full path
15007 +#              o File_Size :   Maximum size of each log files
15008 +#                              Please specify in a number and unit(K or M)
15009 +#                               10   -- 10 Byte
15010 +#                               10K  -- 10 KByte
15011 +#                               10M  -- 10 MByte
15012 +#              o Rotate :      Rotation times
15013 +#                              If specified 0, old versions are removed.
15014 +#-------------------------------------------------------------
15015 +<Log_File_Info>
15016 +       <File_Name>             /tmp/pglb.log   </File_Name>
15017 +       <File_Size>             1M              </File_Size>
15018 +       <Rotate>                3               </Rotate>
15019 +</Log_File_Info>
15020 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pglb.h pgcluster-1.7.0rc7/src/pgcluster/pglb/pglb.h
15021 --- postgresql-8.2.4/src/pgcluster/pglb/pglb.h  1970-01-01 01:00:00.000000000 +0100
15022 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pglb.h        2007-03-01 16:27:49.000000000 +0100
15023 @@ -0,0 +1,472 @@
15024 +/*--------------------------------------------------------------------
15025 + * FILE:
15026 + *     pglb.h
15027 + *
15028 + * Portions Copyright (c) 2003-2006  Atsushi Mitani
15029 + *--------------------------------------------------------------------
15030 + */
15031 +#ifndef PGLB_H
15032 +#define PGLB_H
15033 +
15034 +#define PGLB_VERSION   "1.7.0rc7"
15035 +
15036 +#include "../libpgc/libpgc.h"
15037 +
15038 +/*
15039 + * from pool.h
15040 + */
15041 +
15042 +/* 
15043 + * define this if you do not want to issue RESET ALL at each new
15044 + * connection.  Also you need to define this for 7.1 or prior
15045 + * PostgreSQL since they do not support RESET ALL
15046 + */
15047 +#undef NO_RESET_ALL
15048 +
15049 +/* undef this if you have problems with non blocking accept() */
15050 +#define NONE_BLOCK
15051 +
15052 +#define POOLMAXPATHLEN 8192
15053 +
15054 +/* configuration file name */
15055 +#define POOL_CONF_FILE_NAME "pgpool.conf"
15056 +
15057 +/* pid file directory */
15058 +#define DEFAULT_LOGDIR "/tmp"
15059 +
15060 +/* Unix domain socket directory */
15061 +#define DEFAULT_SOCKET_DIR "/tmp"
15062 +
15063 +/* pid file name */
15064 +#define PID_FILE_NAME "pgpool.pid"
15065 +
15066 +/* strict mode comment in SQL */
15067 +#define STRICT_MODE_STR "/*STRICT*/"
15068 +#define STRICT_MODE(s) (strncasecmp((s), STRICT_MODE_STR, strlen(STRICT_MODE_STR)) == 0)
15069 +
15070 +typedef enum {
15071 +    POOL_CONTINUE = 0,
15072 +       POOL_IDLE,
15073 +    POOL_END,
15074 +    POOL_ERROR,
15075 +    POOL_FATAL
15076 +} POOL_STATUS;
15077 +
15078 +/* protocol major version numbers */
15079 +#define PROTO_MAJOR_V2 2
15080 +#define PROTO_MAJOR_V3 3
15081 +
15082 +/*
15083 + * startup packet definitions (v2) stolen from PostgreSQL
15084 + */
15085 +#define SM_DATABASE            64
15086 +#define SM_USER                        32
15087 +#define SM_OPTIONS             64
15088 +#define SM_UNUSED              64
15089 +#define SM_TTY                 64
15090 +
15091 +typedef struct PGR_StartupPacket_v2
15092 +{
15093 +       int                     protoVersion;           /* Protocol version */
15094 +       char            database[SM_DATABASE];  /* Database name */
15095 +       char            user[SM_USER];  /* User name */
15096 +       char            options[SM_OPTIONS];    /* Optional additional args */
15097 +       char            unused[SM_UNUSED];              /* Unused */
15098 +       char            tty[SM_TTY];    /* Tty for debug output */
15099 +} PGR_StartupPacket_v2;
15100 +
15101 +/* startup packet info */
15102 +typedef struct
15103 +{
15104 +       char *startup_packet;           /* raw startup packet without packet length (malloced area) */
15105 +       int len;                                        /* raw startup packet length */
15106 +       int major;      /* protocol major version */
15107 +       int minor;      /* protocol minor version */
15108 +       char *database; /* database name in startup_packet (malloced area) */
15109 +       char *user;     /* user name in startup_packet (malloced area) */
15110 +} PGR_StartupPacket;
15111 +
15112 +typedef struct CancelPacket
15113 +{
15114 +       int                     protoVersion;           /* Protocol version */
15115 +       int                     pid;    /* bcckend process id */
15116 +       int                     key;    /* cancel key */
15117 +} CancelPacket;
15118 +
15119 +/*
15120 + * configuration paramters
15121 + */
15122 +typedef struct {
15123 +    int        inetdomain;     /* should we make an INET domain socket too? */
15124 +    int        port;   /* port # to bind */
15125 +       char *socket_dir;               /* pgpool socket directory */
15126 +    char       *backend_host_name;     /* backend host name */
15127 +    int        backend_port;   /* backend port # */
15128 +    char       *secondary_backend_host_name;   /* secondary backend host name */
15129 +    int        secondary_backend_port; /* secondary backend port # */
15130 +    int        num_init_children;      /* # of children initially pre-forked */
15131 +    int        child_life_time;        /* if idle for this seconds, child exits */
15132 +    int        connection_life_time;   /* if idle for this seconds, connection closes */
15133 +    int        max_pool;       /* max # of connection pool per child */
15134 +    char *logdir;              /* logging directory */
15135 +    char *backend_socket_dir;  /* Unix domain socket directory for the PostgreSQL server */
15136 +       int replication_mode;           /* replication mode */
15137 +       int replication_strict; /* if non 0, wait for completion of the
15138 +                               query sent to master to avoid deadlock */
15139 +       /*
15140 +        * if secondary does not respond in this milli seconds, abort this session.
15141 +        * this is not compatible with replication_strict = 1. 0 means no timeout.
15142 +        */
15143 +       int replication_timeout;
15144 +
15145 +       int load_balance_mode;          /* load balance mode */
15146 +
15147 +       /* followings do not exist in the configuration file */
15148 +    char       *current_backend_host_name;     /* current backend host name */
15149 +    int        current_backend_port;   /* current backend port # */
15150 +       int replication_enabled;                /* replication mode enabled */
15151 +
15152 +       int replication_stop_on_mismatch;               /* if there's a data mismatch between master and secondary
15153 +                                                                                        * start degenration to stop replication mode
15154 +                                                                                        */
15155 +} POOL_CONFIG;
15156 +
15157 +#define MAX_PASSWORD_SIZE              (1024)
15158 +
15159 +typedef struct {
15160 +       int num;        /* number of entries */
15161 +       char **names;           /* parameter names */
15162 +       char **values;          /* values */
15163 +} ParamStatus;
15164 +
15165 +/*
15166 + * stream connection structure
15167 + */
15168 +typedef struct {
15169 +       int fd;         /* fd for connection */
15170 +       FILE *write_fd; /* stream write connection */
15171 +
15172 +       char *hp;       /* pending data buffer head address */
15173 +       int po;         /* pending data offset */
15174 +       int bufsz;      /* pending data buffer size */
15175 +       int len;        /* pending data length */
15176 +
15177 +       char *sbuf;     /* buffer for pool_read_string */
15178 +       int sbufsz;     /* its size in bytes */
15179 +
15180 +       char *buf2;     /* buffer for pool_read2 */
15181 +       int bufsz2;     /* its size in bytes */
15182 +
15183 +       int isbackend;          /* this connection is for backend if non 0 */
15184 +       int issecondary_backend;                /* this connection is for secondary backend if non 0 */
15185 +
15186 +       char tstate;            /* transaction state (V3 only) */
15187 +
15188 +       /*
15189 +        * following are used to remember when re-use the authenticated connection
15190 +        */
15191 +       int auth_kind;          /* 3: clear text password, 4: crypt password, 5: md5 password */
15192 +       int pwd_size;           /* password (sent back from frontend) size in host order */
15193 +       char password[MAX_PASSWORD_SIZE];               /* password (sent back from frontend) */
15194 +       char salt[4];           /* password salt */
15195 +
15196 +       /*
15197 +        * following are used to remember current session paramter status.
15198 +        * re-used connection will need them (V3 only)
15199 +        */
15200 +       ParamStatus params;
15201 +
15202 +       int no_forward;         /* if non 0, do not write to frontend */
15203 +
15204 +} POOL_CONNECTION;
15205 +
15206 +/*
15207 + * connection pool structure
15208 + */
15209 +typedef struct {
15210 +       PGR_StartupPacket *sp;  /* startup packet info */
15211 +    int pid;   /* backend pid */
15212 +    int key;   /* cancel key */
15213 +    POOL_CONNECTION    *con;
15214 +       time_t closetime;       /* absolute time in second when the connection closed
15215 +                                                * if 0, that means the connection is under use.
15216 +                                                */
15217 +} POOL_CONNECTION_POOL_SLOT;
15218 +
15219 +#define MAX_CONNECTION_SLOTS 2
15220 +
15221 +typedef struct {
15222 +    int num;   /* number of slots */
15223 +    POOL_CONNECTION_POOL_SLOT  *slots[MAX_CONNECTION_SLOTS];
15224 +} POOL_CONNECTION_POOL;
15225 +
15226 +#define MASTER_CONNECTION(p) ((p)->slots[0])
15227 +#define SECONDARY_CONNECTION(p) ((p)->slots[1])
15228 +#define MASTER(p) MASTER_CONNECTION(p)->con
15229 +#define SECONDARY(p) SECONDARY_CONNECTION(p)->con
15230 +#define MAJOR(p) MASTER_CONNECTION(p)->sp->major
15231 +#define TSTATE(p) MASTER(p)->tstate
15232 +
15233 +#define Max(x, y)              ((x) > (y) ? (x) : (y))
15234 +#define Min(x, y)              ((x) < (y) ? (x) : (y))
15235 +
15236 +/*
15237 + * pglb
15238 + */
15239 +
15240 +typedef struct {
15241 +       int useFlag;
15242 +       int sock;
15243 +}SocketTbl;
15244 +
15245 +typedef struct {
15246 +       int useFlag;
15247 +       char hostName[HOSTNAME_MAX_LENGTH];
15248 +       unsigned short port;
15249 +       short max_connect;
15250 +       int use_num;
15251 +       int rate;
15252 +       int rec_no;
15253 +       int retry_count;
15254 +}ClusterTbl;
15255 +
15256 +typedef struct {
15257 +       long mtype;
15258 +       char mdata[1];
15259 +}MsgData;
15260 +
15261 +typedef struct {
15262 +       int useFlag;
15263 +       int rec_no;
15264 +       pid_t pid;
15265 +}ChildTbl;
15266 +
15267 +#define UNIX_DOMAIN_FD (0)
15268 +#define INET_DOMAIN_FD (1)
15269 +typedef struct {
15270 +       int unix_fd;
15271 +       int inet_fd;
15272 +}FrontSocket;
15273 +
15274 +#define pool_config_inetdomain (0)
15275 +#define pool_config_replication_mode   (0)
15276 +#define pool_config_replication_strict (0)
15277 +#define pool_config_replication_timeout        (0)
15278 +#define pool_config_replication_enabled        (0)
15279 +#define pool_config_load_balance_mode  (0)
15280 +#define pool_config_replication_stop_on_mismatch       (0)
15281 +#define pool_config_port       (Recv_Port_Number)
15282 +#define pool_config_socket_dir (Backend_Socket_Dir)
15283 +#define pool_config_backend_host_name  (CurrentCluster->hostName)
15284 +#define pool_config_backend_port       (CurrentCluster->port)
15285 +#define pool_config_secondary_backend_host_name        (CurrentCluster->hostName)
15286 +#define pool_config_secondary_backend_port     (CurrentCluster->port)
15287 +#define pool_config_num_init_children  (CurrentCluster->max_connect)
15288 +#define pool_config_child_life_time    (Connection_Life_Time)
15289 +#define pool_config_connection_life_time       (Connection_Life_Time)
15290 +#define pool_config_max_pool   (Max_Pool)
15291 +#define pool_config_logdir     "./"
15292 +#define pool_config_backend_socket_dir (Backend_Socket_Dir)
15293 +#define pool_config_current_backend_host_name  (CurrentCluster->hostName)
15294 +#define pool_config_current_backend_port       (CurrentCluster->port)
15295 +#define REPLICATION (0)
15296 +#define IN_LOAD_BALANCE (0)
15297 +
15298 +/*
15299 + * for pglb
15300 + */
15301 +#define MAX_DB_SERVER  (32)
15302 +#define PGLB_MAX_SOCKET_QUEUE (10000)
15303 +#define        CLUSTER_TBL_SHM_KEY     (1010)
15304 +#define PGLB_CONNECT_RETRY_TIME  (3)
15305 +#define DEFAULT_CONNECT_NUM    (32)
15306 +#define DEFAULT_PORT   (5432)
15307 +#define BUF_SIZE       (16384)
15308 +#define TBL_FREE       (0)
15309 +#define TBL_INIT       (1)
15310 +#define TBL_USE                (2)
15311 +#define TBL_STOP       (3)
15312 +#define TBL_ACCEPT     (10)
15313 +#define TBL_ERROR_NOTICE       (98)
15314 +#define TBL_ERROR      (99)
15315 +#define TBL_END        (-1)
15316 +#define STATUS_OK      (0)
15317 +#define STATUS_ERROR   (-1)
15318 +#ifdef RECOVERY_PREPARE_REQ
15319 +#define ADD_DB         RECOVERY_PREPARE_REQ
15320 +#else
15321 +#define ADD_DB         (1)
15322 +#endif
15323 +#ifdef RECOVERY_PGDATA_ANS
15324 +#define STOP_DB                RECOVERY_PGDATA_ANS
15325 +#else
15326 +#define STOP_DB                (3)
15327 +#endif
15328 +#ifdef RECOVERY_FINISH
15329 +#define START_DB       RECOVERY_FINISH
15330 +#else
15331 +#define START_DB       (9)
15332 +#endif
15333 +#define DELETE_DB      (99)
15334 +#define QUERY_TERMINATE        (0x00)
15335 +#define RESPONSE_TERMINATE     (0x5a)
15336 +#define PGLB_CONF_FILE "pglb.conf"
15337 +#define PGLB_PID_FILE  "pglb.pid"
15338 +#define PGLB_STATUS_FILE "pglb.sts"
15339 +#define PGLB_LOG_FILE "pglb.log"
15340 +#define CLUSTER_SERVER_TAG     "Cluster_Server_Info"
15341 +#define MAX_CONNECT_TAG        "Max_Connect"
15342 +#define RECOVERY_PORT_TAG      "Recovery_Port"
15343 +#define RECV_PORT_TAG  "Receive_Port"
15344 +#define MAX_CLUSTER_TAG        "Max_Cluster_Num"
15345 +#define USE_CONNECTION_POOL_TAG "Use_Connection_Pooling"
15346 +#define MAX_POOL_TAG   "Max_Pool_Each_Server"
15347 +#define BACKEND_SOCKET_DIR_TAG "Backend_Socket_Dir"
15348 +#define CONNECTION_LIFE_TIME   "Connection_Life_Time"
15349 +#define NOT_USE_CONNECTION_POOL        (0)
15350 +#define USE_CONNECTION_POOL    (1)
15351 +
15352 +#define PGR_SEND_RETRY_CNT (100)
15353 +#define PGR_SEND_WAIT_MSEC (500)
15354 +#define PGR_RECV_RETRY_CNT (100)
15355 +#define PGR_RECV_WAIT_MSEC (500)
15356 +
15357 +extern int Recv_Port_Number;
15358 +extern int Recovery_Port_Number;
15359 +extern uint16_t LifeCheck_Port_Number;
15360 +extern int Use_Connection_Pool;
15361 +extern int Max_Pool;
15362 +extern int Connection_Life_Time;
15363 +extern int Msg_Id;
15364 +extern ClusterTbl * Cluster_Tbl;
15365 +extern int Max_DB_Server;
15366 +extern int MaxBackends;
15367 +extern char * Backend_Socket_Dir;
15368 +extern int ClusterShmid;
15369 +extern int ClusterSemid;
15370 +extern int ChildShmid;
15371 +extern int ClusterNum;
15372 +extern ChildTbl * Child_Tbl;
15373 +extern char * PGR_Data_Path;
15374 +extern char * PGR_Write_Path;
15375 +extern char * Backend_Socket_Dir;
15376 +extern FrontSocket Frontend_FD;
15377 +extern FILE * StatusFp;
15378 +extern char * ResolvedName;
15379 +extern char * PGRuserName;
15380 +
15381 +/* for child.c */
15382 +extern POOL_CONNECTION * Frontend;
15383 +extern ClusterTbl * CurrentCluster;
15384 +
15385 +extern char * Function;
15386 +
15387 +extern POOL_CONNECTION_POOL *pool_connection_pool;     /* connection pool */
15388 +
15389 +/* extern of main.c */
15390 +extern void PGRrecreate_child(int signal_args);
15391 +extern void PGRexit_subprocess(int sig);
15392 +
15393 +/* extern of child.c */
15394 +extern int PGRpre_fork_children(ClusterTbl * ptr);
15395 +extern int PGRpre_fork_child(ClusterTbl * ptr);
15396 +extern int PGRdo_child( int use_pool);
15397 +extern int PGRcreate_child(ClusterTbl * cluster_p);
15398 +extern pid_t PGRscan_child_tbl(ClusterTbl * cluster_p);
15399 +extern void notice_backend_error(void);
15400 +extern void do_pooling_child(int sig);
15401 +extern int PGRset_status_to_child_tbl(pid_t pid, int status);
15402 +extern int PGRadd_child_tbl(ClusterTbl * cluster_p, pid_t pid, int status);
15403 +extern int PGRget_child_status(pid_t pid);
15404 +extern void PGRreturn_connection_full_error(void);
15405 +extern void PGRreturn_no_connection_error(void);
15406 +extern void PGRquit_children_on_cluster(int rec_no);
15407 +
15408 +/* extern of cluster_table.c */
15409 +extern int PGRis_cluster_alive(void) ;
15410 +extern ClusterTbl * PGRscan_cluster(void);
15411 +extern void PGRset_key_of_cluster(ClusterTbl * ptr, RecoveryPacket * packet);
15412 +extern ClusterTbl * PGRadd_cluster_tbl (ClusterTbl * conf_data);
15413 +extern ClusterTbl * PGRset_status_on_cluster_tbl (int status, ClusterTbl * ptr);
15414 +extern ClusterTbl * PGRsearch_cluster_tbl(ClusterTbl * conf_data);
15415 +
15416 +/* extern of load_balance.c */
15417 +extern int PGRload_balance(void);
15418 +extern int PGRload_balance_with_pool(void);
15419 +extern char PGRis_connection_full(ClusterTbl * ptr);
15420 +extern void PGRuse_connection(ClusterTbl * ptr);
15421 +extern void PGRrelease_connection(ClusterTbl * ptr);
15422 +extern void PGRchild_wait(int sig);
15423 +
15424 +/* extern of recovery.c */
15425 +extern void PGRrecovery_main(int fork_wait_fime);
15426 +
15427 +/* extern of socket.c */
15428 +extern int PGRcreate_unix_domain_socket(char * sock_dir, unsigned short port);
15429 +extern int PGRcreate_recv_socket(char * hostName , unsigned short portNumber);
15430 +extern int PGRcreate_acception(int fd, char * hostName , unsigned short portNumber);
15431 +extern void PGRclose_sock(int * sock);
15432 +extern int PGRread_byte(int sock,char * buf,int len, int flag);
15433 +extern int PGRcreate_cluster_socket( int * sock, ClusterTbl * ptr );
15434 +
15435 +/* extern of pool_auth.c */
15436 +extern int pool_do_auth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp);
15437 +extern int pool_do_reauth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp);
15438 +extern int pool_read_message_length(POOL_CONNECTION_POOL *cp);
15439 +extern signed char pool_read_kind(POOL_CONNECTION_POOL *cp);
15440 +
15441 +/* extern of pool_connection_pool.c */
15442 +extern int pool_init_cp(void);
15443 +extern POOL_CONNECTION_POOL *pool_get_cp(char *user, char *database, int protoMajor);
15444 +extern void pool_discard_cp(char *user, char *database, int protoMajor);
15445 +extern POOL_CONNECTION_POOL *pool_create_cp(void);
15446 +extern void pool_connection_pool_timer(POOL_CONNECTION_POOL *backend);
15447 +extern void pool_backend_timer_handler(int sig);
15448 +extern int connect_inet_domain_socket(int secondary_backend);
15449 +extern int connect_unix_domain_socket(int secondary_backend);
15450 +extern char PGRis_same_host(char * host1, char * host2);
15451 +extern void pool_finish(void);
15452 +
15453 +/* extern of pool_process_query.c */
15454 +extern POOL_STATUS pool_process_query(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, int connection_reuse);
15455 +extern POOL_STATUS ErrorResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
15456 +extern void pool_enable_timeout(); 
15457 +extern void pool_disable_timeout();
15458 +extern int pool_check_fd(POOL_CONNECTION *cp, int notimeout);
15459 +extern void pool_send_frontend_exits(POOL_CONNECTION_POOL *backend);
15460 +extern POOL_STATUS SimpleForwardToFrontend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
15461 +extern POOL_STATUS SimpleForwardToBackend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
15462 +extern POOL_STATUS ParameterStatus(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
15463 +extern void pool_send_error_message(POOL_CONNECTION *frontend, int protoMajor, char *code, char *message, char *detail, char *hint, char *file, int line);
15464 +
15465 +/* extern of pool_params.c */
15466 +extern int pool_init_params(ParamStatus *params);
15467 +extern void pool_discard_params(ParamStatus *params);
15468 +extern char *pool_find_name(ParamStatus *params, char *name, int *pos);
15469 +extern int pool_get_param(ParamStatus *params, int index, char **name, char **value);
15470 +extern int pool_add_param(ParamStatus *params, char *name, char *value);
15471 +extern void pool_param_debug_print(ParamStatus *params);
15472 +
15473 +/* extern of pool_stream.c */
15474 +extern POOL_CONNECTION *pool_open(int fd);
15475 +extern void pool_close(POOL_CONNECTION *cp);
15476 +extern int pool_read(POOL_CONNECTION *cp, void *buf, int len);
15477 +extern char *pool_read2(POOL_CONNECTION *cp, int len);
15478 +extern int pool_write(POOL_CONNECTION *cp, void *buf, int len);
15479 +extern int pool_flush(POOL_CONNECTION *cp);
15480 +extern int pool_write_and_flush(POOL_CONNECTION *cp, void *buf, int len);
15481 +extern char *pool_read_string(POOL_CONNECTION *cp, int *len, int line);
15482 +
15483 +/*
15484 + * external prototype in show.c
15485 + */
15486 +extern void show_error(const char * fmt,...);
15487 +extern void show_debug(const char * fmt,...);
15488 +extern void PGRwrite_log_file(FILE * fp, const char * fmt,...);
15489 +
15490 +/*
15491 + * external prototype in lifecheck.c
15492 + */
15493 +extern int PGRlifecheck_main(int fork_wait_time);
15494 +
15495 +#endif /* PGLB_H */
15496 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_auth.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_auth.c
15497 --- postgresql-8.2.4/src/pgcluster/pglb/pool_auth.c     1970-01-01 01:00:00.000000000 +0100
15498 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_auth.c   2007-02-18 22:52:17.000000000 +0100
15499 @@ -0,0 +1,959 @@
15500 +/*--------------------------------------------------------------------
15501 + * FILE:
15502 + *     pool_auth.c
15503 + *
15504 + * NOTE:
15505 + *     authenticaton stuff
15506 + *
15507 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
15508 + * Portions Copyright (c) 2003-2006, Tatsuo Ishii
15509 + *--------------------------------------------------------------------
15510 + */
15511 +/*
15512 + * Permission to use, copy, modify, and distribute this software and
15513 + * its documentation for any purpose and without fee is hereby
15514 + * granted, provided that the above copyright notice appear in all
15515 + * copies and that both that copyright notice and this permission
15516 + * notice appear in supporting documentation, and that the name of the
15517 + * author not be used in advertising or publicity pertaining to
15518 + * distribution of the software without specific, written prior
15519 + * permission. The author makes no representations about the
15520 + * suitability of this software for any purpose.  It is provided "as
15521 + * is" without express or implied warranty.
15522 + *
15523 +*/
15524 +#include <sys/types.h>
15525 +#include <netinet/in.h>
15526 +#include <sys/param.h>
15527 +#include <arpa/inet.h>
15528 +#include <errno.h>
15529 +#include <string.h>
15530 +#include <sys/time.h>
15531 +#include <stdio.h>
15532 +#include "replicate_com.h"
15533 +#include "pglb.h"
15534 +
15535 +int pool_do_auth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp);
15536 +int pool_do_reauth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp);
15537 +int pool_read_message_length(POOL_CONNECTION_POOL *cp);
15538 +signed char pool_read_kind(POOL_CONNECTION_POOL *cp);
15539 +
15540 +static POOL_STATUS pool_send_auth_ok(POOL_CONNECTION *frontend, int pid, int key, int protoMajor);
15541 +static int do_clear_text_password(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor);
15542 +static int do_crypt(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor);
15543 +static int do_md5(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor);
15544 +
15545 +/*
15546 +* do authentication against backend. if success return 0 otherwise non 0.
15547 +*/
15548 +int pool_do_auth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp)
15549 +{
15550 +       char * func = "pool_do_auth()";
15551 +       int status;
15552 +       signed char kind;
15553 +       int pid, pid1;
15554 +       int key, key1;
15555 +       int protoMajor;
15556 +       int length;
15557 +
15558 +       protoMajor = MAJOR(cp);
15559 +
15560 +       kind = pool_read_kind(cp);
15561 +       if (kind < 0)
15562 +       {
15563 +               return -1;
15564 +       }
15565 +
15566 +       /* error response? */
15567 +       if (kind == 'E')
15568 +       {
15569 +               /* we assume error response at this stage is likely version
15570 +                * protocol mismatch (v3 frontend vs. v2 backend). So we throw
15571 +                * a V2 protocol error response in the hope that v3 frontend
15572 +                * will negotiate again using v2 protocol.
15573 +                */
15574 +               show_error("%s:pool_do_auth: maybe protocol version mismatch (current version %d)",func, protoMajor);
15575 +               ErrorResponse(frontend, cp);
15576 +               return -1;
15577 +       }
15578 +       else if (kind != 'R')
15579 +       {
15580 +               show_error("%s:pool_do_auth: expect \"R\" got %c",func, kind);
15581 +               return -1;
15582 +       }
15583 +
15584 +       /*
15585 +        * message length (v3 only) */
15586 +       if (protoMajor == PROTO_MAJOR_V3 && pool_read_message_length(cp) < 0)
15587 +       {
15588 +               return -1;
15589 +       }
15590 +
15591 +       /*
15592 +        * read authentication request kind.
15593 +        *
15594 +        * 0: authentication ok
15595 +        * 1: kerberos v4
15596 +        * 2: kerberos v5
15597 +        * 3: clear text password
15598 +        * 4: crypt password
15599 +        * 5: md5 password
15600 +        * 6: scm credential
15601 +        *
15602 +        * in replication mode, we only supports  kind = 0, 3. this is because to "salt"
15603 +        * cannot be replicated among master and secondary.
15604 +        * in non replication mode, we supports  kind = 0, 3, 4, 5
15605 +        */
15606 +
15607 +       status = pool_read(MASTER(cp), &pid, sizeof(pid));
15608 +       if (status < 0)
15609 +       {
15610 +               show_error("%s:pool_do_auth: read authentication kind failed",func);
15611 +               return -1;
15612 +       }
15613 +
15614 +       if (REPLICATION)
15615 +       {
15616 +               status = pool_read(SECONDARY(cp), &pid1, sizeof(pid1));
15617 +
15618 +               if (status < 0)
15619 +               {
15620 +                       show_error("%s:pool_do_auth: read authentication kind from secondary failed",func);
15621 +                       return -1;
15622 +               }
15623 +       }
15624 +
15625 +       pid = ntohl(pid);
15626 +
15627 +       /* trust? */
15628 +       if (pid == 0)
15629 +       {
15630 +               if (protoMajor == PROTO_MAJOR_V3)
15631 +               {
15632 +                       int msglen;
15633 +
15634 +                       pool_write(frontend, "R", 1);
15635 +                       msglen = htonl(8);
15636 +                       pool_write(frontend, &msglen, sizeof(msglen));
15637 +                       msglen = htonl(0);
15638 +                       if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
15639 +                       {
15640 +                               return -1;
15641 +                       }
15642 +               }
15643 +               MASTER(cp)->auth_kind = 0;
15644 +       }
15645 +
15646 +       /* clear text password authentication? */
15647 +       else if (pid == 3)
15648 +       {
15649 +#ifdef PRINT_DEBUG
15650 +               show_debug("%s:trying clear text password authentication",func);
15651 +#endif                 
15652 +
15653 +               pid = do_clear_text_password(MASTER(cp), frontend, 0, protoMajor);
15654 +
15655 +               if (pid >= 0 && REPLICATION)
15656 +               {
15657 +                       pid = do_clear_text_password(SECONDARY(cp), frontend, 0, protoMajor);
15658 +               }
15659 +       }
15660 +
15661 +       /* crypt authentication? */
15662 +       else if (pid == 4)
15663 +       {
15664 +#ifdef PRINT_DEBUG
15665 +               show_debug("%s:trying crypt authentication",func);
15666 +#endif                 
15667 +
15668 +               pid = do_crypt(MASTER(cp), frontend, 0, protoMajor);
15669 +
15670 +               if (pid >= 0 && REPLICATION)
15671 +               {
15672 +                       pid = do_crypt(SECONDARY(cp), frontend, 0, protoMajor);
15673 +               }
15674 +       }
15675 +
15676 +       /* md5 authentication? */
15677 +       else if (pid == 5)
15678 +       {
15679 +#ifdef PRINT_DEBUG
15680 +               show_debug("%s:trying md5 authentication",func);
15681 +#endif                 
15682 +
15683 +               pid = do_md5(MASTER(cp), frontend, 0, protoMajor);
15684 +
15685 +               if (pid >= 0 && REPLICATION)
15686 +               {
15687 +                       pid = do_md5(SECONDARY(cp), frontend, 0, protoMajor);
15688 +               }
15689 +       }
15690 +
15691 +       if (pid != 0)
15692 +       {
15693 +               show_error("%s:pool_do_auth: backend does not return authenticaton ok",func);
15694 +               return -1;
15695 +       } 
15696 +
15697 +       /*
15698 +        * authentication ok. now read pid and secret key from the
15699 +        * backend
15700 +        */
15701 +       kind = pool_read_kind(cp);
15702 +       if (kind < 0)
15703 +       {
15704 +               return -1;
15705 +       }
15706 +
15707 +       /* error response? */
15708 +       if (kind == 'E')
15709 +       {
15710 +               if (protoMajor == PROTO_MAJOR_V2)
15711 +                       ErrorResponse(frontend, cp);
15712 +               else
15713 +                       SimpleForwardToFrontend(kind, frontend, cp);
15714 +               return -1;
15715 +       }
15716 +       else if (kind != 'K')
15717 +       {
15718 +               if (protoMajor == PROTO_MAJOR_V3)
15719 +               {
15720 +                       /* process parameter status */
15721 +                       while (kind == 'S')
15722 +                       {
15723 +                               if (ParameterStatus(frontend, cp) != POOL_CONTINUE)
15724 +                                               return -1;
15725 +
15726 +                               pool_flush(frontend);
15727 +
15728 +                               kind = pool_read_kind(cp);
15729 +                               if (kind < 0)
15730 +                               {
15731 +                                       show_error("%s:pool_do_auth: failed to read kind while processing ParamterStatus",func);
15732 +                                       return -1;
15733 +                               }
15734 +                       }
15735 +               }
15736 +               else
15737 +               {
15738 +                       show_error("%s:pool_do_auth: expect \"K\" got %c",func, kind);
15739 +                       return -1;
15740 +               }
15741 +       }
15742 +
15743 +       /*
15744 +        * message length (V3 only)
15745 +        */
15746 +       if (protoMajor == PROTO_MAJOR_V3 && (length = pool_read_message_length(cp)) != 12)
15747 +       {
15748 +               show_error("%s:pool_do_auth: invalid messages length(%d) for BackendKeyData",func, length);
15749 +               return -1;
15750 +       }
15751 +
15752 +       /*
15753 +        * OK, read pid and secret key
15754 +        */
15755 +
15756 +       /* pid */
15757 +       pool_read(MASTER(cp), &pid, sizeof(pid));
15758 +       MASTER_CONNECTION(cp)->pid = pid;
15759 +
15760 +       /* key */
15761 +       pool_read(MASTER(cp), &key, sizeof(key));
15762 +       MASTER_CONNECTION(cp)->key = key;
15763 +
15764 +       if (REPLICATION)
15765 +       {
15766 +               pool_read(SECONDARY(cp), &pid1, sizeof(pid1));
15767 +               SECONDARY_CONNECTION(cp)->pid = pid;
15768 +
15769 +               /* key */
15770 +               pool_read(SECONDARY(cp), &key1, sizeof(key1));
15771 +               SECONDARY_CONNECTION(cp)->key = key;
15772 +       }
15773 +
15774 +       return (pool_send_auth_ok(frontend, pid, key, protoMajor));
15775 +}
15776 +
15777 +/*
15778 +* do re-authentication for reused connection. if success return 0 otherwise non 0.
15779 +*/
15780 +int pool_do_reauth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp)
15781 +{
15782 +       char * func = "pool_do_reauth()";
15783 +       int status;
15784 +       int protoMajor;
15785 +
15786 +       protoMajor = MAJOR(cp);
15787 +
15788 +       switch(MASTER(cp)->auth_kind)
15789 +       {
15790 +               case 0:
15791 +                       /* trust */
15792 +                       status = 0;
15793 +                       break;
15794 +
15795 +               case 3:
15796 +                       /* clear text password */
15797 +                       status = do_clear_text_password(MASTER(cp), frontend, 1, protoMajor);
15798 +                       break;
15799 +                       
15800 +               case 4:
15801 +                       /* crypt password */
15802 +                       status = do_crypt(MASTER(cp), frontend, 1, protoMajor);
15803 +                       break;
15804 +
15805 +               case 5:
15806 +                       /* md5 password */
15807 +                       status = do_md5(MASTER(cp), frontend, 1, protoMajor);
15808 +                       break;
15809 +
15810 +               default:
15811 +                       show_error("%s: unknown authentication request code %d", 
15812 +                                          func,MASTER(cp)->auth_kind);
15813 +                       return -1;
15814 +       }
15815 +
15816 +       if (status == 0)
15817 +       {
15818 +               if (protoMajor == PROTO_MAJOR_V3)
15819 +               {
15820 +                       int msglen;
15821 +
15822 +                       pool_write(frontend, "R", 1);
15823 +                       msglen = htonl(8);
15824 +                       pool_write(frontend, &msglen, sizeof(msglen));
15825 +                       msglen = htonl(0);
15826 +                       if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
15827 +                       {
15828 +                               return -1;
15829 +                       }
15830 +               }
15831 +       }
15832 +       else
15833 +       {
15834 +#ifdef PRINT_DEBUG
15835 +               show_debug("%s: authentication failed",func);
15836 +#endif                 
15837 +               return -1;
15838 +       }
15839 +
15840 +       return (pool_send_auth_ok(frontend, MASTER_CONNECTION(cp)->pid, MASTER_CONNECTION(cp)->key, protoMajor) != POOL_CONTINUE);
15841 +}
15842 +
15843 +/*
15844 +* send authentication ok to frontend. if success return 0 otherwise non 0.
15845 +*/
15846 +static POOL_STATUS pool_send_auth_ok(POOL_CONNECTION *frontend, int pid, int key, int protoMajor)
15847 +{
15848 +       char kind;
15849 +       int len;
15850 +
15851 +       if (protoMajor == PROTO_MAJOR_V2)
15852 +       {
15853 +               /* return "Authentication OK" to the frontend */
15854 +               kind = 'R';
15855 +               pool_write(frontend, &kind, 1);
15856 +               len = htonl(0);
15857 +               if (pool_write_and_flush(frontend, &len, sizeof(len)) < 0)
15858 +               {
15859 +                       return -1;
15860 +               }
15861 +       }
15862 +
15863 +       /* send backend key data */
15864 +       kind = 'K';
15865 +       pool_write(frontend, &kind, 1);
15866 +       if (protoMajor == PROTO_MAJOR_V3)
15867 +       {
15868 +               len = htonl(12);
15869 +               pool_write(frontend, &len, sizeof(len));
15870 +       }
15871 +       pool_write(frontend, &pid, sizeof(pid));
15872 +       if (pool_write_and_flush(frontend, &key, sizeof(key)) < 0)
15873 +       {
15874 +               return -1;
15875 +       }
15876 +
15877 +       return 0;
15878 +}
15879 +
15880 +/*
15881 + * perform clear text password authetication
15882 + */
15883 +static int do_clear_text_password(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor)
15884 +{
15885 +       char * func = "do_clear_text_password()";
15886 +       static int size;
15887 +       static char password[MAX_PASSWORD_SIZE];
15888 +       char response;
15889 +       int kind;
15890 +       int len;
15891 +
15892 +       /* master? */
15893 +       if (!backend->issecondary_backend)
15894 +       {
15895 +               pool_write(frontend, "R", 1);   /* authenticaton */
15896 +               if (protoMajor == PROTO_MAJOR_V3)
15897 +               {
15898 +                       len = htonl(8);
15899 +                       pool_write(frontend, &len, sizeof(len));
15900 +               }
15901 +               kind = htonl(3);                /* clear text password authentication */
15902 +               pool_write_and_flush(frontend, &kind, sizeof(kind));    /* indicating clear text password authentication */
15903 +
15904 +               /* read password packet */
15905 +               if (protoMajor == PROTO_MAJOR_V2)
15906 +               {
15907 +                       if (pool_read(frontend, &size, sizeof(size)))
15908 +                       {
15909 +                               show_error("%s: failed to read password packet size",func);
15910 +                               return -1;
15911 +                       }
15912 +               }
15913 +               else
15914 +               {
15915 +                       char k;
15916 +
15917 +                       if (pool_read(frontend, &k, sizeof(k)))
15918 +                       {
15919 +                               show_error("%s: failed to read password packet \"p\"",func);
15920 +                               return -1;
15921 +                       }
15922 +                       if (k != 'p')
15923 +                       {
15924 +                               show_error("%s:packet does not start with \"p\"",func);
15925 +                               return -1;
15926 +                       }
15927 +                       if (pool_read(frontend, &size, sizeof(size)))
15928 +                       {
15929 +                               show_error("%s: failed to read password packet size",func);
15930 +                               return -1;
15931 +                       }
15932 +               }
15933 +
15934 +               if ((ntohl(size) - 4) > sizeof(password))
15935 +               {
15936 +                       show_error("%s: password is too long (size: %d)",func, ntohl(size) - 4);
15937 +                       return -1;
15938 +               }
15939 +
15940 +               if (pool_read(frontend, password, ntohl(size) - 4))
15941 +               {
15942 +                       show_error("%s: failed to read password (size: %d)",func, ntohl(size) - 4);
15943 +                       return -1;
15944 +               }
15945 +       }
15946 +
15947 +       /* connection reusing? */
15948 +       if (reauth)
15949 +       {
15950 +               if ((ntohl(size) - 4) != backend->pwd_size)
15951 +               {
15952 +#ifdef PRINT_DEBUG
15953 +                       show_debug("%s; password size does not match in re-authetication",func);
15954 +#endif                 
15955 +                       return -1;
15956 +               }
15957 +
15958 +               if (memcmp(password, backend->password, backend->pwd_size) != 0)
15959 +               {
15960 +#ifdef PRINT_DEBUG
15961 +                       show_debug("%s; password does not match in re-authetication",func);
15962 +#endif                 
15963 +                       return -1;
15964 +               }
15965 +
15966 +               return 0;
15967 +       }
15968 +
15969 +       /* send password packet to backend */
15970 +       if (protoMajor == PROTO_MAJOR_V3)
15971 +               pool_write(backend, "p", 1);
15972 +       pool_write(backend, &size, sizeof(size));
15973 +       pool_write_and_flush(backend, password, ntohl(size) -4);
15974 +       if (pool_read(backend, &response, sizeof(response)))
15975 +       {
15976 +               show_error("%s: failed to read authentication response",func);
15977 +               return -1;
15978 +       }
15979 +
15980 +       if (response != 'R')
15981 +       {
15982 +#ifdef PRINT_DEBUG
15983 +               show_debug("%s: backend does not return R while processing clear text password authentication",func);
15984 +#endif                 
15985 +               return -1;
15986 +       }
15987 +
15988 +       if (protoMajor == PROTO_MAJOR_V3)
15989 +       {
15990 +               if (pool_read(backend, &len, sizeof(len)))
15991 +               {
15992 +                       show_error("%s: failed to read authentication packet size",func);
15993 +                       return -1;
15994 +               }
15995 +
15996 +               if (ntohl(len) != 8)
15997 +               {
15998 +                       show_error("%s: incorrect authentication packet size (%d)",func, ntohl(len));
15999 +                       return -1;
16000 +               }
16001 +       }
16002 +
16003 +       /* expect to read "Authentication OK" response. kind should be 0... */
16004 +       if (pool_read(backend, &kind, sizeof(kind)))
16005 +       {
16006 +#ifdef PRINT_DEBUG
16007 +               show_debug("%s: failed to read Authentication OK response",func);
16008 +#endif                 
16009 +               return -1;
16010 +       }
16011 +
16012 +       /* if authenticated, save info */
16013 +       if (!reauth && kind == 0)
16014 +       {
16015 +               if (!backend->issecondary_backend && protoMajor == PROTO_MAJOR_V3)
16016 +               {
16017 +                       int msglen;
16018 +
16019 +                       pool_write(frontend, "R", 1);
16020 +                       msglen = htonl(8);
16021 +                       pool_write(frontend, &msglen, sizeof(msglen));
16022 +                       msglen = htonl(0);
16023 +                       if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
16024 +                       {
16025 +                               return -1;
16026 +                       }
16027 +               }
16028 +
16029 +               backend->auth_kind = 3;
16030 +               backend->pwd_size = ntohl(size) - 4;
16031 +               memcpy(backend->password, password, backend->pwd_size);
16032 +       }
16033 +       return kind;
16034 +}
16035 +
16036 +/*
16037 + * perform crypt authetication
16038 + */
16039 +static int do_crypt(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor)
16040 +{
16041 +       char * func = "do_crypt()";
16042 +       char salt[2];
16043 +       static int size;
16044 +       static char password[MAX_PASSWORD_SIZE];
16045 +       char response;
16046 +       int kind;
16047 +       int len;
16048 +
16049 +       if (!reauth)
16050 +       {
16051 +               /* read salt */
16052 +               if (pool_read(backend, salt, sizeof(salt)))
16053 +               {
16054 +                       show_error("%s: failed to read salt",func);
16055 +                       return -1;
16056 +               }
16057 +       }
16058 +       else
16059 +       {
16060 +               memcpy(salt, backend->salt, sizeof(salt));
16061 +       }
16062 +
16063 +       /* master? */
16064 +       if (!backend->issecondary_backend)
16065 +       {
16066 +               pool_write(frontend, "R", 1);   /* authenticaton */
16067 +               if (protoMajor == PROTO_MAJOR_V3)
16068 +               {
16069 +                       len = htonl(10);
16070 +                       pool_write(frontend, &len, sizeof(len));
16071 +               }
16072 +               kind = htonl(4);                /* crypt authentication */
16073 +               pool_write(frontend, &kind, sizeof(kind));      /* indicating crypt authentication */
16074 +               pool_write_and_flush(frontend, salt, sizeof(salt));             /* salt */
16075 +
16076 +               /* read password packet */
16077 +               if (protoMajor == PROTO_MAJOR_V2)
16078 +               {
16079 +                       if (pool_read(frontend, &size, sizeof(size)))
16080 +                       {
16081 +                               show_error("%s: failed to read password packet size",func);
16082 +                               return -1;
16083 +                       }
16084 +               }
16085 +               else
16086 +               {
16087 +                       char k;
16088 +
16089 +                       if (pool_read(frontend, &k, sizeof(k)))
16090 +                       {
16091 +                               show_error("%s: failed to read password packet",func);
16092 +                               return -1;
16093 +                       }
16094 +                       if (k != 'p')
16095 +                       {
16096 +                               show_error("%s: password packet does not start with \"p\"",func);
16097 +                               return -1;
16098 +                       }
16099 +                       if (pool_read(frontend, &size, sizeof(size)))
16100 +                       {
16101 +                               show_error("%s: failed to read password packet size",func);
16102 +                               return -1;
16103 +                       }
16104 +               }
16105 +
16106 +               if ((ntohl(size) - 4) > sizeof(password))
16107 +               {
16108 +                       show_error("%s: password is too long(size: %d)", func,ntohl(size) - 4);
16109 +                       return -1;
16110 +               }
16111 +
16112 +               if (pool_read(frontend, password, ntohl(size) - 4))
16113 +               {
16114 +                       show_error("%s: failed to read password (size: %d)", func,ntohl(size) - 4);
16115 +                       return -1;
16116 +               }
16117 +       }
16118 +
16119 +       /* connection reusing? */
16120 +       if (reauth)
16121 +       {
16122 +#ifdef PRINT_DEBUG
16123 +               show_debug("%s:size: %d saved_size: %d",func, (ntohl(size) - 4), backend->pwd_size);
16124 +#endif                 
16125 +               if ((ntohl(size) - 4) != backend->pwd_size)
16126 +               {
16127 +#ifdef PRINT_DEBUG
16128 +                       show_debug("%s: password size does not match in re-authetication",func);
16129 +#endif                 
16130 +                       return -1;
16131 +               }
16132 +
16133 +               if (memcmp(password, backend->password, backend->pwd_size) != 0)
16134 +               {
16135 +#ifdef PRINT_DEBUG
16136 +                       show_debug("%s: password does not match in re-authetication",func);
16137 +#endif                 
16138 +                       return -1;
16139 +               }
16140 +
16141 +               return 0;
16142 +       }
16143 +
16144 +       /* send password packet to backend */
16145 +       if (protoMajor == PROTO_MAJOR_V3)
16146 +               pool_write(backend, "p", 1);
16147 +       pool_write(backend, &size, sizeof(size));
16148 +       pool_write_and_flush(backend, password, ntohl(size) -4);
16149 +       if (pool_read(backend, &response, sizeof(response)))
16150 +       {
16151 +               show_error("%s: failed to read authentication response",func);
16152 +               return -1;
16153 +       }
16154 +
16155 +       if (response != 'R')
16156 +       {
16157 +#ifdef PRINT_DEBUG
16158 +               show_debug("%s: backend does not return R while processing crypt authentication(%02x) secondary: %d",func, response, backend->issecondary_backend);
16159 +#endif                 
16160 +               return -1;
16161 +       }
16162 +
16163 +       if (protoMajor == PROTO_MAJOR_V3)
16164 +       {
16165 +               if (pool_read(backend, &len, sizeof(len)))
16166 +               {
16167 +                       show_error("%s: failed to read authentication packet size",func);
16168 +                       return -1;
16169 +               }
16170 +
16171 +               if (ntohl(len) != 8)
16172 +               {
16173 +                       show_error("%s: incorrect authentication packet size (%d)",func, ntohl(len));
16174 +                       return -1;
16175 +               }
16176 +       }
16177 +
16178 +       /* expect to read "Authentication OK" response. kind should be 0... */
16179 +       if (pool_read(backend, &kind, sizeof(kind)))
16180 +       {
16181 +#ifdef PRINT_DEBUG
16182 +               show_debug("%s: failed to read Authentication OK response",func);
16183 +#endif                 
16184 +               return -1;
16185 +       }
16186 +
16187 +       /* if authenticated, save info */
16188 +       if (!reauth && kind == 0)
16189 +       {
16190 +               if (protoMajor == PROTO_MAJOR_V3)
16191 +               {
16192 +                       int msglen;
16193 +
16194 +                       pool_write(frontend, "R", 1);
16195 +                       msglen = htonl(8);
16196 +                       pool_write(frontend, &msglen, sizeof(msglen));
16197 +                       msglen = htonl(0);
16198 +                       if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
16199 +                       {
16200 +                               return -1;
16201 +                       }
16202 +               }
16203 +               backend->auth_kind = 4;
16204 +               backend->pwd_size = ntohl(size) - 4;
16205 +               memcpy(backend->password, password, backend->pwd_size);
16206 +               memcpy(backend->salt, salt, sizeof(salt));
16207 +       }
16208 +       return kind;
16209 +}
16210 +
16211 +/*
16212 + * perform MD5 authetication
16213 + */
16214 +static int do_md5(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor)
16215 +{
16216 +       char * func = "do_md5()";
16217 +       char salt[4];
16218 +       static int size;
16219 +       static char password[MAX_PASSWORD_SIZE];
16220 +       char response;
16221 +       int kind;
16222 +       int len;
16223 +
16224 +       if (!reauth)
16225 +       {
16226 +               /* read salt */
16227 +               if (pool_read(backend, salt, sizeof(salt)))
16228 +               {
16229 +                       show_error("%s: failed to read salt",func);
16230 +                       return -1;
16231 +               }
16232 +       }
16233 +       else
16234 +       {
16235 +               memcpy(salt, backend->salt, sizeof(salt));
16236 +       }
16237 +
16238 +       /* master? */
16239 +       if (!backend->issecondary_backend)
16240 +       {
16241 +               pool_write(frontend, "R", 1);   /* authenticaton */
16242 +               if (protoMajor == PROTO_MAJOR_V3)
16243 +               {
16244 +                       len = htonl(12);
16245 +                       pool_write(frontend, &len, sizeof(len));
16246 +               }
16247 +               kind = htonl(5);
16248 +               pool_write(frontend, &kind, sizeof(kind));      /* indicating MD5 */
16249 +               pool_write_and_flush(frontend, salt, sizeof(salt));             /* salt */
16250 +
16251 +               /* read password packet */
16252 +               if (protoMajor == PROTO_MAJOR_V2)
16253 +               {
16254 +                       if (pool_read(frontend, &size, sizeof(size)))
16255 +                       {
16256 +                               show_error("%s: failed to read password packet size",func);
16257 +                               return -1;
16258 +                       }
16259 +               }
16260 +               else
16261 +               {
16262 +                       char k;
16263 +
16264 +                       if (pool_read(frontend, &k, sizeof(k)))
16265 +                       {
16266 +                               show_error("%s: failed to read password packet \"p\"",func);
16267 +                               return -1;
16268 +                       }
16269 +                       if (k != 'p')
16270 +                       {
16271 +                               show_error("%s: password packet does not start with \"p\"",func);
16272 +                               return -1;
16273 +                       }
16274 +                       if (pool_read(frontend, &size, sizeof(size)))
16275 +                       {
16276 +                               show_error("%s: failed to read password packet size",func);
16277 +                               return -1;
16278 +                       }
16279 +               }
16280 +
16281 +               if ((ntohl(size) - 4) > sizeof(password))
16282 +               {
16283 +                       show_error("%s: password is too long(size: %d)",func, ntohl(size) - 4);
16284 +                       return -1;
16285 +               }
16286 +
16287 +               if (pool_read(frontend, password, ntohl(size) - 4))
16288 +               {
16289 +                       show_error("%s: failed to read password (size: %d)",func, ntohl(size) - 4);
16290 +                       return -1;
16291 +               }
16292 +       }
16293 +
16294 +       /* connection reusing? */
16295 +       if (reauth)
16296 +       {
16297 +               if ((ntohl(size) - 4) != backend->pwd_size)
16298 +               {
16299 +#ifdef PRINT_DEBUG
16300 +                       show_debug("%s; password size does not match in re-authetication",func);
16301 +#endif                 
16302 +                       return -1;
16303 +               }
16304 +
16305 +               if (memcmp(password, backend->password, backend->pwd_size) != 0)
16306 +               {
16307 +#ifdef PRINT_DEBUG
16308 +                       show_debug("%s; password does not match in re-authetication",func);
16309 +#endif                 
16310 +                       return -1;
16311 +               }
16312 +
16313 +               return 0;
16314 +       }
16315 +
16316 +       /* send password packet to backend */
16317 +       if (protoMajor == PROTO_MAJOR_V3)
16318 +               pool_write(backend, "p", 1);
16319 +       pool_write(backend, &size, sizeof(size));
16320 +       pool_write_and_flush(backend, password, ntohl(size) -4);
16321 +       if (pool_read(backend, &response, sizeof(response)))
16322 +       {
16323 +               show_error("%s: failed to read authentication response",func);
16324 +               return -1;
16325 +       }
16326 +
16327 +       if (response != 'R')
16328 +       {
16329 +#ifdef PRINT_DEBUG
16330 +               show_debug("%s: backend does not return R while processing MD5 authentication %c", func,response);
16331 +#endif                 
16332 +               return -1;
16333 +       }
16334 +
16335 +       if (protoMajor == PROTO_MAJOR_V3)
16336 +       {
16337 +               if (pool_read(backend, &len, sizeof(len)))
16338 +               {
16339 +                       show_error("%s: failed to read authentication packet size",func);
16340 +                       return -1;
16341 +               }
16342 +
16343 +               if (ntohl(len) != 8)
16344 +               {
16345 +                       show_error("%s: incorrect authentication packet size (%d)",func, ntohl(len));
16346 +                       return -1;
16347 +               }
16348 +       }
16349 +
16350 +       /* expect to read "Authentication OK" response. kind should be 0... */
16351 +       if (pool_read(backend, &kind, sizeof(kind)))
16352 +       {
16353 +#ifdef PRINT_DEBUG
16354 +               show_debug("%s: failed to read Authentication OK response",func);
16355 +#endif                 
16356 +               return -1;
16357 +       }
16358 +
16359 +       /* if authenticated, save info */
16360 +       if (!reauth && kind == 0)
16361 +       {
16362 +               if (protoMajor == PROTO_MAJOR_V3)
16363 +               {
16364 +                       int msglen;
16365 +
16366 +                       pool_write(frontend, "R", 1);
16367 +                       msglen = htonl(8);
16368 +                       pool_write(frontend, &msglen, sizeof(msglen));
16369 +                       msglen = htonl(0);
16370 +                       if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
16371 +                       {
16372 +                               return -1;
16373 +                       }
16374 +               }
16375 +               backend->auth_kind = 5;
16376 +               backend->pwd_size = ntohl(size) - 4;
16377 +               memcpy(backend->password, password, backend->pwd_size);
16378 +               memcpy(backend->salt, salt, sizeof(salt));
16379 +       }
16380 +       return kind;
16381 +}
16382 +
16383 +/*
16384 + * read message length (V3 only)
16385 + */
16386 +int pool_read_message_length(POOL_CONNECTION_POOL *cp)
16387 +{
16388 +       char * func = "pool_read_message_length()";
16389 +       int status;
16390 +       int length, length1;
16391 +
16392 +       status = pool_read(MASTER(cp), &length, sizeof(length));
16393 +       if (status < 0)
16394 +       {
16395 +               show_error("%s: error while reading message length",func);
16396 +               return -1;
16397 +       }
16398 +       length = ntohl(length);
16399 +
16400 +       if (REPLICATION)
16401 +       {
16402 +               status = pool_read(SECONDARY(cp), &length1, sizeof(length1));
16403 +               if (status < 0)
16404 +               {
16405 +                       show_error("%s: error while reading message length from secondary backend",func);
16406 +                       return -1;
16407 +               }
16408 +               length1 = ntohl(length1);
16409 +
16410 +               if (length != length1)
16411 +               {
16412 +                       show_error("%s: length does not match between backends master(%d) secondary(%d)",
16413 +                                          func,length, length1);
16414 +                       return -1;
16415 +               }
16416 +       }
16417 +
16418 +       if (length < 0)
16419 +       {
16420 +               show_error("%s:read_message_length: invalid message length (%d)", func, length);
16421 +               return -1;
16422 +       }
16423 +
16424 +       return length;
16425 +}
16426 +
16427 +signed char pool_read_kind(POOL_CONNECTION_POOL *cp)
16428 +{
16429 +       char * func = "pool_read_kind()";
16430 +       int status;
16431 +       char kind, kind1;
16432 +
16433 +       status = pool_read(MASTER(cp), &kind, sizeof(kind));
16434 +       if (status < 0)
16435 +       {
16436 +               show_error("%s:read_message_kind: error while reading message kind",func);
16437 +               return -1;
16438 +       }
16439 +
16440 +       if (REPLICATION)
16441 +       {
16442 +               status = pool_read(SECONDARY(cp), &kind1, sizeof(kind1));
16443 +               if (status < 0)
16444 +               {
16445 +                       show_error("%s: error while reading message kind from secondary backend",func);
16446 +                       return -1;
16447 +               }
16448 +
16449 +               if (kind != kind1)
16450 +               {
16451 +                       show_error("%s: kind does not match between backends master(%d) secondary(%d)",
16452 +                          func, kind, kind1);
16453 +                       return -1;
16454 +               }
16455 +       }
16456 +
16457 +       return kind;
16458 +}
16459 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_connection_pool.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_connection_pool.c
16460 --- postgresql-8.2.4/src/pgcluster/pglb/pool_connection_pool.c  1970-01-01 01:00:00.000000000 +0100
16461 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_connection_pool.c        2007-02-18 22:52:17.000000000 +0100
16462 @@ -0,0 +1,535 @@
16463 +/*--------------------------------------------------------------------
16464 + * FILE:
16465 + *     pool_connection_pool.c
16466 + *
16467 + * NOTE:
16468 + *     connection pool stuff
16469 + *
16470 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
16471 + * Portions Copyright (c) 2003-2006, Tatsuo Ishii
16472 + *--------------------------------------------------------------------
16473 + */
16474 +/*
16475 + * Permission to use, copy, modify, and distribute this software and
16476 + * its documentation for any purpose and without fee is hereby
16477 + * granted, provided that the above copyright notice appear in all
16478 + * copies and that both that copyright notice and this permission
16479 + * notice appear in supporting documentation, and that the name of the
16480 + * author not be used in advertising or publicity pertaining to
16481 + * distribution of the software without specific, written prior
16482 + * permission. The author makes no representations about the
16483 + * suitability of this software for any purpose.  It is provided "as
16484 + * is" without express or implied warranty.
16485 + *
16486 + */
16487 +#include "postgres.h"
16488 +#include <sys/types.h>
16489 +#include <sys/socket.h>
16490 +#include <sys/time.h>
16491 +#include <sys/un.h>
16492 +#include <arpa/inet.h>
16493 +#include <netdb.h>
16494 +#include <stdio.h>
16495 +#include <stdlib.h>
16496 +#include <errno.h>
16497 +#include <signal.h>
16498 +#include <string.h>
16499 +#include <unistd.h>
16500 +#include <time.h>
16501 +
16502 +#ifdef HAVE_NETINET_TCP_H
16503 +#include <netinet/tcp.h>
16504 +#endif
16505 +
16506 +#include "replicate_com.h"
16507 +#include "pglb.h"
16508 +
16509 +POOL_CONNECTION_POOL *pool_connection_pool;    /* connection pool */
16510 +
16511 +int pool_init_cp(void);
16512 +POOL_CONNECTION_POOL *pool_get_cp(char *user, char *database, int protoMajor);
16513 +void pool_discard_cp(char *user, char *database, int protoMajor);
16514 +POOL_CONNECTION_POOL *pool_create_cp(void);
16515 +void pool_connection_pool_timer(POOL_CONNECTION_POOL *backend);
16516 +void pool_backend_timer_handler(int sig);
16517 +int connect_inet_domain_socket(int secondary_backend);
16518 +int connect_unix_domain_socket(int secondary_backend);
16519 +char PGRis_same_host(char * host1, char * host2);
16520 +void pool_finish(void);
16521 +
16522 +
16523 +static POOL_CONNECTION_POOL_SLOT *create_cp(POOL_CONNECTION_POOL_SLOT *cp, int secondary_backend);
16524 +static POOL_CONNECTION_POOL *new_connection(POOL_CONNECTION_POOL *p);
16525 +
16526 +
16527 +
16528 +/*
16529 +* initialize connection pools. this should be called once at the startup.
16530 +*/
16531 +int pool_init_cp(void)
16532 +{
16533 +       char * func = "pool_init_cp()";
16534 +       pool_connection_pool = (POOL_CONNECTION_POOL *)malloc(sizeof(POOL_CONNECTION_POOL)*Max_Pool);
16535 +       if (pool_connection_pool == NULL)
16536 +       {
16537 +               show_error("%s: malloc() failed[%s]",func,strerror(errno));
16538 +               return -1;
16539 +       }
16540 +       memset(pool_connection_pool, 0, sizeof(POOL_CONNECTION_POOL)*Max_Pool);
16541 +
16542 +       return 0;
16543 +}
16544 +
16545 +/*
16546 +* find connection by user and database
16547 +*/
16548 +POOL_CONNECTION_POOL *pool_get_cp(char *user, char *database, int protoMajor)
16549 +{
16550 +       char * func = "pool_get_cp()";
16551 +       int i;
16552 +
16553 +       POOL_CONNECTION_POOL *p = pool_connection_pool;
16554 +
16555 +       if (p == NULL)
16556 +       {
16557 +               show_error("%s: pool_connection_pool is not initialized",func);
16558 +               return NULL;
16559 +       }
16560 +
16561 +       for (i=0;i<Max_Pool;i++)
16562 +       {
16563 +               if (MASTER_CONNECTION(p) &&
16564 +                       MASTER_CONNECTION(p)->sp->major == protoMajor &&
16565 +                       MASTER_CONNECTION(p)->sp->user != NULL &&
16566 +                       strcmp(MASTER_CONNECTION(p)->sp->user, user) == 0 &&
16567 +                       strcmp(MASTER_CONNECTION(p)->sp->database, database) == 0)
16568 +               {
16569 +                       /* mark this connection is under use */
16570 +                       MASTER_CONNECTION(p)->closetime = 0;
16571 +                       return p;
16572 +               }
16573 +               p++;
16574 +       }
16575 +       return NULL;
16576 +}
16577 +
16578 +/*
16579 + * disconnect and release a connection to the database
16580 + */
16581 +void pool_discard_cp(char *user, char *database, int protoMajor)
16582 +{
16583 +       char * func = "pool_discard_cp()";
16584 +       POOL_CONNECTION_POOL *p = pool_get_cp(user, database, protoMajor);
16585 +
16586 +       if (p == NULL)
16587 +       {
16588 +               show_error("%s: cannot get connection pool for user %s datbase %s", func,user, database);
16589 +               return;
16590 +       }
16591 +
16592 +       free(MASTER_CONNECTION(p)->sp->user);
16593 +       free(MASTER_CONNECTION(p)->sp->database);
16594 +       free(MASTER_CONNECTION(p)->sp->startup_packet);
16595 +       pool_close(MASTER_CONNECTION(p)->con);
16596 +
16597 +       memset(p, 0, sizeof(POOL_CONNECTION_POOL));
16598 +}
16599 +
16600 +
16601 +/*
16602 +* create a connection pool by user and database
16603 +*/
16604 +POOL_CONNECTION_POOL *pool_create_cp(void)
16605 +{
16606 +       char * func = "pool_create_cp()";
16607 +       int i;
16608 +       time_t closetime;
16609 +       POOL_CONNECTION_POOL *oldestp;
16610 +
16611 +       POOL_CONNECTION_POOL *p = pool_connection_pool;
16612 +
16613 +       if (p == NULL)
16614 +       {
16615 +               show_error("%s: pool_connection_pool is not initialized",func);
16616 +               return NULL;
16617 +       }
16618 +
16619 +       for (i=0; i<Max_Pool; i++)
16620 +       {
16621 +               if (MASTER_CONNECTION(p) == NULL)
16622 +                       return new_connection(p);
16623 +               p++;
16624 +       }
16625 +
16626 +#ifdef PRINT_DEBUG
16627 +       show_debug("%s:no empty connection slot was found",func);
16628 +#endif                 
16629 +
16630 +       /*
16631 +        * no empty connection slot was found. look for the oldest connection and discard it.
16632 +        */
16633 +       oldestp = p = pool_connection_pool;
16634 +       closetime = MASTER_CONNECTION(p)->closetime;
16635 +       for (i=0; i<Max_Pool; i++)
16636 +       {
16637 +#ifdef PRINT_DEBUG
16638 +               show_debug("%s:user: %s database: %s closetime: %d",
16639 +                               func,
16640 +                                  MASTER_CONNECTION(p)->sp->user,
16641 +                                  MASTER_CONNECTION(p)->sp->database,
16642 +                                  MASTER_CONNECTION(p)->closetime);
16643 +#endif                 
16644 +               if (MASTER_CONNECTION(p)->closetime < closetime)
16645 +               {
16646 +                       closetime = MASTER_CONNECTION(p)->closetime;
16647 +                       oldestp = p;
16648 +               }
16649 +               p++;
16650 +       }
16651 +
16652 +       p = oldestp;
16653 +       pool_send_frontend_exits(p);
16654 +
16655 +#ifdef PRINT_DEBUG
16656 +       show_debug("%s:discarding old %d th connection. user: %s database: %s", 
16657 +                          func,
16658 +                          oldestp - pool_connection_pool,
16659 +                          MASTER_CONNECTION(p)->sp->user,
16660 +                          MASTER_CONNECTION(p)->sp->database);
16661 +#endif                 
16662 +
16663 +       free(MASTER_CONNECTION(p)->sp->user);
16664 +       free(MASTER_CONNECTION(p)->sp->database);
16665 +       free(MASTER_CONNECTION(p)->sp->startup_packet);
16666 +       pool_close(MASTER_CONNECTION(p)->con);
16667 +
16668 +       memset(p, 0, sizeof(POOL_CONNECTION_POOL));
16669 +
16670 +       return new_connection(p);
16671 +}
16672 +
16673 +/*
16674 + * set backend connection close timer
16675 + */
16676 +void pool_connection_pool_timer(POOL_CONNECTION_POOL *backend)
16677 +{
16678 +#ifdef PRINT_DEBUG
16679 +       char * func = "pool_connection_pool_timer()";
16680 +#endif                 
16681 +       POOL_CONNECTION_POOL *p = pool_connection_pool;
16682 +       int i;
16683 +
16684 +#ifdef PRINT_DEBUG
16685 +       show_debug("%s:pool_connection_pool_timer: called",func);
16686 +#endif                 
16687 +
16688 +       MASTER_CONNECTION(backend)->closetime = time(NULL);             /* set connection close time */
16689 +
16690 +       if (Connection_Life_Time == 0)
16691 +               return;
16692 +
16693 +       /* look for any other timeout */
16694 +       for (i=0;i<Max_Pool;i++, p++)
16695 +       {
16696 +               if (!MASTER_CONNECTION(p))
16697 +                       continue;
16698 +               if (MASTER_CONNECTION(p)->sp->user == NULL)
16699 +                       continue;
16700 +
16701 +               if (p != backend && MASTER_CONNECTION(p)->closetime)
16702 +                       return;
16703 +       }
16704 +
16705 +       /* no other timer found. set my timer */
16706 +#ifdef PRINT_DEBUG
16707 +       show_debug("%s: set alarm after %d seconds",func, Connection_Life_Time);
16708 +#endif                 
16709 +       signal(SIGALRM, pool_backend_timer_handler);
16710 +       alarm(Connection_Life_Time);
16711 +}
16712 +
16713 +/*
16714 + * backend connection close timer handler
16715 + */
16716 +void pool_backend_timer_handler(int sig)
16717 +{
16718 +#define TMINTMAX 0x7fffffff
16719 +
16720 +#ifdef PRINT_DEBUG
16721 +       char * func = "pool_backend_timer_handler()";
16722 +#endif                 
16723 +       POOL_CONNECTION_POOL *p = pool_connection_pool;
16724 +       int i;
16725 +       time_t now;
16726 +       time_t nearest = TMINTMAX;
16727 +
16728 +       now = time(NULL);
16729 +
16730 +#ifdef PRINT_DEBUG
16731 +       show_debug("%s:called at %d", func,now);
16732 +#endif                 
16733 +
16734 +       for (i=0;i<Max_Pool;i++, p++)
16735 +       {
16736 +               if (!MASTER_CONNECTION(p))
16737 +                       continue;
16738 +               if (MASTER_CONNECTION(p)->sp->user == NULL)
16739 +                       continue;
16740 +
16741 +               /* timer expire? */
16742 +               if (MASTER_CONNECTION(p)->closetime)
16743 +               {
16744 +#ifdef PRINT_DEBUG
16745 +                       show_debug("%s: expire time: %d",
16746 +                                          func,
16747 +                                          MASTER_CONNECTION(p)->closetime+Connection_Life_Time);
16748 +#endif                 
16749 +
16750 +                       if (now >= (MASTER_CONNECTION(p)->closetime+Connection_Life_Time))
16751 +                       {
16752 +                               /* discard expired connection */
16753 +#ifdef PRINT_DEBUG
16754 +                               show_debug("%s: expires user %s database %s", func, MASTER_CONNECTION(p)->sp->user, MASTER_CONNECTION(p)->sp->database);
16755 +#endif                 
16756 +
16757 +                               pool_send_frontend_exits(p);
16758 +
16759 +                               free(MASTER_CONNECTION(p)->sp->user);
16760 +                               free(MASTER_CONNECTION(p)->sp->database);
16761 +                               free(MASTER_CONNECTION(p)->sp->startup_packet);
16762 +                               pool_close(MASTER_CONNECTION(p)->con);
16763 +
16764 +                               memset(p, 0, sizeof(POOL_CONNECTION_POOL));
16765 +                       }
16766 +                       else
16767 +                       {
16768 +                               /* look for nearest timer */
16769 +                               if (MASTER_CONNECTION(p)->closetime < nearest)
16770 +                                       nearest = MASTER_CONNECTION(p)->closetime;
16771 +                       }
16772 +               }
16773 +       }
16774 +
16775 +       /* any remaining timer */
16776 +       if (nearest != TMINTMAX)
16777 +       {
16778 +               nearest = Connection_Life_Time - (now - nearest);
16779 +               if (nearest <= 0)
16780 +                 nearest = 1;
16781 +               signal(SIGALRM, pool_backend_timer_handler);
16782 +               alarm(nearest);
16783 +       }
16784 +}
16785 +
16786 +int connect_inet_domain_socket(int secondary_backend)
16787 +{
16788 +       char * func = "connect_inet_domain_socket()";
16789 +       int fd;
16790 +       int len;
16791 +       int on = 1;
16792 +       struct sockaddr_in addr;
16793 +       struct hostent *hp;
16794 +
16795 +       fd = socket(AF_INET, SOCK_STREAM, 0);
16796 +       if (fd < 0)
16797 +       {
16798 +               show_error("%s: socket() failed: %s",func, strerror(errno));
16799 +               return -1;
16800 +       }
16801 +
16802 +       /* set nodelay */
16803 +       if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
16804 +                                  (char *) &on,
16805 +                                  sizeof(on)) < 0)
16806 +       {
16807 +               show_error("%s: setsockopt() failed: %s", func, strerror(errno));
16808 +               close(fd);
16809 +               return -1;
16810 +       }
16811 +
16812 +       memset((char *) &addr, 0, sizeof(addr));
16813 +       ((struct sockaddr *)&addr)->sa_family = AF_INET;
16814 +
16815 +       addr.sin_port = htons(CurrentCluster->port);
16816 +       len = sizeof(struct sockaddr_in);
16817 +
16818 +       hp = gethostbyname(CurrentCluster->hostName);
16819 +
16820 +       if ((hp == NULL) || (hp->h_addrtype != AF_INET))
16821 +       {
16822 +               show_error("%s: gethostbyname() failed: %s host: %s",func, strerror(errno), CurrentCluster->hostName);
16823 +               close(fd);
16824 +               return -1;
16825 +       }
16826 +       memmove((char *) &(addr.sin_addr),
16827 +                       (char *) hp->h_addr,
16828 +                       hp->h_length);
16829 +
16830 +       if (connect(fd, (struct sockaddr *)&addr, len) < 0)
16831 +       {
16832 +               show_error("%s: connect() failed: %s",func,strerror(errno));
16833 +               close(fd);
16834 +               return -1;
16835 +       }
16836 +       return fd;
16837 +}
16838 +
16839 +int connect_unix_domain_socket(int secondary_backend)
16840 +{
16841 +       char * func = "connect_unix_domain_socket()";
16842 +       struct sockaddr_un addr;
16843 +       int fd;
16844 +       int len;
16845 +       int port;
16846 +
16847 +       fd = socket(AF_UNIX, SOCK_STREAM, 0);
16848 +       if (fd == -1)
16849 +       {
16850 +               show_error("%s: setsockopt() failed: %s", func,strerror(errno));
16851 +               return -1;
16852 +       }
16853 +
16854 +       port = CurrentCluster->port;
16855 +       memset((char *) &addr, 0, sizeof(addr));
16856 +       ((struct sockaddr *)&addr)->sa_family = AF_UNIX;
16857 +       snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/.s.PGSQL.%d", 
16858 +                        Backend_Socket_Dir,
16859 +                        CurrentCluster->port);
16860 +#ifdef PRINT_DEBUG
16861 +       show_debug("%s:postmaster Unix domain socket: %s",func, addr.sun_path);
16862 +#endif                 
16863 +
16864 +       len = sizeof(struct sockaddr_un);
16865 +
16866 +       if (connect(fd, (struct sockaddr *)&addr, len) < 0)
16867 +       {
16868 +               show_error("%s: connect() failed: %s",func, strerror(errno));
16869 +               close(fd);
16870 +               return -1;
16871 +       }
16872 +#ifdef PRINT_DEBUG
16873 +       show_debug("%s:connected to postmaster Unix domain socket: %s fd: %d", func,addr.sun_path, fd);
16874 +#endif                 
16875 +       return fd;
16876 +}
16877 +
16878 +static POOL_CONNECTION_POOL_SLOT *create_cp(POOL_CONNECTION_POOL_SLOT *cp, int secondary_backend)
16879 +{
16880 +       char * func = "create_cp()";
16881 +       int fd;
16882 +       char hostName[HOSTNAME_MAX_LENGTH];
16883 +
16884 +       if (gethostname(hostName,sizeof(hostName)) < 0)
16885 +       {
16886 +               show_error("%s:gethostname() failed. (%s)",func,strerror(errno));
16887 +               return NULL;
16888 +       }
16889 +       if (PGRis_same_host(hostName,CurrentCluster->hostName) == 1)
16890 +       {
16891 +#ifdef PRINT_DEBUG
16892 +               show_debug("%s:[%s] [%s] is same",func,hostName,CurrentCluster->hostName);
16893 +#endif                 
16894 +               fd = connect_unix_domain_socket(secondary_backend);
16895 +       }
16896 +       else
16897 +       {
16898 +               fd = connect_inet_domain_socket(secondary_backend);
16899 +       }
16900 +
16901 +       if (fd < 0)
16902 +       {
16903 +               /* fatal error, notice to parent and exit */
16904 +               notice_backend_error();
16905 +               exit(1);
16906 +       }
16907 +
16908 +       cp->con = pool_open(fd);
16909 +       cp->closetime = 0;
16910 +       return cp;
16911 +}
16912 +
16913 +static POOL_CONNECTION_POOL *new_connection(POOL_CONNECTION_POOL *p)
16914 +{
16915 +       char * func = "new_connection()";
16916 +       /* create master connection */
16917 +       MASTER_CONNECTION(p) = malloc(sizeof(POOL_CONNECTION_POOL_SLOT));
16918 +       if (MASTER_CONNECTION(p) == NULL)
16919 +       {
16920 +               show_error("%s: malloc() failed [%s]",func,strerror(errno));
16921 +               return NULL;
16922 +       }
16923 +       create_cp(MASTER_CONNECTION(p), 0);
16924 +
16925 +                       /* initialize Paramter Status save structure */
16926 +       if (pool_init_params(&MASTER(p)->params))
16927 +       {
16928 +               return NULL;
16929 +       }
16930 +       p->num = 1;     /* number of slots */
16931 +
16932 +       return p;
16933 +}
16934 +
16935 +char PGRis_same_host(char * host1, char * host2)
16936 +{
16937 +       unsigned int ip1, ip2;
16938 +
16939 +       if ((host1 == NULL) || (host2 == NULL))
16940 +       {
16941 +               return 0;
16942 +       }
16943 +       ip1 = PGRget_ip_by_name( host1);
16944 +       ip2 = PGRget_ip_by_name( host2);
16945 +       if (ip1 == ip2)
16946 +       {
16947 +               return 1;
16948 +       }
16949 +       return 0;
16950 +}
16951 +
16952 +void pool_finish(void)
16953 +{
16954 +       char * func = "pool_finish()";
16955 +       int i;
16956 +
16957 +       POOL_CONNECTION_POOL *p = pool_connection_pool;
16958 +return;
16959 +       if (p == NULL)
16960 +       {
16961 +               show_error("%s:pool_connection_pool is not initialized",func);
16962 +               return ;
16963 +       }
16964 +
16965 +       for (i=0 ; i<Max_Pool ; i++)
16966 +       {
16967 +               if (p == NULL)
16968 +                       break;
16969 +               /*
16970 +               if (MASTER_CONNECTION(p)->sp->user != NULL)
16971 +               {
16972 +                       free(MASTER_CONNECTION(p)->sp->user);
16973 +                       MASTER_CONNECTION(p)->sp->user = NULL;
16974 +               }
16975 +               if (MASTER_CONNECTION(p)->sp->database != NULL)
16976 +               {
16977 +                       free(MASTER_CONNECTION(p)->sp->database);
16978 +                       MASTER_CONNECTION(p)->sp->database = NULL;
16979 +               }
16980 +               if (MASTER_CONNECTION(p)->sp->startup_packet != NULL)
16981 +               {
16982 +                       free(MASTER_CONNECTION(p)->sp->startup_packet);
16983 +                       MASTER_CONNECTION(p)->sp->startup_packet = NULL;
16984 +               }
16985 +               */
16986 +               if (MASTER_CONNECTION(p)->con != NULL)
16987 +               {
16988 +                       pool_close(MASTER_CONNECTION(p)->con);
16989 +                       MASTER_CONNECTION(p)->con = NULL;
16990 +               }
16991 +               memset(p, 0, sizeof(POOL_CONNECTION_POOL));
16992 +               p++;
16993 +       }
16994 +       free((char *)pool_connection_pool);
16995 +       pool_connection_pool = NULL;
16996 +}
16997 +
16998 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_params.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_params.c
16999 --- postgresql-8.2.4/src/pgcluster/pglb/pool_params.c   1970-01-01 01:00:00.000000000 +0100
17000 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_params.c 2007-02-18 22:52:17.000000000 +0100
17001 @@ -0,0 +1,184 @@
17002 +/*--------------------------------------------------------------------
17003 + * FILE:
17004 + *     pool_params.c
17005 + *
17006 + * NOTE:
17007 + *     connection pool stuff
17008 + *
17009 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
17010 + * Portions Copyright (c) 2003-2006, Tatsuo Ishii
17011 + *--------------------------------------------------------------------
17012 + */
17013 +/*
17014 + * Permission to use, copy, modify, and distribute this software and
17015 + * its documentation for any purpose and without fee is hereby
17016 + * granted, provided that the above copyright notice appear in all
17017 + * copies and that both that copyright notice and this permission
17018 + * notice appear in supporting documentation, and that the name of the
17019 + * author not be used in advertising or publicity pertaining to
17020 + * distribution of the software without specific, written prior
17021 + * permission. The author makes no representations about the
17022 + * suitability of this software for any purpose.  It is provided "as
17023 + * is" without express or implied warranty.
17024 + *
17025 + */
17026 +
17027 +#include <stdio.h>
17028 +#include <sys/time.h>
17029 +#include <time.h>
17030 +#include <stdlib.h>
17031 +#include <string.h>
17032 +
17033 +#ifdef HAVE_NETINET_TCP_H
17034 +#include <netinet/tcp.h>
17035 +#endif
17036 +
17037 +#include "replicate_com.h"
17038 +#include "pglb.h"
17039 +
17040 +#define MAX_PARAM_ITEMS 128
17041 +
17042 +int pool_init_params(ParamStatus *params);
17043 +void pool_discard_params(ParamStatus *params);
17044 +char *pool_find_name(ParamStatus *params, char *name, int *pos);
17045 +int pool_get_param(ParamStatus *params, int index, char **name, char **value);
17046 +int pool_add_param(ParamStatus *params, char *name, char *value);
17047 +void pool_param_debug_print(ParamStatus *params);
17048 +
17049 +/*
17050 + * initialize parameter structure
17051 + */
17052 +int pool_init_params(ParamStatus *params)
17053 +{
17054 +       char * func = "pool_init_params()";
17055 +
17056 +    params->num = 0;
17057 +    params->names = malloc(MAX_PARAM_ITEMS*sizeof(char *));
17058 +       if (params->names == NULL)
17059 +       {
17060 +               show_error("%s: cannot allocate memory",func);
17061 +               return -1;
17062 +       }
17063 +    params->values = malloc(MAX_PARAM_ITEMS*sizeof(char *));
17064 +       if (params->values == NULL)
17065 +       {
17066 +               show_error("%s: cannot allocate memory",func);
17067 +               return -1;
17068 +       }
17069 +       return 0;
17070 +}
17071 +
17072 +/*
17073 + * discard parameter structure
17074 + */
17075 +void pool_discard_params(ParamStatus *params)
17076 +{
17077 +    int i;
17078 +
17079 +    for (i=0;i<params->num;i++)
17080 +    {
17081 +               free(params->names[i]);
17082 +               free(params->values[i]);
17083 +    }
17084 +    free(params->names);
17085 +    free(params->values);
17086 +}
17087 +
17088 +/*
17089 + * find param value by name. if found, its value is returned
17090 + * also, pos is set
17091 + * if not found, NULL is returned
17092 + */
17093 +char *pool_find_name(ParamStatus *params, char *name, int *pos)
17094 +{
17095 +    int i;
17096 +
17097 +    for (i=0;i<params->num;i++)
17098 +    {
17099 +               if (!strcmp(name, params->names[i]))
17100 +               {
17101 +                       *pos = i;
17102 +                       return params->values[i];
17103 +               }
17104 +    }
17105 +    return NULL;
17106 +}
17107 +
17108 +/*
17109 + * return name and value by index.
17110 + */
17111 +int pool_get_param(ParamStatus *params, int index, char **name, char **value)
17112 +{
17113 +       if (index < 0 || index >= params->num)
17114 +               return -1;
17115 +
17116 +       *name = params->names[index];
17117 +       *value = params->values[index];
17118 +
17119 +       return 0;
17120 +}
17121 +
17122 +/*
17123 + * add or replace name/value pair
17124 + */
17125 +int pool_add_param(ParamStatus *params, char *name, char *value)
17126 +{
17127 +       char * func = "pool_add_param()";
17128 +    int pos;
17129 +
17130 +    if (pool_find_name(params, name, &pos))
17131 +    {
17132 +               /* name already exists */
17133 +               if (strlen(params->values[pos]) < strlen(value))
17134 +               {
17135 +                       params->values[pos] = realloc(params->values[pos], strlen(value) + 1);
17136 +                       if (params->values[pos] == NULL)
17137 +                       {
17138 +                               show_error("%s: cannot allocate memory",func);
17139 +                               return -1;
17140 +                       }
17141 +               }
17142 +               strcpy(params->values[pos], value);
17143 +    }
17144 +    else
17145 +    {
17146 +               int num;
17147 +
17148 +               /* add name/value pair */
17149 +               if (params->num >= MAX_PARAM_ITEMS)
17150 +               {
17151 +                       show_error("%s: no more room for num",func);
17152 +                       return -1;
17153 +               }
17154 +               num = params->num;
17155 +               params->names[num] = strdup(name);
17156 +               if (params->names[num] == NULL)
17157 +               {
17158 +                       show_error("%s: cannot allocate memory",func);
17159 +                       return -1;
17160 +               }
17161 +               params->values[num] = strdup(value);
17162 +               if (params->values[num] == NULL)
17163 +               {
17164 +                       show_error("%s: cannot allocate memory",func);
17165 +                       return -1;
17166 +               }
17167 +               params->num++;
17168 +    }
17169 +       return 0;
17170 +}
17171 +
17172 +void pool_param_debug_print(ParamStatus *params)
17173 +{
17174 +#ifdef PRINT_DEBUG
17175 +       char * func = "pool_param_debug_print()";
17176 +#endif                 
17177 +       int i;
17178 +
17179 +    for (i=0;i<params->num;i++)
17180 +    {
17181 +#ifdef PRINT_DEBUG
17182 +               show_debug("%s: No.%d: name: %s value: %s",func, i, params->names[i], params->values[i]);
17183 +#endif                 
17184 +       }
17185 +}
17186 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_process_query.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_process_query.c
17187 --- postgresql-8.2.4/src/pgcluster/pglb/pool_process_query.c    1970-01-01 01:00:00.000000000 +0100
17188 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_process_query.c  2007-02-18 22:52:17.000000000 +0100
17189 @@ -0,0 +1,2100 @@
17190 +/*--------------------------------------------------------------------
17191 + * FILE:
17192 + *     pool_process_query.c
17193 + *
17194 + * NOTE:
17195 + *     query processing stuff
17196 + *
17197 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
17198 + * Portions Copyright (c) 2003-2006, Tatsuo Ishii
17199 + *--------------------------------------------------------------------
17200 + */
17201 +/*
17202 + * Permission to use, copy, modify, and distribute this software and
17203 + * its documentation for any purpose and without fee is hereby
17204 + * granted, provided that the above copyright notice appear in all
17205 + * copies and that both that copyright notice and this permission
17206 + * notice appear in supporting documentation, and that the name of the
17207 + * author not be used in advertising or publicity pertaining to
17208 + * distribution of the software without specific, written prior
17209 + * permission. The author makes no representations about the
17210 + * suitability of this software for any purpose.  It is provided "as
17211 + * is" without express or implied warranty.
17212 + *
17213 +*/
17214 +#include <errno.h>
17215 +#include <sys/types.h>
17216 +#include <sys/time.h>
17217 +#include <arpa/inet.h>
17218 +#include <stdlib.h>
17219 +#include <unistd.h>
17220 +#include <string.h>
17221 +#include <netinet/in.h>
17222 +
17223 +#include "postgres_fe.h"
17224 +#include "libpq/pqcomm.h"
17225 +
17226 +#include "replicate_com.h"
17227 +#include "pglb.h"
17228 +
17229 +POOL_STATUS pool_process_query(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, int connection_reuse);
17230 +POOL_STATUS ErrorResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17231 +void pool_enable_timeout(void); 
17232 +void pool_disable_timeout(void);
17233 +int pool_check_fd(POOL_CONNECTION *cp, int notimeout);
17234 +void pool_send_frontend_exits(POOL_CONNECTION_POOL *backend);
17235 +POOL_STATUS SimpleForwardToFrontend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17236 +POOL_STATUS SimpleForwardToBackend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17237 +POOL_STATUS ParameterStatus(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17238 +void pool_send_error_message(POOL_CONNECTION *frontend, int protoMajor, char *code, char *message, char *detail, char *hint, char *file, int line);
17239 +
17240 +
17241 +static POOL_STATUS Query(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, char *query);
17242 +static POOL_STATUS ReadyForQuery(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, int send_ready);
17243 +static POOL_STATUS CompleteCommandResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17244 +static int RowDescription(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17245 +static POOL_STATUS AsciiRow(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, short num_fields);
17246 +static POOL_STATUS BinaryRow(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, short num_fields);
17247 +static POOL_STATUS CursorResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17248 +static POOL_STATUS NoticeResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17249 +static POOL_STATUS CopyInResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17250 +static POOL_STATUS CopyOutResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17251 +static POOL_STATUS CopyDataRows(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, int copyin);
17252 +static POOL_STATUS EmptyQueryResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17253 +static POOL_STATUS NotificationResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17254 +static POOL_STATUS FunctionCall(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17255 +static POOL_STATUS FunctionResultResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17256 +static POOL_STATUS ProcessFrontendResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17257 +static int synchronize(POOL_CONNECTION *cp);
17258 +static void process_reporting(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17259 +static int reset_backend(POOL_CONNECTION_POOL *backend, int qcnt);
17260 +static int load_balance_enabled(POOL_CONNECTION_POOL *backend, char *sql);
17261 +static void start_load_balance(POOL_CONNECTION_POOL *backend);
17262 +static void end_load_balance(POOL_CONNECTION_POOL *backend);
17263 +
17264 +static POOL_CONNECTION_POOL_SLOT *slots[MAX_CONNECTION_SLOTS];
17265 +
17266 +POOL_STATUS pool_process_query(POOL_CONNECTION *frontend, 
17267 +                                                          POOL_CONNECTION_POOL *backend,
17268 +                                                          int connection_reuse)
17269 +{
17270 +       char * func = "pool_process_query()";
17271 +       char kind, kind1;       /* packet kind (backend) */
17272 +       char fkind;     /* packet kind (frontend) */
17273 +       short num_fields = 0;
17274 +       fd_set  readmask;
17275 +       fd_set  writemask;
17276 +       fd_set  exceptmask;
17277 +       int fds;
17278 +       POOL_STATUS status;
17279 +       int state;      /* 0: ok to issue commands 1: waiting for "ready for query" response */
17280 +       int qcnt;
17281 +
17282 +       frontend->no_forward = connection_reuse;
17283 +       qcnt = 0;
17284 +       state = 0;
17285 +
17286 +       for (;;)
17287 +       {
17288 +               kind = kind1 = 0;
17289 +               fkind = 0;
17290 +
17291 +               if (state == 0 && connection_reuse)
17292 +               {
17293 +                       int st = 0;
17294 +
17295 +                       /* send query for resetting connection such as "ROLLBACK" "RESET ALL"... */
17296 +                       st = reset_backend(backend, qcnt);
17297 +
17298 +                       if (st < 0)             /* error? */
17299 +                               return POOL_END;
17300 +
17301 +                       else if (st == 0)       /* no query issued? */
17302 +                       {
17303 +                               qcnt++;
17304 +                               continue;
17305 +                       }
17306 +
17307 +                       else if (st == 1)       /* more query remains */
17308 +                       {
17309 +                               state = 1;
17310 +                               qcnt++;
17311 +                               continue;
17312 +                       }
17313 +
17314 +                       else if (st == 2)       /* no more qury */
17315 +                       {
17316 +                               frontend->no_forward = 0;
17317 +                               return POOL_CONTINUE;
17318 +                       }
17319 +
17320 +               }
17321 +
17322 +               if ((!REPLICATION && MASTER(backend)->len == 0 && frontend->len == 0) ||
17323 +                       (REPLICATION && MASTER(backend)->len == 0 &&
17324 +                       SECONDARY(backend)->len == 0
17325 +                        && frontend->len == 0))
17326 +               {
17327 +
17328 +                       struct timeval timeout;
17329 +
17330 +                       timeout.tv_sec = 1;
17331 +                       timeout.tv_usec = 0;
17332 +
17333 +                       FD_ZERO(&readmask);
17334 +                       FD_ZERO(&writemask);
17335 +                       FD_ZERO(&exceptmask);
17336 +                       if (!connection_reuse)
17337 +                               FD_SET(frontend->fd, &readmask);
17338 +                       FD_SET(MASTER(backend)->fd, &readmask);
17339 +                       if (REPLICATION)
17340 +                               FD_SET(SECONDARY(backend)->fd, &readmask);
17341 +                       if (!connection_reuse)
17342 +                               FD_SET(frontend->fd, &exceptmask);
17343 +                       FD_SET(MASTER(backend)->fd, &exceptmask);
17344 +
17345 +                       if (connection_reuse)
17346 +                       {
17347 +                               if (REPLICATION)
17348 +                                       fds = select(Max(SECONDARY(backend)->fd, MASTER(backend)->fd) + 1,
17349 +                                                                &readmask, &writemask, &exceptmask, NULL);
17350 +                               else
17351 +                                       fds = select(MASTER(backend)->fd+1, &readmask, &writemask, &exceptmask, NULL);
17352 +                       }
17353 +                       else
17354 +                       {
17355 +                               if (REPLICATION)
17356 +                                       fds = select(Max(SECONDARY(backend)->fd,
17357 +                                                                        Max(frontend->fd, MASTER(backend)->fd)+1),
17358 +                                                                &readmask, &writemask, &exceptmask, NULL);
17359 +                               else
17360 +                                       fds = select(Max(frontend->fd, MASTER(backend)->fd)+1,
17361 +                                                                &readmask, &writemask, &exceptmask, NULL);
17362 +                       }
17363 +
17364 +                       if (fds == -1)
17365 +                       {
17366 +                               if (errno == EINTR)
17367 +                                       continue;
17368 +
17369 +                               show_error("%s:select() failed. reason: %s",func, strerror(errno));
17370 +                               return POOL_ERROR;
17371 +                       }
17372 +
17373 +                       if (fds == 0)
17374 +                       {
17375 +                               return POOL_CONTINUE;
17376 +                       }
17377 +
17378 +                       if (FD_ISSET(MASTER(backend)->fd, &readmask))
17379 +                       {
17380 +                               pool_read(MASTER(backend), &kind, 1);
17381 +#ifdef PRINT_DEBUG
17382 +                               show_debug("%s:read kind from backend %c", func,kind);
17383 +#endif                 
17384 +                       }
17385 +
17386 +                       if (REPLICATION && FD_ISSET(SECONDARY(backend)->fd, &readmask))
17387 +                       {
17388 +                               pool_read(SECONDARY(backend), &kind1, 1);
17389 +#ifdef PRINT_DEBUG
17390 +                               show_debug("%s:read kind from secondary backend %c", func,kind1);
17391 +#endif                 
17392 +                       }
17393 +
17394 +                       if (!connection_reuse && FD_ISSET(frontend->fd, &exceptmask))
17395 +                       {
17396 +                               return POOL_END;
17397 +                       }
17398 +                       if (FD_ISSET(MASTER(backend)->fd, &exceptmask))
17399 +                       {
17400 +                               return POOL_ERROR;
17401 +                       }
17402 +
17403 +                       if (!connection_reuse && FD_ISSET(frontend->fd, &readmask))
17404 +                       {
17405 +                               status = ProcessFrontendResponse(frontend, backend);
17406 +                               if (status != POOL_CONTINUE)
17407 +                                       return status;
17408 +
17409 +                               continue;
17410 +                       }
17411 +               }
17412 +               else
17413 +               {
17414 +                       if (MASTER(backend)->len > 0)
17415 +                       {
17416 +                               pool_read(MASTER(backend), &kind, 1);
17417 +                               if (REPLICATION)
17418 +                               {
17419 +                                       pool_read(SECONDARY(backend), &kind1, 1);
17420 +                                       if (kind == '\0' || kind != kind1)
17421 +                                       {
17422 +                                               show_error("%s: kind does not match between backends master(%c) secondary(%c)",
17423 +                                                                  func, kind, kind1);
17424 +                                               pool_send_error_message(frontend, MAJOR(backend), "XX000", 
17425 +                                                                                               "kind mismatch between backends", "",
17426 +                                                                                               "check data consistency between master and secondary", __FILE__, __LINE__);
17427 +
17428 +                                               if (pool_config_replication_stop_on_mismatch)
17429 +                                                       return POOL_FATAL;
17430 +                                               else
17431 +                                                       return POOL_ERROR;
17432 +                                       }
17433 +                               }
17434 +#ifdef PRINT_DEBUG
17435 +                               show_debug("%s:read kind from backend pending data %c len: %d po: %d", func, kind, MASTER(backend)->len, MASTER(backend)->po);
17436 +#endif                 
17437 +                       }
17438 +                       if (frontend->len > 0)
17439 +                       {
17440 +                               status = ProcessFrontendResponse(frontend, backend);
17441 +                               if (status != POOL_CONTINUE)
17442 +                                       return status;
17443 +
17444 +                               continue;
17445 +                       }
17446 +               }
17447 +
17448 +               /* this is the synchronous point */
17449 +               if (REPLICATION)
17450 +               {
17451 +                       if (kind == 0)
17452 +                       {
17453 +                               pool_read(MASTER(backend), &kind, 1);
17454 +                       }
17455 +                       if (kind1 == 0)
17456 +                       {
17457 +                               pool_read(SECONDARY(backend), &kind1, 1);
17458 +                       }
17459 +                       if (kind == '\0' || kind != kind1)
17460 +                       {
17461 +                               show_error("%s: kind does not match between backends master(%c) secondary(%c)",
17462 +                                                  func, kind, kind1);
17463 +                               pool_send_error_message(frontend, MAJOR(backend), "XX000", 
17464 +                                                                               "kind mismatch between backends", "",
17465 +                                                                               "check data consistency between master and secondary", __FILE__, __LINE__);
17466 +
17467 +                               if (pool_config_replication_stop_on_mismatch)
17468 +                                       return POOL_FATAL;
17469 +                               else
17470 +                                       return POOL_ERROR;
17471 +                       }
17472 +               }
17473 +
17474 +               /*
17475 +                * Prrocess backend Response
17476 +                */
17477 +
17478 +               if (MAJOR(backend) == PROTO_MAJOR_V3)
17479 +               {
17480 +                       switch (kind)
17481 +                       {
17482 +                               case 'G':
17483 +                                       /* CopyIn response */
17484 +                                       status = CopyInResponse(frontend, backend);
17485 +                                       break;
17486 +                               case 'S':
17487 +                                       /* Paramter Status */
17488 +                                       status = ParameterStatus(frontend, backend);
17489 +                                       break;
17490 +                               case 'Z':
17491 +                                       /* Ready for query */
17492 +                                       status = ReadyForQuery(frontend, backend, 1);
17493 +                                       break;
17494 +                               default:
17495 +                                       status = SimpleForwardToFrontend(kind, frontend, backend);
17496 +                                       break;
17497 +                       }
17498 +               }
17499 +               else
17500 +               {
17501 +                       switch (kind)
17502 +                       {
17503 +                               case 'A':
17504 +                                       /* Notification  response */
17505 +                                       status = NotificationResponse(frontend, backend);
17506 +                                       break;
17507 +
17508 +                               case 'B':
17509 +                                       /* BinaryRow */
17510 +                                       status = BinaryRow(frontend, backend, num_fields);
17511 +                                       break;
17512 +
17513 +                               case 'C':
17514 +                                       /* Complete command response */
17515 +                                       status = CompleteCommandResponse(frontend, backend);
17516 +                                       break;
17517 +
17518 +                               case 'D':
17519 +                                       /* AsciiRow */
17520 +                                       status = AsciiRow(frontend, backend, num_fields);
17521 +                                       break;
17522 +
17523 +                               case 'E':
17524 +                                       /* Error Response */
17525 +                                       status = ErrorResponse(frontend, backend);
17526 +                                       break;
17527 +
17528 +                               case 'G':
17529 +                                       /* CopyIn Response */
17530 +                                       status = CopyInResponse(frontend, backend);
17531 +                                       break;
17532 +
17533 +                               case 'H':
17534 +                                       /* CopyOut Response */
17535 +                                       status = CopyOutResponse(frontend, backend);
17536 +                                       break;
17537 +
17538 +                               case 'I':
17539 +                                       /* Empty Query Response */
17540 +                                       status = EmptyQueryResponse(frontend, backend);
17541 +                                       break;
17542 +
17543 +                               case 'N':
17544 +                                       /* Notice Response */
17545 +                                       status = NoticeResponse(frontend, backend);
17546 +                                       break;
17547 +
17548 +                               case 'P':
17549 +                                       /* CursorResponse */
17550 +                                       status = CursorResponse(frontend, backend);
17551 +                                       break;
17552 +
17553 +                               case 'T':
17554 +                                       /* RowDescription */
17555 +                                       status = RowDescription(frontend, backend);
17556 +                                       if (status < 0)
17557 +                                               return POOL_ERROR;
17558 +
17559 +                                       num_fields = status;
17560 +                                       status = POOL_CONTINUE;
17561 +                                       break;
17562 +
17563 +                               case 'V':
17564 +                                       /* FunctionResultResponse and FunctionVoidResponse */
17565 +                                       status = FunctionResultResponse(frontend, backend);
17566 +                                       break;
17567 +                               
17568 +                               case 'Z':
17569 +                                       /* Ready for query */
17570 +                                       status = ReadyForQuery(frontend, backend, 1);
17571 +                                       break;
17572 +                               
17573 +                               default:
17574 +                                       show_error("%s:Unknown message type %c(%02x)",func, kind, kind);
17575 +                                       exit(1);
17576 +                       }
17577 +               }
17578 +
17579 +               if (status != POOL_CONTINUE)
17580 +                       return status;
17581 +
17582 +               if (kind == 'Z' && frontend->no_forward && state == 1)
17583 +               {
17584 +                       state = 0;
17585 +               }
17586 +
17587 +       }
17588 +       return POOL_CONTINUE;
17589 +}
17590 +
17591 +static POOL_STATUS Query(POOL_CONNECTION *frontend, 
17592 +                                                POOL_CONNECTION_POOL *backend, char *query)
17593 +{
17594 +#ifdef PRINT_DEBUG
17595 +       char * func = "Query()";
17596 +#endif                 
17597 +       char *string;
17598 +       int len;
17599 +       static char *sq = "show pool_status";
17600 +
17601 +       if (query == NULL)
17602 +       {
17603 +               /* read actual query */
17604 +               if (MAJOR(backend) == PROTO_MAJOR_V3)
17605 +               {
17606 +                       if (pool_read(frontend, &len, sizeof(len)) < 0)
17607 +                               return POOL_END;
17608 +                       len = ntohl(len) - 4;
17609 +                       string = pool_read2(frontend, len);
17610 +               }
17611 +               else
17612 +                       string = pool_read_string(frontend, &len, 0);
17613 +
17614 +               if (string == NULL)
17615 +                       return POOL_END;
17616 +       }
17617 +       else
17618 +       {
17619 +               len = strlen(query)+1;
17620 +               string = query;
17621 +       }
17622 +
17623 +#ifdef PRINT_DEBUG
17624 +       show_debug("%s: %s", func,string);
17625 +#endif                 
17626 +
17627 +       /* process status reporting? */
17628 +       if (strncasecmp(sq, string, strlen(sq)) == 0)
17629 +       {
17630 +#ifdef PRINT_DEBUG
17631 +               show_debug("%s:process reporting",func);
17632 +#endif                 
17633 +               process_reporting(frontend, backend);
17634 +               return POOL_CONTINUE;
17635 +       }
17636 +
17637 +       /* load balance trick */
17638 +       if (load_balance_enabled(backend, string))
17639 +               start_load_balance(backend);
17640 +
17641 +       /* forward the query to the backend */
17642 +       pool_write(MASTER(backend), "Q", 1);
17643 +
17644 +       if (MAJOR(backend) == PROTO_MAJOR_V3)
17645 +       {
17646 +               int sendlen = htonl(len + 4);
17647 +               pool_write(MASTER(backend), &sendlen, sizeof(sendlen));
17648 +       }
17649 +
17650 +       if (pool_write_and_flush(MASTER(backend), string, len) < 0)
17651 +       {
17652 +               return POOL_END;
17653 +       }
17654 +
17655 +       if (REPLICATION)
17656 +       {
17657 +               /* in "strict mode" we need to wait for master completing the query */
17658 +               if (pool_config_replication_strict || STRICT_MODE(string))
17659 +                       if (synchronize(MASTER(backend)))
17660 +                               return POOL_END;
17661 +
17662 +               pool_write(SECONDARY(backend), "Q", 1);
17663 +               if (MAJOR(backend) == PROTO_MAJOR_V3)
17664 +               {
17665 +                       int sendlen = htonl(len + 4);
17666 +                       pool_write(SECONDARY(backend), &sendlen, sizeof(sendlen));
17667 +               }
17668 +
17669 +               if (pool_write_and_flush(SECONDARY(backend), string, len) < 0)
17670 +               {
17671 +                       return POOL_END;
17672 +               }
17673 +       }
17674 +       return POOL_CONTINUE;
17675 +}
17676 +
17677 +static POOL_STATUS ReadyForQuery(POOL_CONNECTION *frontend, 
17678 +                                                                POOL_CONNECTION_POOL *backend, int send_ready)
17679 +{
17680 +#ifdef PRINT_DEBUG
17681 +       char * func = "ReadyForQuery()";
17682 +#endif                 
17683 +
17684 +       pool_flush(frontend);
17685 +
17686 +       if (send_ready)
17687 +       {
17688 +               pool_write(frontend, "Z", 1);
17689 +
17690 +               if (MAJOR(backend) == PROTO_MAJOR_V3)
17691 +               {
17692 +                       int len;
17693 +                       signed char state;
17694 +
17695 +                       if ((len = pool_read_message_length(backend)) < 0)
17696 +                               return POOL_END;
17697 +
17698 +#ifdef PRINT_DEBUG
17699 +                       show_debug("%s: message length: %d", func, len);
17700 +#endif                 
17701 +
17702 +                       len = htonl(len);
17703 +                       pool_write(frontend, &len, sizeof(len));
17704 +
17705 +                       state = pool_read_kind(backend);
17706 +                       if (state < 0)
17707 +                               return POOL_END;
17708 +
17709 +                       /* set transaction state */
17710 +#ifdef PRINT_DEBUG
17711 +                       show_debug("%s: transaction state: %c", func, state);
17712 +#endif                 
17713 +                       MASTER(backend)->tstate = state;
17714 +                       if (REPLICATION)
17715 +                               SECONDARY(backend)->tstate = state;
17716 +
17717 +                       pool_write(frontend, &state, 1);
17718 +               }
17719 +
17720 +               if (pool_flush(frontend))
17721 +                       return POOL_END;
17722 +       }
17723 +
17724 +       /* end load balance mode */
17725 +       if (IN_LOAD_BALANCE)
17726 +               end_load_balance(backend);
17727 +
17728 +       return ProcessFrontendResponse(frontend, backend);
17729 +}
17730 +
17731 +static POOL_STATUS CompleteCommandResponse(POOL_CONNECTION *frontend, 
17732 +                                                                                  POOL_CONNECTION_POOL *backend)
17733 +{
17734 +       char * func = "CompleteCommandResponse()";
17735 +       char *string, *string1;
17736 +       int len, len1;
17737 +
17738 +       /* read command tag */
17739 +       string = pool_read_string(MASTER(backend), &len, 0);
17740 +       if (string == NULL)
17741 +               return POOL_END;
17742 +
17743 +       if (REPLICATION)
17744 +       {
17745 +               string1 = pool_read_string(SECONDARY(backend), &len1, 0);
17746 +               if (string1 == NULL)
17747 +                       return POOL_END;
17748 +
17749 +               if (len != len1)
17750 +               {
17751 +                       show_error("%s: message length does not match between master(%d \"%s\",) and secondary(%d \"%s\",)",
17752 +                                        func, len, string, len1, string1);
17753 +               }
17754 +       }
17755 +
17756 +       /* forward to the frontend */
17757 +       pool_write(frontend, "C", 1);
17758 +#ifdef PRINT_DEBUG
17759 +       show_debug("%s: string: \"%s\"",func, string);
17760 +#endif                 
17761 +       if (pool_write(frontend, string, len) < 0)
17762 +       {
17763 +               return POOL_END;
17764 +       }
17765 +       return POOL_CONTINUE;
17766 +}
17767 +
17768 +static int RowDescription(POOL_CONNECTION *frontend, 
17769 +                                                 POOL_CONNECTION_POOL *backend)
17770 +{
17771 +       char * func = "RowDescription()";
17772 +       short num_fields, num_fields1;
17773 +       int oid, mod;
17774 +       int oid1, mod1;
17775 +       short size, size1;
17776 +       char *string, *string1;
17777 +       int len, len1;
17778 +       int i;
17779 +
17780 +       /* # of fields (could be 0) */
17781 +       pool_read(MASTER(backend), &num_fields, sizeof(short));
17782 +       if (REPLICATION)
17783 +       {
17784 +               pool_read(SECONDARY(backend), &num_fields1, sizeof(short));
17785 +               if (num_fields != num_fields1)
17786 +               {
17787 +                       show_error("%s: num_fields deos not match between backends master(%d) and secondary(%d)",
17788 +                                          func, num_fields, num_fields1);
17789 +                       return POOL_FATAL;
17790 +               }
17791 +       }
17792 +
17793 +       /* forward it to the frontend */
17794 +       pool_write(frontend, "T", 1);
17795 +       pool_write(frontend, &num_fields, sizeof(short));
17796 +
17797 +       num_fields = ntohs(num_fields);
17798 +       for (i = 0;i<num_fields;i++)
17799 +       {
17800 +               /* field name */
17801 +               string = pool_read_string(MASTER(backend), &len, 0);
17802 +               if (string == NULL)
17803 +                       return POOL_END;
17804 +
17805 +               if (REPLICATION)
17806 +               {
17807 +                       string1 = pool_read_string(SECONDARY(backend), &len1, 0);
17808 +                       if (string == NULL)
17809 +                               return POOL_END;
17810 +                       if (len != len1)
17811 +                       {
17812 +                               show_error("%s: field length deos not match between backends master(%d) and secondary(%d)",
17813 +                                                  func, ntohl(len), ntohl(len1));
17814 +                               return POOL_FATAL;
17815 +                       }
17816 +               }
17817 +
17818 +               pool_write(frontend, string, len);
17819 +
17820 +               /* oid */
17821 +               pool_read(MASTER(backend), &oid, sizeof(int));
17822 +               if (REPLICATION)
17823 +               {
17824 +                       pool_read(SECONDARY(backend), &oid1, sizeof(int));
17825 +
17826 +                       /* we do not regard oid mismatch as fatal */
17827 +                       if (oid != oid1)
17828 +                       {
17829 +                               show_error("%s: field oid deos not match between backends master(%d) and secondary(%d)",
17830 +                                                func, ntohl(oid), ntohl(oid1));
17831 +                       }
17832 +               }
17833 +               pool_write(frontend, &oid, sizeof(int));
17834 +
17835 +               /* size */
17836 +               pool_read(MASTER(backend), &size, sizeof(short));
17837 +               if (REPLICATION)
17838 +               {
17839 +                       pool_read(SECONDARY(backend), &size1, sizeof(short));
17840 +                       if (size1 != size1)
17841 +                       {
17842 +                               show_error("%s: field size deos not match between backends master(%d) and secondary(%d)",
17843 +                                                func, ntohs(size), ntohs(size1));
17844 +                               return POOL_FATAL;
17845 +                       }
17846 +               }
17847 +#ifdef PRINT_DEBUG
17848 +               show_debug("%s: field size:%d", func, ntohs(size));
17849 +#endif                 
17850 +               pool_write(frontend, &size, sizeof(short));
17851 +
17852 +               /* modifier */
17853 +               pool_read(MASTER(backend), &mod, sizeof(int));
17854 +               if (REPLICATION)
17855 +               {
17856 +                       pool_read(SECONDARY(backend), &mod1, sizeof(int));
17857 +                       if (mod != mod1)
17858 +                       {
17859 +                               show_error("%s: modifier deos not match between backends master(%d) and secondary(%d)",
17860 +                                                func, ntohl(mod), ntohl(mod1));
17861 +                       }
17862 +               }
17863 +               pool_write(frontend, &mod, sizeof(int));
17864 +       }
17865 +
17866 +       return num_fields;
17867 +}
17868 +
17869 +static POOL_STATUS AsciiRow(POOL_CONNECTION *frontend, 
17870 +                                                       POOL_CONNECTION_POOL *backend,
17871 +                                                       short num_fields)
17872 +{
17873 +       char * func = "AsciiRow()";
17874 +       static char nullmap[8192], nullmap1[8192];
17875 +       int nbytes;
17876 +       int i;
17877 +       unsigned char mask;
17878 +       int size, size1;
17879 +       char *buf;
17880 +       char msgbuf[1024];
17881 +
17882 +       pool_write(frontend, "D", 1);
17883 +
17884 +       nbytes = (num_fields + 7)/8;
17885 +
17886 +       if (nbytes <= 0)
17887 +               return POOL_CONTINUE;
17888 +
17889 +       /* NULL map */
17890 +       pool_read(MASTER(backend), nullmap, nbytes);
17891 +       if (pool_write(frontend, nullmap, nbytes) < 0)
17892 +               return POOL_END;
17893 +
17894 +       if (REPLICATION)
17895 +       {
17896 +               if (pool_read(SECONDARY(backend), nullmap1, nbytes) < 0)
17897 +                       return POOL_END;
17898 +
17899 +               if (memcmp(nullmap, nullmap1, nbytes))
17900 +               {
17901 +                       /* XXX: NULLMAP maybe different among
17902 +                          backends. If we were a paranoid, we have to treat
17903 +                          this as a fatal error. However in the real world
17904 +                          we'd better to adapt this situation. Just throw a
17905 +                          log... */
17906 +                       show_error("%s: NULLMAP differ between master and secondary",func);
17907 +               }
17908 +       }
17909 +
17910 +       mask = 0;
17911 +
17912 +       for (i = 0;i<num_fields;i++)
17913 +       {
17914 +               if (mask == 0)
17915 +                       mask = 0x80;
17916 +
17917 +               /* NOT NULL? */
17918 +               if (mask & nullmap[i/8])
17919 +               {
17920 +                       /* field size */
17921 +                       if (pool_read(MASTER(backend), &size, sizeof(int)) < 0)
17922 +                               return POOL_END;
17923 +               }
17924 +
17925 +               if (REPLICATION && (mask & nullmap1[i/8]))
17926 +               {
17927 +                       /* XXX: field size maybe different among
17928 +                          backends. If we were a paranoid, we have to treat
17929 +                          this as a fatal error. However in the real world
17930 +                          we'd better to adapt this situation. Just throw a
17931 +                          log... */
17932 +
17933 +                       if (pool_read(SECONDARY(backend), &size1, sizeof(int)) < 0)
17934 +                               return POOL_END;
17935 +
17936 +                       if (size != size1)
17937 +                               show_error("%s: %d th field size does not match between master(%d) and secondary(%d)",
17938 +                                                func, i, ntohl(size), ntohl(size1));
17939 +                       size1 = ntohl(size1) - 4;
17940 +               }
17941 +
17942 +               buf = NULL;
17943 +
17944 +               if (mask & nullmap[i/8])
17945 +               {
17946 +                       /* forward to frontend */
17947 +                       pool_write(frontend, &size, sizeof(int));
17948 +                       size = ntohl(size) - 4;
17949 +
17950 +                       /* read and send actual data only when size > 0 */
17951 +                       if (size > 0)
17952 +                       {
17953 +                               buf = pool_read2(MASTER(backend), size);
17954 +                               if (buf == NULL)
17955 +                                       return POOL_END;
17956 +                       }
17957 +               }
17958 +
17959 +               if (REPLICATION && size1 > 0 && (mask & nullmap1[i/8]))
17960 +               {
17961 +                       /* read and discard secondary data */
17962 +                       if (pool_read2(SECONDARY(backend), size1) == NULL)
17963 +                               return POOL_END;
17964 +               }
17965 +
17966 +               if (buf)
17967 +               {
17968 +                       pool_write(frontend, buf, size);
17969 +                       snprintf(msgbuf, Min(sizeof(msgbuf), size+1), "%s", buf);
17970 +#ifdef PRINT_DEBUG
17971 +                       show_debug("%s: len: %d data: %s", func, size, msgbuf);
17972 +#endif                 
17973 +               }
17974 +
17975 +               mask >>= 1;
17976 +       }
17977 +
17978 +       return POOL_CONTINUE;
17979 +}
17980 +
17981 +static POOL_STATUS BinaryRow(POOL_CONNECTION *frontend, 
17982 +                                                        POOL_CONNECTION_POOL *backend,
17983 +                                                        short num_fields)
17984 +{
17985 +       char * func = "BinaryRow()";
17986 +       static char nullmap[8192], nullmap1[8192];
17987 +       int nbytes;
17988 +       int i;
17989 +       unsigned char mask;
17990 +       int size, size1;
17991 +       char *buf;
17992 +
17993 +       pool_write(frontend, "B", 1);
17994 +
17995 +       nbytes = (num_fields + 7)/8;
17996 +
17997 +       if (nbytes <= 0)
17998 +               return POOL_CONTINUE;
17999 +
18000 +       /* NULL map */
18001 +       pool_read(MASTER(backend), nullmap, nbytes);
18002 +       if (pool_write(frontend, nullmap, nbytes) < 0)
18003 +               return POOL_END;
18004 +
18005 +       if (REPLICATION)
18006 +       {
18007 +               if (pool_read(SECONDARY(backend), nullmap1, nbytes) < 0)
18008 +                       return POOL_END;
18009 +
18010 +               if (memcmp(nullmap, nullmap1, nbytes))
18011 +               {
18012 +                       /* XXX: NULLMAP maybe different among
18013 +                          backends. If we were a paranoid, we have to treat
18014 +                          this as a fatal error. However in the real world
18015 +                          we'd better to adapt this situation. Just throw a
18016 +                          log... */
18017 +                       show_error("%s: NULLMAP differ between master and secondary",func);
18018 +               }
18019 +       }
18020 +
18021 +       mask = 0;
18022 +
18023 +       for (i = 0;i<num_fields;i++)
18024 +       {
18025 +               if (mask == 0)
18026 +                       mask = 0x80;
18027 +
18028 +               /* NOT NULL? */
18029 +               if (mask & nullmap[i/8])
18030 +               {
18031 +                       /* field size */
18032 +                       if (pool_read(MASTER(backend), &size, sizeof(int)) < 0)
18033 +                               return POOL_END;
18034 +               }
18035 +
18036 +               if (REPLICATION && (mask & nullmap1[i/8]))
18037 +               {
18038 +                       /* XXX: field size maybe different among
18039 +                          backends. If we were a paranoid, we have to treat
18040 +                          this as a fatal error. However in the real world
18041 +                          we'd better to adapt this situation. Just throw a
18042 +                          log... */
18043 +
18044 +                       if (pool_read(SECONDARY(backend), &size1, sizeof(int)) < 0)
18045 +                               return POOL_END;
18046 +
18047 +                       if (size != size1)
18048 +                               show_error("%s: %d th field size does not match between master(%d) and secondary(%d)",
18049 +                                                func, i, ntohl(size), ntohl(size1));
18050 +                       size1 = ntohl(size1) - 4;
18051 +               }
18052 +
18053 +               buf = NULL;
18054 +
18055 +               if (mask & nullmap[i/8])
18056 +               {
18057 +                       /* forward to frontend */
18058 +                       pool_write(frontend, &size, sizeof(int));
18059 +                       size = ntohl(size) - 4;
18060 +
18061 +                       /* read and send actual data only when size > 0 */
18062 +                       if (size > 0)
18063 +                       {
18064 +                               buf = pool_read2(MASTER(backend), size);
18065 +                               if (buf == NULL)
18066 +                                       return POOL_END;
18067 +                       }
18068 +               }
18069 +
18070 +               if (REPLICATION && size1 > 0 && (mask & nullmap1[i/8]))
18071 +               {
18072 +                       /* read and discard secondary data */
18073 +                       if (pool_read2(SECONDARY(backend), size1) == NULL)
18074 +                               return POOL_END;
18075 +               }
18076 +
18077 +               if (buf)
18078 +                       pool_write(frontend, buf, size);
18079 +
18080 +               mask >>= 1;
18081 +       }
18082 +       return POOL_CONTINUE;
18083 +}
18084 +
18085 +static POOL_STATUS CursorResponse(POOL_CONNECTION *frontend, 
18086 +                                                                 POOL_CONNECTION_POOL *backend)
18087 +{
18088 +       char * func = "CursorResponse()";
18089 +       char *string, *string1;
18090 +       int len, len1;
18091 +
18092 +       /* read cursor name */
18093 +       string = pool_read_string(MASTER(backend), &len, 0);
18094 +       if (string == NULL)
18095 +               return POOL_END;
18096 +       if (REPLICATION)
18097 +       {
18098 +               string1 = pool_read_string(SECONDARY(backend), &len1, 0);
18099 +               if (string1 == NULL)
18100 +                       return POOL_END;
18101 +               if (len != len1)
18102 +               {
18103 +                       show_error("%s: length does not match between master(%d) and secondary(%d)",
18104 +                                          func, len, len1);
18105 +                       show_error("%s: master(%s) secondary(%s)", func, string, string1);
18106 +                       return POOL_END;
18107 +               }
18108 +       }
18109 +
18110 +       /* forward to the frontend */
18111 +       pool_write(frontend, "P", 1);
18112 +       if (pool_write(frontend, string, len) < 0)
18113 +       {
18114 +               return POOL_END;
18115 +       }
18116 +       return POOL_CONTINUE;
18117 +}
18118 +
18119 +POOL_STATUS ErrorResponse(POOL_CONNECTION *frontend, 
18120 +                                                 POOL_CONNECTION_POOL *backend)
18121 +{
18122 +       char *string;
18123 +       int len;
18124 +
18125 +       /* read error message */
18126 +       string = pool_read_string(MASTER(backend), &len, 0);
18127 +       if (string == NULL)
18128 +               return POOL_END;
18129 +       if (REPLICATION)
18130 +       {
18131 +               string = pool_read_string(SECONDARY(backend), &len, 0);
18132 +               if (string == NULL)
18133 +                       return POOL_END;
18134 +       }
18135 +
18136 +       /* forward to the frontend */
18137 +       pool_write(frontend, "E", 1);
18138 +       if (pool_write_and_flush(frontend, string, len) < 0)
18139 +               return POOL_END;
18140 +                       
18141 +       return POOL_CONTINUE;
18142 +}
18143 +
18144 +static POOL_STATUS NoticeResponse(POOL_CONNECTION *frontend, 
18145 +                                                                 POOL_CONNECTION_POOL *backend)
18146 +{
18147 +       char *string, *string1;
18148 +       int len, len1;
18149 +
18150 +       /* read notice message */
18151 +       string = pool_read_string(MASTER(backend), &len, 0);
18152 +       if (string == NULL)
18153 +               return POOL_END;
18154 +       if (REPLICATION)
18155 +       {
18156 +               string1 = pool_read_string(SECONDARY(backend), &len1, 0);
18157 +               if (string1 == NULL)
18158 +                       return POOL_END;
18159 +       }
18160 +
18161 +       /* forward to the frontend */
18162 +       pool_write(frontend, "N", 1);
18163 +       if (pool_write_and_flush(frontend, string, len) < 0)
18164 +       {
18165 +               return POOL_END;
18166 +       }
18167 +       return POOL_CONTINUE;
18168 +}
18169 +
18170 +static POOL_STATUS CopyInResponse(POOL_CONNECTION *frontend, 
18171 +                                                                 POOL_CONNECTION_POOL *backend)
18172 +{
18173 +       POOL_STATUS status;
18174 +
18175 +       /* forward to the frontend */
18176 +       if (MAJOR(backend) == PROTO_MAJOR_V3)
18177 +       {
18178 +               if (SimpleForwardToFrontend('G', frontend, backend) != POOL_CONTINUE)
18179 +                       return POOL_END;
18180 +               if (pool_flush(frontend) != POOL_CONTINUE)
18181 +                       return POOL_END;
18182 +       }
18183 +       else
18184 +               if (pool_write_and_flush(frontend, "G", 1) < 0)
18185 +                       return POOL_END;
18186 +
18187 +       status = CopyDataRows(frontend, backend, 1);
18188 +       return status;
18189 +}
18190 +
18191 +static POOL_STATUS CopyOutResponse(POOL_CONNECTION *frontend, 
18192 +                                                                  POOL_CONNECTION_POOL *backend)
18193 +{
18194 +       POOL_STATUS status;
18195 +
18196 +       /* forward to the frontend */
18197 +       if (MAJOR(backend) == PROTO_MAJOR_V3)
18198 +       {
18199 +               if (SimpleForwardToFrontend('H', frontend, backend) != POOL_CONTINUE)
18200 +                       return POOL_END;
18201 +               if (pool_flush(frontend) != POOL_CONTINUE)
18202 +                       return POOL_END;
18203 +       }
18204 +       else
18205 +               if (pool_write_and_flush(frontend, "H", 1) < 0)
18206 +                       return POOL_END;
18207 +
18208 +       status = CopyDataRows(frontend, backend, 0);
18209 +       return status;
18210 +}
18211 +
18212 +static POOL_STATUS CopyDataRows(POOL_CONNECTION *frontend,
18213 +                                                               POOL_CONNECTION_POOL *backend, int copyin)
18214 +{
18215 +#ifdef PRINT_DEBUG
18216 +       char * func = "CopyDataRows()";
18217 +#endif                 
18218 +       char *string;
18219 +       int len;
18220 +
18221 +#ifdef PRINT_DEBUG
18222 +       int i = 0;
18223 +       char *buf;
18224 +#endif
18225 +
18226 +       for (;;)
18227 +       {
18228 +               if (copyin)
18229 +               {
18230 +                       if (MAJOR(backend) == PROTO_MAJOR_V3)
18231 +                       {
18232 +                               char kind;
18233 +                               POOL_STATUS status;
18234 +
18235 +                               if (pool_read(frontend, &kind, 1) < 0)
18236 +                                       return POOL_END;
18237 +                               
18238 +                               status = SimpleForwardToBackend(kind, frontend, backend);
18239 +                               if (status == POOL_END)
18240 +                                       return status;
18241 +
18242 +                               /* CopyData? */
18243 +                               if (kind == 'd')
18244 +                                       continue;
18245 +                               else
18246 +                                       break;
18247 +                       }
18248 +                       else
18249 +                       {
18250 +                               string = pool_read_string(frontend, &len, 1);
18251 +                               if (string == NULL)
18252 +                                       return POOL_END;
18253 +                       }
18254 +               }
18255 +               else
18256 +               {
18257 +                       /* CopyOut */
18258 +                       if (MAJOR(backend) == PROTO_MAJOR_V3)
18259 +                       {
18260 +                               signed char kind;
18261 +                               POOL_STATUS status;
18262 +
18263 +                               if ((kind = pool_read_kind(backend)) < 0)
18264 +                                       return POOL_END;
18265 +                               
18266 +                               status = SimpleForwardToFrontend(kind, frontend, backend);
18267 +                               if (status == POOL_END)
18268 +                                       return status;
18269 +
18270 +                               /* CopyData? */
18271 +                               if (kind == 'd')
18272 +                                       continue;
18273 +                               else
18274 +                                       break;
18275 +                       }
18276 +                       else
18277 +                       {
18278 +                               string = pool_read_string(MASTER(backend), &len, 1);
18279 +                               if (REPLICATION)
18280 +                                       string = pool_read_string(SECONDARY(backend), &len, 1);
18281 +                       }
18282 +               }
18283 +
18284 +               if (string == NULL)
18285 +                       return POOL_END;
18286 +
18287 +#ifdef PRINT_DEBUG
18288 +               buf = malloc(len + 1);
18289 +               if (buf == NULL)
18290 +               {
18291 +                       show_error("CopyDataRows: malloc failed: %s", strerror(errno));
18292 +                       return POOL_END;
18293 +               }
18294 +               strncpy(buf, string, len);
18295 +               buf[len] = '\0';
18296 +               show_debug("%s: copy line %d %d bytes :%s:",func, i++, len, buf);
18297 +               free(buf);
18298 +#endif
18299 +
18300 +               if (copyin)
18301 +               {
18302 +                       pool_write(MASTER(backend), string, len);
18303 +                       if (REPLICATION)
18304 +                               pool_write(SECONDARY(backend), string, len);
18305 +               }
18306 +               else
18307 +                       pool_write(frontend, string, len);                      
18308 +
18309 +               if (len == PROTO_MAJOR_V3)
18310 +               {
18311 +                       /* end of copy? */
18312 +                       if (string[0] == '\\' &&
18313 +                               string[1] == '.' &&
18314 +                               string[2] == '\n')
18315 +                       {
18316 +                               break;
18317 +                       }
18318 +               }
18319 +       }
18320 +
18321 +       if (copyin)
18322 +       {
18323 +               if (pool_flush(MASTER(backend)) <0)
18324 +                       return POOL_END;
18325 +               if (REPLICATION)
18326 +               {
18327 +                       if (pool_flush(SECONDARY(backend)) <0)
18328 +                               return POOL_END;
18329 +               }
18330 +       }
18331 +       else
18332 +               if (pool_flush(frontend) <0)
18333 +                       return POOL_END;
18334 +
18335 +       return POOL_CONTINUE;
18336 +}
18337 +
18338 +static POOL_STATUS EmptyQueryResponse(POOL_CONNECTION *frontend,
18339 +                                                                         POOL_CONNECTION_POOL *backend)
18340 +{
18341 +       char c;
18342 +
18343 +       if (pool_read(MASTER(backend), &c, sizeof(c)) < 0)
18344 +               return POOL_END;
18345 +
18346 +       if (REPLICATION)
18347 +       {
18348 +               if (pool_read(SECONDARY(backend), &c, sizeof(c)) < 0)
18349 +                       return POOL_END;
18350 +       }
18351 +
18352 +       pool_write(frontend, "I", 1);
18353 +       return pool_write_and_flush(frontend, "", 1);
18354 +}
18355 +
18356 +static POOL_STATUS NotificationResponse(POOL_CONNECTION *frontend, 
18357 +                                                                               POOL_CONNECTION_POOL *backend)
18358 +{
18359 +       int pid, pid1;
18360 +       char *condition, *condition1;
18361 +       int len, len1;
18362 +
18363 +       pool_write(frontend, "A", 1);
18364 +
18365 +       if (pool_read(MASTER(backend), &pid, sizeof(pid)) < 0)
18366 +               return POOL_ERROR;
18367 +
18368 +       if (REPLICATION)
18369 +       {
18370 +               if (pool_read(SECONDARY(backend), &pid1, sizeof(pid1)) < 0)
18371 +                       return POOL_ERROR;
18372 +       }
18373 +
18374 +       condition = pool_read_string(MASTER(backend), &len, 0);
18375 +       if (condition == NULL)
18376 +               return POOL_END;
18377 +       if (REPLICATION)
18378 +       {
18379 +               condition1 = pool_read_string(SECONDARY(backend), &len1, 0);
18380 +               if (condition1 == NULL)
18381 +                       return POOL_END;
18382 +       }
18383 +
18384 +       pool_write(frontend, &pid, sizeof(pid));
18385 +
18386 +       return pool_write_and_flush(frontend, condition, len);
18387 +}
18388 +
18389 +static POOL_STATUS FunctionCall(POOL_CONNECTION *frontend, 
18390 +                                                               POOL_CONNECTION_POOL *backend)
18391 +{
18392 +       char dummy[2];
18393 +       int oid;
18394 +       int argn;
18395 +       int i;
18396 +
18397 +       pool_write(MASTER(backend), "F", 1);
18398 +       if (REPLICATION)
18399 +               pool_write(SECONDARY(backend), "F", 1);
18400 +
18401 +       /* dummy */
18402 +       if (pool_read(frontend, dummy, sizeof(dummy)) < 0)
18403 +               return POOL_ERROR;
18404 +       pool_write(MASTER(backend), dummy, sizeof(dummy));
18405 +       if (REPLICATION)
18406 +               pool_write(SECONDARY(backend), dummy, sizeof(dummy));
18407 +
18408 +       /* function object id */
18409 +       if (pool_read(frontend, &oid, sizeof(oid)) < 0)
18410 +               return POOL_ERROR;
18411 +
18412 +       pool_write(MASTER(backend), &oid, sizeof(oid));
18413 +       if (REPLICATION)
18414 +               pool_write(SECONDARY(backend), &oid, sizeof(oid));
18415 +
18416 +       /* number of arguments */
18417 +       if (pool_read(frontend, &argn, sizeof(argn)) < 0)
18418 +               return POOL_ERROR;
18419 +       pool_write(MASTER(backend), &argn, sizeof(argn));
18420 +       if (REPLICATION)
18421 +               pool_write(SECONDARY(backend), &argn, sizeof(argn));
18422 +
18423 +       argn = ntohl(argn);
18424 +
18425 +       for (i=0;i<argn;i++)
18426 +       {
18427 +               int len;
18428 +               char *arg;
18429 +
18430 +               /* length of each argument in bytes */
18431 +               if (pool_read(frontend, &len, sizeof(len)) < 0)
18432 +                       return POOL_ERROR;
18433 +
18434 +               pool_write(MASTER(backend), &len, sizeof(len));
18435 +               if (REPLICATION)
18436 +                       pool_write(SECONDARY(backend), &len, sizeof(len));
18437 +
18438 +               len = ntohl(len);
18439 +
18440 +               /* argument value itself */
18441 +               if ((arg = pool_read2(frontend, len)) == NULL)
18442 +                       return POOL_ERROR;
18443 +               pool_write(MASTER(backend), arg, len);
18444 +               if (REPLICATION)
18445 +                       pool_write(SECONDARY(backend), arg, len);
18446 +       }
18447 +
18448 +       if (pool_flush(MASTER(backend)))
18449 +               return POOL_ERROR;
18450 +       if (REPLICATION)
18451 +               if (pool_flush(SECONDARY(backend)))
18452 +                       return POOL_ERROR;
18453 +       return POOL_CONTINUE;
18454 +}
18455 +
18456 +static POOL_STATUS FunctionResultResponse(POOL_CONNECTION *frontend, 
18457 +                                                                                 POOL_CONNECTION_POOL *backend)
18458 +{
18459 +       char dummy;
18460 +       int len;
18461 +       char *result;
18462 +
18463 +       pool_write(frontend, "V", 1);
18464 +
18465 +       if (pool_read(MASTER(backend), &dummy, 1) < 0)
18466 +               return POOL_ERROR;
18467 +       if (REPLICATION)
18468 +               if (pool_read(SECONDARY(backend), &dummy, 1) < 0)
18469 +                       return POOL_ERROR;
18470 +
18471 +       pool_write(frontend, &dummy, 1);
18472 +
18473 +       /* non empty result? */
18474 +       if (dummy == 'G')
18475 +       {
18476 +               /* length of result in bytes */
18477 +               if (pool_read(MASTER(backend), &len, sizeof(len)) < 0)
18478 +                       return POOL_ERROR;
18479 +               if (REPLICATION)
18480 +                       if (pool_read(SECONDARY(backend), &len, sizeof(len)) < 0)
18481 +                               return POOL_ERROR;
18482 +
18483 +               pool_write(frontend, &len, sizeof(len));
18484 +
18485 +               len = ntohl(len);
18486 +
18487 +               /* result value itself */
18488 +               if ((result = pool_read2(MASTER(backend), len)) == NULL)
18489 +                       return POOL_ERROR;
18490 +               if (REPLICATION)
18491 +                       if (pool_read(SECONDARY(backend), result, len) < 0)
18492 +                               return POOL_ERROR;
18493 +
18494 +               pool_write(frontend, result, len);
18495 +       }
18496 +
18497 +       /* unused ('0') */
18498 +       if (pool_read(MASTER(backend), &dummy, 1) < 0)
18499 +               return POOL_ERROR;
18500 +       if (REPLICATION)
18501 +               if (pool_read(SECONDARY(backend), &dummy, 1) < 0)
18502 +                       return POOL_ERROR;
18503 +
18504 +       pool_write(frontend, "0", 1);
18505 +
18506 +       return pool_flush(frontend);
18507 +}
18508 +
18509 +static POOL_STATUS ProcessFrontendResponse(POOL_CONNECTION *frontend, 
18510 +                                                                                  POOL_CONNECTION_POOL *backend)
18511 +{
18512 +       char * func = "ProcessFrontendResponse()";
18513 +       char fkind;
18514 +       POOL_STATUS status;
18515 +
18516 +       if (frontend->len <= 0 && frontend->no_forward != 0)
18517 +               return POOL_CONTINUE;
18518 +
18519 +       if (pool_read(frontend, &fkind, 1) < 0)
18520 +       {
18521 +               show_error("%s: failed to read kind",func);
18522 +               return POOL_END;
18523 +       }
18524 +
18525 +#ifdef PRINT_DEBUG
18526 +       show_debug("%s:read kind from frontend %c(%02x)", func, fkind, fkind);
18527 +#endif                 
18528 +
18529 +       switch (fkind)
18530 +       {
18531 +               case 'X':
18532 +                       if (MAJOR(backend) == PROTO_MAJOR_V3)
18533 +                       {
18534 +                               int len;
18535 +                               pool_read(frontend, &len, sizeof(len));
18536 +                       }
18537 +                       status = POOL_END;
18538 +                       break;
18539 +
18540 +               case 'Q':
18541 +                       status = Query(frontend, backend, NULL);
18542 +                       break;
18543 +
18544 +               default:
18545 +                       if (MAJOR(backend) == PROTO_MAJOR_V3)
18546 +                       {
18547 +                               status = SimpleForwardToBackend(fkind, frontend, backend);
18548 +                               if (pool_flush(MASTER(backend)))
18549 +                                       status = POOL_ERROR;
18550 +                               if (REPLICATION)
18551 +                                       if (pool_flush(SECONDARY(backend)))
18552 +                                               status = POOL_ERROR;
18553 +                       }
18554 +                       else if (MAJOR(backend) == PROTO_MAJOR_V2 && fkind == 'F')
18555 +                               status = FunctionCall(frontend, backend);
18556 +                       else
18557 +                       {
18558 +                               show_error("%s: unknown message type %c(%02x)", func, fkind, fkind);
18559 +                               status = POOL_ERROR;
18560 +                       }
18561 +                       break;
18562 +       }
18563 +
18564 +       return status;
18565 +}
18566 +
18567 +static int timeoutmsec;
18568 +/*
18569 + * enable read timeout
18570 + */
18571 +void pool_enable_timeout(void)
18572 +{
18573 +       timeoutmsec = pool_config_replication_timeout;
18574 +}
18575 +
18576 +/*
18577 + * disable read timeout
18578 + */
18579 +void pool_disable_timeout(void)
18580 +{
18581 +       timeoutmsec = 0;
18582 +}
18583 +
18584 +/*
18585 + * wait until read data is ready
18586 + */
18587 +static int synchronize(POOL_CONNECTION *cp)
18588 +{
18589 +       return pool_check_fd(cp, 1);
18590 +}
18591 +
18592 +/*
18593 + * wait until read data is ready
18594 + * if notimeout is non 0, wait forever.
18595 + */
18596 +int pool_check_fd(POOL_CONNECTION *cp, int notimeout)
18597 +{
18598 +       char * func = "pool_check_fd()";
18599 +       fd_set readmask;
18600 +       fd_set exceptmask;
18601 +       int fd;
18602 +       int fds;
18603 +       struct timeval timeout;
18604 +       struct timeval *tp;
18605 +
18606 +       fd = cp->fd;
18607 +
18608 +       for (;;)
18609 +       {
18610 +               FD_ZERO(&readmask);
18611 +               FD_ZERO(&exceptmask);
18612 +               FD_SET(fd, &readmask);
18613 +               FD_SET(fd, &exceptmask);
18614 +
18615 +               if (notimeout || timeoutmsec == 0)
18616 +                       tp = NULL;
18617 +               else
18618 +               {
18619 +                       timeout.tv_sec = 0;
18620 +                       timeout.tv_usec = pool_config_replication_timeout*1000;
18621 +                       tp = &timeout;
18622 +               }
18623 +
18624 +               fds = select(fd+1, &readmask, NULL, &exceptmask, tp);
18625 +
18626 +               if (fds == -1)
18627 +               {
18628 +                       if (errno == EAGAIN || errno == EINTR)
18629 +                               continue;
18630 +
18631 +                       show_error("%s: select() failed. reason %s",func, strerror(errno));
18632 +                       break;
18633 +               }
18634 +
18635 +               if (FD_ISSET(fd, &exceptmask))
18636 +               {
18637 +                       show_error("%s: exception occurred",func);
18638 +                       break;
18639 +               }
18640 +
18641 +               if (fds == 0)
18642 +               {
18643 +                       show_error("%s: data is not ready tp->tv_sec %d tp->tp_usec %d", func, tp->tv_sec, tp->tv_usec);
18644 +                       break;
18645 +               }
18646 +               return 0;
18647 +       }
18648 +       return -1;
18649 +}
18650 +
18651 +static void process_reporting(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend)
18652 +{
18653 +       static char *cursorname = "blank";
18654 +       static short num_fields = 3;
18655 +       static char *field_names[] = {"item", "value", "description"};
18656 +       static int oid = 0;
18657 +       static short fsize = -1;
18658 +       static int mod = 0;
18659 +       short n;
18660 +       int i;
18661 +       short s;
18662 +       int len;
18663 +       short colnum;
18664 +
18665 +       static char nullmap[2] = {0xff, 0xff};
18666 +       int nbytes = (num_fields + 7)/8;
18667 +
18668 +#define MAXVALLEN 512
18669 +
18670 +       typedef struct {
18671 +               char *name;
18672 +               char value[MAXVALLEN+1];
18673 +               char *desc;
18674 +       } POOL_REPORT_STATUS;
18675 +
18676 +#define MAXITEMS 128
18677 +
18678 +       POOL_REPORT_STATUS status[MAXITEMS];
18679 +
18680 +       short nrows;
18681 +       int size;
18682 +       int hsize;
18683 +
18684 +       i = 0;
18685 +
18686 +       status[i].name = "inetdomain";
18687 +       snprintf(status[i].value, MAXVALLEN, "%d", pool_config_inetdomain);
18688 +       status[i].desc = "1 if accepting TCP/IP connection";
18689 +       i++;
18690 +
18691 +       status[i].name = "port";
18692 +       snprintf(status[i].value, MAXVALLEN, "%d", pool_config_port);
18693 +       status[i].desc = "pgpool accepting port number";
18694 +       i++;
18695 +
18696 +       status[i].name = "socket_dir";
18697 +       snprintf(status[i].value, MAXVALLEN, "%s", pool_config_socket_dir);
18698 +       status[i].desc = "pgpool socket directory";
18699 +       i++;
18700 +
18701 +       status[i].name = "backend_host_name";
18702 +       snprintf(status[i].value, MAXVALLEN, "%s", pool_config_backend_host_name);
18703 +       status[i].desc = "master backend host name";
18704 +       i++;
18705 +
18706 +       status[i].name = "backend_port";
18707 +       snprintf(status[i].value, MAXVALLEN, "%d", pool_config_backend_port);
18708 +       status[i].desc = "master backend port number";
18709 +       i++;
18710 +
18711 +       status[i].name = "secondary_backend_host_name";
18712 +       snprintf(status[i].value, MAXVALLEN, "%s", pool_config_secondary_backend_host_name);
18713 +       status[i].desc = "secondary backend host name";
18714 +       i++;
18715 +
18716 +       status[i].name = "secondary_backend_port";
18717 +       snprintf(status[i].value, MAXVALLEN, "%d", pool_config_secondary_backend_port);
18718 +       status[i].desc = "secondary backend port number";
18719 +       i++;
18720 +
18721 +       status[i].name = "num_init_children";
18722 +       snprintf(status[i].value, MAXVALLEN, "%d", pool_config_num_init_children);
18723 +       status[i].desc = "# of children initially pre-forked";
18724 +       i++;
18725 +
18726 +       status[i].name = "child_life_time";
18727 +       snprintf(status[i].value, MAXVALLEN, "%d", pool_config_child_life_time);
18728 +       status[i].desc = "if idle for this seconds, child exits (not implemented yet)";
18729 +       i++;
18730 +
18731 +       status[i].name = "connection_life_time";
18732 +       snprintf(status[i].value, MAXVALLEN, "%d", pool_config_connection_life_time);
18733 +       status[i].desc = "if idle for this seconds, connection closes";
18734 +       i++;
18735 +
18736 +       status[i].name = "max_pool";
18737 +       snprintf(status[i].value, MAXVALLEN, "%d", pool_config_max_pool);
18738 +       status[i].desc = "max # of connection pool per child";
18739 +       i++;
18740 +
18741 +       status[i].name = "logdir";
18742 +       snprintf(status[i].value, MAXVALLEN, "%s", pool_config_logdir);
18743 +       status[i].desc = "logging directory";
18744 +       i++;
18745 +
18746 +       status[i].name = "backend_socket_dir";
18747 +       snprintf(status[i].value, MAXVALLEN, "%s", pool_config_backend_socket_dir);
18748 +       status[i].desc = "Unix domain socket directory for the PostgreSQL server";
18749 +       i++;
18750 +
18751 +       status[i].name = "replication_mode";
18752 +       snprintf(status[i].value, MAXVALLEN, "%d", pool_config_replication_mode);
18753 +       status[i].desc = "non 0 if operating in replication mode";
18754 +       i++;
18755 +
18756 +       status[i].name = "replication_strict";
18757 +       snprintf(status[i].value, MAXVALLEN, "%d", pool_config_replication_strict);
18758 +       status[i].desc = "non 0 if operating in strict mode";
18759 +       i++;
18760 +
18761 +       status[i].name = "replication_timeout";
18762 +       snprintf(status[i].value, MAXVALLEN, "%d", pool_config_replication_timeout);
18763 +       status[i].desc = "if secondary does not respond in this milli seconds, abort the session";
18764 +       i++;
18765 +
18766 +       status[i].name = "current_backend_host_name";
18767 +       snprintf(status[i].value, MAXVALLEN, "%s", pool_config_current_backend_host_name);
18768 +       status[i].desc = "current master host name";
18769 +       i++;
18770 +
18771 +       status[i].name = "current_backend_port";
18772 +       snprintf(status[i].value, MAXVALLEN, "%d", pool_config_current_backend_port);
18773 +       status[i].desc = "current master port #";
18774 +       i++;
18775 +
18776 +       status[i].name = "replication_enabled";
18777 +       snprintf(status[i].value, MAXVALLEN, "%d", pool_config_replication_enabled);
18778 +       status[i].desc = "non 0 if actually operating in replication mode";
18779 +       i++;
18780 +
18781 +       status[i].name = "load_balance_mode";
18782 +       snprintf(status[i].value, MAXVALLEN, "%d", pool_config_load_balance_mode);
18783 +       status[i].desc = "non 0 if operating in load balancing mode";
18784 +       i++;
18785 +
18786 +       status[i].name = "replication_stop_on_mismatch";
18787 +       snprintf(status[i].value, MAXVALLEN, "%d", pool_config_replication_stop_on_mismatch);
18788 +       status[i].desc = "stop replication mode on fatal error";
18789 +       i++;
18790 +
18791 +       nrows = i;
18792 +
18793 +       if (MAJOR(backend) == PROTO_MAJOR_V2)
18794 +       {
18795 +               /* cursor response */
18796 +               pool_write(frontend, "P", 1);
18797 +               pool_write(frontend, cursorname, strlen(cursorname)+1);
18798 +       }
18799 +
18800 +       /* row description */
18801 +       pool_write(frontend, "T", 1);
18802 +
18803 +       if (MAJOR(backend) == PROTO_MAJOR_V3)
18804 +       {
18805 +               len = sizeof(num_fields) + sizeof(len);
18806 +
18807 +               for (i=0;i<num_fields;i++)
18808 +               {
18809 +                       char *f = field_names[i];
18810 +                       len += strlen(f)+1;
18811 +                       len += sizeof(oid);
18812 +                       len += sizeof(colnum);
18813 +                       len += sizeof(oid);
18814 +                       len += sizeof(s);
18815 +                       len += sizeof(mod);
18816 +                       len += sizeof(s);
18817 +               }
18818 +
18819 +               len = htonl(len);
18820 +               pool_write(frontend, &len, sizeof(len));
18821 +       }
18822 +
18823 +       n = htons(num_fields);
18824 +       pool_write(frontend, &n, sizeof(short));
18825 +
18826 +       for (i=0;i<num_fields;i++)
18827 +       {
18828 +               char *f = field_names[i];
18829 +
18830 +               pool_write(frontend, f, strlen(f)+1);           /* field name */
18831 +
18832 +               if (MAJOR(backend) == PROTO_MAJOR_V3)
18833 +               {
18834 +                       pool_write(frontend, &oid, sizeof(oid));        /* table oid */
18835 +                       colnum = htons(i);
18836 +                       pool_write(frontend, &colnum, sizeof(colnum));  /* column number */
18837 +               }
18838 +
18839 +               pool_write(frontend, &oid, sizeof(oid));                /* data type oid */
18840 +               s = htons(fsize);
18841 +               pool_write(frontend, &s, sizeof(fsize));                /* field size */
18842 +               pool_write(frontend, &mod, sizeof(mod));                /* modifier */
18843 +
18844 +               if (MAJOR(backend) == PROTO_MAJOR_V3)
18845 +               {
18846 +                       s = htons(0);
18847 +                       pool_write(frontend, &s, sizeof(fsize));        /* field format (text) */
18848 +               }
18849 +       }
18850 +       pool_flush(frontend);
18851 +
18852 +       if (MAJOR(backend) == PROTO_MAJOR_V2)
18853 +       {
18854 +               /* ascii row */
18855 +               for (i=0;i<nrows;i++)
18856 +               {
18857 +                       pool_write(frontend, "D", 1);
18858 +                       pool_write_and_flush(frontend, nullmap, nbytes);
18859 +
18860 +                       size = strlen(status[i].name);
18861 +                       hsize = htonl(size+4);
18862 +                       pool_write(frontend, &hsize, sizeof(hsize));
18863 +                       pool_write(frontend, status[i].name, size);
18864 +
18865 +                       size = strlen(status[i].value);
18866 +                       hsize = htonl(size+4);
18867 +                       pool_write(frontend, &hsize, sizeof(hsize));
18868 +                       pool_write(frontend, status[i].value, size);
18869 +
18870 +                       size = strlen(status[i].desc);
18871 +                       hsize = htonl(size+4);
18872 +                       pool_write(frontend, &hsize, sizeof(hsize));
18873 +                       pool_write(frontend, status[i].desc, size);
18874 +               }
18875 +       }
18876 +       else
18877 +       {
18878 +               /* data row */
18879 +               for (i=0;i<nrows;i++)
18880 +               {
18881 +                       pool_write(frontend, "D", 1);
18882 +                       len = sizeof(len) + sizeof(nrows);
18883 +                       len += sizeof(int) + strlen(status[i].name);
18884 +                       len += sizeof(int) + strlen(status[i].value);
18885 +                       len += sizeof(int) + strlen(status[i].desc);
18886 +                       len = htonl(len);
18887 +                       pool_write(frontend, &len, sizeof(len));
18888 +                       s = htons(3);
18889 +                       pool_write(frontend, &s, sizeof(s));
18890 +
18891 +                       len = htonl(strlen(status[i].name));
18892 +                       pool_write(frontend, &len, sizeof(len));
18893 +                       pool_write(frontend, status[i].name, strlen(status[i].name));
18894 +
18895 +                       len = htonl(strlen(status[i].value));
18896 +                       pool_write(frontend, &len, sizeof(len));
18897 +                       pool_write(frontend, status[i].value, strlen(status[i].value));
18898 +                       
18899 +                       len = htonl(strlen(status[i].desc));
18900 +                       pool_write(frontend, &len, sizeof(len));
18901 +                       pool_write(frontend, status[i].desc, strlen(status[i].desc));
18902 +               }
18903 +       }
18904 +
18905 +       /* complete command response */
18906 +       pool_write(frontend, "C", 1);
18907 +       if (MAJOR(backend) == PROTO_MAJOR_V3)
18908 +       {
18909 +               len = htonl(sizeof(len) + strlen("SELECT")+1);
18910 +               pool_write(frontend, &len, sizeof(len));
18911 +       }
18912 +       pool_write(frontend, "SELECT", strlen("SELECT")+1);
18913 +
18914 +       /* ready for query */
18915 +       pool_write(frontend, "Z", 1);
18916 +       if (MAJOR(backend) == PROTO_MAJOR_V3)
18917 +       {
18918 +               len = htonl(sizeof(len) + 1);
18919 +               pool_write(frontend, &len, sizeof(len));
18920 +               pool_write(frontend, "I", 1);
18921 +       }
18922 +
18923 +       pool_flush(frontend);
18924 +}
18925 +
18926 +void pool_send_frontend_exits(POOL_CONNECTION_POOL *backend)
18927 +{
18928 +       int len;
18929 +
18930 +       pool_write(MASTER(backend), "X", 1);
18931 +
18932 +       if (MAJOR(backend) == PROTO_MAJOR_V3)
18933 +       {
18934 +               len = htonl(4);
18935 +               pool_write(MASTER(backend), &len, sizeof(len));
18936 +       }
18937 +
18938 +       /*
18939 +        * XXX we cannot call pool_flush() here since backend may already
18940 +        * close the socket and pool_flush() automatically invokes fail
18941 +        * over handler. This could happen in copy command (remember the
18942 +        * famouse "lostsynchronization with server, resettin g
18943 +        * connection" message)
18944 +        */
18945 +       fflush(MASTER(backend)->write_fd);
18946 +
18947 +       if (REPLICATION)
18948 +       {
18949 +               pool_write(SECONDARY(backend), "X", 1);
18950 +               if (MAJOR(backend) == PROTO_MAJOR_V3)
18951 +               {
18952 +                       len = htonl(4);
18953 +                       pool_write(MASTER(backend), &len, sizeof(len));
18954 +               }
18955 +               fflush(SECONDARY(backend)->write_fd);
18956 +       }
18957 +}
18958 +
18959 +/*
18960 + * -------------------------------------------------------
18961 + * V3 functions
18962 + * -------------------------------------------------------
18963 + */
18964 +POOL_STATUS SimpleForwardToFrontend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend)
18965 +{
18966 +       char * func = "SimpleForwardToFrontend()";
18967 +       int len, len1;
18968 +       char *p;
18969 +       int status;
18970 +
18971 +       pool_write(frontend, &kind, 1);
18972 +
18973 +       status = pool_read(MASTER(backend), &len, sizeof(len));
18974 +       if (status < 0)
18975 +       {
18976 +               show_error("%s: error while reading message length",func);
18977 +               return POOL_END;
18978 +       }
18979 +
18980 +       if (REPLICATION)
18981 +       {
18982 +               status = pool_read(SECONDARY(backend), &len1, sizeof(len1));
18983 +               if (status < 0)
18984 +               {
18985 +                       show_error("%s: error while reading message length from secondary backend",func);
18986 +                       return POOL_END;
18987 +               }
18988 +
18989 +               if (len != len1)
18990 +               {
18991 +                       show_error("%s: length does not match between backends master(%d) secondary(%d) kind:(%c)",
18992 +                                        func, ntohl(len), ntohl(len1), kind);
18993 +               }
18994 +       }
18995 +
18996 +       pool_write(frontend, &len, sizeof(len));
18997 +
18998 +       len = ntohl(len);
18999 +       len -= 4;
19000 +
19001 +       p = pool_read2(MASTER(backend), len);
19002 +       if (p == NULL)
19003 +               return POOL_END;
19004 +
19005 +       if (REPLICATION)
19006 +       {
19007 +               len1 = ntohl(len1);
19008 +               len1 -= 4;
19009 +               if (pool_read2(SECONDARY(backend), len1) == NULL)
19010 +                       return POOL_END;
19011 +       }
19012 +
19013 +       return pool_write(frontend, p, len);
19014 +}
19015 +
19016 +POOL_STATUS SimpleForwardToBackend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend)
19017 +{
19018 +       int len;
19019 +       int sendlen;
19020 +       char *p;
19021 +
19022 +       if (pool_write(MASTER(backend), &kind, 1))
19023 +               return POOL_END;
19024 +       if (REPLICATION)
19025 +               if (pool_write(SECONDARY(backend), &kind, 1))
19026 +                       return POOL_END;
19027 +
19028 +       if (pool_read(frontend, &sendlen, sizeof(sendlen)))
19029 +       {
19030 +               return POOL_END;
19031 +       }
19032 +
19033 +       len = ntohl(sendlen) - 4;
19034 +
19035 +       p = pool_read2(frontend, len);
19036 +       if (p == NULL)
19037 +               return POOL_END;
19038 +
19039 +       if (pool_write(MASTER(backend), &sendlen, sizeof(sendlen)))
19040 +               return POOL_END;
19041 +       if (pool_write(MASTER(backend), p, len))
19042 +               return POOL_END;
19043 +
19044 +       if (REPLICATION)
19045 +       {
19046 +               if (pool_write(SECONDARY(backend), &sendlen, sizeof(sendlen)))
19047 +                       return POOL_END;
19048 +               if (pool_write(SECONDARY(backend), p, len))
19049 +                       return POOL_END;
19050 +       }
19051 +
19052 +       return POOL_CONTINUE;
19053 +}
19054 +
19055 +POOL_STATUS ParameterStatus(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend)
19056 +{
19057 +#ifdef PRINT_DEBUG
19058 +       char * func = "ParameterStatus()";
19059 +#endif                 
19060 +       int len;
19061 +       int sendlen;
19062 +       char *p;
19063 +       char *name;
19064 +       char *value;
19065 +
19066 +       pool_write(frontend, "S", 1);
19067 +
19068 +       len = pool_read_message_length(backend);
19069 +       if (len < 0)
19070 +       {
19071 +               return POOL_END;
19072 +       }
19073 +
19074 +       sendlen = htonl(len);
19075 +       pool_write(frontend, &sendlen, sizeof(sendlen));
19076 +
19077 +       len -= 4;
19078 +
19079 +       p = pool_read2(MASTER(backend), len);
19080 +       if (p == NULL)
19081 +               return POOL_END;
19082 +
19083 +       name = p;
19084 +       value = p + strlen(name) + 1;
19085 +
19086 +#ifdef PRINT_DEBUG
19087 +       show_debug("%s:name: %s value: %s",func, name, value);
19088 +#endif                 
19089 +
19090 +       pool_add_param(&MASTER(backend)->params, name, value);
19091 +
19092 +#ifdef PRINT_DEBUG
19093 +       pool_param_debug_print(&MASTER(backend)->params);
19094 +#endif
19095 +
19096 +       if (REPLICATION)
19097 +               if (pool_read2(SECONDARY(backend), len) == NULL)
19098 +                       return POOL_END;
19099 +
19100 +       return pool_write(frontend, p, len);
19101 +
19102 +}
19103 +
19104 +/*
19105 + * reset backend status. return values are:
19106 + * 0: no query was issued 1: a query was issued 2: no more queries remain -1: error
19107 + */
19108 +static int reset_backend(POOL_CONNECTION_POOL *backend, int qcnt)
19109 +{
19110 +#ifdef NO_RESET_ALL
19111 +       static char *queries[] = {"ABORT"};
19112 +#else
19113 +       static char *queries[] = {"ABORT", "RESET ALL"};
19114 +#endif
19115 +
19116 +       char *query;
19117 +       int qn = sizeof(queries)/sizeof(char *);
19118 +
19119 +       /* for PGCluster */
19120 +       if (!Use_Connection_Pool)
19121 +               return 2;
19122 +
19123 +       if (qcnt >= qn)
19124 +               return 2;
19125 +
19126 +       query = queries[qcnt];
19127 +
19128 +       /* if transaction state is idle, we don't need to issue ABORT */
19129 +       if (TSTATE(backend) == 'I' && !strcmp("ABORT", query))
19130 +               return 0;
19131 +
19132 +       if (Query(NULL, backend, query) != POOL_CONTINUE)
19133 +               return -1;
19134 +
19135 +       return 1;
19136 +}
19137 +
19138 +/*
19139 + * return non 0 if load balance is possible
19140 + */
19141 +static int load_balance_enabled(POOL_CONNECTION_POOL *backend, char *sql)
19142 +{
19143 +       if (pool_config_load_balance_mode &&
19144 +               REPLICATION &&
19145 +               MAJOR(backend) == PROTO_MAJOR_V3 &&
19146 +               TSTATE(backend) == 'I' &&
19147 +               !strncasecmp(sql, "SELECT", 6))
19148 +               return 1;
19149 +       return 0;
19150 +}
19151 +
19152 +/*
19153 + * start load balance mode
19154 + */
19155 +static void start_load_balance(POOL_CONNECTION_POOL *backend)
19156 +{
19157 +#ifdef PRINT_DEBUG
19158 +       char * func = "start_load_balance()";
19159 +#endif                 
19160 +       int i;
19161 +       int master;
19162 +
19163 +       /* save backend connection slots */
19164 +       for (i=0;i<backend->num;i++)
19165 +       {
19166 +               slots[i] = backend->slots[i];
19167 +       }
19168 +
19169 +       /* temporary turn off replication mode */
19170 +       /*REPLICATION = 0; */
19171 +
19172 +       /* choose a master in random manner */
19173 +       master = random() % backend->num;
19174 +       backend->slots[0] = slots[master];
19175 +#ifdef PRINT_DEBUG
19176 +       show_debug("%s: selected master is %d", func,master);
19177 +#endif                 
19178 +
19179 +       /* start load balancing */
19180 +       /*in_load_balance = 1;*/
19181 +}
19182 +
19183 +/*
19184 + * finish load balance mode
19185 + */
19186 +static void end_load_balance(POOL_CONNECTION_POOL *backend)
19187 +{
19188 +       int i;
19189 +
19190 +       /* restore backend connection slots */
19191 +       for (i=0;i<backend->num;i++)
19192 +       {
19193 +               backend->slots[i] = slots[i];
19194 +       }
19195 +
19196 +       /* turn on replication mode */
19197 +       /* REPLICATION = 1; */
19198 +
19199 +       /*in_load_balance = 0;*/
19200 +#ifdef PRINT_DEBUG
19201 +       show_debug("end_load_balance: end load balance mode");
19202 +#endif                 
19203 +}
19204 +
19205 +/*
19206 + * send error message to frontend
19207 + */
19208 +void pool_send_error_message(POOL_CONNECTION *frontend, int protoMajor,
19209 +                                                        char *code,
19210 +                                                        char *message,
19211 +                                                        char *detail,
19212 +                                                        char *hint,
19213 +                                                        char *file,
19214 +                                                        int line)
19215 +{
19216 +#define MAXDATA        1024
19217 +#define MAXMSGBUF 128
19218 +       char * func = "pool_send_error_message()";
19219 +
19220 +       if (protoMajor == PROTO_MAJOR_V2)
19221 +       {
19222 +               pool_write(frontend, "E", 1);
19223 +               pool_write_and_flush(frontend, message, strlen(message)+1);
19224 +       }
19225 +       else if (protoMajor == PROTO_MAJOR_V3)
19226 +       {
19227 +               char data[MAXDATA];
19228 +               char msgbuf[MAXMSGBUF];
19229 +               int len;
19230 +               int thislen;
19231 +               int sendlen;
19232 +
19233 +               len = 0;
19234 +
19235 +               pool_write(frontend, "E", 1);
19236 +
19237 +               /* error level */
19238 +               thislen = snprintf(msgbuf, MAXMSGBUF, "SERROR");
19239 +               memcpy(data +len, msgbuf, thislen+1);
19240 +               len += thislen + 1;
19241 +
19242 +               /* code */
19243 +               thislen = snprintf(msgbuf, MAXMSGBUF, "C%s", code);
19244 +               memcpy(data +len, msgbuf, thislen+1);
19245 +               len += thislen + 1;
19246 +
19247 +               /* message */
19248 +               thislen = snprintf(msgbuf, MAXMSGBUF, "M%s", message);
19249 +               memcpy(data +len, msgbuf, thislen+1);
19250 +               len += thislen + 1;
19251 +
19252 +               /* detail */
19253 +               if (*detail != '\0')
19254 +               {
19255 +                       thislen = snprintf(msgbuf, MAXMSGBUF, "D%s", detail);
19256 +                       memcpy(data +len, msgbuf, thislen+1);
19257 +                       len += thislen + 1;
19258 +               }
19259 +
19260 +               /* hint */
19261 +               if (*hint != '\0')
19262 +               {
19263 +                       thislen = snprintf(msgbuf, MAXMSGBUF, "H%s", hint);
19264 +                       memcpy(data +len, msgbuf, thislen+1);
19265 +                       len += thislen + 1;
19266 +               }
19267 +
19268 +               /* file */
19269 +               thislen = snprintf(msgbuf, MAXMSGBUF, "F%s", file);
19270 +               memcpy(data +len, msgbuf, thislen+1);
19271 +               len += thislen + 1;
19272 +
19273 +               /* line */
19274 +               thislen = snprintf(msgbuf, MAXMSGBUF, "L%d", line);
19275 +               memcpy(data +len, msgbuf, thislen+1);
19276 +               len += thislen + 1;
19277 +
19278 +               /* stop null */
19279 +               len++;
19280 +               *(data + len) = '\0';
19281 +
19282 +               sendlen = len;
19283 +               len = htonl(len + 4);
19284 +               pool_write(frontend, &len, sizeof(len));
19285 +               pool_write_and_flush(frontend, data, sendlen);
19286 +       }
19287 +       else
19288 +               show_error("%s: unknown protocol major %d",func, protoMajor);
19289 +}
19290 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_stream.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_stream.c
19291 --- postgresql-8.2.4/src/pgcluster/pglb/pool_stream.c   1970-01-01 01:00:00.000000000 +0100
19292 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_stream.c 2007-02-18 22:52:17.000000000 +0100
19293 @@ -0,0 +1,584 @@
19294 +/*--------------------------------------------------------------------
19295 + * FILE:
19296 + *     pool_stream.c
19297 + *
19298 + * NOTE:
19299 + *     stream I/O modules
19300 + *
19301 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
19302 + * Portions Copyright (c) 2003-2006, Tatsuo Ishii
19303 + *--------------------------------------------------------------------
19304 + */
19305 +/*
19306 +* Permission to use, copy, modify, and distribute this software and
19307 +* its documentation for any purpose and without fee is hereby
19308 +* granted, provided that the above copyright notice appear in all
19309 +* copies and that both that copyright notice and this permission
19310 +* notice appear in supporting documentation, and that the name of the
19311 +* author not be used in advertising or publicity pertaining to
19312 +* distribution of the software without specific, written prior
19313 +* permission. The author makes no representations about the
19314 +* suitability of this software for any purpose.  It is provided "as
19315 +* is" without express or implied warranty.
19316 +*/
19317 +
19318 +#include <stdio.h>
19319 +#include <stdlib.h>
19320 +#include <string.h>
19321 +#include <errno.h>
19322 +#include <sys/types.h>
19323 +#include <unistd.h>
19324 +#include <sys/time.h>
19325 +
19326 +#include "postgres_fe.h"
19327 +#include "libpq/pqcomm.h"
19328 +#include "replicate_com.h"
19329 +#include "pglb.h"
19330 +
19331 +#define READBUFSZ 1024
19332 +
19333 +POOL_CONNECTION *pool_open(int fd);
19334 +void pool_close(POOL_CONNECTION *cp);
19335 +int pool_read(POOL_CONNECTION *cp, void *buf, int len);
19336 +char *pool_read2(POOL_CONNECTION *cp, int len);
19337 +int pool_write(POOL_CONNECTION *cp, void *buf, int len);
19338 +int pool_flush(POOL_CONNECTION *cp);
19339 +int pool_write_and_flush(POOL_CONNECTION *cp, void *buf, int len);
19340 +char *pool_read_string(POOL_CONNECTION *cp, int *len, int line);
19341 +
19342 +static int mystrlen(char *str, int upper, int *flag);
19343 +static int mystrlinelen(char *str, int upper, int *flag);
19344 +static int save_pending_data(POOL_CONNECTION *cp, void *data, int len);
19345 +static int consume_pending_data(POOL_CONNECTION *cp, void *data, int len);
19346 +
19347 +
19348 +/*
19349 +* open read/write file descriptors.
19350 +* returns POOL_CONNECTION on success otherwise NULL.
19351 +*/
19352 +POOL_CONNECTION *pool_open(int fd)
19353 +{
19354 +       POOL_CONNECTION *cp;
19355 +
19356 +       cp = (POOL_CONNECTION *)malloc(sizeof(POOL_CONNECTION));
19357 +       if (cp == NULL)
19358 +       {
19359 +               show_error("pool_open: malloc failed: %s", strerror(errno));
19360 +               return NULL;
19361 +       }
19362 +
19363 +       memset(cp, 0, sizeof(*cp));
19364 +
19365 +       cp->write_fd = fdopen(fd, "w");
19366 +       if (cp->write_fd == NULL)
19367 +       {
19368 +               show_error("pool_open: fdopen failed: %s",strerror(errno));
19369 +               free(cp);
19370 +               return NULL;
19371 +       }
19372 +
19373 +       /* initialize pending data buffer */
19374 +       cp->hp = malloc(READBUFSZ);
19375 +       if (cp->hp == NULL)
19376 +       {
19377 +               show_error("pool_open: malloc failed");
19378 +               return NULL;
19379 +       }
19380 +       cp->bufsz = READBUFSZ;
19381 +       cp->po = 0;
19382 +       cp->len = 0;
19383 +       cp->sbuf = NULL;
19384 +       cp->sbufsz = 0;
19385 +       cp->buf2 = NULL;
19386 +       cp->sbufsz = 0;
19387 +
19388 +       cp->fd = fd;
19389 +       return cp;
19390 +}
19391 +
19392 +/*
19393 +* close read/write file descriptors.
19394 +*/
19395 +void pool_close(POOL_CONNECTION *cp)
19396 +{
19397 +       close(cp->fd);
19398 +       fclose(cp->write_fd);
19399 +       free(cp->hp);
19400 +       if (cp->sbuf)
19401 +               free(cp->sbuf);
19402 +       if (cp->buf2)
19403 +               free(cp->buf2);
19404 +       pool_discard_params(&cp->params);
19405 +       free(cp);
19406 +}
19407 +
19408 +/*
19409 +* read len bytes from cp
19410 +* returns 0 on success otherwise -1.
19411 +*/
19412 +int pool_read(POOL_CONNECTION *cp, void *buf, int len)
19413 +{
19414 +       static char readbuf[READBUFSZ];
19415 +
19416 +       int consume_size;
19417 +       int readlen;
19418 +
19419 +       consume_size = consume_pending_data(cp, buf, len);
19420 +       len -= consume_size;
19421 +       buf += consume_size;
19422 +
19423 +       while (len > 0)
19424 +       {
19425 +               if (cp->issecondary_backend)
19426 +               {
19427 +                       if (pool_check_fd(cp, 0))
19428 +                       {
19429 +                               show_error("pool_read: secondary data is not ready. abort this session");
19430 +                               exit(1);
19431 +                       }
19432 +               }
19433 +
19434 +               readlen = read(cp->fd, readbuf, READBUFSZ);
19435 +               if (readlen == -1)
19436 +               {
19437 +                       show_error("pool_read: read failed (%s)", strerror(errno));
19438 +
19439 +                       if (cp->isbackend)
19440 +                       {
19441 +                           /* fatal error, notice to parent and exit */
19442 +                           notice_backend_error();
19443 +                           exit(1);
19444 +                       }
19445 +                       else
19446 +                       {
19447 +                           return -1;
19448 +                       }
19449 +               }
19450 +               else if (readlen == 0)
19451 +               {
19452 +                       show_error("pool_read: EOF encountered");
19453 +
19454 +                       if (cp->isbackend)
19455 +                       {
19456 +                           /* fatal error, notice to parent and exit */
19457 +                           notice_backend_error();
19458 +                               exit(1);
19459 +                       }
19460 +                       else
19461 +                       {
19462 +                               /*
19463 +                                * if backend offers authentication method, frontend could close connection
19464 +                                */
19465 +                               return -1;
19466 +                       }
19467 +               }
19468 +
19469 +               if (len < readlen)
19470 +               {
19471 +                       /* overrun. we need to save remaining data to pending buffer */
19472 +                       if (save_pending_data(cp, readbuf+len, readlen-len))
19473 +                               return -1;
19474 +                       memmove(buf, readbuf, len);
19475 +                       break;
19476 +               }
19477 +
19478 +               memmove(buf, readbuf, readlen);
19479 +               buf += readlen;
19480 +               len -= readlen;
19481 +       }
19482 +
19483 +       return 0;
19484 +}
19485 +
19486 +/*
19487 +* read exactly len bytes from cp
19488 +* returns buffer address on success otherwise NULL.
19489 +*/
19490 +char *pool_read2(POOL_CONNECTION *cp, int len)
19491 +{
19492 +       char *buf;
19493 +       int req_size;
19494 +       int alloc_size;
19495 +       int consume_size;
19496 +       int readlen;
19497 +
19498 +       req_size = cp->len + len;
19499 +
19500 +       if (req_size > cp->bufsz2)
19501 +       {
19502 +               alloc_size = ((req_size+1)/READBUFSZ+1)*READBUFSZ;
19503 +               cp->buf2 = realloc(cp->buf2, alloc_size);
19504 +               if (cp->buf2 == NULL)
19505 +               {
19506 +                       show_error("pool_read2: failed to realloc");
19507 +                       exit(1);
19508 +               }
19509 +               cp->bufsz2 = alloc_size;
19510 +       }
19511 +
19512 +       buf = cp->buf2;
19513 +
19514 +       consume_size = consume_pending_data(cp, buf, len);
19515 +       len -= consume_size;
19516 +       buf += consume_size;
19517 +
19518 +       while (len > 0)
19519 +       {
19520 +               if (cp->issecondary_backend)
19521 +               {
19522 +                       if (pool_check_fd(cp, 0))
19523 +                       {
19524 +                               show_error("pool_read2: secondary data is not ready. abort this session");
19525 +                               exit(1);
19526 +                       }
19527 +               }
19528 +
19529 +               readlen = read(cp->fd, buf, len);
19530 +               if (readlen == -1)
19531 +               {
19532 +                       show_error("pool_read2: read failed (%s)", strerror(errno));
19533 +
19534 +                       if (cp->isbackend)
19535 +                       {
19536 +                           /* fatal error, notice to parent and exit */
19537 +                           notice_backend_error();
19538 +                           exit(1);
19539 +                       }
19540 +                       else
19541 +                       {
19542 +                           return NULL;
19543 +                       }
19544 +               }
19545 +               else if (readlen == 0)
19546 +               {
19547 +                       show_error("pool_read2: EOF encountered");
19548 +
19549 +                       if (cp->isbackend)
19550 +                       {
19551 +                           /* fatal error, notice to parent and exit */
19552 +                           notice_backend_error();
19553 +                               exit(1);
19554 +                       }
19555 +                       else
19556 +                       {
19557 +                               /*
19558 +                                * if backend offers authentication method, frontend could close connection
19559 +                                */
19560 +                               return NULL;
19561 +                       }
19562 +               }
19563 +
19564 +               buf += readlen;
19565 +               len -= readlen;
19566 +       }
19567 +
19568 +       return cp->buf2;
19569 +}
19570 +
19571 +/*
19572 +* write len bytes from cp
19573 +* returns 0 on success otherwise -1.
19574 +*/
19575 +int pool_write(POOL_CONNECTION *cp, void *buf, int len)
19576 +{
19577 +       if (!cp->no_forward)
19578 +               fwrite(buf, len, 1, cp->write_fd);
19579 +
19580 +       return 0;
19581 +}
19582 +
19583 +/*
19584 +* flush write buffer
19585 +*/
19586 +int pool_flush(POOL_CONNECTION *cp)
19587 +{
19588 +       if (fflush(cp->write_fd) != 0)
19589 +       {
19590 +               show_error("pool_flush: fflush failed (%s)", strerror(errno));
19591 +
19592 +               if (cp->isbackend)
19593 +               {
19594 +                   notice_backend_error();
19595 +                   exit(1);
19596 +               }
19597 +               else
19598 +               {
19599 +                   return -1;
19600 +               }
19601 +       }
19602 +       return 0;
19603 +}
19604 +
19605 +/*
19606 +* combo of pool_write and pool_flush
19607 +*/
19608 +int pool_write_and_flush(POOL_CONNECTION *cp, void *buf, int len)
19609 +{
19610 +       if (pool_write(cp, buf, len))
19611 +               return -1;
19612 +       return pool_flush(cp);
19613 +}
19614 +
19615 +/* 
19616 + * read a string until EOF or NULL is encountered.
19617 + * if line is not 0, read until new line is encountered.
19618 +*/
19619 +char *pool_read_string(POOL_CONNECTION *cp, int *len, int line)
19620 +{
19621 +       int readp;
19622 +       int readsize;
19623 +       int readlen;
19624 +       int strlength;
19625 +       int flag;
19626 +       int consume_size;
19627 +
19628 +#ifdef DEBUG
19629 +       static char pbuf[READBUFSZ];
19630 +#endif
19631 +
19632 +       *len = 0;
19633 +       readp = 0;
19634 +
19635 +       /* initialize read buffer */
19636 +       if (cp->sbufsz == 0)
19637 +       {
19638 +               cp->sbuf = malloc(READBUFSZ);
19639 +               if (cp->sbuf == NULL)
19640 +               {
19641 +                       show_error("pool_read_string: malloc failed");
19642 +                       return NULL;
19643 +               }
19644 +               cp->sbufsz = READBUFSZ;
19645 +               *cp->sbuf = '\0';
19646 +       }
19647 +
19648 +       /* any pending data? */
19649 +       if (cp->len)
19650 +       {
19651 +               if (line)
19652 +                       strlength = mystrlinelen(cp->hp+cp->po, cp->len, &flag);
19653 +               else
19654 +                       strlength = mystrlen(cp->hp+cp->po, cp->len, &flag);
19655 +
19656 +               /* buffer is too small? */
19657 +               if ((strlength + 1) > cp->sbufsz)
19658 +               {
19659 +                       cp->sbufsz = ((strlength+1)/READBUFSZ+1)*READBUFSZ;
19660 +                       cp->sbuf = realloc(cp->sbuf, cp->sbufsz);
19661 +                       if (cp->sbuf == NULL)
19662 +                       {
19663 +                               show_error("pool_read_string: realloc failed");
19664 +                               return NULL;
19665 +                       }
19666 +               }
19667 +
19668 +               /* consume pending and save to read string buffer */
19669 +               consume_size = consume_pending_data(cp, cp->sbuf, strlength);
19670 +
19671 +               *len = strlength;
19672 +
19673 +               /* is the string null terminated? */
19674 +               if (consume_size == strlength && !flag)
19675 +               {
19676 +                       /* not null or line terminated.
19677 +                        * we need to read more since we have not encountered NULL or new line yet
19678 +                        */
19679 +                       readsize = cp->sbufsz - strlength;
19680 +                       readp = strlength;
19681 +               }
19682 +               else
19683 +               {
19684 +#ifdef PRINT_DEBUG
19685 +                       show_debug("pool_read_string: read all from pending data. po:%d len:%d",
19686 +                                          cp->po, cp->len);
19687 +#endif                 
19688 +                       return cp->sbuf;
19689 +               }
19690 +       } else
19691 +       {
19692 +               readsize = cp->sbufsz;
19693 +       }
19694 +
19695 +
19696 +       for (;;)
19697 +       {
19698 +               readlen = read(cp->fd, cp->sbuf+readp, readsize);
19699 +               if (readlen == -1)
19700 +               {
19701 +                       show_error("pool_read_string: read() failed. reason:%s", strerror(errno));
19702 +
19703 +                       if (cp->isbackend)
19704 +                       {
19705 +                           notice_backend_error();
19706 +                           exit(1);
19707 +                       }
19708 +                       else
19709 +                       {
19710 +                           return NULL;
19711 +                       }
19712 +               }
19713 +
19714 +               if (readlen == 0)
19715 +                       return NULL;
19716 +
19717 +               /* check overrun */
19718 +               if (line)
19719 +                       strlength = mystrlinelen(cp->sbuf+readp, readlen, &flag);
19720 +               else
19721 +                       strlength = mystrlen(cp->sbuf+readp, readlen, &flag);
19722 +
19723 +               if (strlength < readlen)
19724 +               {
19725 +                       save_pending_data(cp, cp->sbuf+readp+strlength, readlen-strlength);
19726 +                       *len += strlength;
19727 +#ifdef PRINT_DEBUG
19728 +                       show_debug("pool_read_string: total result %d with pending data po:%d len:%d", *len, cp->po, cp->len);
19729 +#endif                 
19730 +                       return cp->sbuf;
19731 +               }
19732 +
19733 +               *len += readlen;
19734 +
19735 +               /* encountered null or newline? */
19736 +               if (flag)
19737 +               {
19738 +                       /* ok we have read all data */
19739 +#ifdef PRINT_DEBUG
19740 +                       show_debug("pool_read_string: total result %d ", *len);
19741 +#endif                 
19742 +                       break;
19743 +               }
19744 +
19745 +               readp += readlen;
19746 +               readsize = READBUFSZ;
19747 +
19748 +               if ((*len+readsize) > cp->sbufsz)
19749 +               {
19750 +                       cp->sbufsz += READBUFSZ;
19751 +
19752 +                       cp->sbuf = realloc(cp->sbuf, cp->sbufsz);
19753 +                       if (cp->sbuf == NULL)
19754 +                       {
19755 +                               show_error("pool_read_string: realloc failed");
19756 +                               return NULL;
19757 +                       }
19758 +               }
19759 +       }
19760 +       return cp->sbuf;
19761 +}
19762 +
19763 +/*
19764 + * returns the byte length of str, including \0, no more than upper.
19765 + * if encountered \0, flag is set to non 0.
19766 + * example:
19767 + *     mystrlen("abc", 2) returns 2
19768 + *     mystrlen("abc", 3) returns 3
19769 + *     mystrlen("abc", 4) returns 4
19770 + *     mystrlen("abc", 5) returns 4
19771 + */
19772 +static int mystrlen(char *str, int upper, int *flag)
19773 +{
19774 +       int len;
19775 +
19776 +       *flag = 0;
19777 +
19778 +       for (len = 0;len < upper; len++, str++)
19779 +       {
19780 +           if (!*str)
19781 +           {
19782 +                       len++;
19783 +                       *flag = 1;
19784 +                       break;
19785 +           }
19786 +       }
19787 +       return len;
19788 +}
19789 +
19790 +/*
19791 + * returns the byte length of str terminated by \n or \0 (including \n or \0), no more than upper.
19792 + * if encountered \0 or \n, flag is set to non 0.
19793 + * example:
19794 + *     mystrlinelen("abc", 2) returns 2
19795 + *     mystrlinelen("abc", 3) returns 3
19796 + *     mystrlinelen("abc", 4) returns 4
19797 + *     mystrlinelen("abc", 5) returns 4
19798 + *     mystrlinelen("abcd\nefg", 4) returns 4
19799 + *     mystrlinelen("abcd\nefg", 5) returns 5
19800 + *     mystrlinelen("abcd\nefg", 6) returns 5
19801 + */
19802 +static int mystrlinelen(char *str, int upper, int *flag)
19803 +{
19804 +       int len;
19805 +
19806 +       *flag = 0;
19807 +
19808 +       for (len = 0;len < upper; len++, str++)
19809 +       {
19810 +           if (!*str || *str == '\n')
19811 +           {
19812 +                       len++;
19813 +                       *flag = 1;
19814 +                       break;
19815 +           }
19816 +       }
19817 +       return len;
19818 +}
19819 +
19820 +/*
19821 + * save pending data
19822 + */
19823 +static int save_pending_data(POOL_CONNECTION *cp, void *data, int len)
19824 +{
19825 +       int reqlen;
19826 +       size_t realloc_size;
19827 +       char *p;
19828 +
19829 +       /* to be safe */
19830 +       if (cp->len == 0)
19831 +               cp->po = 0;
19832 +
19833 +       reqlen = cp->po + cp->len + len;
19834 +
19835 +       /* pending buffer is enough? */
19836 +       if (reqlen > cp->bufsz)
19837 +       {
19838 +               /* too small, enlarge it */
19839 +               realloc_size = (reqlen/READBUFSZ+1)*READBUFSZ;
19840 +               p = realloc(cp->hp, realloc_size);
19841 +               if (p == NULL)
19842 +               {
19843 +                       show_error("save_pending_data: realloc failed");
19844 +                       return -1;
19845 +               }
19846 +
19847 +               cp->bufsz = realloc_size;
19848 +               cp->hp = p;
19849 +       }
19850 +
19851 +       memmove(cp->hp + cp->po + cp->len, data, len);
19852 +       cp->len += len;
19853 +
19854 +       return 0;
19855 +}
19856 +
19857 +/*
19858 + * consume pending data. returns actually consumed data length.
19859 + */
19860 +static int consume_pending_data(POOL_CONNECTION *cp, void *data, int len)
19861 +{
19862 +       int consume_size;
19863 +
19864 +       if (cp->len <= 0)
19865 +               return 0;
19866 +
19867 +       consume_size = Min(len, cp->len);
19868 +       memmove(data, cp->hp + cp->po, consume_size);
19869 +       cp->len -= consume_size;
19870 +
19871 +       if (cp->len <= 0)
19872 +               cp->po = 0;
19873 +       else
19874 +               cp->po += consume_size;
19875 +
19876 +       return consume_size;
19877 +}
19878 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/recovery.c pgcluster-1.7.0rc7/src/pgcluster/pglb/recovery.c
19879 --- postgresql-8.2.4/src/pgcluster/pglb/recovery.c      1970-01-01 01:00:00.000000000 +0100
19880 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/recovery.c    2007-02-18 22:52:17.000000000 +0100
19881 @@ -0,0 +1,262 @@
19882 +/*--------------------------------------------------------------------
19883 + * FILE:
19884 + *     recovery.c
19885 + *
19886 + * NOTE:
19887 + *     This file is composed of the functions to call with the source
19888 + *     at pglb for the recovery.
19889 + *
19890 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
19891 + *--------------------------------------------------------------------
19892 + */
19893 +/*
19894 + * Permission to use, copy, modify, and distribute this software and
19895 + * its documentation for any purpose and without fee is hereby
19896 + * granted, provided that the above copyright notice appear in all
19897 + * copies and that both that copyright notice and this permission
19898 + * notice appear in supporting documentation, and that the name of the
19899 + * author not be used in advertising or publicity pertaining to
19900 + * distribution of the software without specific, written prior
19901 + * permission. The author makes no representations about the
19902 + * suitability of this software for any purpose.  It is provided "as
19903 + * is" without express or implied warranty.
19904 + *
19905 +*/
19906 +#include <stdio.h>
19907 +#include <string.h>
19908 +#include <stdlib.h>
19909 +#include <unistd.h>
19910 +#include <signal.h>
19911 +#include <sys/wait.h>
19912 +#include <ctype.h>
19913 +#include <sys/types.h>
19914 +#include <sys/stat.h>
19915 +#include <sys/socket.h>
19916 +#include <sys/ipc.h>
19917 +#include <sys/msg.h>
19918 +#include <netdb.h>
19919 +#include <netinet/in.h>
19920 +#include <errno.h>
19921 +#include <fcntl.h>
19922 +#include <time.h>
19923 +#include <sys/param.h>
19924 +#include <arpa/inet.h>
19925 +#include <sys/file.h>
19926 +
19927 +#ifdef HAVE_NETINET_TCP_H
19928 +#include <netinet/tcp.h>
19929 +#endif
19930 +
19931 +#ifdef HAVE_SYS_SELECT_H
19932 +#include <sys/select.h>
19933 +#endif
19934 +#include "replicate_com.h"
19935 +#include "pglb.h"
19936 +
19937 +
19938 +/*--------------------------------------
19939 + * PROTOTYPE DECLARATION
19940 + *--------------------------------------
19941 + */
19942 +void PGRrecovery_main(int fork_wait_time);
19943 +
19944 +static int set_recovery(RecoveryPacket *packet);
19945 +static int receive_recovery(int fd);
19946 +
19947 +
19948 +/*--------------------------------------------------------------------
19949 + * SYMBOL
19950 + *    PGRrecovery_main()
19951 + * NOTES
19952 + *    main module of recovery function
19953 + * ARGS
19954 + *    void
19955 + * RETURN
19956 + *    none
19957 + *--------------------------------------------------------------------
19958 + */
19959 +void
19960 +PGRrecovery_main(int fork_wait_time)
19961 +{
19962 +       char * func = "PGRrecovery_main()";
19963 +       int fd = -1;
19964 +       int rtn;
19965 +       pid_t pgid = 0;
19966 +       pid_t pid = 0;
19967 +
19968 +       pgid = getpgid(0);
19969 +       pid = fork();
19970 +       if (pid != 0)
19971 +       {
19972 +               return;
19973 +       }
19974 +
19975 +       PGRsignal(SIGCHLD, SIG_DFL);
19976 +       PGRsignal(SIGHUP, PGRexit_subprocess);  
19977 +       PGRsignal(SIGINT, PGRexit_subprocess);  
19978 +       PGRsignal(SIGQUIT, PGRexit_subprocess); 
19979 +       PGRsignal(SIGTERM, PGRexit_subprocess); 
19980 +       PGRsignal(SIGPIPE, SIG_IGN);    
19981 +       /*
19982 +        * in child process,
19983 +        * call recovery module
19984 +        */
19985 +       setpgid(0,pgid);
19986 +
19987 +       if (fork_wait_time > 0) {
19988 +#ifdef PRINT_DEBUG
19989 +               show_debug("recovery process: wait fork(): pid = %d", getpid());
19990 +#endif         
19991 +               sleep(fork_wait_time);
19992 +       }
19993 +
19994 +       fd = PGRcreate_recv_socket(ResolvedName, Recovery_Port_Number);
19995 +       if (fd < 0)
19996 +       {
19997 +               show_error("%s:PGRcreate_recv_socket failed",func);
19998 +               exit(1);
19999 +       }
20000 +       
20001 +       for (;;)
20002 +       {
20003 +               fd_set    rmask;
20004 +               struct timeval timeout;
20005 +
20006 +               timeout.tv_sec = 60;
20007 +               timeout.tv_usec = 0;
20008 +
20009 +               /*
20010 +                * Wait for something to happen.
20011 +                */
20012 +               FD_ZERO(&rmask);
20013 +               FD_SET(fd,&rmask);
20014 +               rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
20015 +               if (rtn && FD_ISSET(fd, &rmask))
20016 +               {
20017 +                       receive_recovery(fd);
20018 +               }
20019 +       }
20020 +}
20021 +
20022 +/*--------------------------------------------------------------------
20023 + * SYMBOL
20024 + *    set_recovery()
20025 + * NOTES
20026 + *    check a recovery request from replication server
20027 + * ARGS
20028 + *    void
20029 + * RETURN
20030 + *    none
20031 + *--------------------------------------------------------------------
20032 + */
20033 +static int
20034 +set_recovery(RecoveryPacket *packet)
20035 +{
20036 +#ifdef PRINT_DEBUG
20037 +       char * func = "set_recovery()";
20038 +#endif                 
20039 +       int status = STATUS_OK;
20040 +       ClusterTbl key;
20041 +       ClusterTbl * ptr;
20042 +
20043 +       PGRset_key_of_cluster(&key,packet);
20044 +#ifdef PRINT_DEBUG
20045 +       show_debug("%s:received no:%d",func, ntohs(packet->packet_no));
20046 +#endif                 
20047 +       switch (ntohs(packet->packet_no))
20048 +       {
20049 +       case RECOVERY_PREPARE_REQ:
20050 +               /* add cluster db */
20051 +#ifdef PRINT_DEBUG
20052 +               show_debug("%s:add_db host:%s port:%d max:%d",
20053 +                       func, packet->hostName,ntohs(packet->port),ntohs(packet->max_connect));
20054 +#endif                 
20055 +               ptr = PGRsearch_cluster_tbl(&key);
20056 +               if (ptr == NULL)
20057 +               {
20058 +                       ptr = PGRadd_cluster_tbl(&key);
20059 +               }
20060 +               if (ptr != NULL)
20061 +               {
20062 +                       PGRset_status_on_cluster_tbl(TBL_STOP,ptr);
20063 +                       if (Use_Connection_Pool)
20064 +                       {
20065 +                               signal(SIGCHLD,PGRrecreate_child);
20066 +                               status = PGRpre_fork_child(ptr);
20067 +                       }
20068 +               }
20069 +               break;
20070 +       case RECOVERY_FINISH:
20071 +               /* start cluster db */
20072 +               ptr = PGRsearch_cluster_tbl(&key);
20073 +               if (ptr != NULL)
20074 +               {
20075 +#ifdef PRINT_DEBUG
20076 +                       show_debug("%s:start_db host:%s port:%d max:%d",
20077 +                               func,packet->hostName,ntohs(packet->port),ntohs(packet->max_connect));
20078 +#endif                 
20079 +                       PGRset_status_on_cluster_tbl(TBL_INIT,ptr);
20080 +               }
20081 +               break;
20082 +       case RECOVERY_PGDATA_ANS:
20083 +               /* stop cluster db */
20084 +               ptr = PGRsearch_cluster_tbl(&key);
20085 +               if (ptr != NULL)
20086 +               {
20087 +#ifdef PRINT_DEBUG
20088 +                       show_debug("%s:stop_db host:%s port:%d max:%d",
20089 +                               func, packet->hostName,ntohs(packet->port),ntohs(packet->max_connect));
20090 +#endif                 
20091 +                       PGRset_status_on_cluster_tbl(TBL_STOP,ptr);
20092 +               }
20093 +               break;
20094 +       case RECOVERY_ERROR:
20095 +               /* delete cluster db */
20096 +               ptr = PGRsearch_cluster_tbl(&key);
20097 +               if (ptr != NULL)
20098 +               {
20099 +                       PGRset_status_on_cluster_tbl(TBL_FREE,ptr);
20100 +                       if (Use_Connection_Pool)
20101 +                       {
20102 +                               PGRquit_children_on_cluster(ptr->rec_no);
20103 +                       }
20104 +               }
20105 +               break;
20106 +       /* cluster db has error */
20107 +       case RECOVERY_ERROR_CONNECTION:
20108 +               /* set error cluster db */
20109 +               ptr = PGRsearch_cluster_tbl(&key);
20110 +               if (ptr != NULL)
20111 +               {
20112 +                       PGRset_status_on_cluster_tbl(TBL_ERROR,ptr);
20113 +                       if (Use_Connection_Pool)
20114 +                       {
20115 +                               PGRquit_children_on_cluster(ptr->rec_no);
20116 +                       }
20117 +               }
20118 +               break;
20119 +       }
20120 +       return STATUS_OK;
20121 +}
20122 +
20123 +static int
20124 +receive_recovery(int fd)
20125 +{
20126 +       int status = STATUS_ERROR;
20127 +       int r_size = -1;
20128 +       int recv_sock = -1;
20129 +       RecoveryPacket packet;
20130 +
20131 +       recv_sock = PGRcreate_acception(fd,ResolvedName,Recovery_Port_Number);
20132 +       if (recv_sock >= 0 )
20133 +       {
20134 +               memset(&packet,0, sizeof(RecoveryPacket));
20135 +               r_size = PGRread_byte(recv_sock,(char *)&packet,sizeof(RecoveryPacket),MSG_WAITALL);
20136 +               if ( r_size == sizeof(RecoveryPacket) )
20137 +               {
20138 +                       status = set_recovery(&packet);
20139 +               }
20140 +       }
20141 +       PGRclose_sock(&recv_sock);
20142 +       return status;
20143 +}
20144 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/socket.c pgcluster-1.7.0rc7/src/pgcluster/pglb/socket.c
20145 --- postgresql-8.2.4/src/pgcluster/pglb/socket.c        1970-01-01 01:00:00.000000000 +0100
20146 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/socket.c      2007-02-18 22:52:17.000000000 +0100
20147 @@ -0,0 +1,395 @@
20148 +/*--------------------------------------------------------------------
20149 + * FILE:
20150 + *     socket.c
20151 + *
20152 + * NOTE:
20153 + *     This file is composed of the communication modules
20154 + *
20155 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
20156 + *--------------------------------------------------------------------
20157 + */
20158 +/*
20159 + * Permission to use, copy, modify, and distribute this software and
20160 + * its documentation for any purpose and without fee is hereby
20161 + * granted, provided that the above copyright notice appear in all
20162 + * copies and that both that copyright notice and this permission
20163 + * notice appear in supporting documentation, and that the name of the
20164 + * author not be used in advertising or publicity pertaining to
20165 + * distribution of the software without specific, written prior
20166 + * permission. The author makes no representations about the
20167 + * suitability of this software for any purpose.  It is provided "as
20168 + * is" without express or implied warranty.
20169 + *
20170 +*/
20171 +#include "postgres.h"
20172 +#include <stdio.h>
20173 +#include <string.h>
20174 +#include <stdlib.h>
20175 +#include <unistd.h>
20176 +#include <sys/wait.h>
20177 +#include <ctype.h>
20178 +#include <sys/types.h>
20179 +#include <sys/stat.h>
20180 +#include <sys/socket.h>
20181 +#include <sys/un.h>
20182 +#include <sys/ipc.h>
20183 +#include <netdb.h>
20184 +#include <errno.h>
20185 +#include <fcntl.h>
20186 +#include <time.h>
20187 +#include <sys/param.h>
20188 +#include <sys/file.h>
20189 +#include <netinet/in.h>
20190 +#include <arpa/inet.h>
20191 +
20192 +#ifdef HAVE_SYS_SELECT_H
20193 +#include <sys/select.h>
20194 +#endif
20195 +
20196 +#ifdef HAVE_NETINET_TCP_H
20197 +#include <netinet/tcp.h>
20198 +#endif
20199 +
20200 +#include "replicate_com.h"
20201 +#include "pglb.h"
20202 +
20203 +
20204 +/*--------------------------------------
20205 + * PROTOTYPE DECLARATION
20206 + *--------------------------------------
20207 + */
20208 +int PGRcreate_unix_domain_socket(char * sock_dir, unsigned short port);
20209 +int PGRcreate_recv_socket(char * hostName , unsigned short portNumber);
20210 +int PGRcreate_acception(int fd, char * hostName , unsigned short portNumber);
20211 +void PGRclose_sock(int * sock);
20212 +int PGRread_byte(int sock,char * buf,int len, int flag);
20213 +int PGRcreate_cluster_socket( int * sock, ClusterTbl * ptr );
20214 +
20215 +static int create_send_socket(int * fdP, char * hostName , unsigned short portNumber);
20216 +
20217 +
20218 +/*
20219 +* create UNIX domain socket
20220 +*/
20221 +int 
20222 +PGRcreate_unix_domain_socket(char * sock_dir, unsigned short port)
20223 +{
20224 +       char * func = "PGRcreate_unix_domain_socket()";
20225 +       struct sockaddr_un addr;
20226 +       int fd;
20227 +       int status;
20228 +       int len;
20229 +
20230 +       /* set unix domain socket path */
20231 +       fd = socket(AF_UNIX, SOCK_STREAM, 0);
20232 +       if (fd == -1)
20233 +       {
20234 +               show_error("%s:Failed to create UNIX domain socket. reason: %s",func,  strerror(errno));
20235 +               return -1;
20236 +       }
20237 +       memset((char *) &addr, 0, sizeof(addr));
20238 +       ((struct sockaddr *)&addr)->sa_family = AF_UNIX;
20239 +       snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/.s.PGSQL.%d",sock_dir,port);
20240 +       len = sizeof(struct sockaddr_un);
20241 +       status = bind(fd, (struct sockaddr *)&addr, len);
20242 +       if (status == -1)
20243 +       {
20244 +               show_error("%s: bind() failed. reason: %s", func, strerror(errno));
20245 +               return -1;
20246 +       }
20247 +
20248 +       if (chmod(addr.sun_path, 0777) == -1)
20249 +       {
20250 +               show_error("%s: chmod() failed. reason: %s", func, strerror(errno));
20251 +               return -1;
20252 +       }
20253 +
20254 +       status = listen(fd, PGLB_MAX_SOCKET_QUEUE);
20255 +       if (status < 0)
20256 +       {
20257 +               show_error("%s: listen() failed. reason: %s", func, strerror(errno));
20258 +               return -1;
20259 +       }
20260 +       return fd;
20261 +}
20262 +
20263 +int
20264 +PGRcreate_recv_socket(char * hostName , unsigned short portNumber)
20265 +{
20266 +       char * func = "PGRcreate_recv_socket()";
20267 +       int fd,err;
20268 +       size_t  len = 0;
20269 +       struct sockaddr_in addr;
20270 +       int one = 1;
20271 +
20272 +       if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
20273 +       {
20274 +               show_error("%s: socket() failed. (%s)", func, strerror(errno));
20275 +               return -1;
20276 +       }
20277 +       if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one))) == -1)
20278 +       {
20279 +               PGRclose_sock(&fd);
20280 +               show_error("%s: setsockopt() failed. (%s)",func, strerror(errno));
20281 +               return -1;
20282 +       }
20283 +       addr.sin_family = AF_INET;
20284 +       if ((hostName == NULL) || (hostName[0] == '\0'))
20285 +               addr.sin_addr.s_addr = htonl(INADDR_ANY);
20286 +       else
20287 +       {
20288 +               struct hostent *hp;
20289 +
20290 +               hp = gethostbyname(hostName);
20291 +               if ((hp == NULL) || (hp->h_addrtype != AF_INET))
20292 +               {
20293 +                       PGRclose_sock(&fd);
20294 +                       return -1;
20295 +               }
20296 +               memmove((char *) &(addr.sin_addr), (char *) hp->h_addr, hp->h_length);
20297 +       }
20298 +
20299 +       addr.sin_port = htons(portNumber);
20300 +       len = sizeof(struct sockaddr_in);
20301 +       
20302 +       err = bind(fd, (struct sockaddr *) & addr, len);
20303 +       if (err < 0)
20304 +       {
20305 +               PGRclose_sock(&fd);
20306 +               show_error("%s: bind() failed. (%s)",func, strerror(errno));
20307 +               return -1;
20308 +       }
20309 +       err = listen(fd, PGLB_MAX_SOCKET_QUEUE);
20310 +       if (err < 0)
20311 +       {
20312 +               PGRclose_sock(&fd);
20313 +               show_error("%s: listen() failed. (%s)", func, strerror(errno));
20314 +               return -1;
20315 +       }
20316 +       return  fd;
20317 +}
20318 +
20319 +int
20320 +PGRcreate_acception(int fd, char * hostName , unsigned short portNumber)
20321 +{
20322 +       char * func = "PGRcreate_acception()";
20323 +       int sock;
20324 +       struct sockaddr  addr;
20325 +       size_t  len = 0;
20326 +       int one = 1;
20327 +       int count;
20328 +
20329 +       len = sizeof(struct sockaddr);
20330 +       count = 0;
20331 +       while ((sock = accept(fd,&addr,&len)) < 0)
20332 +       {
20333 +               show_error("%s:accept error",func);
20334 +               PGRclose_sock(&fd);
20335 +               if ( count > PGLB_CONNECT_RETRY_TIME)
20336 +               {
20337 +                       return -1;
20338 +               }
20339 +               fd = PGRcreate_recv_socket(hostName , portNumber);
20340 +               count ++;
20341 +       }
20342 +       
20343 +       count = 0;
20344 +       while (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) < 0)
20345 +       {
20346 +               show_error("%s: setsockopt TCP_NODELAY error (%s)",func, strerror(errno));
20347 +               if ( count > PGLB_CONNECT_RETRY_TIME)
20348 +               {
20349 +                       return -1;
20350 +               }
20351 +               count ++;
20352 +       }
20353 +       count = 0;
20354 +       while (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one)) < 0)
20355 +       {
20356 +               show_error("%s:setsockopt SO_KEEPALIVE error (%s)",func,strerror(errno));
20357 +               if ( count > PGLB_CONNECT_RETRY_TIME)
20358 +               {
20359 +                       return -1;
20360 +               }
20361 +               count ++;
20362 +       }
20363 +
20364 +       return  sock;
20365 +}
20366 +
20367 +void
20368 +PGRclose_sock(int * sock)
20369 +{
20370 +       close(*sock);
20371 +       *sock = -1;
20372 +}
20373 +
20374 +int
20375 +PGRread_byte(int sock,char * buf,int len, int flag)
20376 +{
20377 +       char * func = "PGRread_byte()";
20378 +       int r;
20379 +       char * read_ptr;
20380 +       int read_size = 0;
20381 +       int max_buf_size ;
20382 +       int pid;
20383 +
20384 +       pid = getpid();
20385 +       max_buf_size = len;
20386 +       read_ptr = (char*)buf;
20387 +       for (;;)
20388 +       {
20389 +               r = recv(sock,read_ptr + read_size ,max_buf_size - read_size, flag);
20390 +               if (r < 0)
20391 +               {
20392 +                       if (errno == EINTR)
20393 +                       {
20394 +                               continue;
20395 +                       }
20396 +#ifdef EAGAIN
20397 +                       if (errno == EAGAIN)
20398 +                       {
20399 +                               return read_size;
20400 +                       }
20401 +#endif
20402 +#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
20403 +                       if (errno == EWOULDBLOCK)
20404 +                       {
20405 +                               show_error("%s:no data (%s)",func,strerror(errno));
20406 +                               return read_size;
20407 +                       }
20408 +#endif
20409 +#ifdef ECONNRESET
20410 +                       if (errno == ECONNRESET)
20411 +                       {
20412 +                               PGRclose_sock(&sock);
20413 +                               show_error("%s:connection reset (%s)",func, strerror(errno));
20414 +                               return -1;
20415 +                       }
20416 +#endif
20417 +                       show_error("%s:recv() failed. (%s)",func,strerror(errno));
20418 +                       read_size = -1;
20419 +                       break;
20420 +               }
20421 +               if (r > 0)
20422 +               {
20423 +                       read_size += r;
20424 +                       if (max_buf_size == read_size)
20425 +                       {
20426 +                               break;
20427 +                       }
20428 +                       break;
20429 +               }
20430 +               if (read_size)
20431 +               {
20432 +                       return read_size;
20433 +               }
20434 +               else
20435 +               {
20436 +                       return -1;
20437 +               }
20438 +       }
20439 +
20440 +       return read_size;
20441 +}
20442 +
20443 +int
20444 +PGRcreate_cluster_socket( int * sock, ClusterTbl * ptr )
20445 +{
20446 +       char * func = "PGRcreate_cluster_socket()";
20447 +       int status = STATUS_ERROR;
20448 +
20449 +       /*
20450 +       if (PGRis_connection_full(ptr) == 1)
20451 +       {
20452 +               return STATUS_ERROR;
20453 +       }
20454 +       */
20455 +       if (ptr != (ClusterTbl *) NULL)
20456 +       {
20457 +               status = create_send_socket(sock, ptr->hostName, ptr->port)     ;
20458 +       }
20459 +       else
20460 +       {
20461 +               show_error("%s:ClusterTbl is not initialize",func);
20462 +       }
20463 +       return status;
20464 +}
20465 +
20466 +static int
20467 +create_send_socket(int * fdP, char * hostName , unsigned short portNumber)
20468 +{
20469 +       char * func = "create_send_socket()";
20470 +       int sock;
20471 +       size_t  len = 0;
20472 +       struct sockaddr_in addr;
20473 +       int fd;
20474 +       int one = 1;
20475 +
20476 +#ifdef PRINT_DEBUG
20477 +       show_debug("%s: host:%s port:%d",func, hostName,portNumber);
20478 +#endif                 
20479 +
20480 +       memset((char *)&addr,0,sizeof(addr));
20481 +
20482 +       if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
20483 +       {
20484 +               * fdP = -1;
20485 +               
20486 +               show_error("%s:socket() failed. (%s)",func, strerror(errno));
20487 +               return STATUS_ERROR;
20488 +       }
20489 +       if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one))) == -1)
20490 +       {
20491 +               PGRclose_sock(&fd);
20492 +               * fdP = -1;
20493 +               show_error("%s:setsockopt() failed. (%s)", func, strerror(errno));
20494 +               return STATUS_ERROR;
20495 +               return STATUS_ERROR;
20496 +       }
20497 +       if ((setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one))) == -1)
20498 +       {
20499 +               PGRclose_sock(&fd);
20500 +               * fdP = -1;
20501 +               show_error("%s:setsockopt() failed. (%s)", func, strerror(errno));
20502 +               return STATUS_ERROR;
20503 +       }
20504 +       if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) < 0)
20505 +       {
20506 +               PGRclose_sock(&fd);
20507 +               * fdP = -1;
20508 +               show_error("%s:setsockopt() failed. (%s)",func, strerror(errno));
20509 +               return STATUS_ERROR;
20510 +       }
20511 +       
20512 +       addr.sin_family = AF_INET;
20513 +       if ((hostName == NULL) || (hostName[0] == '\0'))
20514 +               addr.sin_addr.s_addr = htonl(INADDR_ANY);
20515 +       else
20516 +       {
20517 +               struct hostent *hp;
20518 +
20519 +               hp = gethostbyname(hostName);
20520 +               if ((hp == NULL) || (hp->h_addrtype != AF_INET))
20521 +               {
20522 +                       PGRclose_sock(&fd);
20523 +                       * fdP = -1;
20524 +                       return STATUS_ERROR;
20525 +               }
20526 +               memmove((char *) &(addr.sin_addr), (char *) hp->h_addr, hp->h_length);
20527 +       }
20528 +
20529 +       addr.sin_port = htons(portNumber);
20530 +       len = sizeof(struct sockaddr_in);
20531 +       
20532 +       if ((sock = connect(fd,(struct sockaddr*)&addr,len)) < 0)
20533 +       {
20534 +               PGRclose_sock(&fd);
20535 +               * fdP = -1;
20536 +               return STATUS_ERROR;
20537 +       }
20538 +       
20539 +       * fdP = fd;
20540 +       return  STATUS_OK;
20541 +}
20542 +
20543 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/AUTHORS pgcluster-1.7.0rc7/src/pgcluster/pgrp/AUTHORS
20544 --- postgresql-8.2.4/src/pgcluster/pgrp/AUTHORS 1970-01-01 01:00:00.000000000 +0100
20545 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/AUTHORS       2007-02-18 22:52:17.000000000 +0100
20546 @@ -0,0 +1,3 @@
20547 +Authors of pgrp
20548 +
20549 +pgrp was written by Atsushi Mitani
20550 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/COPYING pgcluster-1.7.0rc7/src/pgcluster/pgrp/COPYING
20551 --- postgresql-8.2.4/src/pgcluster/pgrp/COPYING 1970-01-01 01:00:00.000000000 +0100
20552 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/COPYING       2007-02-18 22:52:17.000000000 +0100
20553 @@ -0,0 +1,12 @@
20554 +Copyright (c) 2003-2006        Atsushi Mitani
20555 +
20556 +Permission to use, copy, modify, and distribute this software and
20557 +its documentation for any purpose and without fee is hereby
20558 +granted, provided that the above copyright notice appear in all
20559 +copies and that both that copyright notice and this permission
20560 +notice appear in supporting documentation, and that the name of the
20561 +author not be used in advertising or publicity pertaining to
20562 +distribution of the software without specific, written prior
20563 +permission. The author makes no representations about the
20564 +suitability of this software for any purpose.  It is provided "as
20565 +is" without express or implied warranty.
20566 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/Makefile pgcluster-1.7.0rc7/src/pgcluster/pgrp/Makefile
20567 --- postgresql-8.2.4/src/pgcluster/pgrp/Makefile        1970-01-01 01:00:00.000000000 +0100
20568 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/Makefile      2007-02-18 22:52:17.000000000 +0100
20569 @@ -0,0 +1,41 @@
20570 +#-------------------------------------------------------------------------
20571 +#
20572 +# Makefile for src/pgcluster/pgrp
20573 +#
20574 +#-------------------------------------------------------------------------
20575 +
20576 +subdir = src/pgcluster/pgrp
20577 +top_builddir = ../../..
20578 +include $(top_builddir)/src/Makefile.global
20579 +
20580 +# this setup is for V2 protocol
20581 +#OBJS= cascade.o conf.o main.o recovery.o replicate.o rlog.o 
20582 +# this setup is for V3 protocol
20583 +OBJS= pqformat.o cascade.o conf.o main.o recovery.o replicate.o rlog.o lifecheck.o
20584 +
20585 +EXTRA_OBJS = $(top_builddir)/src/backend/libpq/replicate_com.o ../libpgc/SUBSYS.o
20586 +
20587 +CFLAGS += -DPRINT_DEBUG
20588 +override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -DBINDIR=\"$(bindir)\"
20589 +all: pgreplicate
20590 +
20591 +pgreplicate: $(OBJS) $(libpq_builddir)/libpq.a 
20592 +       $(CC) $(CFLAGS) $(OBJS) $(EXTRA_OBJS) $(libpq) $(libpq_builddir)/libpq.a $(LDFLAGS) $(LIBS) -o $@
20593 +
20594 +install: all installdirs
20595 +       $(INSTALL_PROGRAM) pgreplicate$(X) $(DESTDIR)$(bindir)/pgreplicate$(X)
20596 +       $(INSTALL_DATA) pgreplicate.conf.sample  $(DESTDIR)$(datadir)/pgreplicate.conf.sample
20597 +
20598 +installdirs:
20599 +       $(mkinstalldirs) $(DESTDIR)$(bindir)
20600 +       $(mkinstalldirs) $(DESTDIR)$(datadir)
20601 +
20602 +uninstall:
20603 +       rm -f $(addprefix $(DESTDIR)$(bindir)/, pgreplicate$(X))
20604 +       rm -f $(DESTDIR)$(datadir)/pgreplicate.conf.sample
20605 +
20606 +clean distclean maintainer-clean:
20607 +       rm -f pgreplicate$(X) $(OBJS) 
20608 +
20609 +clean_obj:
20610 +       rm -f $(OBJS)
20611 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/cascade.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/cascade.c
20612 --- postgresql-8.2.4/src/pgcluster/pgrp/cascade.c       1970-01-01 01:00:00.000000000 +0100
20613 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/cascade.c     2007-02-18 22:52:17.000000000 +0100
20614 @@ -0,0 +1,928 @@
20615 +/*--------------------------------------------------------------------
20616 + * FILE:
20617 + *     cascade.c
20618 + *
20619 + * NOTE:
20620 + *     This file is composed of the functions to call with the source
20621 + *     at pgreplicate for backup and cascade .
20622 + *
20623 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
20624 + *--------------------------------------------------------------------
20625 + */
20626 +#ifdef USE_REPLICATION
20627 +
20628 +#include "postgres.h"
20629 +#include "postgres_fe.h"
20630 +
20631 +#include <stdio.h>
20632 +#include <unistd.h>
20633 +#ifdef HAVE_SYS_TYPES_H
20634 +#include <sys/types.h>
20635 +#endif
20636 +#ifdef HAVE_FCNTL_H
20637 +#include <fcntl.h>
20638 +#endif
20639 +#include <errno.h>
20640 +#include <ctype.h>
20641 +#include <time.h>
20642 +#include <sys/ipc.h>
20643 +#include <sys/shm.h>
20644 +#include <sys/sem.h>
20645 +#include <signal.h>
20646 +#include <sys/socket.h>
20647 +#ifdef HAVE_UNISTD_H
20648 +#include <unistd.h>
20649 +#endif
20650 +#include <netdb.h>
20651 +#ifdef HAVE_NETINET_TCP_H
20652 +#include <netinet/tcp.h>
20653 +#endif
20654 +#include <dirent.h>
20655 +#include <arpa/inet.h>
20656 +
20657 +#ifdef HAVE_CRYPT_H
20658 +#include <crypt.h>
20659 +#endif
20660 +
20661 +#ifdef MULTIBYTE
20662 +#include "mb/pg_wchar.h"
20663 +#endif
20664 +
20665 +#include "libpq-fe.h"
20666 +#include "libpq-int.h"
20667 +#include "fe-auth.h"
20668 +
20669 +#include "access/xact.h"
20670 +#include "replicate_com.h"
20671 +#include "pgreplicate.h"
20672 +
20673 +#if 0
20674 +static int count_cascade(int flag);
20675 +static void PGRinit_cascade_child(void); 
20676 +#endif
20677 +
20678 +static int fixup_socket_for_cascades(int *sock ,ReplicateServerInfo * target);
20679 +static ReplicateServerInfo * get_cascade_data(int * cnt, int flag);
20680 +static int add_cascade_data(ReplicateHeader * header, ReplicateServerInfo * add_data);
20681 +static int update_cascade_data(ReplicateHeader * header, ReplicateServerInfo * update_data);
20682 +static void write_cascade_status_file(ReplicateServerInfo * cascade);
20683 +static int notice_cascade_data(int sock);
20684 +static int notice_cascade_data_to_cluster_db(void);
20685 +
20686 +/**
20687 + * socket variables, moved from  Cascade_Inf->(lower|upper)->sock.
20688 + * Cascade->Inf is in shared memory, so sometimes cascades returns EBADF due to not initialized socket in specified process.
20689 + * 05/10/05 tanida@sraoss.co.jp
20690 + */
20691 +
20692 +static int lsock=-1; /* socket for lower-cascade. */
20693 +static int usock=-1; /* socket for upper-cascade. */
20694 +
20695 +/*--------------------------------------
20696 + * PROTOTYPE DECLARATION
20697 + *--------------------------------------
20698 + */
20699 +
20700 +#if 0
20701 +static int
20702 +count_cascade(int flag)
20703 +{
20704 +       int cnt = 0;
20705 +       int cascade_cnt = 0;
20706 +       ReplicateServerInfo * cascade = NULL;
20707 +
20708 +       if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
20709 +       {
20710 +               return 0;
20711 +       }
20712 +
20713 +       /* count cascadeing replication server */
20714 +       switch (flag)
20715 +       {
20716 +               case UPPER_CASCADE:
20717 +               case ALL_CASCADE:
20718 +                       cascade = Cascade_Tbl;
20719 +                       break;
20720 +               case LOWER_CASCADE:
20721 +                       cascade = Cascade_Inf->myself;
20722 +                       break;
20723 +       }
20724 +
20725 +       if (cascade == NULL)
20726 +       {
20727 +               return 0;
20728 +       }
20729 +       while (cascade->useFlag != DB_TBL_END)
20730 +       {
20731 +               if (cascade->useFlag == DB_TBL_USE)
20732 +               {
20733 +                       cascade_cnt ++;
20734 +               }
20735 +               if ((flag == UPPER_CASCADE) &&
20736 +                       (cascade == Cascade_Inf->myself))
20737 +               {
20738 +                       break;
20739 +               }
20740 +               cnt ++;
20741 +               if (cnt >= MAX_DB_SERVER -1 )
20742 +               {
20743 +                       break;
20744 +               }
20745 +               cascade ++;
20746 +       }
20747 +       return cascade_cnt;
20748 +}
20749 +
20750 +static void 
20751 +PGRinit_cascade_child(void) {
20752 +      fixup_socket_for_cascades(&usock,NULL);
20753 +      fixup_socket_for_cascades(&lsock,NULL);
20754 +}
20755 +#endif /* if 0 */
20756 +
20757 +static ReplicateServerInfo * 
20758 +get_cascade_data(int * cnt, int flag)
20759 +{
20760 +       char * func = "get_cascade_data()";
20761 +       int i = 0;
20762 +       int loop_cnt = 0;
20763 +       int size = 0;
20764 +       ReplicateServerInfo * buf = NULL;
20765 +       ReplicateServerInfo * cascade = NULL;
20766 +
20767 +       size = sizeof(ReplicateServerInfo) * MAX_DB_SERVER;
20768 +       buf = (ReplicateServerInfo *)malloc(size);
20769 +       if (buf == (ReplicateServerInfo *)NULL)
20770 +       {
20771 +               show_error("%s:malloc failed: (%s)",func,strerror(errno));
20772 +               *cnt = 0;
20773 +               return NULL;
20774 +       }
20775 +       memset(buf,0,size);
20776 +
20777 +       switch (flag)
20778 +       {
20779 +               case UPPER_CASCADE:
20780 +               case ALL_CASCADE:
20781 +                       cascade = Cascade_Tbl;
20782 +                       break;
20783 +               case LOWER_CASCADE:
20784 +                       cascade = Cascade_Inf->myself;
20785 +                       break;
20786 +                       default:
20787 +               free(buf);
20788 +               *cnt = 0;
20789 +               return NULL;
20790 +                                       
20791 +       }
20792 +
20793 +       if (cascade == NULL)
20794 +       {
20795 +               free(buf);
20796 +               *cnt = 0;
20797 +               return NULL;
20798 +       }
20799 +       PGRsem_lock(CascadeSemID,1);
20800 +       i = 0;
20801 +       loop_cnt = 0;
20802 +       while (cascade->useFlag != DB_TBL_END)
20803 +       {
20804 +               if (cascade->useFlag == DB_TBL_USE) 
20805 +               {
20806 +                       (buf + i)->useFlag = htonl(cascade->useFlag);
20807 +                       strncpy((buf + i)->hostName,cascade->hostName,sizeof(cascade->hostName));
20808 +                       (buf + i)->portNumber = htons(cascade->portNumber);
20809 +                       (buf + i)->recoveryPortNumber = htons(cascade->recoveryPortNumber);
20810 +                       (buf + i)->lifecheckPortNumber = htons(cascade->lifecheckPortNumber);
20811 +                       i++;
20812 +               }
20813 +               if ((flag == UPPER_CASCADE) &&
20814 +                       (cascade == Cascade_Inf->myself))
20815 +               {
20816 +                       break;
20817 +               }
20818 +               loop_cnt ++;
20819 +               if (loop_cnt >= MAX_DB_SERVER -1 )
20820 +               {
20821 +                       break;
20822 +               }
20823 +               if (Cascade_Inf->end == cascade)
20824 +               {
20825 +                       break;
20826 +               }
20827 +               cascade ++;
20828 +       }
20829 +       *cnt = i;
20830 +       PGRsem_unlock(CascadeSemID,1);
20831 +
20832 +       return buf;
20833 +}
20834 +
20835 +static int
20836 +update_cascade_data(ReplicateHeader * header, ReplicateServerInfo * update_data)
20837 +{
20838 +       char * func = "update_cascade_data()";
20839 +       int size = 0;
20840 +       int cnt = 0;
20841 +       ReplicateServerInfo * ptr = NULL;
20842 +       ReplicateServerInfo * cascade = NULL;
20843 +       char hostName[HOSTNAME_MAX_LENGTH];
20844 +
20845 +
20846 +       show_debug("executing %s",func);
20847 +       if ((header == NULL ) || ( update_data == NULL))
20848 +       {
20849 +               show_error("%s:receive data is wrong",func);
20850 +               return STATUS_ERROR;
20851 +       }
20852 +       if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
20853 +       {
20854 +               show_error("%s:config data read error",func);
20855 +               return STATUS_ERROR;
20856 +       }
20857 +
20858 +
20859 +       size = ntohl(header->query_size);
20860 +       cnt = size / sizeof(ReplicateServerInfo);
20861 +       if (cnt >= MAX_DB_SERVER)
20862 +       {
20863 +               show_error("%s:update cascade data is too large. it's more than %d", func,MAX_DB_SERVER);
20864 +               return STATUS_ERROR;
20865 +       }
20866 +
20867 +       Cascade_Inf->useFlag = DB_TBL_INIT;
20868 +       fixup_socket_for_cascades(&usock,NULL);
20869 +       fixup_socket_for_cascades(&lsock,NULL);
20870 +
20871 +       Cascade_Inf->upper = NULL;
20872 +       Cascade_Inf->lower = NULL;
20873 +
20874 +       gethostname(hostName,sizeof(hostName));
20875 +       ptr = update_data;
20876 +       cascade = Cascade_Tbl;
20877 +       memset(cascade,0,(sizeof(ReplicateServerInfo)*MAX_DB_SERVER));
20878 +       Cascade_Inf->top = cascade;
20879 +       while (cnt > 0)
20880 +       {
20881 +
20882 +               cascade->useFlag = ntohl(ptr->useFlag);
20883 +               strncpy(cascade->hostName,ptr->hostName,sizeof(cascade->hostName));
20884 +               cascade->portNumber = ntohs(ptr->portNumber);
20885 +               cascade->recoveryPortNumber = ntohs(ptr->recoveryPortNumber);
20886 +               cascade->lifecheckPortNumber = ntohs(ptr->lifecheckPortNumber);
20887 +
20888 +               if ((!strncmp(cascade->hostName,hostName,sizeof(cascade->hostName)))  &&
20889 +                       (cascade->portNumber == Port_Number) &&
20890 +                       (cascade->recoveryPortNumber == Recovery_Port_Number))
20891 +               {
20892 +                       Cascade_Inf->myself = cascade;
20893 +               }
20894 +
20895 +               Cascade_Inf->end = cascade;
20896 +               cascade ++;
20897 +               ptr ++;
20898 +               cnt --;
20899 +               cascade->useFlag = DB_TBL_END;
20900 +       }
20901 +       Cascade_Inf->useFlag = DB_TBL_USE;
20902 +
20903 +       return STATUS_OK;
20904 +}
20905 +
20906 +static int
20907 +add_cascade_data(ReplicateHeader * header, ReplicateServerInfo * add_data)
20908 +{
20909 +       char *func = "add_cascade_data()";
20910 +       int size = 0;
20911 +       int cnt = 0;
20912 +       ReplicateServerInfo * ptr = NULL;
20913 +       ReplicateServerInfo * cascade = NULL;
20914 +       char hostName[HOSTNAME_MAX_LENGTH];
20915 +
20916 +       if ((header == NULL ) || ( add_data == NULL))
20917 +       {
20918 +               show_error("%s:receive data is wrong",func);
20919 +               return STATUS_ERROR;
20920 +       }
20921 +       if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
20922 +       {
20923 +               show_error("%s:config data read error",func);
20924 +               return STATUS_ERROR;
20925 +       }
20926 +       size = ntohl(header->query_size);
20927 +       cnt = size / sizeof(ReplicateServerInfo);
20928 +       if (cnt >= MAX_DB_SERVER)
20929 +       {
20930 +               show_error("%s:addtional cascade data is too large. it's more than %d", func,MAX_DB_SERVER);
20931 +               return STATUS_ERROR;
20932 +       }
20933 +
20934 +       Cascade_Inf->useFlag = DB_TBL_INIT;
20935 +       fixup_socket_for_cascades(&lsock,NULL);
20936 +       Cascade_Inf->lower = NULL;
20937 +
20938 +       gethostname(hostName,sizeof(hostName));
20939 +       ptr = add_data;
20940 +       cascade = Cascade_Inf->myself;
20941 +       cascade ++;
20942 +       while (cnt > 0)
20943 +       {
20944 +               cascade->useFlag = ntohl(ptr->useFlag);
20945 +               strncpy(cascade->hostName,ptr->hostName,sizeof(cascade->hostName));
20946 +               cascade->portNumber = ntohs(ptr->portNumber);
20947 +               cascade->recoveryPortNumber = ntohs(ptr->recoveryPortNumber);
20948 +               cascade->lifecheckPortNumber = ntohs(ptr->lifecheckPortNumber);
20949 +                cascade->replicate_id=-1;
20950 +               cascade->response_mode=-1;
20951 +
20952 +               Cascade_Inf->end = cascade;
20953 +
20954 +               if ((!strncmp(cascade->hostName,hostName,sizeof(cascade->hostName)))  &&
20955 +                       (cascade->portNumber == Port_Number) &&
20956 +                       (cascade->recoveryPortNumber == Recovery_Port_Number))
20957 +               {
20958 +                       ptr ++;
20959 +                       cnt --;
20960 +                       continue;
20961 +               }
20962 +               cascade ++;
20963 +               cascade->useFlag = DB_TBL_END;
20964 +               ptr ++;
20965 +               cnt --;
20966 +       }
20967 +       Cascade_Inf->useFlag = DB_TBL_USE;
20968 +       return STATUS_OK;
20969 +}
20970 +
20971 +int
20972 +PGRstartup_cascade(void)
20973 +{
20974 +       char * func = "PGRstartup_cascade()";
20975 +       int cnt = 0;
20976 +       int status = STATUS_OK;
20977 +       ReplicateHeader header;
20978 +       ReplicateServerInfo * cascade = NULL;
20979 +       ReplicateServerInfo * buf = NULL;
20980 +
20981 +       if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
20982 +       {
20983 +               show_error("%s:config data read error",func);
20984 +               return STATUS_ERROR;
20985 +       }
20986 +
20987 +       /* count lower server */
20988 +       cascade = Cascade_Inf->myself;
20989 +       if (cascade == NULL)
20990 +       {
20991 +               show_error("%s:cascade data initialize error",func);
20992 +               return STATUS_ERROR;
20993 +       }
20994 +       buf = get_cascade_data(&cnt,LOWER_CASCADE);
20995 +       if (cnt <= 0)
20996 +       {
20997 +               show_error("%s:cascade data get error",func);
20998 +               return STATUS_ERROR;
20999 +       }
21000 +
21001 +       memset(&header,0,sizeof(ReplicateHeader));
21002 +       header.cmdSys = CMD_SYS_CASCADE;
21003 +       header.cmdSts = CMD_STS_TO_UPPER;
21004 +       header.cmdType = CMD_TYPE_ADD;
21005 +       header.query_size = htonl(sizeof(ReplicateServerInfo) * cnt);
21006 +
21007 +       status = PGRsend_upper_cascade(&header, (char *)buf);
21008 +       if (buf != NULL)
21009 +       {
21010 +               free(buf);
21011 +       }
21012 +       if (status == STATUS_OK)
21013 +       {
21014 +               memset(&header,0,sizeof(ReplicateHeader));
21015 +               buf = PGRrecv_cascade_answer( Cascade_Inf->upper, &header);
21016 +               if (buf == NULL)
21017 +               {
21018 +                               status=STATUS_ERROR;
21019 +               }
21020 +               else if((header.cmdSys == CMD_SYS_CASCADE) &&
21021 +                   (header.cmdSts == CMD_STS_TO_LOWER) &&
21022 +                   (header.cmdType == CMD_TYPE_UPDATE_ALL))
21023 +               {
21024 +                               status = update_cascade_data(&header,buf);
21025 +                               free(buf);
21026 +               }
21027 +               
21028 +       }
21029 +       show_debug("%s:startup packet result is %d",func,status);
21030 +       return status;
21031 +}
21032 +
21033 +int
21034 +PGRsend_lower_cascade(ReplicateHeader * header, char * query)
21035 +{
21036 +
21037 +
21038 +               char * func = "PGRsend_lower_cascade()";
21039 +               ReplicateServerInfo *lower = PGRget_lower_cascade();
21040 +
21041 +
21042 +               while(lower!=NULL) 
21043 +               {
21044 +                               /**
21045 +                                * check lower_cascade validaty.
21046 +                                *
21047 +                                */                     
21048 +                               if(lsock!=-1 &&                    
21049 +                                  PGRsend_cascade(lsock,header,query)==STATUS_OK) 
21050 +                               {
21051 +                                               return STATUS_OK;
21052 +                               }
21053 +                               else
21054 +                               {
21055 +                                               /**
21056 +                                                * current lower cascade is missing.
21057 +                                                * fix socket , or go to next one. 
21058 +                                                *
21059 +                                                */
21060 +                                               while(  lower!=NULL &&
21061 +                                                               fixup_socket_for_cascades(&lsock,lower)!=STATUS_OK)
21062 +                                               {
21063 +                                                               show_error("%s:lower cascade maybe down,challenge new one.",func);
21064 +                                                               PGRset_cascade_server_status(lower,DB_TBL_ERROR);
21065 +                                                               lower =PGRget_lower_cascade();
21066 +                                               }
21067 +                               }
21068 +                               Cascade_Inf->lower=lower;
21069 +        }
21070 +
21071 +
21072 +               return STATUS_ERROR;
21073 +}
21074 +
21075 +
21076 +int
21077 +PGRsend_upper_cascade(ReplicateHeader * header, char * query)
21078 +{
21079 +       char * func = "PGRsend_upper_cascade()";
21080 +       ReplicateServerInfo *upper = PGRget_upper_cascade();
21081 +
21082 +
21083 +       while(upper!=NULL)
21084 +       {                               
21085 +                       /**
21086 +                        * check upper_cascade validaty.
21087 +                        *
21088 +                        */                     
21089 +                       if(usock!=-1 && 
21090 +                          PGRsend_cascade(usock,header,query)==STATUS_OK) 
21091 +                       {
21092 +                                       return STATUS_OK;
21093 +                       }
21094 +                       else
21095 +                       {
21096 +                                       /**
21097 +                                        * current upper cascade is missing.
21098 +                                        * fix socket , or go to next one. 
21099 +                                        *
21100 +                                        */
21101 +                                       while(  upper!=NULL &&
21102 +                                                       fixup_socket_for_cascades(&usock,upper)!=STATUS_OK) 
21103 +                                       {
21104 +                                                       show_error("%s:upper cascade maybe down,challenge new one.",func);
21105 +                                                       PGRset_cascade_server_status(upper,DB_TBL_ERROR);
21106 +                                                       upper =PGRget_upper_cascade();
21107 +                                       }
21108 +                       }
21109 +                       Cascade_Inf->upper=upper;
21110 +       }
21111 +
21112 +       return STATUS_ERROR;
21113 +}
21114 +
21115 +ReplicateServerInfo *
21116 +PGRget_lower_cascade(void)
21117 +{
21118 +       char * func = "PGRget_lower_cascade()";
21119 +       ReplicateServerInfo * cascade = NULL;
21120 +
21121 +       if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
21122 +       {
21123 +               show_error("%s:config data read error",func);
21124 +               return NULL;
21125 +       }
21126 +
21127 +       /* count lower server */
21128 +
21129 +       cascade = Cascade_Inf->myself;
21130 +       if (cascade == NULL)
21131 +       {
21132 +               show_error("%s:cascade data initialize error",func);
21133 +               return NULL;
21134 +       }
21135 +       if (cascade->useFlag != DB_TBL_END)
21136 +       {
21137 +               cascade ++;
21138 +       }
21139 +       while (cascade->useFlag != DB_TBL_END)
21140 +       {
21141 +#ifdef PRINT_DEBUG
21142 +               show_debug("%s:lower cascade search[%d]@[%s] use[%d]",
21143 +                       func,
21144 +                       cascade->portNumber,
21145 +                       cascade->hostName,
21146 +                       cascade->useFlag);
21147 +#endif                 
21148 +               if (cascade->useFlag == DB_TBL_USE)
21149 +               {
21150 +#ifdef PRINT_DEBUG
21151 +                       show_debug("%s:find lower cascade",func);
21152 +#endif                 
21153 +                       return cascade;
21154 +               }
21155 +               cascade ++;
21156 +       }
21157 +       return NULL;
21158 +}
21159 +
21160 +ReplicateServerInfo *
21161 +PGRget_upper_cascade(void)
21162 +{
21163 +       char * func = "PGRget_upper_cascade()";
21164 +       ReplicateServerInfo * cascade = NULL;
21165 +
21166 +       if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
21167 +       {
21168 +               show_error("%s:config data read error",func);
21169 +               return NULL;
21170 +       }
21171 +
21172 +
21173 +       /* count lower server */
21174 +       cascade = Cascade_Inf->myself;
21175 +       if ((cascade == NULL) || (Cascade_Inf->top == cascade))
21176 +       {
21177 +               return NULL;
21178 +       }
21179 +       cascade --;
21180 +       while (cascade != NULL)
21181 +       {
21182 +               if (cascade->useFlag == DB_TBL_USE)
21183 +               {
21184 +                       return cascade;
21185 +               }
21186 +               if (Cascade_Inf->top == cascade)
21187 +               {
21188 +                       break;
21189 +               }
21190 +               cascade --;
21191 +       }
21192 +       return NULL;
21193 +}
21194 +
21195 +static void
21196 +write_cascade_status_file(ReplicateServerInfo * cascade)
21197 +{
21198 +       switch( cascade->useFlag)
21199 +       {
21200 +               case DB_TBL_FREE:
21201 +                       PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) free",
21202 +                                       cascade->hostName,
21203 +                                       cascade->portNumber);
21204 +                       break;
21205 +               case DB_TBL_INIT:
21206 +                       PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) initialize",
21207 +                                       cascade->hostName,
21208 +                                       cascade->portNumber);
21209 +                       break;
21210 +               case DB_TBL_USE:
21211 +                       PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) start use",
21212 +                                       cascade->hostName,
21213 +                                       cascade->portNumber);
21214 +                       break;
21215 +               case DB_TBL_ERROR:
21216 +                       PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) error",
21217 +                                       cascade->hostName,
21218 +                                       cascade->portNumber);
21219 +                       break;
21220 +               case DB_TBL_TOP:
21221 +                       PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) become top",
21222 +                                       cascade->hostName,
21223 +                                       cascade->portNumber);
21224 +                       break;
21225 +       }
21226 +}
21227 +
21228 +void
21229 +PGRset_cascade_server_status(ReplicateServerInfo * cascade, int status)
21230 +{
21231 +       if (cascade == NULL)
21232 +       {
21233 +               return;
21234 +       }
21235 +       if (cascade->useFlag != status)
21236 +       {
21237 +               cascade->useFlag = status;
21238 +               write_cascade_status_file(cascade);
21239 +       }
21240 +}
21241 +
21242 +ReplicateServerInfo *
21243 +PGRrecv_cascade_answer(ReplicateServerInfo * cascade,ReplicateHeader * header)
21244 +{
21245 +       ReplicateServerInfo * answer = NULL;
21246 +        int sock;
21247 +
21248 +       if ((cascade == NULL) || (header == NULL))
21249 +       {
21250 +               return NULL;
21251 +       }
21252 +
21253 +       /* FIXME: ReplicateServerInfo->sock must be removed in cascading. */
21254 +       if(cascade == Cascade_Inf->upper ) 
21255 +       {
21256 +         sock=usock;
21257 +       }
21258 +       else if (cascade == Cascade_Inf->lower )
21259 +       {
21260 +         sock=lsock;
21261 +       }
21262 +       else 
21263 +       {
21264 +         show_debug("PGRrecv_cascade_answer:receiving packet from sock not belogs to cascade->upper / lower. maybe missing .");
21265 +         sock=cascade->sock;
21266 +       }
21267 +       answer = (ReplicateServerInfo*)PGRread_packet(sock,header);
21268 +       return answer;
21269 +}
21270 +
21271 +int
21272 +PGRsend_cascade(int sock , ReplicateHeader * header, char * query)
21273 +{
21274 +       char * func ="PGRsend_cascade()";
21275 +       int s;
21276 +       char * send_ptr;
21277 +       char * buf;
21278 +       int send_size = 0;
21279 +       int buf_size;
21280 +       int header_size;
21281 +       int rtn;
21282 +       fd_set      wmask;
21283 +       struct timeval timeout;
21284 +       int query_size = 0;
21285 +
21286 +       /* check parameter */
21287 +       if ((header == NULL) || (sock == -1))
21288 +       {
21289 +               return STATUS_ERROR;
21290 +       }
21291 +
21292 +#ifdef PRINT_DEBUG
21293 +       show_debug("%s:PGRsend_cascade sock[%d]",func,sock);
21294 +#endif                 
21295 +       query_size = ntohl(header->query_size);
21296 +       header_size = sizeof(ReplicateHeader);
21297 +       buf_size = header_size + query_size + 4;
21298 +       buf = malloc(buf_size);
21299 +       memset(buf,0,buf_size);
21300 +       buf_size -= 4;
21301 +       memcpy(buf,header,header_size);
21302 +       if (query_size > 0)
21303 +       {
21304 +               memcpy((char *)(buf+header_size),query,query_size+1);
21305 +       }
21306 +       send_ptr = buf;
21307 +
21308 +       for (;;)
21309 +       {
21310 +               timeout.tv_sec = 10;
21311 +               timeout.tv_usec = 0;
21312 +
21313 +               /*
21314 +                * Wait for something to happen.
21315 +                */
21316 +               FD_ZERO(&wmask);
21317 +               FD_SET(sock,&wmask);
21318 +               rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
21319 +
21320 +               if (rtn < 0)
21321 +               {
21322 +                       if (errno == EINTR || errno == EAGAIN)
21323 +                               continue;
21324 +
21325 +                       show_error("%s:select failed ,errno is %s",func , strerror(errno));
21326 +                       free(buf);
21327 +                       return STATUS_ERROR;
21328 +               }
21329 +
21330 +               if (rtn && FD_ISSET(sock, &wmask))
21331 +               {
21332 +                       s = send(sock,send_ptr + send_size,buf_size - send_size ,0);
21333 +                       if (s < 0)
21334 +                       {
21335 +                               if (errno == EINTR || errno == EAGAIN)
21336 +                                       continue;
21337 +                               else
21338 +                               {
21339 +                                       show_error("%s:send failed: %d(%s)",func, errno, strerror(errno));
21340 +                                       free(buf);
21341 +                                       return STATUS_ERROR;
21342 +                               }
21343 +                       }
21344 +                       else if (s == 0)
21345 +                       {
21346 +                               show_error("%s:unexpected EOF", func);
21347 +                               free(buf);
21348 +                               return STATUS_ERROR;
21349 +                       }
21350 +                       send_size += s;
21351 +                       if (send_size == buf_size)
21352 +                       {
21353 +#ifdef PRINT_DEBUG
21354 +                               show_debug("%s:send[%s] size[%d]",func,query,send_size);
21355 +#endif                 
21356 +                               free(buf);
21357 +                               return STATUS_OK;
21358 +                       }
21359 +               }
21360 +       }
21361 +       return STATUS_OK;
21362 +}
21363 +
21364 +int
21365 +PGRwait_answer_cascade(int  sock)
21366 +{
21367 +       ReplicateHeader header;
21368 +       char * answer = NULL;
21369 +
21370 +       answer = PGRread_packet(sock,&header);
21371 +       if (answer != NULL)
21372 +       {
21373 +               free(answer);
21374 +               return STATUS_OK;
21375 +       }
21376 +       return STATUS_ERROR;
21377 +}
21378 +/**
21379 + * fixup_socket_for_cascades checks socket's validaty.
21380 + * returns STATUS_OK if succeeded , or STATUS_ERROR if some error occured.
21381 + * if target is null , only close socket.
21382 + *
21383 + * originally written by tanida@sraoss.co.jp
21384 + */
21385 +static int
21386 +fixup_socket_for_cascades(int *sock, ReplicateServerInfo *target) 
21387 +{
21388 +       if (*sock > 0)
21389 +       {
21390 +               close(*sock);   
21391 +               *sock=-1;
21392 +       }
21393 +        if(target!=NULL) {
21394 +              return PGR_Create_Socket_Connect(sock,target->hostName,target->portNumber);
21395 +        }
21396 +       return STATUS_OK;
21397 +}
21398 +
21399 +
21400 +static int
21401 +notice_cascade_data(int sock)
21402 +{
21403 +       char * func = "notice_cascade_data";
21404 +       ReplicateServerInfo *cascade_data = NULL;
21405 +       ReplicateHeader header;
21406 +       int cnt = 0;
21407 +       int size = 0;
21408 +
21409 +       if (sock <= 0)
21410 +       {
21411 +               return STATUS_ERROR;
21412 +       }
21413 +
21414 +       cascade_data = get_cascade_data(&cnt, ALL_CASCADE );
21415 +       if (cnt <= 0)
21416 +       {
21417 +               show_error("%s:cascade data is wrong",func);
21418 +               return STATUS_ERROR;
21419 +       }
21420 +       size = sizeof (ReplicateServerInfo) * cnt ;
21421 +
21422 +       memset(&header,0,sizeof(ReplicateHeader));
21423 +       header.cmdSys = CMD_SYS_CASCADE ;
21424 +       header.cmdSts = CMD_STS_TO_LOWER ;
21425 +       header.cmdType = CMD_TYPE_UPDATE_ALL;
21426 +       header.query_size = htonl(size);
21427 +       PGRsend_cascade(sock, &header, (char *)cascade_data );
21428 +       if (cascade_data != NULL)
21429 +       {
21430 +               free(cascade_data);
21431 +       }
21432 +       return STATUS_OK;
21433 +}
21434 +
21435 +int
21436 +PGRcascade_main(int sock, ReplicateHeader * header, char * query)
21437 +{
21438 +       switch (header->cmdSts)
21439 +       {
21440 +               case CMD_STS_TO_UPPER:
21441 +                       if (header->cmdType == CMD_TYPE_ADD)
21442 +                       {
21443 +                               /* add lower cascade data to myself */
21444 +                               add_cascade_data(header,(ReplicateServerInfo*)query);
21445 +                               /* send cascade data to upper */
21446 +                               /* and receive new cascade data from upper */
21447 +                               PGRstartup_cascade();
21448 +                               /* return to lower with new cascade data */
21449 +                               notice_cascade_data(sock);
21450 +                               /* notifies a cascade server's information to Cluster DBs */
21451 +                               notice_cascade_data_to_cluster_db();
21452 +                       }
21453 +                       break;
21454 +               case CMD_STS_TO_LOWER:
21455 +                       /*
21456 +                        * use for cascading replication 
21457 +                        */
21458 +                       break;
21459 +       }
21460 +       return STATUS_OK;
21461 +}
21462 +
21463 +static int
21464 +notice_cascade_data_to_cluster_db(void)
21465 +{
21466 +       char userName[USERNAME_MAX_LENGTH];
21467 +       ReplicateServerInfo *s=NULL;
21468 +
21469 +       if (Cascade_Inf->lower == NULL)
21470 +       {
21471 +               Cascade_Inf->lower = PGRget_lower_cascade();    
21472 +       }
21473 +       if (Cascade_Inf->lower == NULL)
21474 +       {
21475 +               return STATUS_ERROR;
21476 +       }
21477 +       s=Cascade_Inf->lower;
21478 +       memset(userName,0,sizeof(userName));
21479 +       strncpy(userName ,getenv("LOGNAME"),sizeof(userName)-1);
21480 +
21481 +       PGRnotice_replication_server(s->hostName,
21482 +                                                                s->portNumber,
21483 +                                                                s->recoveryPortNumber,
21484 +                                                                s->lifecheckPortNumber,
21485 +                                                                userName);
21486 +
21487 +       return STATUS_OK;
21488 +}
21489 +
21490 +int
21491 +PGRwait_notice_rlog_done(void)
21492 +{
21493 +        ReplicateHeader header;
21494 +               if (lsock != -1)
21495 +               {
21496 +                               PGRread_packet(lsock,&header);
21497 +                               return STATUS_OK;
21498 +               }
21499 +               return STATUS_ERROR;
21500 +
21501 +}
21502 +
21503 +
21504 +int
21505 +PGRsend_notice_quit(void )
21506 +{
21507 +               ReplicateHeader header;
21508 +               int size = 0;
21509 +
21510 +               size = strlen("QUIT_SAFELY");
21511 +               memset(&header,0,sizeof(ReplicateHeader));
21512 +               header.cmdSys = CMD_SYS_CALL ;
21513 +               header.cmdSts = CMD_STS_RESPONSE ;
21514 +               header.cmdType = CMD_TYPE_FRONTEND_CLOSED;
21515 +               header.query_size = htonl(size);
21516 +               PGRsend_lower_cascade(&header, "QUIT_SAFELY");
21517 +               PGRwait_notice_rlog_done();
21518 +               return STATUS_OK;
21519 +}
21520 +
21521 +int
21522 +PGRsend_notice_rlog_done(int sock)
21523 +{
21524 +       ReplicateHeader header;
21525 +       int size = 0;
21526 +
21527 +       if (sock <= 0)
21528 +       {
21529 +               return STATUS_ERROR;
21530 +       }
21531 +
21532 +       size = strlen(PGR_QUERY_DONE_NOTICE_CMD);
21533 +       memset(&header,0,sizeof(ReplicateHeader));
21534 +       header.cmdSys = CMD_SYS_CASCADE ;
21535 +       header.cmdSts = CMD_STS_RESPONSE ;
21536 +       header.cmdType = 0;
21537 +       header.query_size = htonl(size);
21538 +       PGRsend_cascade(sock, &header, PGR_QUERY_DONE_NOTICE_CMD);
21539 +       return STATUS_OK;
21540 +
21541 +}
21542 +#endif /* USE_REPLICATION */
21543 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/conf.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/conf.c
21544 --- postgresql-8.2.4/src/pgcluster/pgrp/conf.c  1970-01-01 01:00:00.000000000 +0100
21545 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/conf.c        2007-02-18 22:52:17.000000000 +0100
21546 @@ -0,0 +1,694 @@
21547 +/*--------------------------------------------------------------------
21548 + * FILE:
21549 + *    conf.c
21550 + *    Replication server for PostgreSQL
21551 + *
21552 + * NOTE:
21553 + *    Read and set configuration data in this modul.
21554 + *
21555 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
21556 + *--------------------------------------------------------------------
21557 + */
21558 +#include "postgres.h"
21559 +
21560 +#include <stdio.h>
21561 +#include <string.h>
21562 +#include <unistd.h>
21563 +#include <sys/types.h>
21564 +#include <fcntl.h>
21565 +#include <ctype.h>
21566 +#include <sys/stat.h>
21567 +#include <sys/ipc.h>
21568 +#include <sys/shm.h>
21569 +#include <sys/sem.h>
21570 +#include <netdb.h>
21571 +#include <errno.h>
21572 +#include <sys/file.h>
21573 +
21574 +
21575 +
21576 +#include "libpq-fe.h"
21577 +#include "libpq-int.h"
21578 +#include "fe-auth.h"
21579 +
21580 +#include "replicate_com.h"
21581 +#include "pgreplicate.h"
21582 +
21583 +/*--------------------------------------------------------------------
21584 + * SYMBOL
21585 + *    PGRget_Conf_Data()
21586 + * NOTES
21587 + *    Initialize mamory and tables
21588 + * ARGS
21589 + *    char * path: path of the setup file (I)
21590 + * RETURN
21591 + *    OK: STATUS_OK
21592 + *    NG: STATUS_ERROR
21593 + *--------------------------------------------------------------------
21594 + */
21595 +int
21596 +PGRget_Conf_Data(char * path)
21597 +{
21598 +       char * func = "PGRget_Conf_Data()";
21599 +       HostTbl host_tbl[MAX_DB_SERVER];
21600 +       ConfDataType * conf = NULL;
21601 +       int cnt = 0;
21602 +       int lb_cnt = 0;
21603 +       int cascade_cnt = 0;
21604 +       int rec_no = 0;
21605 +       int lb_rec_no = 0;
21606 +       int cascade_rec_no = -1;
21607 +       int i = 0;
21608 +       int size = 0;
21609 +       char fname[256];
21610 +       union semun sem_arg;
21611 +
21612 +       /*
21613 +        * open log file
21614 +        */
21615 +       if (path == NULL)
21616 +       {
21617 +               path = ".";
21618 +       }
21619 +       size = sizeof(LogFileInf);
21620 +       LogFileData = (LogFileInf *) malloc(size);
21621 +       if (LogFileData == NULL)
21622 +       {
21623 +               show_error("%s:malloc() failed. reason: %s", func,strerror(errno));
21624 +               return STATUS_ERROR;
21625 +       }
21626 +       memset(LogFileData,0,size);
21627 +
21628 +       snprintf(fname,sizeof(fname),"%s/%s",path,PGREPLICATE_STATUS_FILE);
21629 +       StatusFp = fopen(fname,"a");
21630 +       if (StatusFp == NULL)
21631 +       {
21632 +               show_error("%s:fopen failed: (%s)",func,strerror(errno));
21633 +               return STATUS_ERROR;
21634 +       }
21635 +
21636 +       snprintf(fname,sizeof(fname),"%s/%s",path,PGREPLICATE_RID_FILE);
21637 +       RidFp = fopen(fname,"r+");
21638 +       if (RidFp == NULL)
21639 +       {
21640 +               RidFp = fopen(fname,"w+");
21641 +               if (RidFp == NULL)
21642 +               {
21643 +                       show_error("%s:fopen failed: (%s)",func,strerror(errno));
21644 +                       return STATUS_ERROR;
21645 +               }
21646 +       }
21647 +
21648 +       /*
21649 +        * read configuration file
21650 +        */
21651 +       if (PGR_Get_Conf_Data(path,PGREPLICATE_CONF_FILE) != STATUS_OK)
21652 +       {
21653 +               show_error("%s:PGR_Get_Conf_Data failed",func);
21654 +               return STATUS_ERROR;
21655 +       }
21656 +#ifdef PRINT_DEBUG
21657 +       show_debug("PGR_Get_Conf_Data ok");
21658 +#endif                 
21659 +
21660 +       /* allocate response information table */
21661 +       PGR_Response_Inf = (ResponseInf *)malloc(sizeof(ResponseInf));
21662 +       if (PGR_Response_Inf == NULL)
21663 +       {
21664 +               show_error("%s:malloc() failed. reason: %s", func,strerror(errno));
21665 +               return STATUS_ERROR;
21666 +       }
21667 +       PGR_Response_Inf->response_mode = PGR_NORMAL_MODE;
21668 +       PGR_Response_Inf->current_cluster = 0;
21669 +
21670 +       /*
21671 +        * memory allocate load balance table buffer
21672 +        */
21673 +       LoadBalanceTbl = (RecoveryTbl *)malloc(sizeof(RecoveryTbl)*MAX_DB_SERVER);
21674 +       if (LoadBalanceTbl == (RecoveryTbl *)NULL)
21675 +       {
21676 +               show_error("%s:malloc failed: (%s)",func,strerror(errno));
21677 +               return STATUS_ERROR;
21678 +       }
21679 +#ifdef PRINT_DEBUG
21680 +       show_debug("LoadBalanceTbl allocate ok");
21681 +#endif                 
21682 +
21683 +       /*
21684 +        * memory allocate cascade server table buffer
21685 +        */
21686 +       size = sizeof(ReplicateServerInfo) * MAX_DB_SERVER;
21687 +       CascadeTblShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
21688 +       if (CascadeTblShmid < 0)
21689 +       {
21690 +               show_error("%s:shmget() failed. reason: %s", func,strerror(errno));
21691 +               return STATUS_ERROR;
21692 +       }
21693 +#ifdef PRINT_DEBUG
21694 +       show_debug("%s:CascadeTbl shmget ok",func);
21695 +#endif                 
21696 +       Cascade_Tbl = (ReplicateServerInfo *)shmat(CascadeTblShmid,0,0);
21697 +       if (Cascade_Tbl == (ReplicateServerInfo *)-1)
21698 +       {
21699 +               show_error("%s:shmat() failed. reason: %s", func,strerror(errno));
21700 +               return STATUS_ERROR;
21701 +       }
21702 +#ifdef PRINT_DEBUG
21703 +       show_debug("%s:CascadeTbl shmat ok",func);
21704 +#endif                 
21705 +       memset(Cascade_Tbl , 0 , size );
21706 +
21707 +       /*
21708 +        * memory allocate cascade index 
21709 +        */
21710 +       size = sizeof(CascadeInf);
21711 +       CascadeInfShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
21712 +       if (CascadeInfShmid < 0)
21713 +       {
21714 +               show_error("%s:shmget() failed. reason: %s", func,strerror(errno));
21715 +               return STATUS_ERROR;
21716 +       }
21717 +#ifdef PRINT_DEBUG
21718 +       show_debug("%s:CascadeInf shmget ok",func);
21719 +#endif                 
21720 +       Cascade_Inf = (CascadeInf *)shmat(CascadeInfShmid,0,0);
21721 +       if (Cascade_Inf == (CascadeInf *)-1)
21722 +       {
21723 +               show_error("%s:shmat() failed. reason: %s",func, strerror(errno));
21724 +               return STATUS_ERROR;
21725 +       }
21726 +#ifdef PRINT_DEBUG
21727 +       show_debug("%s:CascadeInf shmat ok",func);
21728 +#endif                 
21729 +       memset(Cascade_Inf , 0 , size );
21730 +
21731 +       /*
21732 +        * memory allocate replication commit log buffer
21733 +        */
21734 +       size = sizeof(CommitLogInf) * MAX_DB_SERVER * MAX_CONNECTIONS;
21735 +       CommitLogShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
21736 +       if (CommitLogShmid < 0)
21737 +       {
21738 +               show_error("%s:shmget() failed. reason: %s", func, strerror(errno));
21739 +               return STATUS_ERROR;
21740 +       }
21741 +#ifdef PRINT_DEBUG
21742 +       show_debug("%s:CommitLog shmget ok",func);
21743 +#endif                 
21744 +       Commit_Log_Tbl = (CommitLogInf *)shmat(CommitLogShmid,0,0);
21745 +       if (Commit_Log_Tbl == (CommitLogInf *)-1)
21746 +       {
21747 +               show_error("%s:shmat() failed. reason: %s",func, strerror(errno));
21748 +               return STATUS_ERROR;
21749 +       }
21750 +#ifdef PRINT_DEBUG
21751 +       show_debug("%s:Commit_Log_Tbl shmat ok",func);
21752 +#endif                 
21753 +       memset(Commit_Log_Tbl , 0 , size );
21754 +       (Commit_Log_Tbl + (MAX_DB_SERVER * MAX_CONNECTIONS) -1)->inf.useFlag = DB_TBL_END;
21755 +
21756 +       /* create semapho */
21757 +       if ((SemID = semget(IPC_PRIVATE,2,IPC_CREAT | IPC_EXCL | 0600)) < 0)
21758 +       {
21759 +               show_error("%s:semget() failed. (%s)",func,strerror(errno));
21760 +               return STATUS_ERROR;
21761 +       }
21762 +       for ( i = 0 ; i < 2 ; i ++)
21763 +       {
21764 +               semctl(SemID, i, GETVAL, sem_arg);
21765 +               sem_arg.val = 1;
21766 +               semctl(SemID, i, SETVAL, sem_arg);
21767 +       }
21768 +
21769 +       /* create semapho */
21770 +       if ((CascadeSemID = semget(IPC_PRIVATE,2,IPC_CREAT | IPC_EXCL | 0600)) < 0)
21771 +       {
21772 +               show_error("%s:semget() failed. (%s)",func,strerror(errno));
21773 +               return STATUS_ERROR;
21774 +       }
21775 +       for ( i = 0 ; i < 2 ; i ++)
21776 +       {
21777 +               semctl(CascadeSemID, i, GETVAL, sem_arg);
21778 +               sem_arg.val = 1;
21779 +               semctl(CascadeSemID, i, SETVAL, sem_arg);
21780 +       }
21781 +
21782 +
21783 +       if ((VacuumSemID = semget(IPC_PRIVATE,2,IPC_CREAT | IPC_EXCL | 0600)) < 0)
21784 +       {
21785 +               show_error("%s:semget() failed. (%s)",func,strerror(errno));
21786 +               return STATUS_ERROR;
21787 +       }
21788 +       for ( i = 0 ; i < 2 ; i ++)
21789 +       {
21790 +               semctl(VacuumSemID, i, GETVAL, sem_arg);
21791 +               sem_arg.val = 1;
21792 +               semctl(VacuumSemID, i, SETVAL, sem_arg);
21793 +       }
21794 +       size = sizeof(ReplicationLogInf);
21795 +       Replicateion_Log = malloc(size);
21796 +       if (Replicateion_Log == NULL)
21797 +       {
21798 +               show_error("%s:malloc failed: (%s)",func,strerror(errno));
21799 +               return STATUS_ERROR;
21800 +       }
21801 +       memset(Replicateion_Log , 0 , size );
21802 +       Replicateion_Log->RLog_Sock_Path = NULL;
21803 +#ifdef PRINT_DEBUG
21804 +       show_debug("%s:RLog Memory Allocation ok",func);
21805 +#endif                 
21806 +
21807 +
21808 +       /*
21809 +        * set each datas into the tables
21810 +        */
21811 +       conf = ConfData_Top;
21812 +       while (conf != (ConfDataType *)NULL) 
21813 +       {
21814 +               show_debug("registering (key,value)=(%s,%s)",conf->key,conf->value);
21815 +               /* get cluster db data */
21816 +               if (!STRCMP(conf->table,CLUSTER_SERVER_TAG))
21817 +               {
21818 +                       rec_no = conf->rec_no;
21819 +                       if (cnt < rec_no)
21820 +                       {
21821 +                               cnt = rec_no;
21822 +                               if (cnt >= MAX_DB_SERVER)
21823 +                               {
21824 +                                       continue;
21825 +                               }
21826 +                       }
21827 +                       if (!STRCMP(conf->key,HOST_NAME_TAG))
21828 +                       {
21829 +                               int ip;
21830 +                               strncpy(host_tbl[rec_no].hostName,conf->value,sizeof(host_tbl[rec_no].hostName));
21831 +                               show_debug("registering hostname %s",host_tbl[rec_no].hostName);
21832 +                               ip=PGRget_ip_by_name(conf->value);
21833 +
21834 +                               sprintf(host_tbl[rec_no].resolvedName,
21835 +                                        "%d.%d.%d.%d",
21836 +                                        (ip      ) & 0xff ,
21837 +                                        (ip >>  8) & 0xff ,
21838 +                                        (ip >> 16) & 0xff ,
21839 +                                        (ip >> 24) & 0xff );
21840 +                               show_debug("resolved name is %s",host_tbl[rec_no].resolvedName);
21841 +
21842 +                               conf = (ConfDataType*)conf->next;
21843 +                               continue;
21844 +                       }
21845 +                       if (!STRCMP(conf->key,PORT_TAG))
21846 +                       {
21847 +                               host_tbl[rec_no].port = atoi(conf->value);
21848 +                               conf = (ConfDataType*)conf->next;
21849 +                               continue;
21850 +                       }
21851 +                       if (!STRCMP(conf->key,RECOVERY_PORT_TAG))
21852 +                       {
21853 +                               host_tbl[rec_no].recoveryPort = atoi(conf->value);
21854 +                               conf = (ConfDataType*)conf->next;
21855 +                               continue;
21856 +                       }
21857 +               }
21858 +               /* get cascade server data */
21859 +               else if (!STRCMP(conf->table, REPLICATION_SERVER_INFO_TAG))
21860 +               {
21861 +                       cascade_rec_no = conf->rec_no ;
21862 +                       if (cascade_cnt < cascade_rec_no)
21863 +                       {
21864 +                               cascade_cnt = cascade_rec_no;
21865 +                               if (cascade_cnt >= MAX_DB_SERVER)
21866 +                               {
21867 +                                       continue;
21868 +                               }
21869 +                       }
21870 +                       if (!STRCMP(conf->key,HOST_NAME_TAG))
21871 +                       {
21872 +                               strncpy((Cascade_Tbl+cascade_rec_no)->hostName,conf->value,sizeof(Cascade_Tbl->hostName));
21873 +                               conf = (ConfDataType*)conf->next;
21874 +                               continue;
21875 +                       }
21876 +                       if (!STRCMP(conf->key,PORT_TAG))
21877 +                       {
21878 +                               if (atoi(conf->value) > 0)
21879 +                               {
21880 +                                       (Cascade_Tbl+cascade_rec_no)->portNumber = atoi(conf->value);
21881 +                               }
21882 +                               else
21883 +                               {
21884 +                                       (Cascade_Tbl+cascade_rec_no)->portNumber = DEFAULT_PGRP_PORT;
21885 +                               }
21886 +                               (Cascade_Tbl+cascade_rec_no)->sock = -1;
21887 +
21888 +                               conf = (ConfDataType*)conf->next;
21889 +                               PGRset_cascade_server_status(Cascade_Tbl+cascade_rec_no,DB_TBL_USE);
21890 +                               if (cascade_rec_no == 0)
21891 +                               {
21892 +                                       Cascade_Inf->top = Cascade_Tbl;
21893 +                               }
21894 +                               continue;
21895 +                       }
21896 +                       if (!STRCMP(conf->key,RECOVERY_PORT_TAG))
21897 +                       {
21898 +                               if (atoi(conf->value) > 0)
21899 +                               {
21900 +                                       (Cascade_Tbl+cascade_rec_no)->recoveryPortNumber = atoi(conf->value);
21901 +                               }
21902 +                               else
21903 +                               {
21904 +                                       (Cascade_Tbl+cascade_rec_no)->recoveryPortNumber = DEFAULT_PGRP_RECOVERY_PORT;
21905 +                               }
21906 +                               (Cascade_Tbl+cascade_rec_no)->rlog_sock=-1;
21907 +                               (Cascade_Tbl+cascade_rec_no +1)->useFlag = DB_TBL_END;
21908 +                               conf = (ConfDataType*)conf->next;
21909 +                               continue;
21910 +                       }
21911 +               }
21912 +               /* get loadbalancer table data */
21913 +               else if (!STRCMP(conf->table,LOAD_BALANCE_SERVER_TAG))
21914 +               {
21915 +                       lb_rec_no = conf->rec_no;
21916 +                       if (lb_cnt < lb_rec_no)
21917 +                       {
21918 +                               lb_cnt = lb_rec_no;
21919 +                               if (lb_cnt >= MAX_DB_SERVER)
21920 +                               {
21921 +                                       continue;
21922 +                               }
21923 +                       }
21924 +                       if (!STRCMP(conf->key,HOST_NAME_TAG))
21925 +                       {
21926 +                               strncpy((LoadBalanceTbl + lb_rec_no)->hostName, conf->value,sizeof(LoadBalanceTbl->hostName));
21927 +                               conf = (ConfDataType*)conf->next;
21928 +                               continue;
21929 +                       }
21930 +                       if (!STRCMP(conf->key,RECOVERY_PORT_TAG))
21931 +                       {
21932 +                               (LoadBalanceTbl + lb_rec_no)->recoveryPort = atoi(conf->value);
21933 +                               (LoadBalanceTbl + lb_rec_no)->sock = -1;
21934 +                               (LoadBalanceTbl + lb_rec_no)->recovery_sock = -1;
21935 +                               conf = (ConfDataType*)conf->next;
21936 +                               continue;
21937 +                       }
21938 +               }
21939 +               /* get logging file data */
21940 +               else if (!STRCMP(conf->table, LOG_INFO_TAG))
21941 +               {
21942 +                       if (!STRCMP(conf->key, FILE_NAME_TAG))
21943 +                       {
21944 +                               strncpy(LogFileData->file_name, conf->value ,sizeof(LogFileData->file_name));
21945 +                               LogFileData->fp = NULL;
21946 +                               conf = (ConfDataType*)conf->next;
21947 +                               continue;
21948 +                       }
21949 +                       if (!STRCMP(conf->key, FILE_SIZE_TAG))
21950 +                       {
21951 +                               int i,len;
21952 +                               char * ptr;
21953 +                               int unit = 1;
21954 +                               len = strlen(conf->value);
21955 +                               ptr = conf->value;
21956 +                               for (i = 0; i < len ; i ++,ptr++)
21957 +                               {
21958 +                                       if ((! isdigit(*ptr)) && (! isspace(*ptr)))
21959 +                                       {
21960 +                                               switch (*ptr)
21961 +                                               {
21962 +                                                       case 'K':
21963 +                                                       case 'k':
21964 +                                                               unit = 1024;
21965 +                                                               break;
21966 +                                                       case 'M':
21967 +                                                       case 'm':
21968 +                                                               unit = 1024*1024;
21969 +                                                               break;
21970 +                                                       case 'G':
21971 +                                                       case 'g':
21972 +                                                               unit = 1024*1024*1024;
21973 +                                                               break;
21974 +                                               }
21975 +                                               *ptr = '\0';
21976 +                                               break;
21977 +                                       }
21978 +                               }
21979 +                               LogFileData->max_size = atoi(conf->value) * unit;
21980 +                               conf = (ConfDataType*)conf->next;
21981 +                               continue;
21982 +                       }
21983 +                       if (!STRCMP(conf->key, LOG_ROTATION_TAG))
21984 +                       {
21985 +                               LogFileData->rotation = atoi(conf->value);
21986 +                               conf = (ConfDataType*)conf->next;
21987 +                               continue;
21988 +                       }
21989 +               }
21990 +               else
21991 +               {
21992 +                       if (!STRCMP(conf->key,HOST_NAME_TAG))
21993 +                       {
21994 +                           int ip;
21995 +                               ip=PGRget_ip_by_name(conf->value);
21996 +                               if (ResolvedName == NULL)
21997 +                               {
21998 +                                       ResolvedName = malloc(ADDRESS_LENGTH);
21999 +                               }
22000 +                               if (ResolvedName == NULL)
22001 +                               {
22002 +                                       continue;
22003 +                               }
22004 +                               else
22005 +                               {
22006 +                                       memset(ResolvedName,0,ADDRESS_LENGTH);
22007 +                               }
22008 +
22009 +                               sprintf(ResolvedName,
22010 +                                        "%d.%d.%d.%d",
22011 +                                        (ip      ) & 0xff ,
22012 +                                        (ip >>  8) & 0xff ,
22013 +                                        (ip >> 16) & 0xff ,
22014 +                                        (ip >> 24) & 0xff );
22015 +                               conf = (ConfDataType*)conf->next;
22016 +                               continue;
22017 +                       }
22018 +                       else if (!STRCMP(conf->key,REPLICATE_PORT_TAG))
22019 +                       {
22020 +                               Port_Number = atoi(conf->value);
22021 +                               conf = (ConfDataType*)conf->next;
22022 +                               continue;
22023 +                       }
22024 +                       /* get port number for recovery cluster db server */
22025 +                       else if (!STRCMP(conf->key,RECOVERY_PORT_TAG))
22026 +                       {
22027 +                               if (atoi(conf->value) > 0)
22028 +                               {
22029 +                                       Recovery_Port_Number = atoi(conf->value);
22030 +                               }
22031 +                               else
22032 +                               {
22033 +                                       Recovery_Port_Number =DEFAULT_PGRP_RECOVERY_PORT;
22034 +                               }
22035 +                               conf = (ConfDataType*)conf->next;
22036 +                               continue;
22037 +                       }
22038 +                       else if (!STRCMP(conf->key,LIFECHECK_PORT_TAG))
22039 +                       {
22040 +                               if (atoi(conf->value) > 0)
22041 +                               {
22042 +                                       LifeCheck_Port_Number = atoi(conf->value);
22043 +                               }
22044 +                               else
22045 +                               {
22046 +                                       LifeCheck_Port_Number = DEFAULT_PGRP_LIFECHECK_PORT;
22047 +                               }
22048 +                               conf = (ConfDataType*)conf->next;
22049 +                               continue;
22050 +                       }
22051 +                       else if (!STRCMP(conf->key,RLOG_PORT_TAG))
22052 +                       {
22053 +                               if (atoi(conf->value) > 0)
22054 +                               {
22055 +                                       Replicateion_Log->RLog_Port_Number = atoi(conf->value);
22056 +                               }
22057 +                               else
22058 +                               {
22059 +                                       Replicateion_Log->RLog_Port_Number = DEFAULT_PGRP_RLOG_PORT;
22060 +                               }
22061 +                               conf = (ConfDataType*)conf->next;
22062 +                               continue;
22063 +                       }
22064 +                       /* get response mode */
22065 +                       else if (!STRCMP(conf->key,RESPONSE_MODE_TAG))
22066 +                       {
22067 +                               if (!STRCMP(conf->value,RESPONSE_MODE_RELIABLE))
22068 +                               {
22069 +                                       PGR_Response_Inf->response_mode = PGR_RELIABLE_MODE;
22070 +                               }
22071 +                               else if (!STRCMP(conf->value,RESPONSE_MODE_FAST))
22072 +                               {
22073 +                                       PGR_Response_Inf->response_mode = PGR_FAST_MODE;
22074 +                               }
22075 +                               else
22076 +                               {
22077 +                                       PGR_Response_Inf->response_mode = PGR_NORMAL_MODE;
22078 +                               }
22079 +                               conf = (ConfDataType*)conf->next;
22080 +                               continue;
22081 +                       }
22082 +                       /* get replication log use or not */
22083 +                       else if (!STRCMP(conf->key,USE_REPLICATION_LOG_TAG))
22084 +                       {
22085 +                               if (!STRCMP(conf->value,"yes"))
22086 +                               {
22087 +                                       PGR_Use_Replication_Log = true;
22088 +                               }
22089 +                               conf = (ConfDataType*)conf->next;
22090 +                               continue;
22091 +                       }
22092 +                       /* get replication timeout */
22093 +                       else if (!STRCMP(conf->key,TIMEOUT_TAG))
22094 +                       {
22095 +                               /* get repliaction timeout */
22096 +                               PGR_Replication_Timeout = PGRget_time_value(conf->value);
22097 +                               if ((PGR_Replication_Timeout < 1) || (PGR_Replication_Timeout > 3600))
22098 +                               {
22099 +                                       fprintf(stderr,"%s is out of range. It should be between 1sec-1hr.\n",TIMEOUT_TAG);
22100 +                                       return STATUS_ERROR;
22101 +                               }
22102 +                               conf = (ConfDataType*)conf->next;
22103 +                               continue;
22104 +                       }
22105 +                       else if (!STRCMP(conf->key,LIFECHECK_TIMEOUT_TAG))
22106 +                       {
22107 +                               /* get lifecheck timeout */
22108 +                               PGR_Lifecheck_Timeout = PGRget_time_value(conf->value);
22109 +                               if ((PGR_Lifecheck_Timeout < 1) || (PGR_Lifecheck_Timeout > 3600))
22110 +                               {
22111 +                                       show_error("%s is out of range. It should be between 1sec-1hr.\n",LIFECHECK_TIMEOUT_TAG);
22112 +                                       return STATUS_ERROR;
22113 +                               }
22114 +                               conf = (ConfDataType*)conf->next;
22115 +                               continue;
22116 +                       }
22117 +                       else if (!STRCMP(conf->key,LIFECHECK_INTERVAL_TAG))
22118 +                       {
22119 +                               /* get lifecheck interval */
22120 +                               PGR_Lifecheck_Interval = PGRget_time_value(conf->value);
22121 +                               if ((PGR_Lifecheck_Interval < 1) || (PGR_Lifecheck_Interval > 3600))
22122 +                               {
22123 +                                       show_error("%s is out of range. It should between 1sec-1hr.\n",LIFECHECK_INTERVAL_TAG);
22124 +                                       return STATUS_ERROR;
22125 +                               }
22126 +                               conf = (ConfDataType*)conf->next;
22127 +                               continue;
22128 +                       }
22129 +               }
22130 +               conf = (ConfDataType*)conf->next;
22131 +       }
22132 +
22133 +       /* create cluster db server table */
22134 +       Host_Tbl_Begin = (HostTbl *)NULL;
22135 +
22136 +       size = sizeof(HostTbl) * MAX_DB_SERVER;
22137 +       HostTblShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
22138 +       if (HostTblShmid < 0)
22139 +       {
22140 +               show_error("%s:shmget() failed. reason: %s", func,strerror(errno));
22141 +               return STATUS_ERROR;
22142 +       }
22143 +#ifdef PRINT_DEBUG
22144 +       show_debug("%s:HostTbl shmget ok",func);
22145 +#endif                 
22146 +       Host_Tbl_Begin = (HostTbl *)shmat(HostTblShmid,0,0);
22147 +       if (Host_Tbl_Begin == (HostTbl *)-1)
22148 +       {
22149 +               show_error("%s:shmat() failed. reason: %s", func, strerror(errno));
22150 +               return STATUS_ERROR;
22151 +       }
22152 +#ifdef PRINT_DEBUG
22153 +       show_debug("%s:HostTbl shmat ok",func);
22154 +#endif                 
22155 +       memset(Host_Tbl_Begin , 0 , size );
22156 +       Host_Tbl_Begin -> useFlag = DB_TBL_END;
22157 +
22158 +       for ( i = 0 ; i <= cnt ; i ++)
22159 +       {
22160 +               PGRadd_HostTbl(&host_tbl[i],DB_TBL_INIT);
22161 +       }
22162 +       /* set load balance table */
22163 +       for ( i = 0 ; i <= lb_cnt ; i ++)
22164 +       {
22165 +               (LoadBalanceTbl + i)->port = -1;
22166 +               (LoadBalanceTbl + i)->sock = -1;
22167 +       }
22168 +       memset((LoadBalanceTbl + i),0,sizeof(RecoveryTbl));
22169 +       PGR_Free_Conf_Data();
22170 +
22171 +       /* allocate result buffer of query */
22172 +       PGR_Result = malloc(PGR_MESSAGE_BUFSIZE);
22173 +       if (PGR_Result == NULL)
22174 +       {
22175 +               show_error("%s:malloc() failed. reason: %s", func, strerror(errno));
22176 +               return STATUS_ERROR;
22177 +       }
22178 +       memset(PGR_Result,0,PGR_MESSAGE_BUFSIZE);
22179 +
22180 +       /* allocate log_data */
22181 +       PGR_Log_Header = malloc(sizeof(ReplicateHeader));
22182 +       if (PGR_Log_Header == NULL)
22183 +       {
22184 +               show_error("%s:malloc() failed. reason: %s", func, strerror(errno));
22185 +               return STATUS_ERROR;
22186 +       }
22187 +       memset(PGR_Log_Header,0,sizeof(ReplicateHeader));
22188 +
22189 +       /* allocate send query id */
22190 +       size = sizeof(unsigned int) * (MAX_DB_SERVER +1);
22191 +       PGR_Send_Query_ID = malloc (size);
22192 +       if (PGR_Send_Query_ID == NULL)
22193 +       {
22194 +               show_error("%s:malloc() failed. reason: %s", func, strerror(errno));
22195 +               return STATUS_ERROR;
22196 +       }
22197 +       memset(PGR_Send_Query_ID, 0, size);
22198 +       for ( i = 0 ; i < MAX_DB_SERVER ; i ++)
22199 +       {
22200 +               StartReplication[i] = true;
22201 +       }
22202 +
22203 +       /* set self data into cascade table */
22204 +
22205 +       cascade_rec_no ++;
22206 +       if (ResolvedName != NULL)
22207 +       {
22208 +               strncpy((Cascade_Tbl+cascade_rec_no)->hostName,ResolvedName,ADDRESS_LENGTH);
22209 +       }
22210 +       else
22211 +       {
22212 +
22213 +               gethostname((Cascade_Tbl+cascade_rec_no)->hostName,sizeof(Cascade_Tbl->hostName));
22214 +       }
22215 +       (Cascade_Tbl+cascade_rec_no)->portNumber = Port_Number;
22216 +       (Cascade_Tbl+cascade_rec_no)->recoveryPortNumber = Recovery_Port_Number;
22217 +       (Cascade_Tbl+cascade_rec_no)->sock = -1;
22218 +
22219 +       PGRset_cascade_server_status(Cascade_Tbl+cascade_rec_no,DB_TBL_USE);
22220 +       /* terminate */
22221 +       (Cascade_Tbl+(cascade_rec_no+1))->useFlag = DB_TBL_END;
22222 +
22223 +       Cascade_Inf->top = Cascade_Tbl;
22224 +       Cascade_Inf->end = Cascade_Tbl+cascade_rec_no;
22225 +       Cascade_Inf->upper = NULL;
22226 +       Cascade_Inf->lower = NULL;
22227 +       if (cascade_rec_no >= 1)
22228 +       {
22229 +               Cascade_Inf->upper = (Cascade_Tbl+cascade_rec_no - 1);
22230 +       }
22231 +       (Cascade_Tbl+(cascade_rec_no+1))->useFlag = DB_TBL_END;
22232 +
22233 +       Cascade_Inf->myself = (Cascade_Tbl+cascade_rec_no);
22234 +       Cascade_Inf->useFlag = DB_TBL_USE;
22235 +
22236 +       PGR_Response_Inf->response_mode = PGR_NORMAL_MODE;
22237 +
22238 +       return STATUS_OK;
22239 +}
22240 +
22241 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/lifecheck.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/lifecheck.c
22242 --- postgresql-8.2.4/src/pgcluster/pgrp/lifecheck.c     1970-01-01 01:00:00.000000000 +0100
22243 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/lifecheck.c   2007-03-01 16:27:15.000000000 +0100
22244 @@ -0,0 +1,276 @@
22245 +/*--------------------------------------------------------------------
22246 + * FILE:
22247 + *     lifecheck.c
22248 + *
22249 + * NOTE:
22250 + *     This file is composed of the functions to call with the source
22251 + *     at pgreplicate for the lifecheck.
22252 + *
22253 + * Portions Copyright (c) 2003-2007, Atsushi Mitani
22254 + *--------------------------------------------------------------------
22255 + */
22256 +#include "postgres.h"
22257 +#include "postgres_fe.h"
22258 +
22259 +#include <pthread.h>
22260 +#include <stdio.h>
22261 +#include <stdarg.h>
22262 +#include <sys/types.h>
22263 +#include <fcntl.h>
22264 +#include <errno.h>
22265 +#include <ctype.h>
22266 +#include <time.h>
22267 +#include <sys/ipc.h>
22268 +#include <sys/shm.h>
22269 +#include <sys/sem.h>
22270 +#include <sys/msg.h>
22271 +#include <signal.h>
22272 +
22273 +/*
22274 +#include "libpq/pqsignal.h"
22275 +#include "utils/guc.h"
22276 +#include "miscadmin.h"
22277 +#include "nodes/nodes.h"
22278 +#include "nodes/parsenodes.h"
22279 +#include "access/xact.h"
22280 +#include "access/xlog.h"
22281 +#include "tcop/tcopprot.h"
22282 +#include "postmaster/postmaster.h"
22283 +*/
22284 +
22285 +#include "libpq-fe.h"
22286 +#include "libpq-int.h"
22287 +#include "fe-auth.h"
22288 +
22289 +#include <sys/socket.h>
22290 +#include <unistd.h>
22291 +#include <netdb.h>
22292 +#include <arpa/inet.h>
22293 +
22294 +#ifdef HAVE_NETINET_TCP_H
22295 +#include <netinet/tcp.h>
22296 +#endif
22297 +
22298 +#ifdef HAVE_SYS_SELECT_H
22299 +#include <sys/select.h>
22300 +#endif
22301 +
22302 +
22303 +#ifdef HAVE_CRYPT_H
22304 +#include <crypt.h>
22305 +#endif
22306 +
22307 +
22308 +#ifdef MULTIBYTE
22309 +#include "mb/pg_wchar.h"
22310 +#endif
22311 +
22312 +#include "access/xact.h"
22313 +#include "lib/dllist.h"
22314 +#include "libpq/pqformat.h"
22315 +#include "replicate_com.h"
22316 +#include "pgreplicate.h"
22317 +
22318 +#define PING_DB                "template1"
22319 +#define PING_QUERY     "SELECT 1"
22320 +
22321 +static HostTbl * PGR_Cluster_DB_4_Lifecheck = (HostTbl*)NULL;
22322 +
22323 +/*--------------------------------------
22324 + * PROTOTYPE DECLARATION
22325 + *--------------------------------------
22326 + */
22327 +int PGRlifecheck_main(int fork_wait_time);
22328 +
22329 +static bool is_started_replication(void);
22330 +static void set_timeout(SIGNAL_ARGS);
22331 +static int lifecheck_loop(void);
22332 +static int ping_cluster(PGconn * conn);
22333 +static void set_host_status( HostTbl * host_ptr , int status );
22334 +
22335 +int
22336 +PGRlifecheck_main(int fork_wait_time)
22337 +{
22338 +       bool started = false;
22339 +       pid_t pgid = 0;
22340 +       pid_t pid = 0;
22341 +
22342 +       pgid = getpgid(0);
22343 +       pid = fork();
22344 +       if (pid != 0)
22345 +       {
22346 +               return STATUS_OK;
22347 +       }
22348 +
22349 +       /*
22350 +        * in child process,
22351 +        * call recovery module
22352 +        */
22353 +       setpgid(0,pgid);
22354 +
22355 +       PGRsignal(SIGHUP, PGRexit_subprocess);
22356 +       PGRsignal(SIGTERM, PGRexit_subprocess);
22357 +       PGRsignal(SIGINT, PGRexit_subprocess);
22358 +       PGRsignal(SIGQUIT, PGRexit_subprocess);
22359 +       PGRsignal(SIGALRM, set_timeout);
22360 +
22361 +       if (fork_wait_time > 0) {
22362 +               sleep(fork_wait_time);
22363 +       }
22364 +
22365 +       if (PGRuserName == NULL)
22366 +       {
22367 +               PGRuserName = getenv("LOGNAME");
22368 +               if (PGRuserName == NULL)
22369 +               {
22370 +                       PGRuserName = getenv("USER");
22371 +                       if (PGRuserName == NULL)
22372 +                               PGRuserName = "postgres";
22373 +               }
22374 +       }
22375 +
22376 +       for (;;)
22377 +       {
22378 +               started = is_started_replication();
22379 +               if (!started)
22380 +               {
22381 +                       /* wait next lifecheck as interval */
22382 +                       sleep(PGR_Lifecheck_Interval);
22383 +                       continue;
22384 +               }
22385 +
22386 +               /* life check to all cluster dbs */
22387 +               lifecheck_loop();
22388 +
22389 +               /* wait next lifecheck as interval */
22390 +               sleep(PGR_Lifecheck_Interval);
22391 +       }
22392 +       return STATUS_OK;
22393 +}
22394 +
22395 +static bool
22396 +is_started_replication(void)
22397 +{
22398 +       HostTbl * host_ptr = (HostTbl*)NULL;
22399 +
22400 +       host_ptr = Host_Tbl_Begin;
22401 +       while(host_ptr->useFlag != DB_TBL_END)
22402 +       {
22403 +               if (host_ptr->useFlag == DB_TBL_USE)
22404 +               {
22405 +                       return true;
22406 +               }
22407 +               host_ptr ++;
22408 +       }
22409 +       return false;
22410 +}
22411 +
22412 +static void 
22413 +set_timeout(SIGNAL_ARGS)
22414 +{
22415 +       if (PGR_Cluster_DB_4_Lifecheck != NULL)
22416 +       {
22417 +               PGR_Cluster_DB_4_Lifecheck->retry_count ++;
22418 +               if (PGR_Cluster_DB_4_Lifecheck->retry_count > PGR_CONNECT_RETRY_TIME )
22419 +               {
22420 +                       set_host_status(PGR_Cluster_DB_4_Lifecheck,DB_TBL_ERROR);
22421 +               }
22422 +       }
22423 +       PGRsignal(SIGALRM, set_timeout);
22424 +}
22425 +
22426 +static int
22427 +lifecheck_loop(void)
22428 +{
22429 +       HostTbl * host_ptr = (HostTbl*)NULL;
22430 +       char       port[8];
22431 +       char * host = NULL;
22432 +       PGconn * conn = NULL;
22433 +
22434 +       host_ptr = Host_Tbl_Begin;
22435 +       if (host_ptr == NULL)
22436 +       {
22437 +               return STATUS_ERROR;
22438 +       }
22439 +       alarm(0);
22440 +       while(host_ptr->useFlag != DB_TBL_END)
22441 +       {
22442 +               /*
22443 +                * check the status of the cluster DB
22444 +                */
22445 +               if (host_ptr->useFlag != DB_TBL_USE)
22446 +               {
22447 +                       host_ptr ++;
22448 +                       continue;
22449 +               }
22450 +               snprintf(port,sizeof(port),"%d", host_ptr->port);
22451 +               host = (char *)(host_ptr->resolvedName);
22452 +               /* set host data */
22453 +               PGR_Cluster_DB_4_Lifecheck = host_ptr;
22454 +               
22455 +               /* set alarm as lifecheck timeout */
22456 +               alarm(PGR_Lifecheck_Timeout);
22457 +
22458 +               /* connect DB */
22459 +               conn = PGRcreateConn(host,port, PING_DB ,PGRuserName,"","","");
22460 +               if ((conn != NULL) &&
22461 +                       (ping_cluster(conn) == STATUS_OK))
22462 +               {
22463 +                       set_host_status(host_ptr, DB_TBL_USE);
22464 +               }
22465 +               else
22466 +               {
22467 +                       set_host_status(host_ptr, DB_TBL_ERROR);
22468 +               }
22469 +               /* reset alarm */
22470 +               alarm(0);
22471 +
22472 +               PQfinish(conn);
22473 +               conn = NULL;
22474 +               host_ptr ++;
22475 +       }
22476 +
22477 +       return STATUS_OK;
22478 +}
22479 +
22480 +static int
22481 +ping_cluster(PGconn * conn)
22482 +{
22483 +       int status = 0;
22484 +       PGresult * res = (PGresult *)NULL;
22485 +
22486 +       res = PQexec(conn, PING_QUERY );
22487 +
22488 +       status = PQresultStatus(res);
22489 +       if (res != NULL)
22490 +       {
22491 +               PQclear(res);
22492 +       }
22493 +       if ((status == PGRES_NONFATAL_ERROR ) ||
22494 +               (status == PGRES_FATAL_ERROR ))
22495 +       {
22496 +               return STATUS_ERROR;
22497 +       }
22498 +       return STATUS_OK;
22499 +}
22500 +
22501 +static void
22502 +set_host_status( HostTbl * host_ptr , int status )
22503 +{
22504 +       if (host_ptr == NULL)
22505 +               return;
22506 +       if (status == DB_TBL_ERROR)
22507 +       {
22508 +               host_ptr->retry_count ++;
22509 +               if (host_ptr->retry_count > PGR_CONNECT_RETRY_TIME )
22510 +               {
22511 +                       PGRset_host_status(host_ptr, status);
22512 +               }
22513 +       }
22514 +       else
22515 +       {
22516 +               host_ptr->retry_count = 0;
22517 +               PGRset_host_status(host_ptr, status);
22518 +       }
22519 +}
22520 +
22521 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/main.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/main.c
22522 --- postgresql-8.2.4/src/pgcluster/pgrp/main.c  1970-01-01 01:00:00.000000000 +0100
22523 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/main.c        2007-02-18 22:52:17.000000000 +0100
22524 @@ -0,0 +1,935 @@
22525 +/*--------------------------------------------------------------------
22526 + * FILE:
22527 + *    main.c
22528 + *    Replication server for PostgreSQL
22529 + *
22530 + * NOTE:
22531 + *    This is the main module of the replication server.
22532 + *
22533 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
22534 + *--------------------------------------------------------------------
22535 + */
22536 +#include "postgres.h"
22537 +
22538 +#include <stdio.h>
22539 +#include <string.h>
22540 +#include <unistd.h>
22541 +#include <sys/time.h>
22542 +#include <signal.h>
22543 +#include <sys/wait.h>
22544 +#include <ctype.h>
22545 +#include <sys/types.h>
22546 +#include <sys/stat.h>
22547 +#include <sys/socket.h>
22548 +#include <sys/ipc.h>
22549 +#include <sys/shm.h>
22550 +#include <netdb.h>
22551 +#include <netinet/in.h>
22552 +#include <errno.h>
22553 +#include <fcntl.h>
22554 +#include <time.h>
22555 +#include <sys/param.h>
22556 +#include <arpa/inet.h>
22557 +#include <sys/file.h>
22558 +#include <pthread.h>
22559 +
22560 +#ifdef HAVE_NETINET_TCP_H
22561 +#include <netinet/tcp.h>
22562 +#endif
22563 +#ifdef HAVE_SYS_SELECT_H
22564 +#include <sys/select.h>
22565 +#endif
22566 +
22567 +#ifdef HAVE_GETOPT_H
22568 +#include <getopt.h>
22569 +#endif
22570 +
22571 +#include "miscadmin.h"
22572 +#include "nodes/nodes.h"
22573 +
22574 +#include "libpq-fe.h"
22575 +#include "libpq/libpq-fs.h"
22576 +#include "libpq-int.h"
22577 +#include "fe-auth.h"
22578 +
22579 +
22580 +#include "access/xact.h"
22581 +#include "replicate_com.h"
22582 +#include "pgreplicate.h"
22583 +
22584 +#ifdef WIN32
22585 +#include "win32.h"
22586 +#endif
22587 +#include <arpa/inet.h>
22588 +#ifdef HAVE_CRYPT_H
22589 +#include <crypt.h>
22590 +#endif
22591 +
22592 +#ifdef MULTIBYTE
22593 +#include "mb/pg_wchar.h"
22594 +#endif
22595 +
22596 +/*--------------------------------------
22597 + * GLOBAL VARIABLE DECLARATION
22598 + *--------------------------------------
22599 + */
22600 +/* for replicate_com.h */
22601 +
22602 +ConfDataType * ConfData_Top = (ConfDataType *)NULL;
22603 +ConfDataType * ConfData_End = (ConfDataType *)NULL;
22604 +
22605 +/* replication server data */
22606 +char * ResolvedName = NULL;
22607 +uint16_t Port_Number = 0;
22608 +uint16_t LifeCheck_Port_Number = 0;
22609 +uint16_t Recovery_Port_Number = 0;
22610 +bool PGR_Parse_Session_Started = false;
22611 +int PGR_Replication_Timeout = 60;
22612 +int PGR_Lifecheck_Timeout = 3;
22613 +int PGR_Lifecheck_Interval = 15;
22614 +
22615 +/* global table data */
22616 +HostTbl *Host_Tbl_Begin = NULL;
22617 +Dllist * Transaction_Tbl_Begin = NULL;
22618 +TransactionTbl * Transaction_Tbl_End = NULL;
22619 +RecoveryTbl * LoadBalanceTbl = NULL;
22620 +RecoveryStatusInf * Recovery_Status_Inf = NULL;
22621 +ReplicateHeader * PGR_Log_Header = NULL;
22622 +ReplicateServerInfo * Cascade_Tbl = NULL;;
22623 +CommitLogInf * Commit_Log_Tbl = NULL;
22624 +QueryLogType * Query_Log_Top = NULL;
22625 +QueryLogType * Query_Log_End = NULL;
22626 +CascadeInf * Cascade_Inf = NULL;
22627 +ReplicationLogInf * Replicateion_Log = NULL;
22628 +/* IPC's id data */
22629 +int RecoveryShmid = 0;
22630 +int ReplicateSerializationShmid=0;
22631 +int RecoveryMsgShmid = 0;
22632 +int *RecoveryMsgid = NULL;
22633 +int HostTblShmid = 0;
22634 +int LockWaitTblShmid = 0;
22635 +int LoadBalanceTblShmid = 0;
22636 +int CascadeTblShmid = 0;
22637 +int CascadeInfShmid = 0;
22638 +int CommitLogShmid = 0;
22639 +int QueryLogMsgid = 0;
22640 +int QueryLogAnsMsgid = 0;
22641 +int PGconnMsgid = 0;
22642 +int MaxBackends = 0;
22643 +char * PGR_Result = NULL;
22644 +int SemID = 0;
22645 +int RecoverySemID= 0;
22646 +int RecovErysemid = 0;
22647 +int VacuumSemID = 0;
22648 +int CascadeSemID= 0;
22649 +char * PGR_Data_Path = NULL;
22650 +char * PGR_Write_Path = NULL;
22651 +int IS_SESSION_AUTHORIZATION = 0;
22652 +ResponseInf * PGR_Response_Inf = NULL; 
22653 +bool StartReplication[MAX_DB_SERVER]; 
22654 +bool PGR_Cascade = false;
22655 +bool PGR_Use_Replication_Log = false;
22656 +bool   PGR_AutoCommit = true;
22657 +unsigned int * PGR_Send_Query_ID = NULL;
22658 +unsigned int PGR_Query_ID = 0;
22659 +volatile bool exit_processing = false;
22660 +int pgreplicate_pid = 0;
22661 +
22662 +int ReplicateSock = -1;
22663 +int exit_signo = SIGTERM;
22664 +
22665 +RecoveryQueueInf RecoveryQueue;
22666 +char * Backend_Socket_Dir = NULL;
22667 +
22668 +unsigned int * PGR_ReplicateSerializationID = NULL;
22669 +
22670 +int Log_Print = 0;
22671 +int Debug_Print = 0;
22672 +FILE * LogFp = (FILE *)NULL;
22673 +FILE * StatusFp = (FILE *)NULL;
22674 +FILE * RidFp = (FILE *)NULL;
22675 +FILE * QueueFp = (FILE *)NULL;
22676 +
22677 +extern char *optarg;
22678 +char * PGRuserName = NULL;
22679 +
22680 +int fork_wait_time = 0;
22681 +int Idle_Flag = IDLE_MODE;
22682 +volatile bool Exit_Request = false;
22683 +
22684 +pthread_mutex_t transaction_table_mutex;
22685 +
22686 +/*--------------------------------------
22687 + * PROTOTYPE DECLARATION
22688 + *--------------------------------------
22689 + */
22690 +static void startup_replication_server(void);
22691 +static int replicate_loop(int fd);
22692 +static void replicate_main(void);
22693 +static void quick_exit(SIGNAL_ARGS);
22694 +static void daemonize(void);
22695 +static void write_pid_file(void);
22696 +static void stop_pgreplicate(void);
22697 +static bool is_exist_pid_file(void);
22698 +static void usage(void);
22699 +static void set_exit_processing(int signo);
22700 +
22701 +/*--------------------------------------------------------------------
22702 + * SYMBOL
22703 + *    replicate_loop()
22704 + * NOTES
22705 + *   replication module
22706 + * ARGS
22707 + *    int fd :
22708 + * RETURN
22709 + *    OK: STATUS_OK
22710 + *    NG: STATUS_ERROR
22711 + *--------------------------------------------------------------------
22712 + */
22713 +static int
22714 +replicate_loop(int fd)
22715 +{
22716 +       char * func = "replicate_loop()";
22717 +       pid_t pgid = 0;
22718 +       pid_t pid = 0;
22719 +       int sock = -1;
22720 +       int rtn = 0;
22721 +       int cnt = 0;
22722 +       int result;
22723 +       bool exist_sys_log=false;
22724 +       bool exist_replicate=false;
22725 +       bool clear_connection = false;
22726 +
22727 +
22728 +       result = PGR_Create_Acception(fd,&sock,"",Port_Number);
22729 +       if (result == STATUS_ERROR)
22730 +       {
22731 +               show_error("%s: accept failed (%s)", func, strerror(errno));
22732 +               if (sock != -1)
22733 +                       close(sock);
22734 +               return 1;
22735 +       }
22736 +
22737 +       pgid = getpgid(0);
22738 +       pid = fork();
22739 +       if (pid <0)
22740 +       {
22741 +               show_error("%s:fork failed (%s)",func,strerror(errno));
22742 +               PGRreplicate_exit(0);
22743 +       }
22744 +       if (pid == 0)
22745 +       {
22746 +               int status = LOOP_CONTINUE;
22747 +               bool PGR_Cascade = false;
22748 +               ReplicateHeader  header;
22749 +               ReplicateHeader  header_save_for_recovering;
22750 +               char * query = NULL;
22751 +
22752 +               if (fork_wait_time > 0) {
22753 +                       sleep(fork_wait_time);
22754 +               }
22755 +
22756 +               close(fd);
22757 +
22758 +               PGRsignal(SIGHUP, quick_exit);  
22759 +               PGRsignal(SIGINT, quick_exit);  
22760 +               PGRsignal(SIGQUIT, quick_exit); 
22761 +               PGRsignal(SIGTERM, quick_exit); 
22762 +               PGRsignal(SIGALRM, quick_exit); 
22763 +               PGRsignal(SIGPIPE, SIG_IGN); 
22764 +               setpgid(0,pgid);
22765 +               
22766 +               if (PGRinit_transaction_table() != STATUS_OK)
22767 +               {
22768 +                       show_error("transaction table memory allocate failed");
22769 +                       PGR_Close_Sock(&sock);
22770 +                       exit(1);
22771 +               }
22772 +
22773 +               pthread_mutex_init(&transaction_table_mutex, NULL);
22774 +
22775 +               /* child loop */
22776 +               for (;;)
22777 +               {
22778 +                       fd_set    rmask;
22779 +                       struct timeval timeout;
22780 +
22781 +                       timeout.tv_sec = PGR_Replication_Timeout;
22782 +                       timeout.tv_usec = 0;
22783 +                       
22784 +                       if (query != NULL)
22785 +                       {
22786 +                               free(query);
22787 +                               query = NULL;
22788 +                       }
22789 +                       /*
22790 +                        * Wait for something to happen.
22791 +                        */
22792 +                       FD_ZERO(&rmask);
22793 +                       FD_SET(sock,&rmask);
22794 +                       rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
22795 +                       if (rtn < 0)
22796 +                       {
22797 +                               if (errno == EINTR)
22798 +                                       continue;
22799 +                       }
22800 +
22801 +                       if (rtn && FD_ISSET(sock, &rmask))
22802 +                       {
22803 +                               query = NULL;
22804 +                               query = PGRread_packet(sock,&header);                           
22805 +                               if ((query == NULL) || (header.cmdSts == 0))
22806 +                               {
22807 +
22808 +                                       if (exist_sys_log)
22809 +                                       {
22810 +                                               show_error("%s:upper cascade closed? , errno=%d(%s)",func,errno,strerror(errno));
22811 +                                               memset(&header, 0, sizeof(ReplicateHeader));
22812 +                                               header.cmdSys = CMD_SYS_CALL;
22813 +                                               header.cmdSts = CMD_STS_QUERY_SUSPEND;
22814 +                                               header.query_size = htonl(0);
22815 +                                               PGRsend_rlog_to_local(&header, NULL);
22816 +                                               exist_sys_log = false;
22817 +                                       }
22818 +                                       else
22819 +                                       {
22820 +                                               if (exist_replicate)
22821 +                                               {
22822 +                                                       PGRclear_connections();
22823 +                                                       clear_connection = true;
22824 +                                                       header_save_for_recovering.cmdSts=CMD_TYPE_OTHER;
22825 +                                                       header_save_for_recovering.cmdType=CMD_TYPE_CONNECTION_CLOSE;
22826 +                                                       header_save_for_recovering.query_size = htonl(21);
22827 +                                                       PGRdo_replicate(sock,&header_save_for_recovering,"PGR_CLOSE_CONNECTION");
22828 +                                               }
22829 +                                               PGRsend_notice_quit();
22830 +                                       }
22831 +                                       break;
22832 +                               }
22833 +                               cnt = 0;
22834 +                               switch (header.cmdSys)
22835 +                               {
22836 +                               case CMD_SYS_LIFECHECK:
22837 +                                       PGRreturn_result(sock,"1", PGR_NOWAIT_ANSWER);
22838 +                                       break;
22839 +                               case CMD_SYS_PREREPLICATE:
22840 +                                       if(Cascade_Inf!=NULL ||
22841 +                                               Cascade_Inf->upper == NULL) 
22842 +                                       {
22843 +                                               /* 1 means "I am primary replicate server." */
22844 +                                               PGRreturn_result(sock,"1", PGR_NOWAIT_ANSWER);
22845 +                                       }
22846 +                                       else
22847 +                                       {
22848 +                                               /* 0 means "I am not primary replicate server." */
22849 +                                               PGRreturn_result(sock,"0", PGR_NOWAIT_ANSWER);
22850 +                                       }
22851 +                                       break;
22852 +                               case CMD_SYS_REPLICATE:
22853 +                                       if (exist_replicate == false)
22854 +                                       {
22855 +                                               exist_replicate=true;
22856 +                                               memcpy(&header_save_for_recovering,
22857 +                                                       &header,
22858 +                                                       sizeof(ReplicateHeader));
22859 +                                       }
22860 +                                       status = PGRdo_replicate(sock,&header,query);
22861 +                                       break;
22862 +                               case CMD_SYS_LOG:
22863 +                                       exist_sys_log = true;
22864 +                                       PGRsend_rlog_to_local(&header, query);
22865 +                                       /* set own replicate id by rlog */
22866 +                                       PGRset_replication_id(ntohl(header.replicate_id));
22867 +                                       PGRsend_notice_rlog_done(sock);
22868 +                                       break;
22869 +                               case  CMD_SYS_CASCADE:
22870 +                                       PGR_Cascade = true;
22871 +                                       PGRcascade_main(sock,&header,query);
22872 +                                       break;
22873 +                               case  CMD_SYS_CALL:
22874 +                                       if (header.cmdSts == CMD_STS_TRANSACTION_ABORT)
22875 +                                       {
22876 +                                               PGRreconfirm_commit(sock,&header);
22877 +                                       }
22878 +                                       else if (header.cmdSts == CMD_STS_NOTICE)
22879 +                                       {
22880 +
22881 +                                       }
22882 +                                       else if (header.cmdSts == CMD_STS_RESPONSE)
22883 +                                       {
22884 +                                               if (header.cmdType == CMD_TYPE_FRONTEND_CLOSED)
22885 +                                               {
22886 +                                                       PGRsend_notice_rlog_done(sock);
22887 +                                                       status = LOOP_END;
22888 +                                               }
22889 +                                       }
22890 +                                       break;
22891 +                               default:
22892 +                                       show_error("WARNING: unknown Header->cmdSys %c",header.cmdSys);
22893 +                               }
22894 +                       }
22895 +                       if (status == LOOP_END)
22896 +                       {
22897 +                               break;
22898 +                       }
22899 +               }
22900 +
22901 +               PGR_Close_Sock(&sock);
22902 +               if (query != NULL)
22903 +               {
22904 +                       free(query);
22905 +                       query = NULL;
22906 +               }
22907 +               if (!clear_connection)
22908 +                       PGRclear_connections();
22909 +               PGRdestroy_transaction_table();
22910 +               pthread_mutex_destroy(&transaction_table_mutex);
22911 +               exit(0);
22912 +       }
22913 +       else
22914 +       {
22915 +               PGR_Close_Sock(&sock);
22916 +               return 0;
22917 +       }
22918 +}
22919 +
22920 +static void
22921 +startup_replication_server(void)
22922 +{
22923 +       ReplicateHeader  header;
22924 +       char hostName[HOSTNAME_MAX_LENGTH];
22925 +       char userName[USERNAME_MAX_LENGTH];
22926 +       char query[256];
22927 +
22928 +       if (PGRuserName == NULL)
22929 +       {
22930 +               PGRuserName = getenv("LOGNAME");
22931 +               if (PGRuserName == NULL)
22932 +               {
22933 +                       PGRuserName = getenv("USER");
22934 +                       if (PGRuserName == NULL)
22935 +                               PGRuserName = "postgres";
22936 +               }
22937 +       }
22938 +       memset(&header,0,sizeof(ReplicateHeader));
22939 +       memset(query,0,sizeof(query));
22940 +       memset(hostName,0,sizeof(hostName));
22941 +       memset(userName,0,sizeof(userName));
22942 +       if (ResolvedName != NULL)
22943 +       {
22944 +               strncpy(hostName,ResolvedName,ADDRESS_LENGTH);
22945 +       }
22946 +       else
22947 +       {
22948 +               gethostname(hostName,sizeof(hostName)-1);
22949 +       }
22950 +       strncpy(userName ,PGRuserName,sizeof(userName)-1);
22951 +       snprintf(query,sizeof(query)-1,"SELECT %s(%d,'%s',%d,%d)",
22952 +                       PGR_SYSTEM_COMMAND_FUNC,
22953 +                       PGR_STARTUP_REPLICATION_SERVER_FUNC_NO,
22954 +                       hostName,
22955 +                       Port_Number,
22956 +                       Recovery_Port_Number);
22957 +       header.cmdSts = CMD_STS_NOTICE;
22958 +       header.query_id = htonl(PGRget_next_query_id());
22959 +       header.query_size = htonl(strlen(query));
22960 +       memcpy(header.from_host,hostName,sizeof(header.from_host));
22961 +       memcpy(header.userName,userName,sizeof(header.userName));
22962 +       strcpy(header.dbName,"template1");
22963 +       replicate_packet_send_internal( &header, query,-1,PGRget_recovery_status(),true);
22964 +}
22965 +
22966 +/*--------------------------------------------------------------------
22967 + * SYMBOL
22968 + *    replicate_main()
22969 + * NOTES
22970 + *    Replication main module
22971 + * ARGS
22972 + *    void
22973 + * RETURN
22974 + *    none
22975 + *--------------------------------------------------------------------
22976 + */
22977 +static void
22978 +replicate_main(void)
22979 +{
22980 +#ifdef PRINT_DEBUG
22981 +       char * func = "replicate_main()";
22982 +#endif                 
22983 +       int status;
22984 +       int rtn;
22985 +       show_debug ("%s:entering replicate_main",func);
22986 +
22987 +       /* cascade start up notice */
22988 +       if (Cascade_Inf->upper != NULL)
22989 +       {
22990 +               show_debug("initialize cascade information");
22991 +               PGRstartup_cascade();
22992 +       }
22993 +
22994 +       status = PGR_Create_Socket_Bind(&ReplicateSock, ResolvedName, Port_Number);
22995 +
22996 +       if (status != STATUS_OK)
22997 +       {
22998 +               show_debug("%s %d port bind failed. quit.",func,Port_Number);
22999 +               stop_pgreplicate();
23000 +               PGRreplicate_exit(0);
23001 +       }
23002 +#ifdef PRINT_DEBUG
23003 +       show_debug("%s %d port bind OK",func,Port_Number);
23004 +#endif                 
23005 +       
23006 +
23007 +       /* replication start up notice */
23008 +       startup_replication_server();
23009 +
23010 +       for (;;)
23011 +       {
23012 +               fd_set    rmask;
23013 +               struct timeval timeout;
23014 +
23015 +               if (exit_processing == true)
23016 +                       PGRreplicate_exit(0);
23017 +
23018 +               timeout.tv_sec = PGR_Replication_Timeout;
23019 +               timeout.tv_usec = 0;
23020 +
23021 +
23022 +               /*
23023 +                * Wait for something to happen.
23024 +                */
23025 +               FD_ZERO(&rmask);
23026 +               FD_SET(ReplicateSock,&rmask);
23027 +               rtn = select(ReplicateSock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
23028 +               if (rtn < 0)
23029 +                       continue;
23030 +
23031 +               if (rtn && FD_ISSET(ReplicateSock, &rmask))
23032 +               {
23033 +                       /*
23034 +                        * get recovery status.
23035 +                        */
23036 +                       PGRcheck_recovered_host();
23037 +
23038 +                       if (exit_processing == true)
23039 +                               break;
23040 +
23041 +                       /*
23042 +                        * call replication module
23043 +                        */
23044 +                       replicate_loop(ReplicateSock);
23045 +               }
23046 +       }
23047 +}
23048 +
23049 +/*--------------------------------------------------------------------
23050 + * SYMBOL
23051 + *    quick_exit()
23052 + * NOTES
23053 + *    Exit child process
23054 + * ARGS
23055 + *    SIGNAL_ARGS: receive signal number(I)
23056 + * RETURN
23057 + *    none
23058 + *--------------------------------------------------------------------
23059 + */
23060 +static void
23061 +quick_exit(SIGNAL_ARGS)
23062 +{
23063 +#ifdef PRINT_DEBUG
23064 +       show_debug("quick_exit:signo = %d", postgres_signal_arg);
23065 +#endif
23066 +       exit(0);
23067 +}
23068 +
23069 +/*--------------------------------------------------------------------
23070 + * SYMBOL
23071 + *    daemonize()
23072 + * NOTES
23073 + *    Daemonize this process
23074 + * ARGS
23075 + *    void
23076 + * RETURN
23077 + *    none
23078 + *--------------------------------------------------------------------
23079 + */
23080 +static void 
23081 +daemonize(void)
23082 +{
23083 +       char * func = "daemonize()";
23084 +       int             i;
23085 +       pid_t           pid;
23086 +
23087 +       pid = fork();
23088 +       if (pid == (pid_t) -1)
23089 +       {
23090 +               show_error("%s:fork() failed. reason: %s",func, strerror(errno));
23091 +               exit(1);
23092 +               return;                                 /* not reached */
23093 +       }
23094 +       else if (pid > 0)
23095 +       {                       /* parent */
23096 +               exit(0);
23097 +       }
23098 +
23099 +#ifdef HAVE_SETSID
23100 +       if (setsid() < 0)
23101 +       {
23102 +               show_error("%s:setsid() failed. reason:%s", func,strerror(errno));
23103 +               exit(1);
23104 +       }
23105 +#endif
23106 +
23107 +       i = open("/dev/null", O_RDWR);
23108 +       dup2(i, 0);
23109 +       dup2(i, 1);
23110 +       dup2(i, 2);
23111 +       close(i);
23112 +}
23113 +
23114 +/*--------------------------------------------------------------------
23115 + * SYMBOL
23116 + *    write_pid_file()
23117 + * NOTES
23118 + *    The process ID is written in the file.
23119 + *    This process ID is used when finish pglb.
23120 + * ARGS
23121 + *    void
23122 + * RETURN
23123 + *    none
23124 + *--------------------------------------------------------------------
23125 + */
23126 +static void 
23127 +write_pid_file(void)
23128 +{
23129 +       char * func = "write_pid_file()";
23130 +       FILE *fd;
23131 +       char fname[256];
23132 +       char pidbuf[128];
23133 +
23134 +       snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGREPLICATE_PID_FILE);
23135 +       fd = fopen(fname, "w");
23136 +       if (!fd)
23137 +       {
23138 +               show_error("%s:could not open pid file as %s. reason: %s",
23139 +                                  func, fname, strerror(errno));
23140 +               exit(1);
23141 +       }
23142 +       snprintf(pidbuf, sizeof(pidbuf), "%d", getpid());
23143 +       fwrite(pidbuf, strlen(pidbuf), 1, fd);
23144 +       if (fclose(fd))
23145 +       {
23146 +               show_error("%s:could not write pid file as %s. reason: %s",
23147 +                                  func,fname, strerror(errno));
23148 +               exit(1);
23149 +       }
23150 +}
23151 +
23152 +/*--------------------------------------------------------------------
23153 + * SYMBOL
23154 + *    stop_pgreplicate()
23155 + * NOTES
23156 + *    Stop the pgreplicate process
23157 + * ARGS
23158 + *    void
23159 + * RETURN
23160 + *    none
23161 + *--------------------------------------------------------------------
23162 + */
23163 +static void 
23164 +stop_pgreplicate(void)
23165 +{
23166 +       char * func = "stop_pgreplicate()";
23167 +       FILE *fd;
23168 +       char fname[256];
23169 +       char pidbuf[128];
23170 +       pid_t pid;
23171 +
23172 +       snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGREPLICATE_PID_FILE);
23173 +       fd = fopen(fname, "r");
23174 +       if (!fd)
23175 +       {
23176 +               show_error("%s:could not open pid file as %s. reason: %s",
23177 +                                  func,fname, strerror(errno));
23178 +               exit(1);
23179 +       }
23180 +       memset(pidbuf,0,sizeof(pidbuf));
23181 +       fread(pidbuf, sizeof(pidbuf), 1, fd);
23182 +       fclose(fd);
23183 +       pid = atoi(pidbuf);
23184 +
23185 +       if (kill (pid,SIGTERM) == -1)
23186 +       {
23187 +               show_error("%s:could not stop pid: %d, reason: %s",func,pid,strerror(errno));
23188 +               exit(1);
23189 +       }
23190 +}
23191 +
23192 +/*--------------------------------------------------------------------
23193 + * SYMBOL
23194 + *    is_exist_pid_file()
23195 + * NOTES
23196 + *    Check existence of pid file.
23197 + * ARGS
23198 + *    void
23199 + * RETURN
23200 + *    1: the pid file is exist
23201 + *    0: the pid file is not exist
23202 + *--------------------------------------------------------------------
23203 + */
23204 +static bool
23205 +is_exist_pid_file(void)
23206 +{
23207 +       char fname[256];
23208 +       struct stat buf;
23209 +
23210 +       snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGREPLICATE_PID_FILE);
23211 +       if (stat(fname,&buf) == 0)
23212 +       {
23213 +               /* pid file is exist */
23214 +               return true;
23215 +       }
23216 +       else
23217 +       {
23218 +               /* pid file is not exist */
23219 +               return false;
23220 +       }
23221 +}
23222 +
23223 +/*--------------------------------------------------------------------
23224 + * SYMBOL
23225 + *    child_wait()
23226 + * NOTES
23227 + *    Waiting for hung up a child
23228 + * ARGS
23229 + *    int signal_args: signal number (expecting the SIGCHLD)
23230 + * RETURN
23231 + *    none
23232 + *--------------------------------------------------------------------
23233 + */
23234 +void
23235 +child_wait(SIGNAL_ARGS)
23236 +{
23237 +       pid_t pid = 0;
23238 +
23239 +       do {
23240 +               int ret;
23241 +               pid = waitpid(-1,&ret,WNOHANG);
23242 +       } while(pid > 0);
23243 +}
23244 +
23245 +/*--------------------------------------------------------------------
23246 + * SYMBOL
23247 + *    usage()
23248 + * NOTES
23249 + *    show usage of pglb
23250 + * ARGS
23251 + *    void
23252 + * RETURN
23253 + *    none
23254 + *--------------------------------------------------------------------
23255 + */
23256 +static void
23257 +usage(void)
23258 +{
23259 +       char * path;
23260 +
23261 +       path = getenv("PGDATA");
23262 +       if (path == NULL)
23263 +               path = ".";
23264 +       fprintf(stderr,"PGReplicate version [%s]\n",PGREPLICATE_VERSION);
23265 +       fprintf(stderr,"A replication server for cluster DB servers (based on PostgreSQL)\n\n");
23266 +       fprintf(stderr,"usage: pgreplicate [-D path_of_config_file] [-W path_of_work_files] [-U login user][-l][-n][-v][-h][stop]\n");
23267 +       fprintf(stderr,"    config file default path: %s/%s\n",path, PGREPLICATE_CONF_FILE);
23268 +       fprintf(stderr,"    -l: print error logs in the log file.\n");
23269 +       fprintf(stderr,"    -n: don't run in daemon mode.\n");
23270 +       fprintf(stderr,"    -v: debug mode. need '-n' flag\n");
23271 +       fprintf(stderr,"    -h: print this help\n");
23272 +       fprintf(stderr,"    stop: stop pgreplicate\n");
23273 +}
23274 +
23275 +/*--------------------------------------------------------------------
23276 + * SYMBOL
23277 + *    main()
23278 + * NOTES
23279 + *    main module of pgreplicate
23280 + * ARGS
23281 + *    int argc: number of parameter
23282 + *    char ** argv: value of parameter
23283 + * RETURN
23284 + *    none
23285 + *--------------------------------------------------------------------
23286 + */
23287 +int
23288 +main(int argc, char * argv[])
23289 +{
23290 +       char * func = "main()";
23291 +       int opt = 0;
23292 +       char * r_path = NULL;
23293 +       char * w_path = NULL;
23294 +       bool detach = true;
23295 +       pid_t rlog_pid;
23296 +
23297 +       r_path = getenv("PGDATA");
23298 +       if (r_path == NULL)
23299 +               r_path = ".";
23300 +       while ((opt = getopt(argc, argv, "U:D:W:w:lvnh")) != -1)
23301 +       {
23302 +               switch (opt)
23303 +               {
23304 +                       case 'U':
23305 +                               if (!optarg)
23306 +                               {
23307 +                                       usage();
23308 +                                       exit(1);
23309 +                               }
23310 +                               PGRuserName = strdup(optarg);
23311 +                               break;
23312 +                       case 'D':
23313 +                               if (!optarg)
23314 +                               {
23315 +                                       usage();
23316 +                                       exit(1);
23317 +                               }
23318 +                               r_path = optarg;
23319 +                               break;
23320 +                       case 'W':
23321 +                               if (!optarg)
23322 +                               {
23323 +                                       usage();
23324 +                                       exit(1);
23325 +                               }
23326 +                               w_path = optarg;
23327 +                               break;
23328 +                       case 'w':
23329 +                               fork_wait_time = atoi(optarg);
23330 +                               if (fork_wait_time < 0)
23331 +                                       fork_wait_time = 0;
23332 +                               break;
23333 +                       case 'l':
23334 +                               Log_Print = 1;
23335 +                               break;
23336 +                       case 'v':
23337 +                               Debug_Print = 1;
23338 +                               break;
23339 +                       case 'n':
23340 +                               detach = false;
23341 +                               break;
23342 +                       case 'h':
23343 +                               usage();
23344 +                               exit(0);
23345 +                               break;
23346 +                       default:
23347 +                               usage();
23348 +                               exit(1);
23349 +               }
23350 +       }
23351 +       PGR_Data_Path = r_path;
23352 +       if (w_path == NULL)
23353 +       {
23354 +               PGR_Write_Path = PGR_Data_Path;
23355 +       }
23356 +       else
23357 +       {
23358 +               PGR_Write_Path = w_path;
23359 +       }
23360 +
23361 +       if (optind == (argc-1) && !strncasecmp(argv[optind],"stop",4))
23362 +       {
23363 +               stop_pgreplicate();
23364 +               exit(0);
23365 +       }
23366 +       else if (optind == argc)
23367 +       {
23368 +               if (is_exist_pid_file())
23369 +               {
23370 +                       fprintf(stderr,"pid file %s/%s found. is another pgreplicate running?", PGR_Write_Path, PGREPLICATE_PID_FILE);
23371 +                       exit(1);
23372 +               }
23373 +       }
23374 +       else if (optind < argc)
23375 +       {
23376 +               usage();
23377 +               exit(1);
23378 +       }
23379 +
23380 +       if (detach)
23381 +       {
23382 +               daemonize();
23383 +       }
23384 +
23385 +       PGR_Under_Replication_Server = true;
23386 +       write_pid_file();
23387 +       pgreplicate_pid = getpid();
23388 +
23389 +       PGRsignal(SIGINT, set_exit_processing);
23390 +       PGRsignal(SIGQUIT, set_exit_processing);
23391 +       PGRsignal(SIGTERM, set_exit_processing);
23392 +       PGRsignal(SIGCHLD, child_wait);
23393 +       PGRsignal(SIGPIPE, SIG_IGN);
23394 +
23395 +       if (PGRget_Conf_Data(PGR_Data_Path) != STATUS_OK)
23396 +       {
23397 +               show_error("%s:PGRget_Conf_Data error",func);
23398 +               PGRreplicate_exit(0);
23399 +       }
23400 +       if (PGRinit_recovery() != STATUS_OK)
23401 +       {
23402 +               show_error("%s:PGRinit_recovery error",func);
23403 +               PGRreplicate_exit(0);
23404 +       }
23405 +       if (PGRload_replication_id() != STATUS_OK)
23406 +       {
23407 +               show_error("%s:PGRload_replication_id error",func);
23408 +               PGRreplicate_exit(0);
23409 +       }
23410 +
23411 +       if ( PGR_Use_Replication_Log == true )
23412 +       {
23413 +#ifdef PRINT_DEBUG
23414 +               show_debug("Use Replication Log. Start PGR_RLog_Main()");
23415 +#endif
23416 +               rlog_pid = PGR_RLog_Main();
23417 +               if (rlog_pid < 0)
23418 +               {
23419 +                       show_error("%s:PGR_RLog_Main failed",func);
23420 +                       PGRreplicate_exit(0);
23421 +               }
23422 +       }
23423 +
23424 +       /*
23425 +        * fork recovery process
23426 +        */
23427 +       PGRrecovery_main(fork_wait_time);
23428 +
23429 +       /*
23430 +        * fork lifecheck process
23431 +        */
23432 +       PGRlifecheck_main(fork_wait_time);
23433 +
23434 +       /*
23435 +        * call replicate module
23436 +        */
23437 +       Replicateion_Log->r_log_sock =-1;
23438 +
23439 +       if (fork_wait_time > 0) {
23440 +#ifdef PRINT_DEBUG
23441 +               show_debug("replicate process: wait fork(): pid = %d", getpid());
23442 +#endif         
23443 +               sleep(fork_wait_time);
23444 +       }
23445 +
23446 +       replicate_main();
23447 +
23448 +       PGRreplicate_exit(0);
23449 +       return STATUS_OK;
23450 +}
23451 +
23452 +static void
23453 +set_exit_processing(int signo)
23454 +{
23455 +       exit_signo = signo;
23456 +       exit_processing = true;
23457 +       PGRsignal(signo, SIG_IGN);
23458 +}
23459 +
23460 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/pgreplicate.conf.sample pgcluster-1.7.0rc7/src/pgcluster/pgrp/pgreplicate.conf.sample
23461 --- postgresql-8.2.4/src/pgcluster/pgrp/pgreplicate.conf.sample 1970-01-01 01:00:00.000000000 +0100
23462 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/pgreplicate.conf.sample       2007-02-18 22:52:17.000000000 +0100
23463 @@ -0,0 +1,113 @@
23464 +#=============================================================
23465 +#  PGReplicate configuration file
23466 +#-------------------------------------------------------------
23467 +# file: pgreplicate.conf
23468 +#-------------------------------------------------------------
23469 +# This file controls:
23470 +#       o which hosts & port are cluster server
23471 +#       o which port use for replication request from cluster server
23472 +#=============================================================
23473 +#
23474 +#-------------------------------------------------------------
23475 +# A setup of Cluster DB(s)
23476 +#
23477 +#              o Host_Name :           The host name of Cluster DB.
23478 +#                                      Please write a host name by FQDN.
23479 +#                                      DO NOT write IP address.
23480 +#              o Port :                The connection port with postmaster.
23481 +#              o Recovery_Port :       The connection port at the time of 
23482 +#                                      a recovery sequence.
23483 +#-------------------------------------------------------------
23484 +#<Cluster_Server_Info>
23485 +#    <Host_Name>               master.pgcluster.org            </Host_Name>
23486 +#    <Port>                    5432                            </Port>
23487 +#    <Recovery_Port>           7001                            </Recovery_Port>
23488 +#</Cluster_Server_Info>
23489 +#<Cluster_Server_Info>
23490 +#    <Host_Name>               clusterdb2.pgcluster.org        </Host_Name>
23491 +#    <Port>                    5432                            </Port>
23492 +#    <Recovery_Port>           7001                            </Recovery_Port>
23493 +#</Cluster_Server_Info>
23494 +#<Cluster_Server_Info>
23495 +#    <Host_Name>               cluster3.pgcluster.org          </Host_Name>
23496 +#    <Port>                    5432                            </Port>
23497 +#    <Recovery_Port>           7001                            </Recovery_Port>
23498 +#</Cluster_Server_Info>
23499 +#
23500 +#-------------------------------------------------------------
23501 +# A setup of Load Balance Server
23502 +#
23503 +#              o Host_Name :           The host name of a load balance server.
23504 +#                                      Please write a host name by FQDN or IP address.
23505 +#              o Recovery_Port :       The connection port at the time of 
23506 +#                                      a recovery sequence .
23507 +#-------------------------------------------------------------
23508 +#<LoadBalance_Server_Info>
23509 +#      <Host_Name>             loadbalancer.pgcluster.org      </Host_Name>
23510 +#      <Recovery_Port>         6001                            </Recovery_Port>
23511 +#</LoadBalance_Server_Info>
23512 +#
23513 +#------------------------------------------------------------
23514 +# A setup of the cascade connection between replication servers.
23515 +# When you do not use RLOG recovery, you can skip this setup
23516 +#
23517 +#              o Host_Name :   The host name of the upper replication server.
23518 +#                              Please write a host name by FQDN or IP address.
23519 +#              o Port :        The connection port with postmaster.
23520 +#              o Recovery_Port : The connection port at the time of
23521 +#                                a recovery sequence .
23522 +#------------------------------------------------------------
23523 +#<Replicate_Server_Info>
23524 +#      <Host_Name>             upper_replicate.pgcluster.org   </Host_Name>
23525 +#      <Port>                  8002                            </Port>
23526 +#      <Recovery_Port>         8102                            </Recovery_Port>
23527 +#</Replicate_Server_Info>
23528 +#
23529 +#-------------------------------------------------------------
23530 +# A setup of a replication server
23531 +#
23532 +#              o Host_Name :           The host name of the this replication server.
23533 +#                                      Please write a host name by FQDN or IP address.
23534 +#              o Replicate_Port :      Connection port for replication
23535 +#              o Recovery_Port :       Connection port for recovery
23536 +#              o RLOG_Port :           Connection port for replication log
23537 +#              o Response_mode :       Timing which returns a response
23538 +#                                      normal   -- return result of DB which received the query
23539 +#                                      reliable -- return result after waiting for response of 
23540 +#                                      all Cluster DBs.
23541 +#              o Use_Replication_Log : Use replication log
23542 +#                                       [yes/no]. default : no
23543 +#              o Replication_Timeout : Timeout of each replication response
23544 +#              o Lifecheck_Timeout :   Timeout of the lifecheck response
23545 +#              o Lifecheck_Interval :  Interval time of the lifecheck
23546 +#                                      (range 1s - 1h)
23547 +#                                      10s   -- 10 seconds
23548 +#                                      10min -- 10 minutes
23549 +#                                      1h    -- 1 hours
23550 +#-------------------------------------------------------------
23551 +<Host_Name>                    replicate.pgcluster.org         </Host_Name>
23552 +<Replication_Port>             8001                            </Replication_Port>
23553 +<Recovery_Port>                8101                            </Recovery_Port>
23554 +<RLOG_Port>                    8301                            </RLOG_Port>
23555 +<Response_Mode>                normal                          </Response_Mode>
23556 +<Use_Replication_Log>          no                              </Use_Replication_Log>
23557 +<Replication_Timeout>          1min                            </Replication_Timeout>
23558 +<LifeCheck_Timeout>            3s                              </LifeCheck_Timeout>
23559 +<LifeCheck_Interval>           15s                             </LifeCheck_Interval>
23560 +#-------------------------------------------------------------
23561 +# A setup of a log files 
23562 +#
23563 +#              o File_Name :   Log file name with full path
23564 +#              o File_Size :   Maximum size of each log files
23565 +#                              Please specify in a number and unit(K or M)
23566 +#                                10  -- 10 Byte
23567 +#                                10K -- 10 KByte
23568 +#                                10M -- 10 MByte
23569 +#              o Rotate :      Rotation times
23570 +#                              If specified 0, old versions are removed.
23571 +#-------------------------------------------------------------
23572 +<Log_File_Info>
23573 +       <File_Name>             /tmp/pgreplicate.log    </File_Name>
23574 +       <File_Size>             1M                      </File_Size>
23575 +       <Rotate>                3                       </Rotate>
23576 +</Log_File_Info>
23577 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/pgreplicate.h pgcluster-1.7.0rc7/src/pgcluster/pgrp/pgreplicate.h
23578 --- postgresql-8.2.4/src/pgcluster/pgrp/pgreplicate.h   1970-01-01 01:00:00.000000000 +0100
23579 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/pgreplicate.h 2007-03-01 16:27:56.000000000 +0100
23580 @@ -0,0 +1,425 @@
23581 +/*--------------------------------------------------------------------
23582 + * FILE:
23583 + *     pgreplicate.h
23584 + *
23585 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
23586 + *--------------------------------------------------------------------
23587 + */
23588 +#ifndef PGREPLICATE_H
23589 +#define PGREPLICATE_H
23590 +
23591 +#define PGREPLICATE_VERSION            "1.7.0rc7"
23592 +
23593 +#include "lib/dllist.h"
23594 +#include "lib/stringinfo.h"
23595 +#include "../libpgc/libpgc.h"
23596 +
23597 +/* cascade packet id */
23598 +#define        CMD_SYS_CASCADE 'C'
23599 +#define CMD_STS_TO_UPPER 'U'
23600 +#define CMD_STS_TO_LOWER 'L'
23601 +#define CMD_TYPE_ADD 'A'
23602 +#define CMD_TYPE_DELTE 'D'
23603 +#define CMD_TYPE_UPDATE_ALL 'A'
23604 +
23605 +/* log packet id */
23606 +#define        CMD_SYS_LOG     'L'
23607 +#define CMD_STS_DELETE_QUERY 'q'
23608 +#define CMD_STS_DELETE_TRANSACTION 't'
23609 +#define CMD_STS_UPDATE_QUERY 'r'
23610 +#define CMD_STS_UPDATE_TRANSACTION 'u'
23611 +
23612 +#define INIT_TRANSACTION_TBL_NUM (12)
23613 +#define FILENAME_MAX_LENGTH    (256)
23614 +#define MAX_DB_SERVER  (32)
23615 +#define        MAX_CONNECTIONS (128)
23616 +#define MAX_QUEUE_FILE_SIZE (0x40000000)
23617 +#define PGR_MAX_TICKETS (0x7FFFFFFF)
23618 +#define PGR_MAX_QUERY_ID (0x7FFFFFFF)
23619 +#define PGR_CONNECT_RETRY_TIME  (3)
23620 +#define PGR_EXEC_RETRY_TIME  (5)
23621 +#define DB_TBL_FREE    (0)
23622 +#define DB_TBL_INIT    (1)
23623 +#define DB_TBL_USE     (2)
23624 +#define DB_TBL_ERROR   (-1)
23625 +#define DB_TBL_TOP     (10)
23626 +#define DB_TBL_END     (11)
23627 +#define RECOVERY_FILE_MTYPE    (1)
23628 +#define QUERY_LOG_MTYPE (2)
23629 +#define PGREPLICATE_CONF_FILE  "pgreplicate.conf"
23630 +#define PGREPLICATE_LOG_FILE   "pgreplicate.log"
23631 +#define PGREPLICATE_STATUS_FILE        "pgreplicate.sts"
23632 +#define PGREPLICATE_PID_FILE   "pgreplicate.pid"
23633 +#define PGREPLICATE_RID_FILE   "pgreplicate.rid"
23634 +#define RECOVERY_QUEUE_FILE    "pgr_recovery"
23635 +/* setup data tag of the configuration file */
23636 +#define CLUSTER_SERVER_TAG     "Cluster_Server_Info"
23637 +#define LOAD_BALANCE_SERVER_TAG        "LoadBalance_Server_Info"
23638 +#define REPLICATE_PORT_TAG     "Replication_Port"
23639 +#define RECOVERY_PORT_TAG      "Recovery_Port"
23640 +#define LIFECHECK_PORT_TAG     "LifeCheck_Port"
23641 +#define RLOG_PORT_TAG          "RLOG_Port"
23642 +#define RESPONSE_MODE_TAG      "Response_Mode"
23643 +#define        RESPONSE_MODE_FAST      "fast"
23644 +#define        RESPONSE_MODE_NORMAL    "normal"
23645 +#define        RESPONSE_MODE_RELIABLE  "reliable"
23646 +#define        USE_REPLICATION_LOG_TAG "Use_Replication_Log"
23647 +#define        RESERVED_CONNECTIONS_TAG        "Reserved_Connections"
23648 +/* semapho numner of recovery queue */
23649 +#define SEM_NUM_OF_RECOVERY    (1)
23650 +#define SEM_NUM_OF_RECOVERY_QUEUE      (2)
23651 +/* semapho numner of lock tickets */
23652 +#define SEM_NUM_OF_LOCK        (1)
23653 +#define STATUS_LOCK_CONFLICT (2)
23654 +#define STATUS_DEADLOCK_DETECT (3)
23655 +#define STATUS_ABORTED (4)
23656 +#define STATUS_NOT_YET_REPLICATE (5)
23657 +#define STATUS_ALREADY_REPLICATED (6)
23658 +#define STATUS_SKIP_REPLICATE (7)
23659 +#define PGR_NOWAIT_ANSWER (0)
23660 +#define PGR_WAIT_ANSWER (1)
23661 +#define LOOP_CONTINUE  (0)
23662 +#define LOOP_END       (1)
23663 +#define LOWER_CASCADE  (1)
23664 +#define UPPER_CASCADE  (2)
23665 +#define ALL_CASCADE    (3)
23666 +#define NOTICE_SYSTEM_CALL_TYPE (10)
23667 +#define RECOVERY_QUERY_TYPE (20)
23668 +
23669 +#define PGR_TIME_OUT   (60)
23670 +#define PGR_SEND_RETRY_CNT (100)
23671 +#define PGR_SEND_WAIT_MSEC (500)
23672 +#define PGR_RECV_RETRY_CNT (100)
23673 +#define PGR_RECV_WAIT_MSEC (500)
23674 +#define PGR_SEM_UNLOCK_WAIT_MSEC (100)
23675 +#define PGR_SEM_LOCK_WAIT_MSEC (500)
23676 +#define PGR_RECOVERY_RETRY_CNT (6000)
23677 +#define PGR_RECOVERY_WAIT_MSEC (500)
23678 +#define PGR_CHECK_POINT    (300)
23679 +
23680 +#define PGR_RECOVERY_1ST_STAGE (1)
23681 +#define PGR_RECOVERY_2ND_STAGE (2)
23682 +
23683 +#define IDLE_MODE      (0)
23684 +#define BUSY_MODE      (1)
23685 +
23686 +/*
23687 + * connection table for transaction query
23688 + */
23689 +typedef struct {
23690 +       int useFlag;
23691 +       int lock;
23692 +       int transaction_count;
23693 +       unsigned short port;
23694 +       unsigned short pid;
23695 +       unsigned int hostIP;
23696 +       unsigned int srcHostIP;
23697 +       char host[HOSTNAME_MAX_LENGTH];
23698 +       char srcHost[HOSTNAME_MAX_LENGTH];
23699 +       char dbName[DBNAME_MAX_LENGTH];
23700 +       PGconn  * conn;
23701 +       bool in_transaction;
23702 +       bool exec_copy;
23703 +}TransactionTbl;
23704 +
23705 +/*
23706 + * cluster server table
23707 + */
23708 +typedef struct {
23709 +       int useFlag;
23710 +       char hostName[HOSTNAME_MAX_LENGTH];
23711 +       char resolvedName[24];
23712 +       int port;
23713 +       int recoveryPort;
23714 +       int hostNum;
23715 +       int transaction_count;
23716 +       int retry_count;
23717 +}HostTbl;
23718 +
23719 +
23720 +typedef struct {
23721 +       FILE * queue_fp;
23722 +       int current_queue_no;
23723 +} RecoveryQueueInf;
23724 +
23725 +
23726 +/*
23727 + * host table for recovery request
23728 + */
23729 +typedef struct {
23730 +       char hostName[HOSTNAME_MAX_LENGTH];
23731 +       char resolvedName[24];
23732 +       int port;
23733 +       int recoveryPort;
23734 +       int sock;
23735 +       int recovery_sock;
23736 +} RecoveryTbl;
23737 +
23738 +/*
23739 + * status table for recovery
23740 + */
23741 +typedef struct {
23742 +       int useFlag;
23743 +       int transaction_count;
23744 +       int recovery_status;
23745 +       unsigned int replication_id;
23746 +       HostTbl target_host;
23747 +       int read_queue_no;
23748 +       int write_queue_no;
23749 +       int check_point;
23750 +       unsigned int file_size;
23751 +       char write_file[FILENAME_MAX_LENGTH];
23752 +       char read_file[FILENAME_MAX_LENGTH];
23753 +} RecoveryStatusInf;
23754 +
23755 +typedef struct {
23756 +       long mtype;
23757 +       char mdata[1];
23758 +} RecoveryQueueFile;
23759 +
23760 +typedef struct {
23761 +       long mtype;
23762 +       unsigned int replicationId;
23763 +       char mdata[1];
23764 +} RecoveryQueueQuery;
23765 +
23766 +typedef struct {
23767 +       unsigned int entry_ticket;
23768 +       unsigned int lock_wait_queue_length;
23769 +       int overflow;
23770 +} LockWaitInf;
23771 +
23772 +typedef struct {
23773 +       int response_mode;
23774 +       int current_cluster;
23775 +} ResponseInf;
23776 +
23777 +typedef struct {
23778 +       ReplicateHeader * header;
23779 +       char * query;
23780 +       char * next;
23781 +       char * last;
23782 +} QueryLogType;
23783 +
23784 +typedef struct {
23785 +       ReplicateServerInfo * top;
23786 +       ReplicateServerInfo * end;
23787 +       ReplicateServerInfo * lower;
23788 +       ReplicateServerInfo * upper;
23789 +       ReplicateServerInfo * myself;
23790 +       int useFlag;
23791 +} CascadeInf;
23792 +
23793 +typedef struct {
23794 +       union 
23795 +       {
23796 +               int useFlag;
23797 +               int commit_log_num;
23798 +       } inf;
23799 +       ReplicateHeader header;
23800 +} CommitLogInf;
23801 +
23802 +typedef struct {
23803 +       int useFlag;
23804 +       char * RLog_Sock_Path;
23805 +       uint16_t RLog_Port_Number;
23806 +       int r_log_sock;
23807 +       ReplicateHeader * header;
23808 +       char * query;
23809 +} ReplicationLogInf;
23810 +
23811 +typedef struct {
23812 +       char hostName[HOSTNAME_MAX_LENGTH];
23813 +       uint16_t port;
23814 +       uint16_t pid;
23815 +       uint32_t request_id;
23816 +} QueryLogID; 
23817 +
23818 +typedef struct {
23819 +       QueryLogID query_log_id;
23820 +       char * last;
23821 +       char * next;
23822 +} ConfirmQueryList;
23823 +
23824 +typedef struct {
23825 +       ReplicateHeader * header;
23826 +       char * query;
23827 +       int dest;
23828 +       int current_cluster;
23829 +       int transaction_count;
23830 +       HostTbl * host_ptr;
23831 +       TransactionTbl *transaction_tbl;
23832 +} ThreadArgInf;
23833 +
23834 +/* replication server data */
23835 +extern char * ResolvedName;
23836 +extern uint16_t Port_Number;
23837 +extern uint16_t LifeCheck_Port_Number;
23838 +extern uint16_t Recovery_Port_Number;
23839 +extern int Reserved_Connections;
23840 +extern bool PGR_Parse_Session_Started;
23841 +extern int PGR_Replication_Timeout;
23842 +
23843 +/* global tables */
23844 +extern HostTbl * Host_Tbl_Begin;
23845 +extern Dllist * Transaction_Tbl_Begin;
23846 +extern TransactionTbl * Transaction_Tbl_End;
23847 +extern RecoveryTbl * LoadBalanceTbl;
23848 +extern RecoveryStatusInf * Recovery_Status_Inf;
23849 +extern LockWaitInf * Lock_Wait_Tbl;
23850 +extern ReplicateHeader * PGR_Log_Header;
23851 +extern ReplicateServerInfo * Cascade_Tbl;
23852 +extern CascadeInf * Cascade_Inf;
23853 +extern CommitLogInf * Commit_Log_Tbl;
23854 +extern QueryLogType * Query_Log_Top;
23855 +extern QueryLogType * Query_Log_End;
23856 +extern ReplicationLogInf * Replicateion_Log;
23857 +extern int RecoveryShmid;
23858 +extern int ReplicateSerializationShmid;
23859 +extern int RecoveryMsgShmid;
23860 +extern int *RecoveryMsgid;
23861 +extern int HostTblShmid;
23862 +extern int LockWaitTblShmid;
23863 +extern int CascadeTblShmid;
23864 +extern int CascadeInfShmid;
23865 +extern int CommitLogShmid;
23866 +extern int MaxBackends;
23867 +extern char * PGR_Result;
23868 +extern int SemID;
23869 +extern int RecoverySemID;
23870 +extern int CascadeSemID;
23871 +extern int LockSemID;
23872 +extern int VacuumSemID;
23873 +extern char * PGR_Data_Path;
23874 +extern char * PGR_Write_Path;
23875 +extern FILE * LogFp;
23876 +extern FILE * StatusFp;
23877 +extern FILE * RidFp;
23878 +extern FILE * QueueFp;
23879 +extern int Log_Print;
23880 +extern int Debug_Print;
23881 +extern char * Function;
23882 +extern int IS_SESSION_AUTHORIZATION;
23883 +extern ResponseInf * PGR_Response_Inf;
23884 +extern bool StartReplication[MAX_DB_SERVER];
23885 +extern bool PGR_Cascade;
23886 +extern bool    PGR_Use_Replication_Log;
23887 +extern bool    PGR_AutoCommit;
23888 +extern unsigned int * PGR_ReplicateSerializationID;
23889 +extern unsigned int * PGR_Send_Query_ID;
23890 +extern unsigned int PGR_Query_ID;
23891 +extern volatile bool exit_processing;
23892 +extern RecoveryQueueInf RecoveryQueue;
23893 +extern int pgreplicate_pid;
23894 +extern char * PGRuserName;
23895 +extern int exit_signo;
23896 +
23897 +extern int ReplicateSock;
23898 +
23899 +/* smart shutdown */
23900 +extern int Idle_Flag;
23901 +extern volatile bool Exit_Request;
23902 +
23903 +/*
23904 + * external prototype in main.c
23905 + */
23906 +extern void child_wait(SIGNAL_ARGS);
23907 +
23908 +/*
23909 + * external prototype in conf.c
23910 + */
23911 +extern int PGRget_Conf_Data(char * path);
23912 +
23913 +/*
23914 + * external prototype in replicate.c
23915 + */
23916 +extern int PGRset_replication_id(uint32_t rid);
23917 +extern bool PGRis_same_host(char * host1, unsigned short port1 , char * host2, unsigned short port2);
23918 +extern HostTbl * PGRadd_HostTbl(HostTbl *  conf_data, int useFlag);
23919 +extern HostTbl * PGRget_master(void);
23920 +extern void PGRset_recovery_status(int status);
23921 +extern int PGRget_recovery_status(void);
23922 +extern int PGRcheck_recovered_host(void);
23923 +extern int PGRset_recovered_host(HostTbl * target,int useFlag);
23924 +extern int PGRinit_recovery(void);
23925 +extern void PGRexit_subprocess(int signo);
23926 +extern void PGRreplicate_exit(int exit_status);
23927 +extern int PGRsend_replicate_packet_to_server( HostTbl * host_ptr, ReplicateHeader * header, char *query , char * result,unsigned int replicationId, bool recovery);
23928 +extern int PGRreplicate_packet_send_each_server( HostTbl * ptr, bool return_response, ReplicateHeader * header, char * query,int dest);
23929 +extern HostTbl * PGRget_HostTbl(char * hostName,int port);
23930 +extern int PGRset_queue(ReplicateHeader * header,char * query);
23931 +extern int PGRset_host_status(HostTbl * host_ptr,int status);
23932 +extern void PGRclear_connections(void);
23933 +extern void PGRdestroy_transaction_table(void);
23934 +extern void PGRsem_unlock( int semid, short sem_num );
23935 +extern void PGRsem_lock( int semid, short sem_num );
23936 +extern int PGRdo_replicate(int sock,ReplicateHeader *header, char * query);
23937 +extern int PGRreturn_result(int dest, char * result, int wait);
23938 +extern int PGRreplicate_packet_send( ReplicateHeader * header, char * query,int dest,int recovery_status);
23939 +extern char * PGRread_packet(int sock, ReplicateHeader *header);
23940 +extern void PGRnotice_replication_server(char * hostName, unsigned short portNumber,unsigned short recoveryPortNumber, unsigned short lifecheckPortNumber, char * userName);
23941 +extern char * PGRread_query(int sock, ReplicateHeader *header);
23942 +extern int PGRsync_oid(ReplicateHeader *header);
23943 +extern unsigned int PGRget_next_query_id(void);
23944 +extern int PGRinit_transaction_table(void);
23945 +extern int replicate_packet_send_internal(ReplicateHeader * header, char * query,int dest,int recovery_status,bool isHeldLock);
23946 +extern int PGRsync_oid(ReplicateHeader *header);
23947 +extern int PGRload_replication_id(void);
23948 +extern PGconn * PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt );
23949 +/*
23950 + * external prototype in recovery.c
23951 + */
23952 +extern int PGRsend_load_balance_packet(RecoveryPacket * packet);
23953 +extern void PGRrecovery_main(int fork_wait_time);
23954 +extern FILE * PGRget_recovery_queue_file_for_write(void);
23955 +extern FILE * PGRget_recovery_queue_file_for_read(int next);
23956 +
23957 +/*
23958 + * external prototype in rlog.c
23959 + */
23960 +extern int PGRwrite_rlog(ReplicateHeader * header, char * query);
23961 +extern ReplicateHeader * PGRget_requested_query(ReplicateHeader * header);
23962 +extern void PGRreconfirm_commit(int sock, ReplicateHeader * header);
23963 +extern void PGRset_rlog(ReplicateHeader * header, char * query);
23964 +extern void PGRunset_rlog(ReplicateHeader * header, char * query);
23965 +extern int PGRresend_rlog_to_db(void);
23966 +extern void PGRreconfirm_query(int sock, ReplicateHeader * header);
23967 +extern pid_t  PGR_RLog_Main(void);
23968 +extern int PGRcreate_send_rlog_socket(void);
23969 +extern int PGRsend_rlog_packet(int sock,ReplicateHeader * header, const char * query_string);
23970 +extern int PGRrecv_rlog_result(int sock,void * result, int size);
23971 +extern int PGRsend_rlog_to_local(ReplicateHeader * header,char * query);
23972 +extern int PGRget_rlog_header(ReplicateHeader * header);
23973 +
23974 +/*
23975 + * external prototype in cascade.c
23976 + */
23977 +extern int PGRstartup_cascade(void);
23978 +extern int PGRsend_lower_cascade(ReplicateHeader * header, char * query);
23979 +extern int PGRsend_upper_cascade(ReplicateHeader * header, char * query);
23980 +extern int PGRwait_answer_cascade(int  sock);
23981 +extern ReplicateServerInfo * PGRget_lower_cascade(void);
23982 +extern ReplicateServerInfo * PGRget_upper_cascade(void);
23983 +extern void PGRset_cascade_server_status(ReplicateServerInfo * cascade, int status);
23984 +extern ReplicateServerInfo * PGRrecv_cascade_answer(ReplicateServerInfo * cascade,ReplicateHeader * header);
23985 +extern int PGRsend_cascade(int sock , ReplicateHeader * header, char * query);
23986 +extern int PGRcascade_main(int sock, ReplicateHeader * header, char * query);
23987 +extern int PGRwait_notice_rlog_done(void);
23988 +extern int PGRsend_notice_rlog_done(int sock);
23989 +extern int PGRsend_notice_quit(void);
23990 +
23991 +/*
23992 + * external prototype in pqformat.c
23993 + */
23994 +extern const char * pq_getmsgstring(StringInfo msg);
23995 +extern unsigned int pq_getmsgint(StringInfo msg, int b);
23996 +extern void pq_copymsgbytes(StringInfo msg, char *buf, int datalen);
23997 +extern const char * pq_getmsgbytes(StringInfo msg, int datalen);
23998 +extern int pq_getmsgbyte(StringInfo msg);
23999 +
24000 +/*
24001 + * external prototype in lifecheck.c
24002 + */
24003 +extern int PGRlifecheck_main(int fork_wait_time);
24004 +
24005 +#endif /* PGREPLICATE_H */
24006 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/pqformat.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/pqformat.c
24007 --- postgresql-8.2.4/src/pgcluster/pgrp/pqformat.c      1970-01-01 01:00:00.000000000 +0100
24008 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/pqformat.c    2007-02-18 22:52:17.000000000 +0100
24009 @@ -0,0 +1,173 @@
24010 +/*-------------------------------------------------------------------------
24011 + * pqformat.c
24012 + *             Routines for formatting and parsing frontend/backend messages
24013 + *
24014 + * These modules copyed from src/backend/libpq/pgformat.c.
24015 + * Original modules have some shared modules and macro,
24016 + * then it is difficult link to replication server directory.
24017 + * Therefore, these modules were custamized.
24018 + * (removed shared module and macro)
24019 + *
24020 + * Original source code is under the following copyright
24021 + * 
24022 + * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
24023 + * Portions Copyright (c) 1994, Regents of the University of California
24024 + *
24025 + *-------------------------------------------------------------------------
24026 + */
24027 +/*
24028 + * INTERFACE ROUTINES
24029 + * Message parsing after input:
24030 + *             pq_getmsgbyte   - get a raw byte from a message buffer
24031 + *             pq_getmsgint    - get a binary integer from a message buffer
24032 + *             pq_getmsgbytes  - get raw data from a message buffer
24033 + *             pq_copymsgbytes - copy raw data from a message buffer
24034 + *             pq_getmsgstring - get a null-terminated text string (with conversion)
24035 + */
24036 +
24037 +/* --------------------------------
24038 + *             pq_getmsgstring - get a null-terminated text string (with conversion)
24039 + *
24040 + *             May return a pointer directly into the message buffer, or a pointer
24041 + *             to a palloc'd conversion result.
24042 + * --------------------------------
24043 + */
24044 +
24045 +#include "postgres.h"
24046 +#include <errno.h>
24047 +#include <sys/types.h>
24048 +#include <sys/param.h>
24049 +#include <netinet/in.h>
24050 +#include <arpa/inet.h>
24051 +#ifdef HAVE_ENDIAN_H
24052 +#include <endian.h>
24053 +#endif
24054 +
24055 +#include "mb/pg_wchar.h"
24056 +
24057 +#include "libpq-fe.h"
24058 +#include "libpq-int.h"
24059 +#include "fe-auth.h"
24060 +#include "replicate_com.h"
24061 +#include "pgreplicate.h"
24062 +
24063 +const char * pq_getmsgstring(StringInfo msg);
24064 +unsigned int pq_getmsgint(StringInfo msg, int b);
24065 +void pq_copymsgbytes(StringInfo msg, char *buf, int datalen);
24066 +const char * pq_getmsgbytes(StringInfo msg, int datalen);
24067 +int pq_getmsgbyte(StringInfo msg);
24068 +
24069 +const char *
24070 +pq_getmsgstring(StringInfo msg)
24071 +{
24072 +       char       *str;
24073 +       int                     slen;
24074 +
24075 +       if (msg == NULL)
24076 +       {
24077 +               return NULL;
24078 +       }
24079 +       str = &msg->data[msg->cursor];
24080 +       /*
24081 +        * It's safe to use strlen() here because a StringInfo is guaranteed to
24082 +        * have a trailing null byte.  But check we found a null inside the
24083 +        * message.
24084 +        */
24085 +       slen = strlen(str);
24086 +       if (msg->cursor + slen >= msg->len)
24087 +       {
24088 +               return NULL;
24089 +       }
24090 +       msg->cursor += slen + 1;
24091 +
24092 +       return str;
24093 +}
24094 +
24095 +
24096 +/* --------------------------------
24097 + *             pq_getmsgint    - get a binary integer from a message buffer
24098 + *
24099 + *             Values are treated as unsigned.
24100 + * --------------------------------
24101 + */
24102 +unsigned int
24103 +pq_getmsgint(StringInfo msg, int b)
24104 +{
24105 +       unsigned int result;
24106 +       unsigned char n8;
24107 +       uint16          n16;
24108 +       uint32          n32;
24109 +
24110 +       switch (b)
24111 +       {
24112 +               case 1:
24113 +                       pq_copymsgbytes(msg, (char *) &n8, 1);
24114 +                       result = n8;
24115 +                       break;
24116 +               case 2:
24117 +                       pq_copymsgbytes(msg, (char *) &n16, 2);
24118 +                       result = ntohs(n16);
24119 +                       break;
24120 +               case 4:
24121 +                       pq_copymsgbytes(msg, (char *) &n32, 4);
24122 +                       result = ntohl(n32);
24123 +                       break;
24124 +               default:
24125 +                       result = 0;                     /* keep compiler quiet */
24126 +                       break;
24127 +       }
24128 +       return result;
24129 +}
24130 +
24131 +/* --------------------------------
24132 + *             pq_copymsgbytes - copy raw data from a message buffer
24133 + *
24134 + *             Same as above, except data is copied to caller's buffer.
24135 + * --------------------------------
24136 + */
24137 +void
24138 +pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
24139 +{
24140 +       if (datalen < 0 || datalen > (msg->len - msg->cursor))
24141 +       {
24142 +               return;
24143 +       }
24144 +       memcpy(buf, &msg->data[msg->cursor], datalen);
24145 +       msg->cursor += datalen;
24146 +}
24147 +
24148 +
24149 +/* --------------------------------
24150 + *             pq_getmsgbytes  - get raw data from a message buffer
24151 + *
24152 + *             Returns a pointer directly into the message buffer; note this
24153 + *             may not have any particular alignment.
24154 + * --------------------------------
24155 + */
24156 +const char *
24157 +pq_getmsgbytes(StringInfo msg, int datalen)
24158 +{
24159 +       const char *result;
24160 +
24161 +       if (datalen < 0 || datalen > (msg->len - msg->cursor))
24162 +       {
24163 +               return NULL;
24164 +       }
24165 +       result = &msg->data[msg->cursor];
24166 +       msg->cursor += datalen;
24167 +       return result;
24168 +}
24169 +
24170 +/* --------------------------------
24171 + *             pq_getmsgbyte   - get a raw byte from a message buffer
24172 + * --------------------------------
24173 + */
24174 +int
24175 +pq_getmsgbyte(StringInfo msg)
24176 +{
24177 +       if (msg->cursor >= msg->len)
24178 +       {
24179 +               return 0;
24180 +       }
24181 +       return (unsigned char) msg->data[msg->cursor++];
24182 +}
24183 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/recovery.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/recovery.c
24184 --- postgresql-8.2.4/src/pgcluster/pgrp/recovery.c      1970-01-01 01:00:00.000000000 +0100
24185 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/recovery.c    2007-02-18 22:52:17.000000000 +0100
24186 @@ -0,0 +1,1143 @@
24187 +/*--------------------------------------------------------------------
24188 + * FILE:
24189 + *     recovery.c
24190 + *
24191 + * NOTE:
24192 + *     This file is composed of the functions to call with the source
24193 + *     at pgreplicate for the recovery.
24194 + *
24195 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
24196 + *--------------------------------------------------------------------
24197 + */
24198 +#include "postgres.h"
24199 +
24200 +#include <stdio.h>
24201 +#include <unistd.h>
24202 +#include <signal.h>
24203 +#include <sys/wait.h>
24204 +#include <sys/types.h>
24205 +#include <sys/stat.h>
24206 +#include <sys/socket.h>
24207 +#include <sys/ipc.h>
24208 +#include <sys/shm.h>
24209 +#include <sys/msg.h>
24210 +#include <netdb.h>
24211 +#include <netinet/in.h>
24212 +#include <errno.h>
24213 +#include <fcntl.h>
24214 +#include <time.h>
24215 +#include <arpa/inet.h>
24216 +#include <sys/param.h>
24217 +#include <sys/file.h>
24218 +
24219 +#ifdef HAVE_SYS_SELECT_H
24220 +#include <sys/select.h>
24221 +#endif
24222 +
24223 +#ifdef HAVE_CRYPT_H
24224 +#include <crypt.h>
24225 +#endif
24226 +
24227 +#include "miscadmin.h"
24228 +#include "nodes/nodes.h"
24229 +
24230 +#include "libpq-fe.h"
24231 +#include "libpq/libpq-fs.h"
24232 +#include "libpq-int.h"
24233 +#include "fe-auth.h"
24234 +
24235 +#include "access/xact.h"
24236 +#include "replicate_com.h"
24237 +#include "pgreplicate.h"
24238 +
24239 +
24240 +#ifdef WIN32
24241 +#include "win32.h"
24242 +#else
24243 +#ifdef HAVE_NETINET_TCP_H
24244 +#include <netinet/tcp.h>
24245 +#endif
24246 +#include <arpa/inet.h>
24247 +#endif
24248 +
24249 +#ifdef HAVE_CRYPT_H
24250 +#include <crypt.h>
24251 +#endif
24252 +
24253 +#ifdef MULTIBYTE
24254 +#include "mb/pg_wchar.h"
24255 +#endif
24256 +#include "pgreplicate.h"
24257 +
24258 +
24259 +/*--------------------------------------
24260 + * GLOBAL VARIABLE DECLARATION
24261 + *--------------------------------------
24262 + */
24263 +RecoveryPacket MasterPacketData;
24264 +RecoveryTbl Master;
24265 +RecoveryTbl Target;
24266 +
24267 +
24268 +/*--------------------------------------
24269 + * PROTOTYPE DECLARATION
24270 + *--------------------------------------
24271 + */
24272 +static int read_packet(int sock,RecoveryPacket * packet);
24273 +static int read_packet_from_master( RecoveryTbl * host, RecoveryPacket * packet );
24274 +static int send_recovery_packet(int  sock, RecoveryPacket * packet);
24275 +static int send_packet(RecoveryTbl * host, RecoveryPacket * packet );
24276 +static void start_recovery_prepare(void);
24277 +static void reset_recovery_prepare(void);
24278 +static void finish_recovery(void);
24279 +static bool first_setup_recovery(int * sock, RecoveryPacket * packet);
24280 +static int wait_transaction_count_clear(void);
24281 +static bool second_setup_recovery (RecoveryPacket * packet);
24282 +static void pgrecovery_loop(int fd);
24283 +static int PGRsend_queue(RecoveryTbl * master, RecoveryTbl * target);
24284 +static int send_vacuum(HostTbl *host, char * userName, int stage);
24285 +static char * read_queue_file(FILE * fp, ReplicateHeader * header, char * query);
24286 +
24287 +#ifdef PRINT_DEBUG
24288 +static void show_recovery_packet(RecoveryPacket * packet);
24289 +#endif                 
24290 +
24291 +int PGRsend_load_balance_packet(RecoveryPacket * packet);
24292 +void PGRrecovery_main(int fork_wait_time);
24293 +
24294 +/*-----------------------------------------------------------
24295 + * SYMBOL
24296 + *    read_packet()
24297 + * NOTES
24298 + *    Read recovery packet data 
24299 + * ARGS
24300 + *    int sock : socket
24301 + *    RecoveryPacket * packet : read packet buffer
24302 + * RETURN
24303 + *    -1 : error
24304 + *    >0 : read size
24305 + *-----------------------------------------------------------
24306 + */
24307 +static int
24308 +read_packet(int sock,RecoveryPacket * packet)
24309 +{
24310 +#ifdef PRINT_DEBUG
24311 +       char * func = "read_packet()";
24312 +#endif                 
24313 +       int r = 0;
24314 +       char * read_ptr = NULL;
24315 +       int read_size = 0;
24316 +       int packet_size = 0;
24317 +
24318 +       if (packet == NULL)
24319 +       {
24320 +               return -1;
24321 +       }
24322 +       read_ptr = (char*)packet;
24323 +       packet_size = sizeof(RecoveryPacket);
24324 +       for (;;)
24325 +       {
24326 +               r = recv(sock,read_ptr + read_size ,packet_size - read_size, MSG_WAITALL);
24327 +               if (r < 0)
24328 +               {
24329 +                       if (errno == EINTR || errno == EAGAIN)
24330 +                               continue;
24331 +                       else
24332 +                       {
24333 +                               show_error("%s:recv failed: %d(%s)",func, errno, strerror(errno));
24334 +                               return -1;
24335 +                       }
24336 +               }
24337 +               else if (r > 0)
24338 +               {
24339 +                       read_size += r;
24340 +                       if (read_size == packet_size)
24341 +                       {
24342 +#ifdef PRINT_DEBUG
24343 +                               show_debug("%s:receive packet",func);
24344 +                               show_recovery_packet(packet);
24345 +#endif                 
24346 +                               return read_size;
24347 +                       }
24348 +               }
24349 +               else /* r == 0 */
24350 +               {
24351 +                       show_error("%s:unexpected EOF", func);
24352 +                       return -1;
24353 +               }
24354 +       }
24355 +       return -1;
24356 +}
24357 +
24358 +static int
24359 +read_packet_from_master( RecoveryTbl * host, RecoveryPacket * packet )
24360 +{
24361 +       int read_size = 0;
24362 +       int rtn;
24363 +       fd_set    rmask;
24364 +       struct timeval timeout;
24365 +
24366 +       for(;;)
24367 +       {
24368 +               timeout.tv_sec = RECOVERY_TIMEOUT;
24369 +               timeout.tv_usec = 0;
24370 +
24371 +               /*
24372 +                * Wait for something to happen.
24373 +                */
24374 +               FD_ZERO(&rmask);
24375 +               FD_SET(host->recovery_sock,&rmask);
24376 +               rtn = select(host->recovery_sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
24377 +               
24378 +               if (rtn == 0) /* timeout */
24379 +               {
24380 +                       return -1;
24381 +               }
24382 +
24383 +               if (rtn && FD_ISSET(host->recovery_sock, &rmask))
24384 +               {
24385 +                       read_size = read_packet(host->recovery_sock, packet);
24386 +                       return read_size;
24387 +               }
24388 +       }
24389 +}
24390 +
24391 +static int
24392 +send_recovery_packet(int  sock, RecoveryPacket * packet)
24393 +{
24394 +       char *func = "send_recovery_packet";
24395 +       char * send_ptr;
24396 +       int send_size= 0;
24397 +       int buf_size = 0;
24398 +       int s;
24399 +       
24400 +       send_ptr = (char *)packet;
24401 +       buf_size = sizeof(RecoveryPacket);
24402 +
24403 +       for (;;)
24404 +       {
24405 +               s = send(sock, send_ptr + send_size,buf_size - send_size ,0);
24406 +               if (s < 0)
24407 +               {
24408 +                       if (errno == EINTR || errno == EAGAIN)
24409 +                               continue;
24410 +
24411 +                       show_error("%s:send error: %d(%s)", func, errno, strerror(errno));
24412 +                       return STATUS_ERROR;
24413 +               }
24414 +               else if (s == 0)
24415 +               {
24416 +                       show_error("%s:unexpected EOF", func);
24417 +                       return STATUS_ERROR;
24418 +               }
24419 +
24420 +               send_size += s;
24421 +               if (send_size == buf_size)
24422 +                       return STATUS_OK;
24423 +       }
24424 +}
24425 +
24426 +static int
24427 +send_packet(RecoveryTbl * host, RecoveryPacket * packet )
24428 +{
24429 +       char * func = "send_packet()";
24430 +       int count = 0;
24431 +
24432 +       if (host->recovery_sock == -1)
24433 +       {
24434 +               while(PGR_Create_Socket_Connect(&(host->recovery_sock), host->hostName , host->recoveryPort) != STATUS_OK )
24435 +               {
24436 +                       if (count > MAX_RETRY_TIMES )
24437 +                       {
24438 +                               show_error("%s:host[%s] port[%d]PGR_Create_Socket_Connect failed",func,host->hostName, host->recoveryPort);
24439 +                               return STATUS_ERROR;
24440 +                       }
24441 +                       count ++;
24442 +               }
24443 +       }
24444 +       count = 0;
24445 +       while (send_recovery_packet(host->recovery_sock,packet) != STATUS_OK)
24446 +       {
24447 +               close(host->recovery_sock);
24448 +               host->recovery_sock = -1;
24449 +               PGR_Create_Socket_Connect(&(host->recovery_sock), host->hostName , host->recoveryPort);
24450 +#ifdef PRINT_DEBUG
24451 +               show_debug("%s:PGR_Create_Socket_Connectsock[%d] host[%s] port[%d]",
24452 +                       func,host->recovery_sock,host->hostName,host->recoveryPort);
24453 +#endif
24454 +               if (count > PGR_CONNECT_RETRY_TIME )
24455 +               {
24456 +
24457 +                       show_error("%s:send failed and PGR_Create_Socket_Connect failed",func);
24458 +                       return STATUS_ERROR;
24459 +               }
24460 +               count ++;
24461 +       }
24462 +       return STATUS_OK;
24463 +}
24464 +
24465 +static void
24466 +start_recovery_prepare(void)
24467 +{
24468 +       PGRset_recovery_status (RECOVERY_PREPARE_START);        
24469 +}
24470 +
24471 +static void
24472 +reset_recovery_prepare(void)
24473 +{
24474 +       PGRset_recovery_status (RECOVERY_INIT);
24475 +}
24476 +
24477 +static void
24478 +finish_recovery(void)
24479 +{
24480 +       PGRset_recovery_status (RECOVERY_INIT);
24481 +}
24482 +
24483 +int
24484 +PGRsend_load_balance_packet(RecoveryPacket * packet)
24485 +{
24486 +       char * func = "PGRsend_load_balance_packet()";
24487 +       RecoveryTbl * lbp;
24488 +       int status;
24489 +
24490 +       lbp = LoadBalanceTbl;
24491 +       if (lbp == (RecoveryTbl *)NULL)
24492 +       {
24493 +               show_error("%s:recovery table is NULL",func);
24494 +               return STATUS_ERROR;
24495 +       }
24496 +       while (lbp->hostName[0] != 0)
24497 +       {
24498 +               if (lbp->recovery_sock != -1)
24499 +               {
24500 +                       close(lbp->recovery_sock);
24501 +                       lbp->recovery_sock = -1;
24502 +               }
24503 +#ifdef PRINT_DEBUG
24504 +       show_debug("%s:host[%s] port[%d]",func,lbp->hostName,lbp->recoveryPort);
24505 +#endif
24506 +               status = send_packet(lbp,packet);
24507 +               if (lbp->recovery_sock != -1)
24508 +               {
24509 +                       close(lbp->recovery_sock);
24510 +                       lbp->recovery_sock = -1;
24511 +               }
24512 +               lbp ++;
24513 +       }
24514 +       return STATUS_OK;
24515 +}
24516 +
24517 +static int
24518 +send_vacuum(HostTbl *host, char * userName, int stage)
24519 +{
24520 +       int rtn = STATUS_OK;
24521 +       ReplicateHeader header;
24522 +       char * query = NULL;
24523 +
24524 +       if (stage == PGR_RECOVERY_1ST_STAGE)
24525 +       {
24526 +               query = strdup("VACUUM");
24527 +       }
24528 +       else
24529 +       {
24530 +               query = strdup("VACUUM FULL");
24531 +       }
24532 +       memset(&header,0,sizeof(header));
24533 +       header.query_size = strlen(query) + 1;
24534 +       strncpy(header.dbName,"template1",sizeof(header.dbName));
24535 +       strncpy(header.userName,userName,sizeof(header.userName));
24536 +       header.cmdSys = CMD_SYS_REPLICATE;
24537 +       header.cmdSts = CMD_STS_QUERY;
24538 +       header.cmdType = CMD_TYPE_VACUUM;
24539 +       header.pid = getpid();
24540 +       header.query_id = getpid();
24541 +       header.isAutoCommit=1;
24542 +       rtn = PGRsend_replicate_packet_to_server(host,&header,query,PGR_Result,0, true);
24543 +       if (query !=NULL)
24544 +               free(query);
24545 +       return rtn;     
24546 +}
24547 +
24548 +static bool
24549 +first_setup_recovery(int * sock, RecoveryPacket * packet)
24550 +{
24551 +       char * func = "first_setup_recovery()";
24552 +       int status;
24553 +       HostTbl * master = (HostTbl *)NULL;
24554 +       bool loop_end = false;
24555 +       HostTbl host_tbl;
24556 +       char * userName = NULL;
24557 +       int ip;
24558 +
24559 +       memset(Target.hostName,0,sizeof(Target.hostName));
24560 +       strncpy(Target.hostName,packet->hostName,sizeof(Target.hostName));
24561 +       ip = PGRget_ip_by_name(Target.hostName);
24562 +       sprintf(Target.resolvedName,
24563 +                "%d.%d.%d.%d",
24564 +                (ip      ) & 0xff ,
24565 +                (ip >>  8) & 0xff ,
24566 +                (ip >> 16) & 0xff ,
24567 +                (ip >> 24) & 0xff );
24568 +       Target.port = ntohs(packet->port);
24569 +       Target.recoveryPort = ntohs(packet->recoveryPort);
24570 +       Target.sock = *sock;
24571 +       Target.recovery_sock = *sock;
24572 +#ifdef PRINT_DEBUG
24573 +       show_debug("%s:1st setup target %s",func,Target.hostName);
24574 +       show_debug("%s:1st setup port %d",func,Target.port);
24575 +#endif                 
24576 +       /*
24577 +        * check another recovery process 
24578 +        */
24579 +       if (PGRget_recovery_status() != RECOVERY_INIT)
24580 +       {
24581 +               /*
24582 +                * recovery process is already running
24583 +                */
24584 +#ifdef PRINT_DEBUG
24585 +               show_debug("%s:already recovery job runing",func);
24586 +#endif                 
24587 +               memset(packet,0,sizeof(packet));
24588 +               PGRset_recovery_packet_no(packet, RECOVERY_ERROR_OCCUPIED) ;
24589 +               status = send_packet(&Target,packet);
24590 +               loop_end = true;
24591 +               return loop_end;
24592 +       }
24593 +       /*
24594 +        * add recovery target to host table
24595 +        */
24596 +#ifdef PRINT_DEBUG
24597 +       show_debug("%s:add recovery target to host table",func);
24598 +#endif                 
24599 +       memcpy(host_tbl.hostName,Target.hostName,sizeof(host_tbl.hostName));
24600 +       memcpy(host_tbl.resolvedName,Target.resolvedName,sizeof(host_tbl.resolvedName));
24601 +       host_tbl.port = Target.port;
24602 +       host_tbl.recoveryPort = Target.recoveryPort;
24603 +       PGRset_recovered_host(&host_tbl,DB_TBL_INIT);
24604 +       PGRadd_HostTbl(&host_tbl,DB_TBL_INIT);
24605 +       /*
24606 +        * send prepare recovery to load balancer
24607 +        */
24608 +       PGRsend_load_balance_packet(packet);
24609 +       userName = strdup(packet->userName);
24610 +
24611 +       /*
24612 +        * set RECOVERY_PGDATA_REQ packet data
24613 +        */
24614 +#ifdef PRINT_DEBUG
24615 +       show_debug("%s:set RECOVERY_PGDATA_REQ packet data",func);
24616 +#endif                 
24617 +       memset(packet,0,sizeof(RecoveryPacket));
24618 +       PGRset_recovery_packet_no(packet, RECOVERY_PGDATA_REQ );
24619 +
24620 +retry_connect_master:
24621 +       master = PGRget_master();
24622 +       if (master == (HostTbl *)NULL)
24623 +       {
24624 +               /*
24625 +                * connection error , master may be down
24626 +                */
24627 +               show_error("%s:get master info error , master may be down",func);
24628 +               PGRset_recovery_packet_no(packet, RECOVERY_ERROR_TARGET_ONLY);
24629 +               status = send_packet(&Target, packet);
24630 +               reset_recovery_prepare();
24631 +               loop_end = true;
24632 +               if (userName != NULL)
24633 +                       free(userName);
24634 +               return loop_end;
24635 +       }
24636 +       /* send vauum command to master server */
24637 +       status = send_vacuum(master, userName, PGR_RECOVERY_1ST_STAGE );
24638 +       if (status != STATUS_OK)
24639 +       {
24640 +               PGRset_host_status(master, DB_TBL_ERROR);
24641 +               goto retry_connect_master;
24642 +       }
24643 +
24644 +       memcpy(Master.hostName,master->hostName,sizeof(Master.hostName));
24645 +       memcpy(Master.resolvedName,master->resolvedName,sizeof(Master.resolvedName));
24646 +       Master.sock = -1;
24647 +       Master.recovery_sock = -1;
24648 +       Master.port = master->port;
24649 +       Master.recoveryPort = master->recoveryPort;
24650 +
24651 +#ifdef PRINT_DEBUG
24652 +       show_debug("%s:send packet to master %s recoveryPort %d",func, Master.hostName, Master.recoveryPort);
24653 +#endif                 
24654 +       status = send_packet(&Master, packet);
24655 +       if (status != STATUS_OK)
24656 +       {
24657 +               /*
24658 +                * connection error , master may be down
24659 +                */
24660 +               show_error("%s:connection error , master may be down",func);
24661 +               PGRset_host_status(master,DB_TBL_ERROR);
24662 +               goto retry_connect_master ;
24663 +       }
24664 +       
24665 +       /*
24666 +        * start prepare of recovery
24667 +        *     set recovery status to "prepare start"
24668 +        *     start transaction count up
24669 +        */
24670 +       start_recovery_prepare();
24671 +       /*
24672 +        * wait answer from master server 
24673 +        */
24674 +#ifdef PRINT_DEBUG
24675 +       show_debug("%s:wait answer from master server",func);
24676 +#endif                 
24677 +       memset(packet,0,sizeof(RecoveryPacket));
24678 +       read_packet_from_master(&Master, packet);
24679 +#ifdef PRINT_DEBUG
24680 +       show_debug("%s:get answer from master:no[%d]",func,ntohs(packet->packet_no));
24681 +#endif                 
24682 +       if (ntohs(packet->packet_no) == RECOVERY_PGDATA_ANS)
24683 +       {
24684 +               /*
24685 +                * send a packet to load balancer that is stopped master's 
24686 +                * load balancing until all recovery process is finished
24687 +                */
24688 +               PGRsend_load_balance_packet(packet);
24689 +               memcpy((char *)&MasterPacketData,packet,sizeof(RecoveryPacket));
24690 +
24691 +               /*
24692 +                * prepare answer from master DB
24693 +                */
24694 +               PGRset_recovery_packet_no(packet, RECOVERY_PREPARE_ANS );
24695 +               memcpy(packet->hostName,Master.hostName,sizeof(packet->hostName));
24696 +               status = send_packet(&Target, packet);
24697 +               if (status != STATUS_OK)
24698 +               {
24699 +                       show_error("%s:no[%d] send_packet to target error",func,ntohs(packet->packet_no));
24700 +                       PGRset_recovery_packet_no(packet, RECOVERY_ERROR_TARGET_ONLY);
24701 +                       status = send_packet(&Master,packet);
24702 +                       reset_recovery_prepare();
24703 +                       loop_end = true;
24704 +               }
24705 +       }
24706 +       if (userName != NULL)
24707 +               free(userName);
24708 +
24709 +
24710 +       return loop_end;
24711 +}
24712 +
24713 +static int
24714 +wait_transaction_count_clear(void)
24715 +{
24716 +       char * func ="wait_transaction_count_clear()";
24717 +       HostTbl * master = (HostTbl *)NULL;
24718 +       int cnt = 0;
24719 +       int recovery_status = PGRget_recovery_status();
24720 +
24721 +       while (recovery_status != RECOVERY_CLEARED)
24722 +       {
24723 +               master = PGRget_master();
24724 +               if (master == (HostTbl *)NULL)
24725 +               {
24726 +                       show_error("%s:get master info error , master may be down",func);
24727 +                       continue;
24728 +               }
24729 +               if ((recovery_status == RECOVERY_PREPARE_START) &&
24730 +                       (master->transaction_count==0))
24731 +               {
24732 +                       PGRset_recovery_status(RECOVERY_CLEARED);
24733 +                       break;
24734 +               }
24735 +
24736 +               sleep(1);
24737 +#ifdef PRINT_DEBUG
24738 +               show_debug("now, waiting clear every transaction for recovery");
24739 +#endif
24740 +               cnt ++;
24741 +               if (cnt > RECOVERY_TIMEOUT * 60 )
24742 +               {
24743 +                       show_error("sorry, it is  timeout for waiting clear transaction");
24744 +                       return STATUS_ERROR;
24745 +               }
24746 +               recovery_status = PGRget_recovery_status();
24747 +       }
24748 +       return STATUS_OK;
24749 +}
24750 +
24751 +static bool
24752 +second_setup_recovery (RecoveryPacket * packet)
24753 +{
24754 +       char * func = "second_setup_recovery()";
24755 +       HostTbl * master = (HostTbl *)NULL;
24756 +       int status;
24757 +       bool loop_end = false;
24758 +       char * userName = NULL;
24759 +       int recovery_status = 0;
24760 +
24761 +       /* send vauum command to master server */
24762 +       while ((master = PGRget_master()) != NULL)
24763 +       {
24764 +               /*
24765 +                * wait until all started transactions are going to finish
24766 +                */
24767 +               status = wait_transaction_count_clear();
24768 +               if (status != STATUS_OK)
24769 +               {
24770 +                       show_error("%s:transaction is too busy, please try again after",func);
24771 +                       PGRset_recovery_packet_no(packet, RECOVERY_ERROR_TARGET_ONLY);
24772 +                       status = send_packet(&Target,packet);
24773 +                       status = send_packet(&Master,packet);
24774 +                       reset_recovery_prepare();
24775 +                       return true;
24776 +               }
24777 +               userName = strdup(packet->userName);
24778 +               status = send_vacuum(master, userName, PGR_RECOVERY_2ND_STAGE );
24779 +               if (status != STATUS_OK)
24780 +               {
24781 +                       PGRset_host_status(master, DB_TBL_ERROR);
24782 +                       if (userName != NULL)
24783 +                       {
24784 +                               free(userName);
24785 +                               userName = NULL;
24786 +                       }
24787 +                       continue;
24788 +               }
24789 +               break;
24790 +       }
24791 +
24792 +       if (master == NULL)
24793 +       {
24794 +               show_error("%s:vacuum error , master may be down",func);
24795 +               PGRset_recovery_packet_no(packet, RECOVERY_ERROR_TARGET_ONLY);
24796 +               status = send_packet(&Target,packet);
24797 +               status = send_packet(&Master,packet);
24798 +               reset_recovery_prepare();
24799 +
24800 +               return true;            
24801 +       }
24802 +
24803 +       recovery_status = PGRget_recovery_status();
24804 +       if ((recovery_status != RECOVERY_PREPARE_START) &&
24805 +               (recovery_status != RECOVERY_WAIT_CLEAN) &&
24806 +               (recovery_status != RECOVERY_CLEARED))
24807 +       {
24808 +               show_error("%s:queue set failed. stop to recovery",func);
24809 +               PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
24810 +               status = send_packet(&Target,packet);
24811 +               status = send_packet(&Master,packet);
24812 +               reset_recovery_prepare();
24813 +               if (userName != NULL)
24814 +                       free(userName);
24815 +               return true;
24816 +       }
24817 +
24818 +       /*
24819 +        * then, send fsync request to master DB
24820 +        */
24821 +       PGRset_recovery_packet_no(packet, RECOVERY_FSYNC_REQ );
24822 +       status = send_packet(&Master,packet);
24823 +       if (status != STATUS_OK)
24824 +       {
24825 +               /*
24826 +                * connection error , master may be down
24827 +                */
24828 +               show_error("%s:connection error , master may be down",func);
24829 +               PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
24830 +               status = send_packet(&Target,packet);
24831 +               status = send_packet(&Master,packet);
24832 +               reset_recovery_prepare();
24833 +               if (userName != NULL)
24834 +                       free(userName);
24835 +               return true;
24836 +       }
24837 +
24838 +       recovery_status = PGRget_recovery_status();
24839 +       if ((recovery_status != RECOVERY_PREPARE_START) &&
24840 +               (recovery_status != RECOVERY_WAIT_CLEAN) &&
24841 +               (recovery_status != RECOVERY_CLEARED))
24842 +       {
24843 +               show_error("%s:queue set failed. stop to recovery",func);
24844 +               PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
24845 +               status = send_packet(&Target,packet);
24846 +               status = send_packet(&Master,packet);
24847 +               reset_recovery_prepare();
24848 +               if (userName != NULL)
24849 +                       free(userName);
24850 +               return true;
24851 +       }
24852 +
24853 +       /*
24854 +        * wait answer from master server 
24855 +        */
24856 +       memset(packet,0,sizeof(RecoveryPacket));
24857 +       read_packet_from_master(&Master,packet);
24858 +       if (ntohs(packet->packet_no) == RECOVERY_FSYNC_ANS )
24859 +       {
24860 +               /*
24861 +                * master DB finished fsync
24862 +                */
24863 +               PGRset_recovery_packet_no(packet, RECOVERY_START_ANS );
24864 +               memcpy(packet->hostName,Master.hostName,sizeof(packet->hostName));
24865 +               status = send_packet(&Target,packet);
24866 +               if (status != STATUS_OK)
24867 +               {
24868 +                       finish_recovery();
24869 +                       loop_end = true;
24870 +               }
24871 +       }
24872 +       else
24873 +       {
24874 +               show_error("%s:failure answer returned",func);
24875 +               PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
24876 +               status = send_packet(&Target,packet);
24877 +               status = send_packet(&Master,packet);
24878 +               reset_recovery_prepare();
24879 +               loop_end = true;
24880 +       }
24881 +       if (userName != NULL)
24882 +               free(userName);
24883 +       return loop_end;
24884 +}
24885 +
24886 +static char *
24887 +read_queue_file(FILE * fp, ReplicateHeader * header, char *query)
24888 +{
24889 +       char * func = "read_queue_file()";
24890 +       int size = 0;
24891 +
24892 +       if (fp == NULL)
24893 +       {
24894 +               return NULL;
24895 +       }
24896 +       if (fread((char*)header,sizeof(ReplicateHeader),1,fp) < 1)
24897 +       {
24898 +               return NULL;
24899 +       }
24900 +       size = ntohl(header->query_size);
24901 +       if (size >= 0)
24902 +       {
24903 +               query = malloc(size+4);
24904 +               if (query == NULL)
24905 +               {
24906 +                       show_error("%s:malloc failed:(%s)",func,strerror(errno));
24907 +               }
24908 +               memset(query,0,size+4);
24909 +               if (size > 0)
24910 +               {
24911 +                       if (fread(query,size,1,fp) < 1)
24912 +                       {
24913 +                               return NULL;
24914 +                       }
24915 +               }
24916 +               return query;
24917 +       }
24918 +       return NULL;
24919 +}
24920 +
24921 +/**
24922 + * send queries from queue.
24923 + *
24924 + * return
24925 + *   STATUS_OK - success both
24926 + *   STATUS_ERROR - fail both
24927 + */
24928 +static int
24929 +PGRsend_queue(RecoveryTbl * master, RecoveryTbl * target)
24930 +{
24931 +       char * func = "PGRsend_queue()";
24932 +       HostTbl * master_ptr = NULL;
24933 +       HostTbl * target_ptr = NULL;
24934 +       RecoveryQueueFile * msg = NULL;
24935 +       FILE * rfp = NULL;
24936 +       ReplicateHeader header;
24937 +       char * query = NULL;
24938 +       int size = 0;
24939 +       int status = 0;
24940 +       int query_size = 0;
24941 +       int rtn=0;
24942 +
24943 +       if (master == (RecoveryTbl *)NULL)
24944 +       {
24945 +               show_error("%s:there is no master ",func);
24946 +               return STATUS_ERROR;
24947 +       }
24948 +#ifdef PRINT_DEBUG
24949 +       show_debug("%s:master %s - %d",func,master->hostName,master->port);
24950 +#endif                 
24951 +       master_ptr = PGRget_HostTbl(master->resolvedName,master->port);
24952 +       if (master_ptr == (HostTbl *)NULL)
24953 +       {
24954 +               show_error("%s:master table is null",func);
24955 +               return STATUS_ERROR;
24956 +       }
24957 +       if (target != (RecoveryTbl *)NULL)
24958 +       {
24959 +#ifdef PRINT_DEBUG
24960 +               show_debug("%s:target %s - %d",func,target->hostName,target->port);
24961 +#endif                 
24962 +               target_ptr = PGRget_HostTbl(target->resolvedName,target->port);
24963 +               if (target_ptr == (HostTbl *)NULL)
24964 +               {
24965 +                       show_error("%s:target table is null",func);
24966 +                       return STATUS_ERROR;
24967 +               }
24968 +       }
24969 +
24970 +       size = sizeof(RecoveryQueueFile) + FILENAME_MAX_LENGTH;
24971 +       msg = (RecoveryQueueFile *)malloc(size+4);
24972 +       if (msg == NULL)
24973 +       {
24974 +#ifdef PRINT_DEBUG
24975 +               show_debug("%s:malloc() failed. reason: %s",func, strerror(errno));
24976 +#endif
24977 +               return STATUS_ERROR;
24978 +       }
24979 +       memset(msg,0,size+4);
24980 +       status = STATUS_OK;
24981 +       while (msgrcv(*RecoveryMsgid , msg, FILENAME_MAX_LENGTH, 0, IPC_NOWAIT) > 0 )
24982 +       {
24983 +               strncpy(Recovery_Status_Inf->read_file,(char *)(msg->mdata),FILENAME_MAX_LENGTH);
24984 +               PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
24985 +               if (!strncmp(Recovery_Status_Inf->write_file,Recovery_Status_Inf->read_file,sizeof(Recovery_Status_Inf->write_file)))
24986 +               {
24987 +                       memset(Recovery_Status_Inf->write_file,0,sizeof(Recovery_Status_Inf->write_file));
24988 +               }
24989 +               PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
24990 +               rfp = fopen(Recovery_Status_Inf->read_file,"r");
24991 +               if (rfp == NULL)
24992 +               {
24993 +                       show_error("%s:queue file [%s] can not be opened:(%s)",func,Recovery_Status_Inf->read_file,strerror(errno));
24994 +                       return STATUS_ERROR;
24995 +               }
24996 +               while ((query = read_queue_file(rfp, &header,query)) != NULL)
24997 +               {
24998 +                       query_size = ntohl(header.query_size);
24999 +                       if (query_size < 0)
25000 +                       {
25001 +                               if (query != NULL)
25002 +                               {
25003 +                                       free(query);
25004 +                                       query = NULL;
25005 +                               }
25006 +                               break;
25007 +                       }
25008 +                       PGR_Response_Inf->current_cluster = 0;
25009 +                       rtn=PGRsend_replicate_packet_to_server(master_ptr,&header,query,PGR_Result,ntohl(header.replicate_id), true);
25010 +                       if (target_ptr != NULL)
25011 +                       {
25012 +                               PGR_Response_Inf->current_cluster = 1;
25013 +                               rtn=PGRsend_replicate_packet_to_server(target_ptr,&header,query,PGR_Result,ntohl(header.replicate_id), true);
25014 +                       }
25015 +               }
25016 +               if (query != NULL)
25017 +               {
25018 +                       free(query);
25019 +                       query = NULL;
25020 +               }
25021 +               if (rfp != NULL)
25022 +               {
25023 +                       fclose(rfp);
25024 +                       rfp = NULL;
25025 +                       unlink(Recovery_Status_Inf->read_file);
25026 +                       memset(Recovery_Status_Inf->read_file,0,sizeof(Recovery_Status_Inf->read_file));
25027 +               }
25028 +       }
25029 +#ifdef PRINT_DEBUG
25030 +       show_debug("%s:send_queue return status %d",func,status);
25031 +#endif                 
25032 +       return status;
25033 +}
25034 +
25035 +static void
25036 +pgrecovery_loop(int fd)
25037 +{
25038 +       char * func = "pgrecovery_loop()";
25039 +       int count;
25040 +       int sock;
25041 +       int status;
25042 +       bool loop_end = false;
25043 +       RecoveryPacket packet;
25044 +       HostTbl new_host;
25045 +       RecoveryTbl * lbp;
25046 +
25047 +       lbp = LoadBalanceTbl;
25048 +       if (lbp == (RecoveryTbl *)NULL)
25049 +       {
25050 +               show_error("%s:recovery table is NULL",func);
25051 +               return ;
25052 +       }
25053 +#ifdef PRINT_DEBUG
25054 +       show_debug("%s:recovery accept port %d",func, Recovery_Port_Number);
25055 +#endif                 
25056 +       count = 0;
25057 +       while ((status = PGR_Create_Acception(fd,&sock,"",Recovery_Port_Number)) != STATUS_OK)
25058 +       {
25059 +               show_error("%s:PGR_Create_Acception failed",func);
25060 +               PGR_Close_Sock(&sock);
25061 +               sock = -1;
25062 +               if ( count > PGR_CONNECT_RETRY_TIME)
25063 +               {
25064 +                       return;
25065 +               }
25066 +               count ++;
25067 +       }
25068 +       if(sock==-1) {
25069 +                       show_error("can't create recovery socket.exit.");
25070 +                       PGRreplicate_exit(1);
25071 +       }
25072 +       for(;;)
25073 +       {
25074 +               int read_size = 0;
25075 +               int rtn;
25076 +               fd_set    rmask;
25077 +               struct timeval timeout;
25078 +
25079 +               timeout.tv_sec = RECOVERY_TIMEOUT;
25080 +               timeout.tv_usec = 0;
25081 +
25082 +               /*
25083 +                * Wait for something to happen.
25084 +                */
25085 +               FD_ZERO(&rmask);
25086 +               FD_SET(sock,&rmask);
25087 +               /*
25088 +                * read packet from target cluster server
25089 +                */
25090 +               rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
25091 +
25092 +               if (rtn == 0) /* timeout */
25093 +               {
25094 +                       return;
25095 +               }
25096 +
25097 +               if (rtn && FD_ISSET(sock, &rmask))
25098 +               {
25099 +                       read_size = read_packet(sock, &packet);
25100 +               }
25101 +               else
25102 +               {
25103 +                       continue;
25104 +               }
25105 +
25106 +#ifdef PRINT_DEBUG
25107 +               show_debug("%s:receive packet no:%d",func,ntohs(packet.packet_no));
25108 +#endif                 
25109 +
25110 +               switch (ntohs(packet.packet_no))
25111 +               {
25112 +                       case RECOVERY_PREPARE_REQ :
25113 +                               /*
25114 +                                * start prepare of recovery
25115 +                                */
25116 +
25117 +#ifdef PRINT_DEBUG
25118 +                               show_debug("%s:1st master %s - %d",
25119 +                                       func,Master.hostName,Master.port);
25120 +                               show_debug("%s:1st target %s - %d",
25121 +                                       func,Target.hostName,Target.port);
25122 +#endif                 
25123 +
25124 +                               loop_end = first_setup_recovery(&sock, &packet);
25125 +#ifdef PRINT_DEBUG
25126 +                               show_debug("%s:first_setup_recovery end:%d ",func,loop_end);
25127 +#endif
25128 +                               break;
25129 +                       case RECOVERY_START_REQ : 
25130 +                               /*
25131 +                                * now, recovery process will start
25132 +                                *    stop the transaction count up
25133 +                                *    start queueing and stop send all queries for master DB
25134 +                                */
25135 +#ifdef PRINT_DEBUG
25136 +                               show_debug("%s:2nd master %s - %d",
25137 +                                       func, Master.hostName,Master.port);
25138 +                               show_debug("%s:2nd target %s - %d",
25139 +                                       func, Target.hostName,Target.port);
25140 +#endif                 
25141 +                               loop_end = second_setup_recovery (&packet);
25142 +#ifdef PRINT_DEBUG
25143 +                               show_debug("%s:second_setup_recovery end :%d ",
25144 +                                       func,loop_end);
25145 +#endif                 
25146 +                               break;
25147 +                       case RECOVERY_QUEUE_DATA_REQ : 
25148 +                               /*
25149 +                                * send all queries in queue
25150 +                                */
25151 +
25152 +#ifdef PRINT_DEBUG
25153 +                               show_debug("%s:last master %s - %d",
25154 +                                       func, Master.hostName,Master.port);
25155 +                               show_debug("%s:last target %s - %d",
25156 +                                       func, Target.hostName,Target.port);
25157 +#endif                 
25158 +                               status = PGRsend_queue(&Master,&Target);
25159 +                               if (status == STATUS_OK)
25160 +                               {
25161 +                                       memcpy(new_host.hostName,Target.hostName,sizeof(new_host.hostName));
25162 +                                       memcpy(new_host.resolvedName,Target.resolvedName,sizeof(new_host.resolvedName));
25163 +                                       new_host.port = Target.port;
25164 +                                       new_host.recoveryPort = Target.recoveryPort;
25165 +                                       PGRset_recovered_host(&new_host,DB_TBL_USE);
25166 +                                       PGRadd_HostTbl(&new_host,DB_TBL_USE);
25167 +                                       PGRset_recovery_packet_no(&packet, RECOVERY_QUEUE_DATA_ANS );
25168 +                                       status = send_packet(&Target, &packet);
25169 +                                       if (status != STATUS_OK)
25170 +                                       {
25171 +                                               finish_recovery();
25172 +                                       }
25173 +                               }
25174 +                               else
25175 +                               {
25176 +                                       /* connection error , master or target may be down */
25177 +                                       show_error("%s:PGRsend_queue failed",func);
25178 +                                       PGRset_recovery_packet_no(&packet, RECOVERY_ERROR_CONNECTION);
25179 +                                       status = send_packet(&Target,&packet);
25180 +                                       finish_recovery();
25181 +                               }
25182 +                               loop_end = true;
25183 +                               break;
25184 +                       case RECOVERY_FINISH : 
25185 +                               /*
25186 +                                * finished rsync DB datas from master to target 
25187 +                                */
25188 +                               /*
25189 +                                * stop queueing, and re-initialize recovery status
25190 +                                */
25191 +                               finish_recovery();
25192 +                               loop_end = true;
25193 +                               /*
25194 +                                * send finish recovery to load balancer
25195 +                                */
25196 +                               if (Master.recovery_sock != -1)
25197 +                               {
25198 +                                       close(Master.recovery_sock);
25199 +                                       Master.recovery_sock = -1;
25200 +                               }
25201 +                               if (Target.recovery_sock != -1)
25202 +                               {
25203 +                                       close(Target.recovery_sock);
25204 +                                       Target.recovery_sock = -1;
25205 +                               }
25206 +                               send_packet(&Master, &packet);
25207 +                               MasterPacketData.packet_no = packet.packet_no;
25208 +                               PGRsend_load_balance_packet(&MasterPacketData);
25209 +                               PGRsend_load_balance_packet(&packet);
25210 +                               memset((char *)&MasterPacketData,0,sizeof(RecoveryPacket));
25211 +                               break;
25212 +                       case RECOVERY_ERROR_ANS : 
25213 +#ifdef PRINT_DEBUG
25214 +                               show_debug("%s:recovery error accept. top queueing and initiarse recovery status",func);
25215 +#endif                 
25216 +                               status = PGRsend_queue(&Master,NULL);
25217 +                               memset(&packet,0,sizeof(RecoveryPacket));
25218 +                               PGRset_recovery_packet_no(&packet, RECOVERY_ERROR_ANS);
25219 +                               send_packet(&Master, &packet);
25220 +                               finish_recovery();
25221 +                               loop_end = true;
25222 +                               PGRset_recovery_packet_no(&MasterPacketData, RECOVERY_FINISH );
25223 +                               PGRsend_load_balance_packet(&MasterPacketData);
25224 +                               memset((char *)&MasterPacketData,0,sizeof(RecoveryPacket));
25225 +                               break;
25226 +               default:
25227 +                 show_error("%s:unknown packet. abort to parse");
25228 +                            loop_end=true;
25229 +                            break;
25230 +               }
25231 +               if (loop_end)
25232 +               {
25233 +                       if (Master.sock != -1)
25234 +                       {
25235 +                               close (Master.sock);
25236 +                       }
25237 +                       if (Master.recovery_sock != -1)
25238 +                       {
25239 +                               close (Master.recovery_sock);
25240 +                       }
25241 +                       PGR_Close_Sock(&sock);
25242 +                       return;
25243 +               }
25244 +       }
25245 +}
25246 +
25247 +void
25248 +PGRrecovery_main(int fork_wait_time)
25249 +{
25250 +       char * func = "PGRrecovery_main()";
25251 +       int status;
25252 +       int fd = -1;
25253 +       int rtn;
25254 +       pid_t pgid = 0;
25255 +       pid_t pid = 0;
25256 +
25257 +       pgid = getpgid(0);
25258 +       pid = fork();
25259 +       if (pid != 0)
25260 +       {
25261 +               return;
25262 +       }
25263 +
25264 +       PGRsignal(SIGCHLD, SIG_DFL);
25265 +       PGRsignal(SIGHUP, PGRexit_subprocess);  
25266 +       PGRsignal(SIGINT, PGRexit_subprocess);  
25267 +       PGRsignal(SIGQUIT, PGRexit_subprocess); 
25268 +       PGRsignal(SIGTERM, PGRexit_subprocess); 
25269 +       PGRsignal(SIGPIPE, SIG_IGN);    
25270 +       /*
25271 +        * in child process,
25272 +        * call recovery module
25273 +        */
25274 +       setpgid(0,pgid);
25275 +
25276 +       if (fork_wait_time > 0) {
25277 +#ifdef PRINT_DEBUG
25278 +               show_debug("recovery process: wait fork(): pid = %d", getpid());
25279 +#endif         
25280 +               sleep(fork_wait_time);
25281 +       }
25282 +
25283 +#ifdef PRINT_DEBUG
25284 +       show_debug("%s:PGRrecovery_main bind port %d",func,Recovery_Port_Number);
25285 +#endif                 
25286 +       status = PGR_Create_Socket_Bind(&fd, "", Recovery_Port_Number);
25287 +       if (status != STATUS_OK)
25288 +       {
25289 +               show_error("%s:PGR_Create_Socket_Bind failed",func);
25290 +               exit(1);
25291 +       }
25292 +       memset(&MasterPacketData,0,sizeof(RecoveryPacket));
25293 +       memset(&Master,0,sizeof(RecoveryTbl));
25294 +       memset(&Target,0,sizeof(RecoveryTbl));
25295 +       for (;;)
25296 +       {
25297 +               fd_set    rmask;
25298 +               struct timeval timeout;
25299 +
25300 +               timeout.tv_sec = RECOVERY_TIMEOUT;
25301 +               timeout.tv_usec = 0;
25302 +
25303 +               /*
25304 +                * Wait for something to happen.
25305 +                */
25306 +               FD_ZERO(&rmask);
25307 +               FD_SET(fd,&rmask);
25308 +               rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
25309 +               if (rtn && FD_ISSET(fd, &rmask))
25310 +               {
25311 +                       pgrecovery_loop(fd);
25312 +               }
25313 +       }
25314 +}
25315 +
25316 +#ifdef PRINT_DEBUG
25317 +static void
25318 +show_recovery_packet(RecoveryPacket * packet)
25319 +{
25320 +       show_debug("no = %d",ntohs(packet->packet_no));
25321 +       show_debug("max_connect = %d",ntohs(packet->max_connect));
25322 +       show_debug("port = %d",ntohs(packet->port));
25323 +       show_debug("recoveryPort = %d",ntohs(packet->recoveryPort));
25324 +       if (packet->hostName != NULL)
25325 +               show_debug("hostName = %s",packet->hostName);
25326 +       if (packet->pg_data != NULL)
25327 +               show_debug("pg_data = %s",packet->pg_data);
25328 +}
25329 +#endif                 
25330 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/replicate.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/replicate.c
25331 --- postgresql-8.2.4/src/pgcluster/pgrp/replicate.c     1970-01-01 01:00:00.000000000 +0100
25332 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/replicate.c   2007-03-01 16:27:15.000000000 +0100
25333 @@ -0,0 +1,4088 @@
25334 +/*--------------------------------------------------------------------
25335 + * FILE:
25336 + *     replicate.c
25337 + *
25338 + * NOTE:
25339 + *     This file is composed of the functions to call with the source
25340 + *     at pgreplicate for the replication.
25341 + *
25342 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
25343 + *--------------------------------------------------------------------
25344 + */
25345 +#include "postgres.h"
25346 +#include "postgres_fe.h"
25347 +
25348 +#include <pthread.h>
25349 +#include <stdio.h>
25350 +#include <stdarg.h>
25351 +#include <sys/types.h>
25352 +#include <fcntl.h>
25353 +#include <errno.h>
25354 +#include <ctype.h>
25355 +#include <time.h>
25356 +#include <sys/ipc.h>
25357 +#include <sys/shm.h>
25358 +#include <sys/sem.h>
25359 +#include <sys/msg.h>
25360 +#include <signal.h>
25361 +
25362 +
25363 +#include "libpq-fe.h"
25364 +#include "libpq-int.h"
25365 +#include "fe-auth.h"
25366 +
25367 +#include <sys/socket.h>
25368 +#include <unistd.h>
25369 +#include <netdb.h>
25370 +#include <arpa/inet.h>
25371 +
25372 +#ifdef HAVE_NETINET_TCP_H
25373 +#include <netinet/tcp.h>
25374 +#endif
25375 +
25376 +#ifdef HAVE_SYS_SELECT_H
25377 +#include <sys/select.h>
25378 +#endif
25379 +
25380 +
25381 +#ifdef HAVE_CRYPT_H
25382 +#include <crypt.h>
25383 +#endif
25384 +
25385 +
25386 +#ifdef MULTIBYTE
25387 +#include "mb/pg_wchar.h"
25388 +#endif
25389 +
25390 +#include "access/xact.h"
25391 +#include "lib/dllist.h"
25392 +#include "libpq/pqformat.h"
25393 +#include "replicate_com.h"
25394 +#include "pgreplicate.h"
25395 +
25396 +
25397 +#define IPC_NMAXSEM (32)
25398 +
25399 +/*--------------------------------------
25400 + * PROTOTYPE DECLARATION
25401 + *--------------------------------------
25402 + */
25403 +static TransactionTbl * setTransactionTbl(HostTbl * host_ptr, ReplicateHeader * header);
25404 +static TransactionTbl * insertTransactionTbl( HostTbl * host_ptr, TransactionTbl * datap);
25405 +static TransactionTbl * getTransactionTbl( HostTbl * host_ptr, ReplicateHeader * header);
25406 +static void deleteTransactionTbl(HostTbl * host_ptr,ReplicateHeader * header);
25407 +
25408 +static HostTbl * deleteHostTbl(HostTbl * ptr);
25409 +static bool is_master_in_recovery(char * host, int port,int recovery_status);
25410 +static void sem_quit(int semid);
25411 +static int send_cluster_status_to_load_balance(HostTbl * host_ptr,int status);
25412 +static void set_transaction_status(int status);
25413 +static void check_transaction_status(ReplicateHeader * header,TransactionTbl *transaction);
25414 +static HostTbl * check_host_transaction_status(ReplicateHeader * header,HostTbl *host );
25415 +static void clearHostTbl(void);
25416 +static bool is_need_sync_time(ReplicateHeader * header);
25417 +static bool is_need_wait_answer(ReplicateHeader * header);
25418 +static void write_host_status_file(HostTbl * host_ptr);
25419 +
25420 +static void delete_template(HostTbl * ptr, ReplicateHeader * header);
25421 +static char * check_copy_command(char * query);
25422 +static int read_answer(int dest);
25423 +static bool is_autocommit_off(char * query);
25424 +static bool is_autocommit_on(char * query);
25425 +static unsigned int get_host_ip_from_tbl(char * host);
25426 +static unsigned int get_srcHost_ip_from_tbl(char * srcHost);
25427 +
25428 +static int next_replication_id(void);
25429 +static void check_replication_id(void);
25430 +static bool is_need_use_rlog(ReplicateHeader * header);
25431 +static bool is_need_queue_jump( ReplicateHeader * header,char * query);
25432 +static int check_delete_transaction (HostTbl * host_ptr, ReplicateHeader * header);
25433 +
25434 +static bool is_executed_query_in_origin( ReplicateHeader *header );
25435 +static bool is_executed_query( PGconn *conn,ReplicateHeader *header );
25436 +
25437 +static void * thread_send_source(void * arg);
25438 +static void * thread_send_cluster(void * arg);
25439 +
25440 +static int send_replicate_packet_to_server( TransactionTbl * transaction_tbl, int current_cluster, HostTbl * host_ptr, ReplicateHeader * header, char *query , char * result,unsigned int replicationId, bool recovery);
25441 +static int check_result( PGresult * res );
25442 +static bool compare_results(int *results, int size, int source_id);
25443 +
25444 +static int send_func(HostTbl * host_ptr,ReplicateHeader * header, char * func,char * result);
25445 +static uint32_t get_oid(HostTbl * host_ptr,ReplicateHeader * header);
25446 +static int set_oid(HostTbl * host_ptr,ReplicateHeader * header, uint32_t oid);
25447 +static int replicate_lo( PGconn * conn, ReplicateHeader * header, LOArgs * query);
25448 +static int notice_abort(HostTbl * host_ptr,ReplicateHeader * header);
25449 +static FILE * create_queue_file(void);
25450 +static int add_queue_file(char * data, int size);
25451 +
25452 +static int send_p_parse (PGconn * conn, StringInfo input_message);
25453 +static int send_p_bind (PGconn * conn, StringInfo input_message);
25454 +static int send_p_describe (PGconn * conn, StringInfo input_message);
25455 +static int send_p_execute (PGconn * conn, StringInfo input_message);
25456 +static int send_p_sync (PGconn * conn, StringInfo input_message);
25457 +static int send_p_close (PGconn * conn, StringInfo input_message);
25458 +static void set_string_info(StringInfo input_message, ReplicateHeader * header, char * query);
25459 +
25460 +int replicate_packet_send_internal(ReplicateHeader * header, char * query,int dest,int recovery_status,bool isHeldLock);
25461 +bool PGRis_same_host(char * host1, unsigned short port1 , char * host2, unsigned short port2);
25462 +HostTbl * PGRadd_HostTbl(HostTbl *  conf_data, int useFlag);
25463 +HostTbl * PGRget_master(void);
25464 +void PGRset_recovery_status(int status);
25465 +int PGRget_recovery_status(void);
25466 +int PGRcheck_recovered_host(void);
25467 +int PGRset_recovered_host(HostTbl * target,int useFlag);
25468 +int PGRinit_recovery(void);
25469 +void PGRexit_subprocess(int signo);
25470 +void PGRreplicate_exit(int exit_status);
25471 +int PGRsend_replicate_packet_to_server( HostTbl * host_ptr, ReplicateHeader * header, char *query , char * result,unsigned int replicationId, bool recovery);
25472 +HostTbl * PGRget_HostTbl(char * resolvedName,int port);
25473 +int PGRset_queue(ReplicateHeader * header,char * query);
25474 +int PGRset_host_status(HostTbl * host_ptr,int status);
25475 +void PGRclear_transactions(void);
25476 +void PGRclear_connections();
25477 +int PGRset_replication_id(uint32_t id);
25478 +int PGRdo_replicate(int sock,ReplicateHeader *header, char * query);
25479 +int PGRreturn_result(int dest, char * result,int wait);
25480 +int PGRreplicate_packet_send( ReplicateHeader * header, char * query,int dest,int recovery_status);
25481 +char * PGRread_packet(int sock, ReplicateHeader *header);
25482 +char * PGRread_query(int sock, ReplicateHeader *header);
25483 +PGconn * PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt );
25484 +
25485 +unsigned int PGRget_next_query_id(void);
25486 +int PGRinit_transaction_table(void);
25487 +int PGRsync_oid(ReplicateHeader *header);
25488 +int PGRload_replication_id(void);
25489 +extern pthread_mutex_t transaction_table_mutex;
25490 +
25491 +bool
25492 +PGRis_same_host(char * host1, unsigned short port1 , char * host2, unsigned short port2)
25493 +{
25494 +#ifdef PRINT_DEBUG
25495 +       char * func = "PGRis_same_host()";
25496 +#endif                 
25497 +       unsigned int ip1, ip2;
25498 +
25499 +       if ((host1[0] == '\0' ) || (host2[0] == '\0') ||
25500 +               ( port1 != port2 ))
25501 +       {
25502 +#ifdef PRINT_DEBUG
25503 +               show_debug("%s:target host",func);
25504 +#endif                 
25505 +               return false;
25506 +       }
25507 +       ip1 = PGRget_ip_by_name( host1);
25508 +       ip2 = PGRget_ip_by_name( host2);
25509 +
25510 +       if ((ip1 == ip2) && (port1 == port2))
25511 +       {
25512 +               return true;
25513 +       }
25514 +       return false;
25515 +}
25516 +
25517 +PGconn *
25518 +PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt )
25519 +{
25520 +       char * func = "PGRcreateConn()";
25521 +       int cnt = 0;
25522 +       PGconn * conn = NULL;
25523 +       char pwd[256];
25524 +
25525 +       memset(pwd,0,sizeof(pwd));
25526 +       if (*password != '\0')
25527 +       {
25528 +               if ((strncmp(password,"md5",3) == 0) && (md5Salt != NULL))
25529 +               {
25530 +                       sprintf(pwd,"%s(%d)(%d)(%d)(%d)",password,
25531 +                               *md5Salt,*(md5Salt+1),*(md5Salt+2),*(md5Salt+3));
25532 +               }
25533 +               else
25534 +               {
25535 +                       strncpy(pwd,password,sizeof(pwd));
25536 +               }
25537 +       }
25538 +       conn = PQsetdbLogin(host, port, NULL, NULL, database, userName, pwd);
25539 +       /* check to see that the backend Connection was successfully made */
25540 +       cnt = 0;
25541 +       while (PQstatus(conn) == CONNECTION_BAD)
25542 +       {
25543 +               if (conn != NULL)
25544 +               {
25545 +                       PQfinish(conn);
25546 +                       conn = NULL;
25547 +               }
25548 +               conn = PQsetdbLogin(host, port, NULL, NULL, database, userName, pwd);
25549 +               if (cnt > PGR_CONNECT_RETRY_TIME )
25550 +               {
25551 +                       if (conn != NULL)
25552 +                       {
25553 +                               PQfinish(conn);
25554 +                               conn = NULL;
25555 +                       }
25556 +                       return (PGconn *)NULL;
25557 +               }               
25558 +               
25559 +               if(PQstatus(conn) == CONNECTION_BAD && h_errno==2)
25560 +               {
25561 +                   show_error("gethostbyname() failed. sleep and retrying...");
25562 +                   usleep(PGR_SEND_WAIT_MSEC);
25563 +                       cnt ++;
25564 +               }
25565 +               else if(!strncasecmp(PQerrorMessage(conn),"FATAL:  Sorry, too many clients already",30) ||
25566 +                       !strncasecmp(PQerrorMessage(conn),"FATAL:  Non-superuser connection limit",30) ) 
25567 +               {
25568 +                   usleep(PGR_SEND_WAIT_MSEC);
25569 +                   show_error("Connection overflow. sleep and retrying...");
25570 +                       cnt ++;
25571 +               }
25572 +               else if(!strncasecmp(PQerrorMessage(conn),"FATAL:  The database system is starting up",40)   )
25573 +               {
25574 +#ifdef PRINT_DEBUG
25575 +                       show_debug("waiting for starting up...");
25576 +#endif                 
25577 +                   usleep(PGR_SEND_WAIT_MSEC);
25578 +               }
25579 +               else
25580 +               {
25581 +#ifdef PRINT_DEBUG
25582 +                       show_error("%s:Retry. h_errno is %d,reason is '%s'",func,h_errno,PQerrorMessage(conn));
25583 +#endif                 
25584 +                 
25585 +                   usleep(PGR_SEND_WAIT_MSEC);
25586 +                       cnt ++;
25587 +               }
25588 +       }
25589 +       return conn;
25590 +}
25591 +
25592 +static TransactionTbl *
25593 +setTransactionTbl(HostTbl * host_ptr, ReplicateHeader * header)
25594 +{
25595 +       char * func = "setTransactionTbl()";
25596 +       TransactionTbl * ptr = NULL;
25597 +       TransactionTbl work ;
25598 +       char port[8];
25599 +       char * hostName = NULL;
25600 +       char * dbName = NULL;
25601 +       char * userName = NULL;
25602 +       char * password = NULL;
25603 +       char * md5Salt = NULL;
25604 +       char * cryptSalt = NULL;
25605 +
25606 +       if ((host_ptr == NULL) || (header == NULL))
25607 +       {
25608 +               return (TransactionTbl *)NULL;
25609 +       }
25610 +       dbName = (char *)header->dbName;
25611 +       snprintf(port,sizeof(port),"%d", host_ptr->port);
25612 +       userName = (char *)(header->userName);
25613 +       password = (char *)(header->password);
25614 +       md5Salt = (char *)(header->md5Salt);
25615 +       cryptSalt = (char *)(header->cryptSalt);
25616 +       hostName = (char *)(host_ptr->resolvedName);
25617 +
25618 +       ptr = getTransactionTbl(host_ptr,header);
25619 +       if (ptr != NULL)
25620 +       {
25621 +               ptr->transaction_count = 0;
25622 +               ptr->conn = PGRcreateConn(hostName,port,dbName,userName,password,md5Salt,cryptSalt);
25623 +               if (ptr->conn == NULL)
25624 +               {
25625 +                       show_error("%s:Transaction is pooling but PGRcreateConn failed",func);
25626 +                       deleteTransactionTbl(host_ptr, header);
25627 +                       PGRset_host_status(host_ptr,DB_TBL_ERROR);
25628 +                       ptr = NULL;
25629 +               }
25630 +               return ptr;
25631 +       }
25632 +
25633 +       memset(&work,0,sizeof(work));
25634 +       strncpy(work.host, hostName, sizeof(work.host));
25635 +       strncpy(work.srcHost, header->from_host, sizeof(work.srcHost));
25636 +       work.hostIP = PGRget_ip_by_name(hostName);
25637 +       work.port = host_ptr->port;
25638 +       work.srcHostIP = PGRget_ip_by_name(header->from_host);
25639 +       work.pid = ntohs(header->pid);
25640 +       strncpy(work.dbName,header->dbName,sizeof(work.dbName));
25641 +       work.conn = PGRcreateConn(hostName,port,dbName,userName,password,md5Salt,cryptSalt);
25642 +       if (work.conn == NULL)
25643 +       {
25644 +#ifdef PRINT_DEBUG
25645 +               show_debug("%s: %s@%s is not ready",func,port,hostName);
25646 +#endif
25647 +               return (TransactionTbl *)NULL;
25648 +       }
25649 +       work.useFlag = DB_TBL_USE ;
25650 +       work.in_transaction = false;
25651 +       work.transaction_count = 0;
25652 +       ptr = insertTransactionTbl(host_ptr,&work);
25653 +       if (ptr == (TransactionTbl *)NULL)
25654 +       {
25655 +               show_error("%s:insertTransactionTbl failed",func);
25656 +               return (TransactionTbl *)NULL;
25657 +       }
25658 +       return ptr;
25659 +}
25660 +
25661 +static TransactionTbl *
25662 +insertTransactionTbl( HostTbl * host_ptr, TransactionTbl * datap)
25663 +{
25664 +       char * func = "insertTransactionTbl()";
25665 +       TransactionTbl * workp = NULL;
25666 +
25667 +       pthread_mutex_lock(&transaction_table_mutex);
25668 +       if ((host_ptr == (HostTbl *)NULL) || (datap == (TransactionTbl*)NULL))
25669 +       {
25670 +               show_error("%s:host table or transaction table is NULL",func);
25671 +               pthread_mutex_unlock(&transaction_table_mutex);
25672 +
25673 +               return (TransactionTbl *)NULL;
25674 +       }
25675 +       if (Transaction_Tbl_Begin == NULL)
25676 +       {
25677 +               if (PGRinit_transaction_table() != STATUS_OK)
25678 +               {
25679 +                       pthread_mutex_unlock(&transaction_table_mutex);
25680 +
25681 +                       return (TransactionTbl *)NULL;
25682 +               }
25683 +       }
25684 +
25685 +       workp = (TransactionTbl *)malloc(sizeof(TransactionTbl));
25686 +       memset(workp,0,sizeof(TransactionTbl));
25687 +       Transaction_Tbl_End = workp;
25688 +       workp->hostIP = datap->hostIP;
25689 +       workp->port = datap->port;
25690 +       workp->pid = datap->pid;
25691 +       workp->srcHostIP = datap->srcHostIP;
25692 +       strncpy(workp->host,datap->host,sizeof(workp->host));
25693 +       strncpy(workp->srcHost,datap->srcHost,sizeof(workp->srcHost));
25694 +       strncpy(workp->dbName,datap->dbName,sizeof(workp->dbName));
25695 +       workp->conn = datap->conn;
25696 +       workp->useFlag = DB_TBL_USE;
25697 +       workp->lock = STATUS_OK;
25698 +       workp->in_transaction =datap->in_transaction;
25699 +       workp->transaction_count =datap->transaction_count;
25700 +       DLAddTail(Transaction_Tbl_Begin, DLNewElem(workp));
25701 +
25702 +       pthread_mutex_unlock(&transaction_table_mutex);
25703 +
25704 +       return workp;
25705 +}
25706 +
25707 +static TransactionTbl *
25708 +getTransactionTbl( HostTbl * host_ptr, ReplicateHeader * header)
25709 +{
25710 +       Dlelem * ptr = NULL;
25711 +       unsigned int host_ip,srcHost_ip;
25712 +       unsigned short pid = 0;
25713 +
25714 +       if (Transaction_Tbl_Begin == (Dllist *) NULL)
25715 +       {
25716 +               return (TransactionTbl * )NULL;
25717 +       }
25718 +       if ((host_ptr == (HostTbl *)NULL) ||
25719 +               (header == (ReplicateHeader *)NULL))
25720 +       {
25721 +               return (TransactionTbl * )NULL;
25722 +       }
25723 +       host_ip = get_host_ip_from_tbl(host_ptr->resolvedName);
25724 +       if (host_ip == 0)
25725 +       {
25726 +               host_ip = PGRget_ip_by_name(host_ptr->resolvedName);
25727 +       }
25728 +       srcHost_ip = get_srcHost_ip_from_tbl(header->from_host);
25729 +       if (srcHost_ip == 0)
25730 +       {
25731 +               srcHost_ip = PGRget_ip_by_name(header->from_host);
25732 +       }
25733 +       pid = ntohs(header->pid);
25734 +
25735 +       pthread_mutex_lock(&transaction_table_mutex);
25736 +
25737 +       ptr = DLGetHead(Transaction_Tbl_Begin);
25738 +       while (ptr)
25739 +       {
25740 +               TransactionTbl *transaction = DLE_VAL(ptr);
25741 +               if ((transaction->useFlag == DB_TBL_USE) &&
25742 +                       (transaction->hostIP == host_ip) &&
25743 +                       (transaction->port == host_ptr->port) &&
25744 +                       (transaction->srcHostIP == srcHost_ip) &&
25745 +                       (!strncasecmp(transaction->dbName,header->dbName,sizeof(transaction->dbName))) &&
25746 +                       (transaction->pid == pid))
25747 +               {
25748 +                       pthread_mutex_unlock(&transaction_table_mutex);
25749 +                       return transaction;
25750 +               }
25751 +               ptr = DLGetSucc(ptr);
25752 +       }
25753 +       pthread_mutex_unlock(&transaction_table_mutex);
25754 +
25755 +       return (TransactionTbl * )NULL;
25756 +}
25757 +
25758 +static void
25759 +deleteTransactionTbl(HostTbl * host_ptr,ReplicateHeader * header)
25760 +{
25761 +       TransactionTbl *ptr = NULL;
25762 +       Dlelem *elem;
25763 +
25764 +       ptr = getTransactionTbl(host_ptr,header);
25765 +      
25766 +       pthread_mutex_lock(&transaction_table_mutex);
25767 +
25768 +       if (ptr != NULL)
25769 +       {
25770 +               /*
25771 +               if (ptr->in_transaction)
25772 +               {
25773 +                       if (host_ptr->transaction_count > 0)
25774 +                               host_ptr->transaction_count--;
25775 +               }
25776 +               */
25777 +
25778 +               if (ptr->conn != NULL)
25779 +               {
25780 +                       PQfinish(ptr->conn);
25781 +               }
25782 +               elem = DLGetHead(Transaction_Tbl_Begin);
25783 +               while (elem)
25784 +               {
25785 +                       TransactionTbl *transaction = DLE_VAL(elem);
25786 +                       if (transaction == ptr) {
25787 +                               free(ptr);
25788 +                               DLRemove(elem);
25789 +                               DLFreeElem(elem);
25790 +                               pthread_mutex_unlock(&transaction_table_mutex);
25791 +                               return;
25792 +                       }
25793 +                       elem = DLGetSucc(elem);
25794 +               }
25795 +       }
25796 +       pthread_mutex_unlock(&transaction_table_mutex);
25797 +}
25798 +
25799 +static HostTbl *
25800 +deleteHostTbl(HostTbl * ptr)
25801 +{
25802 +       if (ptr != (HostTbl*)NULL)
25803 +       {
25804 +               memset(ptr,0,sizeof(HostTbl));
25805 +       }
25806 +       return ++ptr;
25807 +}
25808 +
25809 +HostTbl *
25810 +PGRadd_HostTbl(HostTbl *conf_data, int useFlag)
25811 +{
25812 +       HostTbl * ptr = NULL;
25813 +       int cnt = 0;
25814 +
25815 +       ptr = PGRget_HostTbl(conf_data->resolvedName, conf_data->port);
25816 +       if (ptr != (HostTbl*)NULL)
25817 +       {
25818 +               PGRset_host_status(ptr,useFlag);
25819 +               return ptr;
25820 +       }
25821 +
25822 +       ptr = Host_Tbl_Begin;
25823 +       cnt = 1;
25824 +       while (ptr->useFlag != DB_TBL_END)
25825 +       {
25826 +               if (ptr->useFlag == DB_TBL_FREE)
25827 +               {
25828 +                       break;
25829 +               }
25830 +               ptr ++;
25831 +               cnt ++;
25832 +       }
25833 +       if (cnt >= MAX_DB_SERVER)
25834 +       {
25835 +               return (HostTbl*)NULL;
25836 +       }
25837 +       if (ptr->useFlag == DB_TBL_END)
25838 +       {
25839 +               (ptr + 1) -> useFlag = DB_TBL_END;
25840 +       }
25841 +       memset(ptr,0,sizeof(HostTbl));
25842 +       ptr->hostNum = cnt;
25843 +       memcpy(ptr->hostName,conf_data->hostName,sizeof(ptr->hostName));
25844 +       memcpy(ptr->resolvedName,conf_data->resolvedName,sizeof(ptr->resolvedName));
25845 +       ptr->port = conf_data->port;
25846 +       ptr->recoveryPort = conf_data->recoveryPort;
25847 +       ptr->transaction_count = 0;
25848 +       PGRset_host_status(ptr,useFlag);
25849 +
25850 +       return ptr;
25851 +}
25852 +
25853 +HostTbl *
25854 +PGRget_master(void)
25855 +{
25856 +       HostTbl * host_tbl = NULL;
25857 +
25858 +       host_tbl = Host_Tbl_Begin;
25859 +       while(host_tbl->useFlag != DB_TBL_END)
25860 +       {
25861 +               if (host_tbl->useFlag == DB_TBL_USE)
25862 +               {
25863 +                       return host_tbl;
25864 +               }
25865 +               host_tbl ++;
25866 +       }
25867 +       return (HostTbl *)NULL;
25868 +}
25869 +
25870 +void
25871 +PGRset_recovery_status(int status)
25872 +{
25873 +       if (RecoverySemID <= 0)
25874 +               return;
25875 +       PGRsem_lock(RecoverySemID,SEM_NUM_OF_RECOVERY);
25876 +       if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25877 +       {
25878 +               Recovery_Status_Inf->recovery_status = status;
25879 +               
25880 +       }
25881 +       PGRsem_unlock(RecoverySemID,SEM_NUM_OF_RECOVERY);
25882 +}
25883 +
25884 +int
25885 +PGRget_recovery_status(void)
25886 +{
25887 +       int status = -1;
25888 +
25889 +       if (RecoverySemID <= 0)
25890 +               return -1;
25891 +       PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25892 +       if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25893 +       {
25894 +               status = Recovery_Status_Inf->recovery_status;
25895 +       }
25896 +       PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25897 +       return status;
25898 +
25899 +}
25900 +
25901 +static void
25902 +set_transaction_status(int status)
25903 +{
25904 +       if (RecoverySemID <= 0)
25905 +               return ;
25906 +       PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25907 +       if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25908 +       {
25909 +               Recovery_Status_Inf->recovery_status = status;
25910 +       }
25911 +       PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25912 +}
25913 +
25914 +#if 0
25915 +static int
25916 +get_transaction_status(void)
25917 +{
25918 +       int status = 0;
25919 +
25920 +       if (RecoverySemID <= 0)
25921 +               return 0;
25922 +       PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25923 +       if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25924 +       {
25925 +               status = Recovery_Status_Inf->recovery_status;
25926 +               PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25927 +               return status;
25928 +       }
25929 +       PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25930 +       return 0;
25931 +}
25932 +#endif
25933 +
25934 +int
25935 +PGRcheck_recovered_host(void)
25936 +{
25937 +       char * func = "PGRcheck_recovered_host()";
25938 +       HostTbl * ptr = NULL;
25939 +       int rtn = STATUS_OK;
25940 +
25941 +       if (RecoverySemID <= 0)
25942 +               return STATUS_ERROR;
25943 +       PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25944 +       if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25945 +       {
25946 +               if (Recovery_Status_Inf->useFlag != DB_TBL_FREE)
25947 +               {
25948 +                       ptr = PGRadd_HostTbl((HostTbl *)&(Recovery_Status_Inf->target_host),Recovery_Status_Inf->useFlag);
25949 +                       if (ptr == (HostTbl *) NULL)
25950 +                       {
25951 +                               show_error("%s:PGRadd_HostTbl failed",func);
25952 +                               rtn = STATUS_ERROR;
25953 +                       }
25954 +                       Recovery_Status_Inf->useFlag = DB_TBL_FREE;
25955 +                       memset((HostTbl *)&(Recovery_Status_Inf->target_host),0,sizeof(HostTbl));
25956 +
25957 +               }
25958 +       }
25959 +       PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25960 +       return rtn;
25961 +}
25962 +
25963 +int
25964 +PGRset_recovered_host(HostTbl * target, int useFlag)
25965 +{
25966 +       if (RecoverySemID <= 0)
25967 +               return -1;
25968 +       PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25969 +       if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25970 +       {
25971 +               Recovery_Status_Inf->useFlag = useFlag;
25972 +               if (target != (HostTbl*)NULL)
25973 +               {
25974 +                       memcpy((HostTbl *)&(Recovery_Status_Inf->target_host),target,sizeof(HostTbl));
25975 +                       PGRset_host_status(target,useFlag);
25976 +               }
25977 +
25978 +       }
25979 +       PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25980 +       return 0;
25981 +}
25982 +
25983 +static bool
25984 +is_master_in_recovery(char * host , int port,int recovery_status)
25985 +{
25986 +       HostTbl * master = NULL;
25987 +
25988 +       int status = PGRget_recovery_status();
25989 +       if (status == RECOVERY_CLEARED)
25990 +       {
25991 +               master = PGRget_master();
25992 +               if (master == (HostTbl *)NULL)
25993 +               {
25994 +                       return false;
25995 +               }
25996 +               return (PGRis_same_host(host, port , master->hostName, master->port));
25997 +       }
25998 +       return false;
25999 +}
26000 +
26001 +int
26002 +PGRinit_recovery(void)
26003 +{
26004 +       char * func = "PGRinit_recovery()";
26005 +       int size = 0;
26006 +       union semun sem_arg;
26007 +       int i = 0;
26008 +
26009 +       if ((RecoverySemID = semget(IPC_PRIVATE,4,IPC_CREAT | IPC_EXCL | 0600)) < 0)
26010 +       {
26011 +               show_error("%s:semget() failed. (%s)",func,strerror(errno));
26012 +               return STATUS_ERROR;
26013 +       }
26014 +       for ( i = 0 ; i < 4 ; i ++)
26015 +       {
26016 +               semctl(RecoverySemID, i, GETVAL, sem_arg);
26017 +               sem_arg.val = 1;
26018 +               semctl(RecoverySemID, i, SETVAL, sem_arg);
26019 +       }
26020 +
26021 +       size = sizeof(RecoveryStatusInf);
26022 +       RecoveryShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
26023 +       if (RecoveryShmid < 0)
26024 +       {
26025 +               show_error("%s:shmget() failed. (%s)",func,strerror(errno));
26026 +               return STATUS_ERROR;
26027 +       }
26028 +       Recovery_Status_Inf = (RecoveryStatusInf *)shmat(RecoveryShmid,0,0);
26029 +       if (Recovery_Status_Inf == (RecoveryStatusInf *)-1)
26030 +       {
26031 +               show_error("%s:shmat() failed. (%s)",func,strerror(errno));
26032 +               return STATUS_ERROR;
26033 +       }
26034 +       memset(Recovery_Status_Inf,0,size);
26035 +       Recovery_Status_Inf->check_point = PGR_CHECK_POINT ;
26036 +
26037 +       size = sizeof(unsigned int);
26038 +       ReplicateSerializationShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
26039 +       if (ReplicateSerializationShmid < 0)
26040 +       {
26041 +               show_error("%s:shmget() failed. (%s)",func,strerror(errno));
26042 +               return STATUS_ERROR;
26043 +       }
26044 +
26045 +       PGR_ReplicateSerializationID = (unsigned int *)shmat(ReplicateSerializationShmid,0,0); 
26046 +       if( PGR_ReplicateSerializationID == (unsigned int *)-1) {
26047 +               show_error("%s:shmat() failed. (%s)",func,strerror(errno));
26048 +               return STATUS_ERROR;
26049 +       }
26050 +       memset(PGR_ReplicateSerializationID,0,size);
26051 +       PGRset_recovery_status(RECOVERY_INIT);
26052 +       PGRset_recovered_host((HostTbl *)NULL, DB_TBL_FREE);
26053 +       set_transaction_status(0);
26054 +
26055 +       /*
26056 +        * create message queue
26057 +        */
26058 +       RecoveryMsgShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
26059 +       if (RecoveryMsgShmid < 0)
26060 +       {
26061 +               show_error("%s:shmget() failed. (%s)",func,strerror(errno));
26062 +               return STATUS_ERROR;
26063 +       }
26064 +
26065 +       RecoveryMsgid = (int *)shmat(RecoveryMsgShmid,0,0);
26066 +       if( RecoveryMsgid < 0) {
26067 +               show_error("%s:shmat() failed. (%s)",func,strerror(errno));
26068 +               return STATUS_ERROR;
26069 +       }
26070 +       *RecoveryMsgid = msgget (IPC_PRIVATE, 00666 | IPC_CREAT );
26071 +       if (*RecoveryMsgid < 0)
26072 +       {
26073 +               show_error("%s:msgget() failed. (%s)",func,strerror(errno));
26074 +               return STATUS_ERROR;
26075 +       }
26076 +
26077 +
26078 +       return STATUS_OK;
26079 +}
26080 +
26081 +static void
26082 +clearHostTbl(void)
26083 +{
26084 +
26085 +       HostTbl * ptr = NULL;
26086 +
26087 +       if (Host_Tbl_Begin == NULL)
26088 +               return;
26089 +       /* normal socket close */
26090 +       ptr = Host_Tbl_Begin;
26091 +       while(ptr && ptr->useFlag != DB_TBL_END)
26092 +       {
26093 +               ptr = deleteHostTbl(ptr);
26094 +       }       
26095 +}
26096 +
26097 +void
26098 +PGRexit_subprocess(int signo)
26099 +{
26100 +       exit_signo = signo;
26101 +       PGRreplicate_exit(1);
26102 +}
26103 +
26104 +void
26105 +PGRreplicate_exit(int exit_status)
26106 +{
26107 +       char fname[256];
26108 +       int rtn = 0;
26109 +       sigset_t mask;
26110 +
26111 +       sigemptyset(&mask);
26112 +       sigaddset(&mask, SIGTERM);
26113 +       sigaddset(&mask, SIGINT);
26114 +       sigaddset(&mask, SIGQUIT);
26115 +       sigaddset(&mask, SIGCHLD);
26116 +       sigprocmask(SIG_BLOCK, &mask, NULL);
26117 +
26118 +       kill (0, exit_signo);
26119 +
26120 +       child_wait(0);
26121 +
26122 +       if (RidFp != NULL)
26123 +       {
26124 +               rewind(RidFp);
26125 +               if (Recovery_Status_Inf != NULL)
26126 +               {
26127 +                       PGRwrite_log_file(RidFp,"%u",Recovery_Status_Inf->replication_id);
26128 +               }
26129 +               fflush(RidFp);
26130 +               fclose(RidFp);
26131 +               RidFp = NULL;
26132 +       }
26133 +
26134 +       if (ReplicateSock > 0)
26135 +               close(ReplicateSock);
26136 +
26137 +       /* recovery status clear */     
26138 +       if (RecoverySemID > 0)
26139 +               Recovery_Status_Inf->recovery_status = RECOVERY_INIT;
26140 +
26141 +       /* normal socket close */
26142 +       clearHostTbl();
26143 +
26144 +       if (Host_Tbl_Begin != (HostTbl *)NULL)
26145 +       {
26146 +               rtn = shmdt((char *)Host_Tbl_Begin);
26147 +               shmctl(HostTblShmid,IPC_RMID,(struct shmid_ds *)NULL);
26148 +       }
26149 +
26150 +       if (Cascade_Tbl != (ReplicateServerInfo *)NULL)
26151 +       {
26152 +               rtn = shmdt((char *)Cascade_Tbl);
26153 +               shmctl(CascadeTblShmid,IPC_RMID,(struct shmid_ds *)NULL);
26154 +       }
26155 +
26156 +       if (Cascade_Inf != (CascadeInf *)NULL)
26157 +       {
26158 +               rtn = shmdt((char *)Cascade_Inf);
26159 +               shmctl(CascadeInfShmid,IPC_RMID,(struct shmid_ds *)NULL);
26160 +       }
26161 +
26162 +       if (Commit_Log_Tbl != (CommitLogInf *)NULL)
26163 +       {
26164 +               rtn = shmdt((char *)Commit_Log_Tbl);
26165 +               shmctl(CommitLogShmid,IPC_RMID,(struct shmid_ds *)NULL);
26166 +       }
26167 +
26168 +       if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
26169 +       {
26170 +               rtn = shmdt((char *)Recovery_Status_Inf);
26171 +               shmctl(RecoveryShmid,IPC_RMID,(struct shmid_ds *)NULL);
26172 +       }
26173 +       if (PGR_ReplicateSerializationID!=NULL) 
26174 +       {
26175 +           shmdt(PGR_ReplicateSerializationID);
26176 +           shmctl(ReplicateSerializationShmid,IPC_RMID,(struct shmid_ds *)NULL);
26177 +       }
26178 +
26179 +       if (RecoveryMsgid)
26180 +       {
26181 +               if (*RecoveryMsgid >= 0)
26182 +                       msgctl(*RecoveryMsgid,IPC_RMID,(struct msqid_ds *)NULL);
26183 +
26184 +               shmdt(RecoveryMsgid);
26185 +               shmctl(RecoveryMsgShmid, IPC_RMID, NULL);
26186 +       }
26187 +
26188 +       if (StatusFp != NULL)
26189 +       {
26190 +               fflush(StatusFp);
26191 +               fclose(StatusFp);
26192 +               StatusFp = NULL;
26193 +       }
26194 +       if (LogFp != NULL)
26195 +       {
26196 +               fflush(LogFp);
26197 +               fclose(LogFp);
26198 +               LogFp = NULL;
26199 +       }
26200 +
26201 +       if (PGR_Result != NULL)
26202 +       {
26203 +               free(PGR_Result);
26204 +               PGR_Result = NULL;
26205 +       }
26206 +       if (PGR_Response_Inf != NULL)
26207 +       {
26208 +               free(PGR_Response_Inf);
26209 +               PGR_Response_Inf = NULL;
26210 +       }
26211 +
26212 +       if (LoadBalanceTbl != NULL)
26213 +       {
26214 +               free(LoadBalanceTbl);
26215 +               LoadBalanceTbl = NULL;
26216 +       }
26217 +
26218 +       if (PGR_Log_Header != NULL)
26219 +       {
26220 +               free(PGR_Log_Header);
26221 +               PGR_Log_Header = NULL;
26222 +       }
26223 +
26224 +       if (PGR_Send_Query_ID != NULL)
26225 +       {
26226 +               free(PGR_Send_Query_ID);
26227 +               PGR_Send_Query_ID = NULL;
26228 +       }
26229 +
26230 +       if (CascadeSemID > 0)
26231 +       {
26232 +               sem_quit(CascadeSemID);
26233 +               CascadeSemID = 0;
26234 +       }
26235 +       if (SemID > 0)
26236 +       {
26237 +               sem_quit(SemID);
26238 +               SemID = 0;
26239 +       }
26240 +       if (RecoverySemID > 0)
26241 +       {
26242 +               sem_quit(RecoverySemID);
26243 +               RecoverySemID = 0;
26244 +       }
26245 +       if (VacuumSemID > 0)
26246 +       {
26247 +               sem_quit(VacuumSemID);
26248 +       }
26249 +
26250 +       snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGREPLICATE_PID_FILE);
26251 +       unlink(fname);
26252 +
26253 +       /* close socket between rlog process */
26254 +       
26255 +       if (Replicateion_Log->r_log_sock >= 0)
26256 +       {
26257 +               close(Replicateion_Log->r_log_sock);
26258 +               Replicateion_Log->r_log_sock = -1;
26259 +       }
26260 +       if (Replicateion_Log->RLog_Sock_Path != NULL)
26261 +       {
26262 +               unlink(Replicateion_Log->RLog_Sock_Path);
26263 +               free(Replicateion_Log->RLog_Sock_Path);
26264 +               Replicateion_Log->RLog_Sock_Path = NULL;
26265 +       }
26266 +
26267 +       if (ResolvedName != NULL)
26268 +       {
26269 +               free(ResolvedName);
26270 +               ResolvedName = NULL;
26271 +       }
26272 +       exit(exit_status);
26273 +}
26274 +
26275 +static int
26276 +send_cluster_status_to_load_balance(HostTbl * host_ptr,int status)
26277 +{
26278 +       RecoveryPacket packet;
26279 +       int rtn = 0;
26280 +
26281 +       memset(&packet,0,sizeof(RecoveryPacket));
26282 +       packet.packet_no = htons(status);
26283 +       strncpy(packet.hostName,host_ptr->hostName,sizeof(packet.hostName));
26284 +       packet.port = htons(host_ptr->port);
26285 +       rtn = PGRsend_load_balance_packet(&packet);
26286 +       return rtn;
26287 +}
26288 +
26289 +int
26290 +PGRset_host_status(HostTbl * host_ptr,int status)
26291 +{
26292 +       if (host_ptr == NULL)
26293 +       {
26294 +               return STATUS_ERROR;
26295 +       }
26296 +       if (host_ptr->useFlag != status)
26297 +       {
26298 +               host_ptr->useFlag = status;
26299 +               if (status == DB_TBL_ERROR )
26300 +               {
26301 +                       host_ptr->transaction_count = 0;
26302 +                       send_cluster_status_to_load_balance(host_ptr,RECOVERY_ERROR_CONNECTION);
26303 +               }
26304 +               write_host_status_file(host_ptr);
26305 +       }
26306 +       return STATUS_OK;
26307 +}
26308 +
26309 +static void
26310 +write_host_status_file(HostTbl * host_ptr)
26311 +{
26312 +       switch( host_ptr->useFlag)
26313 +       {
26314 +               case DB_TBL_FREE:
26315 +                       PGRwrite_log_file(StatusFp,"port(%d) host:%s free",
26316 +                                       host_ptr->port,
26317 +                                       host_ptr->hostName);
26318 +                       break;
26319 +               case DB_TBL_INIT:
26320 +                       PGRwrite_log_file(StatusFp,"port(%d) host:%s initialize",
26321 +                                       host_ptr->port,
26322 +                                       host_ptr->hostName);
26323 +                       break;
26324 +               case DB_TBL_USE:
26325 +                       PGRwrite_log_file(StatusFp,"port(%d) host:%s start use",
26326 +                                       host_ptr->port,
26327 +                                       host_ptr->hostName);
26328 +                       break;
26329 +               case DB_TBL_ERROR:
26330 +                       PGRwrite_log_file(StatusFp,"port(%d) host:%s error",
26331 +                                       host_ptr->port,
26332 +                                       host_ptr->hostName);
26333 +                       break;
26334 +               case DB_TBL_END:
26335 +                       PGRwrite_log_file(StatusFp,"port(%d) host:%s end",
26336 +                                       host_ptr->port,
26337 +                                       host_ptr->hostName); 
26338 +                       break;
26339 +       }
26340 +}
26341 +
26342 +static int
26343 +check_result( PGresult * res )
26344 +{
26345 +       int status = 0;
26346 +
26347 +       status = PQresultStatus(res);
26348 +       if ((status == PGRES_NONFATAL_ERROR ) ||
26349 +               (status == PGRES_FATAL_ERROR ))
26350 +       {
26351 +               return STATUS_ERROR;
26352 +       }
26353 +       return STATUS_OK;
26354 +}
26355 +
26356 +static bool
26357 +compare_results(int *results, int size, int source_id)
26358 +{
26359 +       int i, prev = 0;
26360 +
26361 +       for (i = 0; i < size; i++)
26362 +       {
26363 +               if (i != source_id)
26364 +               {
26365 +                       prev = results[i];
26366 +                       break;
26367 +               }
26368 +       }
26369 +
26370 +       for (; i < size; i++)
26371 +       {
26372 +               if (i == source_id)
26373 +                       continue;
26374 +               if (prev != results[i])
26375 +                       return false;
26376 +               prev = results[i];
26377 +       }
26378 +       return true;
26379 +}
26380 +
26381 +/*--------------------------------------------------
26382 + * SYMBOL
26383 + *     PGRsend_replicate_packet_to_server()
26384 + * NOTES
26385 + *     Send query data to the cluster DB and recieve result data. 
26386 + * ARGS
26387 + *     HostTbl * host_ptr: the record of cluster DB table (target)
26388 + *     ReplicateHeader * header: header data
26389 + *     char *query: query data 
26390 + *     char * result: returned result data 
26391 + * RETURN
26392 + *     STATUS_OK: OK
26393 + *     STATUS_ERROR: NG
26394 + *     STATUS_LOCK_CONFLICT: Lock conflicted
26395 + *---------------------------------------------------
26396 + */
26397 +int
26398 +PGRsend_replicate_packet_to_server( HostTbl * host_ptr, ReplicateHeader * header, char *query , char * result,unsigned int replicationId, bool recovery)
26399 +{
26400 +       char * func = "PGRsend_replicate_packet_to_server()";
26401 +       TransactionTbl * transaction_tbl = NULL;
26402 +       char *database = NULL;
26403 +       char port[8];
26404 +       char *userName = NULL;
26405 +       char * password = NULL;
26406 +       char * host = NULL;
26407 +       char * md5Salt = NULL;
26408 +       char * cryptSalt = NULL;
26409 +       int rtn = 0;
26410 +       int current_cluster = 0;
26411 +       int query_size = 0;
26412 +
26413 +       if ((query == NULL) || (header == NULL))
26414 +       {
26415 +               show_error("%s: query is broken",func);
26416 +               return STATUS_ERROR;
26417 +       }
26418 +       query_size = ntohl(header->query_size);
26419 +       if (query_size < 0)
26420 +       {
26421 +               show_error("%s: query size is broken",func);
26422 +               return STATUS_ERROR;
26423 +       }
26424 +       if (host_ptr == NULL)
26425 +       {
26426 +               return STATUS_ERROR;
26427 +       }
26428 +
26429 +       if (PGR_Response_Inf != NULL)
26430 +       {
26431 +               current_cluster = PGR_Response_Inf->current_cluster;
26432 +       }
26433 +
26434 +       /*
26435 +        * set up the connection
26436 +        */
26437 +       database = (char *)header->dbName;
26438 +       snprintf(port,sizeof(port),"%d", host_ptr->port);
26439 +       userName = (char *)(header->userName);
26440 +       password = (char *)(header->password);
26441 +       md5Salt = (char *)(header->md5Salt);
26442 +       cryptSalt = (char *)(header->cryptSalt);
26443 +       host = (char *)(host_ptr->resolvedName);
26444 +       /*
26445 +        * get the transaction table data
26446 +        * it has the connection data with each cluster DB
26447 +        */
26448 +       transaction_tbl = getTransactionTbl(host_ptr,header);
26449 +       /*
26450 +        * if the transaction process is new one, 
26451 +        * create connection data and add the transaction table
26452 +        */
26453 +       if (transaction_tbl == (TransactionTbl *)NULL)
26454 +       {
26455 +               if (recovery == true)
26456 +               {
26457 +                       int cnt = 0;
26458 +                       while(transaction_tbl == (TransactionTbl *)NULL)
26459 +                       {
26460 +                               transaction_tbl = setTransactionTbl(host_ptr, header);
26461 +                               if (cnt > RECOVERY_TIMEOUT)
26462 +                               {
26463 +                                       break;
26464 +                               }
26465 +                               cnt ++;
26466 +                               sleep(1);
26467 +                       }
26468 +               }
26469 +               else
26470 +               {
26471 +                       transaction_tbl = setTransactionTbl(host_ptr, header);
26472 +               }
26473 +               if (transaction_tbl == (TransactionTbl *)NULL)
26474 +               {
26475 +                       show_error("%s:setTransactionTbl failed",func);
26476 +                       if ( header->cmdSts != CMD_STS_NOTICE )
26477 +                       {
26478 +                               PGRset_host_status(host_ptr,DB_TBL_ERROR);
26479 +                       }
26480 +                       return STATUS_ERROR;
26481 +               }
26482 +               StartReplication[current_cluster] = true;
26483 +       }
26484 +       else
26485 +       {
26486 +               /*
26487 +                * re-use the connection data
26488 +                */
26489 +               if ((transaction_tbl->conn != (PGconn *)NULL) &&
26490 +                       (transaction_tbl->conn->sock > 0))
26491 +               {
26492 +                       StartReplication[current_cluster] = false;
26493 +               }
26494 +               else
26495 +               {
26496 +                       if (transaction_tbl->conn != (PGconn *)NULL)
26497 +                       {
26498 +                               PQfinish(transaction_tbl->conn);
26499 +                               transaction_tbl->conn = NULL;
26500 +                       }
26501 +                       transaction_tbl->conn = PGRcreateConn(host,port,database,userName,password,md5Salt,cryptSalt);
26502 +                       StartReplication[current_cluster] = true;
26503 +               }
26504 +       }
26505 +       if(header->cmdSts==CMD_STS_OTHER &&
26506 +          header->cmdType==CMD_TYPE_CONNECTION_CLOSE) 
26507 +       {
26508 +               check_delete_transaction(host_ptr, header);
26509 +               return STATUS_OK;
26510 +       }
26511 +#ifdef PRINT_DEBUG
26512 +       show_debug("%s:connect db:%s port:%s user:%s host:%s query:%s",
26513 +               func, database,port,userName,host,query);
26514 +#endif                 
26515 +        rtn = send_replicate_packet_to_server( transaction_tbl, current_cluster, host_ptr, header, query ,result ,replicationId, recovery);
26516 +       return rtn;
26517 +}
26518 +
26519 +static int
26520 +send_replicate_packet_to_server( TransactionTbl * transaction_tbl, int current_cluster, HostTbl * host_ptr, ReplicateHeader * header, char *query , char * result,unsigned int replicationId, bool recovery)
26521 +{
26522 +       char * func = "send_replicate_packet_to_server()";
26523 +       PGconn * conn = (PGconn *)NULL;
26524 +       PGresult * res = (PGresult *)NULL;
26525 +       char sync_command[256];
26526 +       bool sync_command_flg = false;
26527 +       char * str = NULL;
26528 +       int rtn = 0;
26529 +       int query_size = 0;
26530 +       int hostNum = 0;
26531 +       StringInfoData input_message;
26532 +
26533 +       if (( transaction_tbl == (TransactionTbl *)NULL) ||
26534 +               ( host_ptr == (HostTbl *) NULL) ||
26535 +               (header == (ReplicateHeader *) NULL) ||
26536 +               (query == NULL) ||
26537 +               ( result == NULL))
26538 +       {
26539 +               show_error("%s:unexpected NULL variable",func);
26540 +               return STATUS_ERROR;
26541 +       }
26542 +
26543 +       query_size = ntohl(header->query_size);
26544 +       if (query_size < 0)
26545 +       {
26546 +               show_error("%s: query size is broken",func);
26547 +               return STATUS_ERROR;
26548 +       }
26549 +
26550 +/*
26551 +       if(header->cmdSts == CMD_STS_OTHER &&
26552 +          header->cmdType == CMD_TYPE_CONNECTION_CLOSE) 
26553 +       {
26554 +                        check_delete_transaction(host_ptr,header);
26555 +                        return STATUS_OK;
26556 +       }
26557 +*/
26558 +       conn = transaction_tbl->conn;
26559 +       if (conn == NULL)
26560 +       {
26561 +               show_error("%s:[%d@%s] may be down",func,host_ptr->port,host_ptr->hostName);
26562 +               if ( header->cmdSts != CMD_STS_NOTICE )
26563 +               {
26564 +                       PGRset_host_status(host_ptr,DB_TBL_ERROR);
26565 +               }
26566 +               return STATUS_ERROR;
26567 +       }
26568 +       hostNum = host_ptr->hostNum;
26569 +
26570 +       /*
26571 +        * When the query is transaction query...
26572 +        */
26573 +       if (is_need_sync_time(header) == true)
26574 +       {
26575 +               if (transaction_tbl->transaction_count >1 )
26576 +               {
26577 +                       sync_command_flg = false;
26578 +               }
26579 +               else
26580 +               {
26581 +                       sync_command_flg = true;
26582 +               }
26583 +       }
26584 +       if ((header->cmdSts == CMD_STS_TRANSACTION ) ||
26585 +               (header->cmdSts == CMD_STS_SET_SESSION_AUTHORIZATION ))
26586 +       {
26587 +               if ((header->cmdSts == CMD_STS_TRANSACTION ) &&
26588 +                       ((header->cmdType != CMD_TYPE_BEGIN)     ||
26589 +                       (transaction_tbl->transaction_count >1 )))
26590 +               {
26591 +                       sync_command_flg = false;
26592 +               }
26593 +       }
26594 +
26595 +       /*
26596 +        * execute query
26597 +        */
26598 +
26599 +       if (header->rlog > 0 )
26600 +       {
26601 +
26602 +               if (is_executed_query( conn, header) == true)
26603 +               {
26604 +                       return STATUS_OK;
26605 +               }
26606 +               else 
26607 +               {
26608 +#ifdef PRINT_DEBUG
26609 +                 show_debug("%s:check replication log issue , id=%d,rlog=%d,query=%s status=not_replicated",func,ntohl(header->replicate_id),header->rlog,query);
26610 +#endif
26611 +               }
26612 +       }
26613 +       if (( header->cmdSts != CMD_STS_NOTICE ) && 
26614 +               ( header->cmdSts != CMD_STS_PREPARE ) &&
26615 +               ((sync_command_flg == true)           ||
26616 +                (StartReplication[current_cluster] == true)))
26617 +       {
26618 +               snprintf(sync_command,sizeof(sync_command),
26619 +                       "SELECT %s(%d,%u,%u,%u,%d,%u) ",
26620 +                       PGR_SYSTEM_COMMAND_FUNC,
26621 +                       PGR_SET_CURRENT_TIME_FUNC_NO,
26622 +                       (unsigned int)ntohl(header->tv.tv_sec),
26623 +                       (unsigned int)ntohl(header->tv.tv_usec),
26624 +                       (unsigned int)ntohl(PGR_Log_Header->replicate_id),
26625 +                       PGR_Response_Inf->response_mode,
26626 +                        *PGR_ReplicateSerializationID);
26627 +#ifdef PRINT_DEBUG
26628 +               show_debug("%s:sync_command(%s)",func,sync_command);
26629 +#endif                 
26630 +               res = PQexec(conn, sync_command);
26631 +               if (res != NULL)
26632 +                       PQclear(res);
26633 +               StartReplication[current_cluster] = false;
26634 +       }
26635 +
26636 +       res = NULL;
26637 +       if ((header->cmdType == CMD_TYPE_COPY_DATA) ||
26638 +               (header->cmdType == CMD_TYPE_COPY_DATA_END))
26639 +       {
26640 +               /* copy data replication */
26641 +               rtn =PQputnbytes(conn, query,query_size);
26642 +               if (header->cmdType == CMD_TYPE_COPY_DATA_END)
26643 +               {
26644 +                       rtn = PQendcopy(conn);
26645 +                       if (rtn == 1) /* failed */
26646 +                       {
26647 +                               if (transaction_tbl->conn != NULL)
26648 +                               {
26649 +                                       PQfinish(transaction_tbl->conn);
26650 +                                       transaction_tbl->conn = (PGconn *)NULL;
26651 +                                       StartReplication[current_cluster] = true;
26652 +                               }
26653 +                       }
26654 +               }
26655 +               *(PGR_Send_Query_ID + hostNum ) = ntohl(header->query_id);
26656 +               return STATUS_OK;
26657 +       }
26658 +       else if (header->cmdSts == CMD_STS_LARGE_OBJECT)
26659 +       {
26660 +               replicate_lo(conn, header,(LOArgs *)query);
26661 +               return STATUS_OK;
26662 +       }
26663 +
26664 +       else if (header->cmdSts == CMD_STS_PREPARE)
26665 +       {
26666 +
26667 +               if ( !PGR_Parse_Session_Started)
26668 +               {
26669 +                       snprintf(sync_command,sizeof(sync_command),
26670 +                               "SELECT %s(%d,%u,%u,%u,%d,%u) ",
26671 +                               PGR_SYSTEM_COMMAND_FUNC,
26672 +                               PGR_SET_CURRENT_TIME_FUNC_NO,
26673 +                               (unsigned int)ntohl(header->tv.tv_sec),
26674 +                               (unsigned int)ntohl(header->tv.tv_usec),
26675 +                               (unsigned int)ntohl(PGR_Log_Header->replicate_id),
26676 +                               PGR_Response_Inf->response_mode,
26677 +                                *PGR_ReplicateSerializationID);
26678 +                       res = PQexec(conn, sync_command);
26679 +                       if (res != NULL)
26680 +                       {
26681 +                               PQclear(res);
26682 +                               res = NULL;
26683 +                       }
26684 +                       while ((res = PQgetResult(conn)) != NULL)
26685 +                       {
26686 +                               if (res->resultStatus == PGRES_COPY_IN)
26687 +                               {
26688 +                                       PQclear(res);
26689 +                                       return STATUS_ERROR;
26690 +                               }
26691 +                               else if (res->resultStatus == PGRES_COPY_OUT)
26692 +                               {
26693 +                                       conn->asyncStatus = PGASYNC_BUSY;
26694 +                               }
26695 +                               else if (conn->status == CONNECTION_BAD)
26696 +                               {
26697 +                                       PQclear(res);
26698 +                                       return STATUS_ERROR;
26699 +                               }
26700 +                               PQclear(res);
26701 +                       }
26702 +               }
26703 +               set_string_info(&input_message,header,query);
26704 +               switch (header->cmdType)
26705 +               {
26706 +                       case CMD_TYPE_P_PARSE :
26707 +                               if (send_p_parse(conn, &input_message) != STATUS_OK)
26708 +                               {
26709 +                                       pqHandleSendFailure(conn);
26710 +                                       PGR_Parse_Session_Started = false;
26711 +                                       return STATUS_ERROR;
26712 +                               }
26713 +                               break;
26714 +                       case CMD_TYPE_P_BIND :
26715 +                               if (send_p_bind(conn, &input_message) != STATUS_OK)
26716 +                               {
26717 +                                       pqHandleSendFailure(conn);
26718 +                                       PGR_Parse_Session_Started = false;
26719 +                                       return STATUS_ERROR;
26720 +                               }
26721 +                               break;
26722 +                       case CMD_TYPE_P_DESCRIBE :
26723 +                               if (send_p_describe(conn, &input_message) != STATUS_OK)
26724 +                               {
26725 +                                       pqHandleSendFailure(conn);
26726 +                                       PGR_Parse_Session_Started = false;
26727 +                                       return STATUS_ERROR;
26728 +                               }
26729 +                               break;
26730 +                       case CMD_TYPE_P_EXECUTE :
26731 +                               if (send_p_execute(conn,&input_message) != STATUS_OK)
26732 +                               {
26733 +                                       pqHandleSendFailure(conn);
26734 +                                       PGR_Parse_Session_Started = false;
26735 +                                       return STATUS_ERROR;
26736 +                               }
26737 +                               break;
26738 +                       case CMD_TYPE_P_SYNC :
26739 +                               if (send_p_sync(conn, &input_message) != STATUS_OK)
26740 +                               {
26741 +                                       pqHandleSendFailure(conn);
26742 +                                       PGR_Parse_Session_Started = false;
26743 +                                       return STATUS_ERROR;
26744 +                               }
26745 +                               break;
26746 +                       case CMD_TYPE_P_CLOSE :
26747 +                               if (send_p_close(conn, &input_message) != STATUS_OK)
26748 +                               {
26749 +                                       pqHandleSendFailure(conn);
26750 +                                       PGR_Parse_Session_Started = false;
26751 +                                       return STATUS_ERROR;
26752 +                               }
26753 +                               break;
26754 +                       default :
26755 +                               break;
26756 +               }
26757 +               return STATUS_OK;
26758 +       }
26759 +       else
26760 +       {
26761 +               if (transaction_tbl->lock != STATUS_OK)
26762 +               {
26763 +#ifdef PRINT_DEBUG
26764 +       show_debug("%s:[%d]transaction_tbl->lock is [%d]",func,current_cluster,transaction_tbl->lock );
26765 +#endif
26766 +                       transaction_tbl->lock = STATUS_OK;
26767 +               }
26768 +               snprintf(sync_command,sizeof(sync_command),
26769 +                       "SELECT %s(%d,%u,%u,%d) ",
26770 +                       PGR_SYSTEM_COMMAND_FUNC,
26771 +                       PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO,
26772 +                       replicationId,
26773 +                       0,
26774 +                       PGR_Response_Inf->response_mode);
26775 +               res = PQexec(conn, sync_command);
26776 +               if (res != NULL)
26777 +               {
26778 +                       PQclear(res);
26779 +                       res = NULL;
26780 +               }       
26781 +               res = PQexec(conn, query);
26782 +               rtn = check_result(res);
26783 +#ifdef PRINT_DEBUG
26784 +       show_debug("%s:PQexec send :%s",func,query);
26785 +#endif 
26786 +
26787 +       }
26788 +
26789 +       if (res == NULL)
26790 +       {
26791 +               StartReplication[current_cluster] = true;
26792 +               return STATUS_ERROR;
26793 +       }
26794 +               
26795 +       str = PQcmdStatus(res);
26796 +#ifdef PRINT_DEBUG
26797 +       show_debug("%s:PQexec returns :%s",func,str);
26798 +#endif 
26799 +       if ((str == NULL) || (*str == '\0'))
26800 +       {
26801 +               if ((result != NULL) && (res != NULL) && (res->errMsg != NULL))
26802 +               {
26803 +                       snprintf(result,PGR_MESSAGE_BUFSIZE,"E%s",res->errMsg);
26804 +               }
26805 +               else
26806 +               {
26807 +                       strcpy(result,"E");
26808 +               }
26809 +               StartReplication[current_cluster] = true;
26810 +       }
26811 +       else
26812 +       {
26813 +               if (!strncasecmp(str,PGR_LOCK_CONFLICT_NOTICE_CMD,strlen(PGR_LOCK_CONFLICT_NOTICE_CMD)))
26814 +               {
26815 +#ifdef PRINT_DEBUG
26816 +                       show_debug("%s:LOCK CONFLICT from PQexec",func);
26817 +#endif                 
26818 +                       if (res != NULL)
26819 +                               PQclear(res);
26820 +                       
26821 +                       transaction_tbl->lock = STATUS_LOCK_CONFLICT;
26822 +                       return STATUS_LOCK_CONFLICT;
26823 +               }
26824 +               else if (!strncasecmp(str,PGR_DEADLOCK_DETECT_NOTICE_CMD,strlen(PGR_DEADLOCK_DETECT_NOTICE_CMD)))
26825 +               {
26826 +#ifdef PRINT_DEBUG
26827 +                       show_debug("%s:DEADLOCK DETECTED from PQexec",func);
26828 +#endif                 
26829 +                       if (res != NULL)
26830 +                               PQclear(res);
26831 +                       transaction_tbl->lock = STATUS_DEADLOCK_DETECT;
26832 +                       return STATUS_DEADLOCK_DETECT;
26833 +               }
26834 +               snprintf(result,PGR_MESSAGE_BUFSIZE,"C%s",str);
26835 +       }
26836 +       if (res != NULL)
26837 +               PQclear(res);
26838 +
26839 +       /* set send query id */
26840 +       *(PGR_Send_Query_ID + hostNum ) = ntohl(header->query_id);
26841 +
26842 +       /*
26843 +        * if the query is end transaction process...
26844 +        */
26845 +       check_delete_transaction(host_ptr,header);
26846 +
26847 +       return STATUS_OK;
26848 +}
26849 +
26850 +static int
26851 +check_delete_transaction (HostTbl * host_ptr, ReplicateHeader * header)
26852 +{
26853 +       char       *database = NULL;
26854 +
26855 +       if ((host_ptr == NULL) || (header == NULL))
26856 +       {
26857 +               return STATUS_ERROR;
26858 +       }
26859 +       database = (char *)header->dbName;
26860 +       if(header->cmdSts == CMD_STS_OTHER &&
26861 +          header->cmdType == CMD_TYPE_CONNECTION_CLOSE) 
26862 +       {
26863 +               notice_abort(host_ptr, header);
26864 +               deleteTransactionTbl(host_ptr,header);
26865 +       }
26866 +       
26867 +       delete_template(host_ptr, header);
26868 +       return STATUS_OK;
26869 +}
26870 +
26871 +static void
26872 +check_transaction_status(ReplicateHeader * header,
26873 +                                                TransactionTbl *transaction)
26874 +{
26875 +       if (header == (ReplicateHeader *)NULL)
26876 +       {
26877 +               return;
26878 +       }
26879 +       if (header->cmdSts == CMD_STS_TRANSACTION )
26880 +       {
26881 +               if (header->cmdType == CMD_TYPE_BEGIN )
26882 +               {
26883 +                       if (transaction != NULL)
26884 +                       {
26885 +                               transaction->in_transaction = true;
26886 +                               transaction->transaction_count ++;
26887 +                       }
26888 +               }
26889 +               else if ((header->cmdType == CMD_TYPE_COMMIT) ||
26890 +                                (header->cmdType == CMD_TYPE_ROLLBACK))
26891 +               {
26892 +                       if (transaction != NULL)
26893 +                       {
26894 +                               if (transaction->transaction_count > 0)
26895 +                               {
26896 +                                       transaction->transaction_count --;
26897 +                               }       
26898 +                               if (transaction->transaction_count == 0)
26899 +                               {
26900 +                                       transaction->in_transaction = false;
26901 +                               }
26902 +                       }
26903 +               }
26904 +       }
26905 +       else 
26906 +       { 
26907 +               if ( header->cmdType == CMD_TYPE_COPY ) 
26908 +               {
26909 +                       if (transaction != NULL)
26910 +                       {
26911 +                               transaction->exec_copy = true;
26912 +                       }
26913 +               }
26914 +               else if (header->cmdType == CMD_TYPE_COPY_DATA_END) 
26915 +               {
26916 +                       if (transaction != NULL)
26917 +                       {
26918 +                               transaction->exec_copy = false;                                         
26919 +                       }
26920 +               }
26921 +       }
26922 +}
26923 +
26924 +static HostTbl *
26925 +check_host_transaction_status(ReplicateHeader * header,
26926 +                                                HostTbl *host)
26927 +{
26928 +       int recovery_status = 0;
26929 +
26930 +       if ((header == (ReplicateHeader *)NULL) || (host == (HostTbl *)NULL))
26931 +       {
26932 +               return NULL;
26933 +       }
26934 +       if (header->cmdType == CMD_TYPE_BEGIN )
26935 +       {
26936 +               host->transaction_count++;
26937 +       }
26938 +       else if ((header->cmdType == CMD_TYPE_COMMIT) ||
26939 +                        (header->cmdType == CMD_TYPE_ROLLBACK))
26940 +       {
26941 +               if (host->transaction_count > 0)
26942 +                       host->transaction_count--;
26943 +       }
26944 +
26945 +       recovery_status = PGRget_recovery_status();
26946 +       if ((recovery_status == RECOVERY_PREPARE_START) &&
26947 +               (host->transaction_count > 0))
26948 +       {
26949 +               PGRset_recovery_status(RECOVERY_WAIT_CLEAN);
26950 +       }
26951 +       else if ((recovery_status == RECOVERY_PREPARE_START) &&
26952 +               (host->transaction_count==0))
26953 +       {
26954 +               PGRset_recovery_status(RECOVERY_CLEARED);
26955 +       }
26956 +       else if ((recovery_status == RECOVERY_WAIT_CLEAN) &&
26957 +               (host->transaction_count==0))
26958 +       {
26959 +               PGRset_recovery_status(RECOVERY_CLEARED);
26960 +       }
26961 +       return host;
26962 +}
26963 +
26964 +static FILE *
26965 +create_queue_file(void)
26966 +{
26967 +       char * func = "create_queue_file()";
26968 +       FILE * fp = NULL;
26969 +       struct timeval tv;
26970 +       char fname[FILENAME_MAX_LENGTH];
26971 +       int size = 0;
26972 +       int rtn = 0;
26973 +       RecoveryQueueFile * msg = NULL;
26974 +
26975 +       if (*RecoveryMsgid < 0)
26976 +       {
26977 +               return (FILE *)NULL;
26978 +       }
26979 +       /* create uniq file name */
26980 +       gettimeofday(&tv,NULL);
26981 +       memset(fname,0,sizeof(fname));
26982 +       snprintf(fname,sizeof(fname),"%s/%s_%u.%u",
26983 +               PGR_Data_Path,
26984 +               RECOVERY_QUEUE_FILE,
26985 +               (uint32_t)tv.tv_sec,
26986 +               (uint32_t)tv.tv_usec);
26987 +
26988 +       size = sizeof(fname) + sizeof(RecoveryQueueFile);
26989 +       msg = (RecoveryQueueFile *)malloc(size);
26990 +       if (msg == NULL)
26991 +       {
26992 +               show_error("%s:malloc() failed. reason: %s", func, strerror(errno));
26993 +               return (FILE *)NULL;
26994 +       }
26995 +       memset(msg,0,size);
26996 +       msg->mtype = RECOVERY_FILE_MTYPE;
26997 +       strncpy(msg->mdata,fname,sizeof(fname));
26998 +
26999 +       fp = fopen(fname,"a");
27000 +       if (fp == NULL)
27001 +       {
27002 +               show_error("%s:fopen failed: (%s)",func,strerror(errno));
27003 +               return (FILE *)NULL;
27004 +       }
27005 +
27006 +       rtn = msgsnd(*RecoveryMsgid, msg, sizeof(fname), IPC_NOWAIT);
27007 +       if (rtn < 0)
27008 +       {
27009 +               show_error("%s:msgsnd failed. reason: %s", func, strerror(errno));
27010 +               free(msg);
27011 +               msgctl(*RecoveryMsgid, IPC_RMID, NULL);
27012 +               *RecoveryMsgid = msgget (IPC_PRIVATE, 00666 | IPC_CREAT );
27013 +               return (FILE *)NULL;
27014 +       }
27015 +
27016 +       strncpy(Recovery_Status_Inf->write_file,fname,sizeof(Recovery_Status_Inf->write_file));
27017 +       return fp;
27018 +}
27019 +
27020 +static int
27021 +add_queue_file(char * data,int size)
27022 +{
27023 +       int cnt = 0;
27024 +
27025 +       if ((QueueFp == NULL) || (data == NULL) || (size < 0))
27026 +       {
27027 +               return STATUS_ERROR;
27028 +       }
27029 +       /*fseek(QueueFp,0,SEEK_END);*/
27030 +       while (fwrite(data, size,1,QueueFp) <= 0)
27031 +       {
27032 +               fclose(QueueFp);
27033 +               QueueFp = NULL;
27034 +               if (cnt > MAX_RETRY_TIMES)
27035 +               {
27036 +                       return STATUS_ERROR;
27037 +               }
27038 +               QueueFp = create_queue_file();
27039 +               cnt ++;
27040 +       }
27041 +       Recovery_Status_Inf->file_size += size;
27042 +       return STATUS_OK;
27043 +}
27044 +
27045 +/*
27046 + * set query in queue 
27047 + */
27048 +int
27049 +PGRset_queue(ReplicateHeader * header,char * query)
27050 +{
27051 +       char * func = "PGRset_queue()";
27052 +       int header_size = 0;
27053 +       int query_size = 0;
27054 +
27055 +       if ((Recovery_Status_Inf == NULL) || (header == NULL))
27056 +       {
27057 +               show_error("%s:header is null",func);
27058 +               return STATUS_ERROR;
27059 +       }
27060 +
27061 +       query_size = ntohl(header->query_size);
27062 +       if (query_size < 0)
27063 +       {
27064 +               show_error("%s:query size less than 0",func);
27065 +               return STATUS_ERROR;
27066 +       }
27067 +       header_size = sizeof(ReplicateHeader);
27068 +
27069 +       if (RecoverySemID <= 0)
27070 +       {
27071 +               show_error("%s:RecoverySemID is not initialized",func);
27072 +               return STATUS_ERROR;
27073 +       }
27074 +       PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
27075 +       /* check existance of queue file */
27076 +       if (Recovery_Status_Inf->write_file[0] == '\0')
27077 +       {
27078 +               /* create new queue file */
27079 +               Recovery_Status_Inf->file_size = 0;
27080 +               QueueFp = create_queue_file();
27081 +       }
27082 +       else
27083 +       {
27084 +               /* check size of queue file */
27085 +               if (Recovery_Status_Inf->file_size + header_size + query_size > MAX_QUEUE_FILE_SIZE)
27086 +               {
27087 +                       /* if the file size is over the limit, create new queue file */
27088 +                       memset(Recovery_Status_Inf->write_file,0,sizeof(Recovery_Status_Inf->write_file));
27089 +                       fclose(QueueFp);
27090 +                       Recovery_Status_Inf->file_size = 0;
27091 +                       QueueFp = create_queue_file();
27092 +               }
27093 +               else
27094 +               {
27095 +                       QueueFp= fopen(Recovery_Status_Inf->write_file,"a");
27096 +               }
27097 +       }
27098 +       if (QueueFp == (FILE *)NULL)
27099 +       {
27100 +               PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
27101 +               show_error("%s:QueueFp open failed. error is %s",func,strerror(errno));
27102 +               return STATUS_ERROR;
27103 +       }
27104 +       header->replicate_id = htonl(*PGR_ReplicateSerializationID);
27105 +       if (add_queue_file((char *)header,header_size) != STATUS_OK)
27106 +       {
27107 +               PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
27108 +               show_error("%s:header add failed into queue file",func);
27109 +               return STATUS_ERROR;
27110 +       }
27111 +       if (query_size > 0)
27112 +       {
27113 +               if (add_queue_file((char *)query,query_size) != STATUS_OK)
27114 +               {
27115 +                       PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
27116 +                       show_error("%s:queue add failed into queue file",func);
27117 +                       return STATUS_ERROR;
27118 +               }
27119 +       }
27120 +       fflush(QueueFp);
27121 +       fclose(QueueFp);
27122 +       PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
27123 +
27124 +       return STATUS_OK;       
27125 +}
27126 +
27127 +HostTbl *
27128 +PGRget_HostTbl(char * resolvedName, int port)
27129 +{
27130 +       HostTbl * ptr = NULL;
27131 +       int len = 0;
27132 +
27133 +       if (Host_Tbl_Begin == NULL)
27134 +       {
27135 +               return NULL;
27136 +       }
27137 +       len = strlen(resolvedName);
27138 +       ptr = Host_Tbl_Begin;
27139 +       if (len > sizeof(ptr->resolvedName))
27140 +       {
27141 +               len = sizeof(ptr->resolvedName);
27142 +       }
27143 +       while(ptr->useFlag != DB_TBL_END)
27144 +       {
27145 +               if ((! memcmp(ptr->resolvedName,resolvedName,len)) &&
27146 +                       (ptr->port == port))
27147 +               {
27148 +                       return ptr;
27149 +               }
27150 +               ptr ++;
27151 +       }
27152 +       return (HostTbl*)NULL;
27153 +}
27154 +
27155 +static void
27156 +sem_quit(int semid)
27157 +{
27158 +       semctl(semid, 0, IPC_RMID);
27159 +}
27160 +
27161 +void
27162 +PGRclear_connections(void)
27163 +{
27164 +       Dlelem *ptr = NULL;
27165 +
27166 +       pthread_mutex_lock(&transaction_table_mutex);
27167 +       ptr = DLGetHead(Transaction_Tbl_Begin);
27168 +       while (ptr)
27169 +       {
27170 +               TransactionTbl *transaction = DLE_VAL(ptr);
27171 +               if (transaction->conn != NULL)
27172 +               {
27173 +                       PQfinish(transaction->conn);
27174 +                       transaction->conn = NULL;
27175 +               }
27176 +               ptr = DLGetSucc(ptr);
27177 +       }
27178 +       pthread_mutex_unlock(&transaction_table_mutex);
27179 +}
27180 +
27181 +void
27182 +PGRdestroy_transaction_table(void)
27183 +{
27184 +       Dlelem *ptr = NULL, *next;
27185 +       pthread_mutex_lock(&transaction_table_mutex);
27186 +       ptr = DLGetHead(Transaction_Tbl_Begin);
27187 +       while (ptr)
27188 +       {
27189 +               next = DLGetSucc(ptr);
27190 +               DLRemove(ptr);
27191 +               DLFreeElem(ptr);
27192 +               ptr = next;
27193 +       }
27194 +       DLFreeList(Transaction_Tbl_Begin);
27195 +       Transaction_Tbl_Begin = NULL;
27196 +       pthread_mutex_unlock(&transaction_table_mutex);
27197 +}
27198 +
27199 +static bool
27200 +is_need_sync_time(ReplicateHeader * header)
27201 +{
27202 +       bool rtn = false;
27203 +
27204 +       if (header->cmdSts == CMD_STS_PREPARE)
27205 +       {
27206 +               rtn = false;
27207 +       }
27208 +       else if ((header->cmdType == CMD_TYPE_COPY) ||
27209 +               (header->cmdType == CMD_TYPE_COPY_DATA) ||
27210 +               (header->cmdType == CMD_TYPE_COPY_DATA_END))
27211 +       {
27212 +               rtn = false;
27213 +       }
27214 +       if ((header->cmdSts == CMD_STS_QUERY ) &&
27215 +               ((header->cmdType == CMD_TYPE_INSERT) || 
27216 +                (header->cmdType == CMD_TYPE_UPDATE) || 
27217 +                (header->cmdType == CMD_TYPE_DELETE) || 
27218 +                (header->cmdType == CMD_TYPE_SET) || 
27219 +                (header->cmdType == CMD_TYPE_EXECUTE)))
27220 +       {
27221 +               rtn = true;     
27222 +       }
27223 +       else
27224 +       {
27225 +               if ((header->cmdType == CMD_TYPE_COPY) ||
27226 +                       (header->cmdType == CMD_TYPE_SELECT) ||
27227 +                       (header->cmdType == CMD_TYPE_VACUUM) ||
27228 +                       (header->cmdType == CMD_TYPE_ANALYZE) ||
27229 +                       (header->cmdType == CMD_TYPE_BEGIN))
27230 +               {
27231 +                       rtn = true;
27232 +               }
27233 +               if ((header->cmdSts == CMD_STS_TRANSACTION ) &&
27234 +                       (header->cmdType != CMD_TYPE_BEGIN))
27235 +               {
27236 +                       rtn = false;
27237 +               }
27238 +       }
27239 +       return rtn;
27240 +}
27241 +
27242 +static bool
27243 +is_need_wait_answer(ReplicateHeader * header)
27244 +{
27245 +       bool rtn = false;
27246 +
27247 +       if (header->cmdSts == CMD_STS_PREPARE)
27248 +       {
27249 +               rtn = false;
27250 +       }
27251 +       else if ((header->cmdType == CMD_TYPE_COPY) ||
27252 +               (header->cmdType == CMD_TYPE_COPY_DATA) ||
27253 +               (header->cmdType == CMD_TYPE_COPY_DATA_END))
27254 +       {
27255 +               rtn = false;
27256 +       }
27257 +       else if ((header->cmdSts == CMD_STS_QUERY ) &&
27258 +               ((header->cmdType == CMD_TYPE_INSERT) || 
27259 +                (header->cmdType == CMD_TYPE_UPDATE) || 
27260 +                (header->cmdType == CMD_TYPE_DELETE) || 
27261 +                (header->cmdType == CMD_TYPE_VACUUM) || 
27262 +                (header->cmdType == CMD_TYPE_ANALYZE) || 
27263 +                (header->cmdType == CMD_TYPE_EXECUTE)))
27264 +       {
27265 +               rtn = true;
27266 +       }
27267 +       else if ((header->cmdSts == CMD_STS_TRANSACTION ) ||
27268 +                       (header->cmdSts == CMD_STS_SET_SESSION_AUTHORIZATION ) ||
27269 +                       (header->cmdSts == CMD_STS_TEMP_TABLE ) ||
27270 +                       (header->cmdType == CMD_TYPE_SELECT))
27271 +       {
27272 +               rtn = true;
27273 +       }
27274 +
27275 +       return rtn;
27276 +}
27277 +
27278 +static void
27279 +delete_template(HostTbl * ptr, ReplicateHeader * header)
27280 +{
27281 +       if ((ptr == (HostTbl *)NULL ) ||
27282 +               (header == (ReplicateHeader *)NULL) )
27283 +       {
27284 +               return;
27285 +       }
27286 +
27287 +       if ((! strncmp(header->dbName,"template1",9)) ||
27288 +               (! strncmp(header->dbName,"template0",9)))
27289 +       {
27290 +               if ((header->cmdSts != CMD_STS_TRANSACTION ) &&
27291 +                       ( header->cmdSts != CMD_STS_SET_SESSION_AUTHORIZATION ) &&
27292 +                       ( header->cmdSts != CMD_STS_TEMP_TABLE ))
27293 +               {
27294 +                       deleteTransactionTbl(ptr,header);
27295 +               }
27296 +       }
27297 +}
27298 +
27299 +/*--------------------------------------------------------------------
27300 + * SYMBOL
27301 + *    check_copy_command()
27302 + * NOTES
27303 + *    check the query which it is copy command or not 
27304 + *    when the query is 'copy from', set 'stdin' after 'from' 
27305 + * ARGS
27306 + *    char * query: query strings(I)
27307 + * RETURN
27308 + *    copy command : changed copy command
27309 + *    other command : NULL
27310 + *--------------------------------------------------------------------
27311 + */
27312 +static char *
27313 +check_copy_command(char * query)
27314 +{
27315 +       char * p;
27316 +       char * p1, *p2, *wp;
27317 +       char * buf;
27318 +       int size;
27319 +
27320 +       if (query == NULL)
27321 +               return NULL;
27322 +       size = strlen(query) + strlen("  stdin  ");
27323 +       p = p1 = query;
27324 +       wp = strstr(p,"FROM");
27325 +       if (wp == NULL)
27326 +               wp = strstr(p,"from");
27327 +       
27328 +       if (wp != NULL)
27329 +       {
27330 +               p = wp + strlen("FROM");
27331 +               *p = '\0';
27332 +               p ++;
27333 +               while ((isspace(*p)) && (*p != '\0')) p++;
27334 +               while ((!isspace(*p)) && (*p != '\0')) p++;
27335 +               p2 = p;
27336 +               buf = malloc(size);
27337 +               if (buf == NULL)
27338 +               {
27339 +                       return NULL;
27340 +               }
27341 +               snprintf(buf,size,"%s stdin %s",p1,p2);
27342 +               return buf;
27343 +       }
27344 +       return NULL;
27345 +}
27346 +
27347 +static int
27348 +next_replication_id(void)
27349 +{
27350 +       char * func = "next_replication_id()";
27351 +
27352 +       if (Recovery_Status_Inf == (RecoveryStatusInf *)NULL)
27353 +       {
27354 +               show_error("%s: Recovery_Status_Inf is NULL",func);
27355 +               return -1;
27356 +       }
27357 +       Recovery_Status_Inf->replication_id ++;
27358 +       Recovery_Status_Inf->check_point --;
27359 +       return (Recovery_Status_Inf->replication_id);
27360 +}
27361 +
27362 +static void
27363 +check_replication_id(void)
27364 +{
27365 +       char * func = "check_replication_id()";
27366 +
27367 +       if (Recovery_Status_Inf == (RecoveryStatusInf *)NULL)
27368 +       {
27369 +               show_error("%s: Recovery_Status_Inf is NULL",func);
27370 +               return ;
27371 +       }
27372 +       if (Recovery_Status_Inf->check_point < 0)
27373 +       {
27374 +               Recovery_Status_Inf->check_point = PGR_CHECK_POINT ;
27375 +               rewind(RidFp);
27376 +               PGRwrite_log_file(RidFp,"%u",Recovery_Status_Inf->replication_id + PGR_CHECK_POINT );
27377 +       }
27378 +}
27379 +
27380 +int
27381 +PGRset_replication_id(uint32_t id)
27382 +{
27383 +       Recovery_Status_Inf->replication_id = id;
27384 +       return (Recovery_Status_Inf->replication_id);
27385 +}
27386 +
27387 +int 
27388 +PGRdo_replicate(int sock,ReplicateHeader *header, char * query)
27389 +{
27390 +
27391 +       char * func = "PGRdo_replicate()";
27392 +
27393 +       struct timeval tv;
27394 +       int status = STATUS_OK;
27395 +       int recovery_status = 0;
27396 +       char * query_string = NULL;
27397 +
27398 +       if (header->cmdType == CMD_TYPE_COPY)
27399 +       {
27400 +               query_string = check_copy_command(query);
27401 +               if (query_string == NULL)
27402 +               {
27403 +                       return LOOP_CONTINUE;
27404 +               }
27405 +       }
27406 +       else
27407 +       {
27408 +               query_string = query;
27409 +               if (header->cmdType == CMD_TYPE_SET)
27410 +               {
27411 +                       if (is_autocommit_off(query_string) == true)
27412 +                       {
27413 +                               PGR_AutoCommit = false;
27414 +                       }
27415 +                       else if (is_autocommit_on(query_string) == true)
27416 +                       {
27417 +                               PGR_AutoCommit = true;
27418 +                       }
27419 +               }
27420 +       }
27421 +       header->isAutoCommit=PGR_AutoCommit ? 1 : 0;
27422 +       gettimeofday(&tv,NULL);
27423 +       header->tv.tv_sec = htonl(tv.tv_sec);
27424 +       header->tv.tv_usec = htonl(tv.tv_usec);
27425 +#ifdef PRINT_DEBUG
27426 +       show_debug("%s:query :: %s",func,query_string);
27427 +#endif                 
27428 +
27429 +       /* set query id */
27430 +       header->query_id = htonl(PGRget_next_query_id());
27431 +
27432 +       /* save header for logging */
27433 +       if (is_need_sync_time(header) == true)
27434 +       {
27435 +               if (PGR_Log_Header != NULL)
27436 +               {
27437 +                       memcpy(PGR_Log_Header,header,sizeof(ReplicateHeader));
27438 +                       if (header->rlog == 0)
27439 +                       {
27440 +                               PGR_Log_Header->replicate_id = htonl(next_replication_id());
27441 +                       }
27442 +               }
27443 +       }
27444 +       /* check rlog */
27445 +       if (header->rlog == CONNECTION_SUSPENDED_TYPE )
27446 +       {
27447 +               if (PGRget_rlog_header(header) == STATUS_OK)
27448 +               {
27449 +                       header->rlog = CONNECTION_SUSPENDED_TYPE;
27450 +                       
27451 +               }
27452 +       }
27453 +       
27454 +       /* check recovery mode */
27455 +
27456 +       recovery_status = PGRget_recovery_status();
27457 +       PGRcheck_recovered_host();
27458 +
27459 +       /* send replication packet */
27460 +       status = PGRreplicate_packet_send( header,query_string,sock,recovery_status);
27461 +
27462 +       if ((header->cmdType == CMD_TYPE_COPY) &&
27463 +               (query_string != NULL))
27464 +       {
27465 +               free(query_string);
27466 +               query_string = NULL;
27467 +       }
27468 +       
27469 +       if (status == STATUS_ABORTED )
27470 +       {
27471 +#ifdef PRINT_DEBUG
27472 +               show_debug("%s:status is STATUS_ABORTED",func);
27473 +#endif                 
27474 +               return LOOP_END;
27475 +       }
27476 +       if (status == STATUS_DEADLOCK_DETECT) 
27477 +       {
27478 +#ifdef PRINT_DEBUG
27479 +               show_debug("%s:status is STATUS_DEADLOCK_DETECT",func);
27480 +#endif                 
27481 +               return LOOP_END;
27482 +       }
27483 +       return LOOP_CONTINUE;
27484 +}
27485 +
27486 +/*--------------------------------------------------------------------
27487 + * SYMBOL
27488 + *    PGRreturn_result()
27489 + * NOTES
27490 + *    Return result of execution 
27491 + * ARGS
27492 + *    int dest: socket of destination server (I)
27493 + *    char *result: result data(I)
27494 + *    int wait: wait flag (I)
27495 + * RETURN
27496 + *    OK: STATUS_OK
27497 + *    NG: STATUS_ERROR
27498 + *    NG: STATUS_LOCK_CONFLICT
27499 + *    NG: STATUS_DEADLOCK_DETECT
27500 + *--------------------------------------------------------------------
27501 + */
27502 +int
27503 +PGRreturn_result(int dest, char * result, int wait)
27504 +{
27505 +       char * func = "PGRreturn_result()";
27506 +       fd_set    wmask;
27507 +       struct timeval timeout;
27508 +       int rtn = 0;
27509 +       char * send_ptr = NULL;
27510 +       int send_size= 0;
27511 +       int buf_size = 0;
27512 +       int s = 0;
27513 +       int status = 0;
27514 +       int flag = 0;
27515 +       
27516 +       if (result == NULL)
27517 +       {
27518 +               show_error("%s:result is not initialize",func);
27519 +               return STATUS_ERROR;
27520 +       }
27521 +       if (dest < 0)
27522 +       {
27523 +               return STATUS_ERROR;
27524 +       }
27525 +       send_ptr = result;
27526 +       buf_size = PGR_MESSAGE_BUFSIZE;
27527 +       if (buf_size < 1)
27528 +               buf_size = 1;
27529 +
27530 +       /*
27531 +        * Wait for something to happen.
27532 +        */
27533 +#ifdef MSG_DONTWAIT
27534 +       flag |= MSG_DONTWAIT;
27535 +#endif
27536 +#ifdef MSG_NOSIGNAL
27537 +       flag |= MSG_NOSIGNAL;
27538 +#endif
27539 +
27540 +       for (;;)
27541 +       {
27542 +               timeout.tv_sec = PGR_Replication_Timeout;
27543 +               timeout.tv_usec = 0;
27544 +
27545 +               FD_ZERO(&wmask);
27546 +               FD_SET(dest,&wmask);
27547 +
27548 +               rtn = select(dest+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
27549 +               if (rtn < 0)
27550 +               {
27551 +                       if (errno == EINTR || errno == EAGAIN)
27552 +                               continue;
27553 +
27554 +                       show_error("%s:select failed ,errno is %s",func , strerror(errno));
27555 +                       return STATUS_ERROR;
27556 +               }
27557 +               else if (rtn && FD_ISSET(dest, &wmask))
27558 +               {
27559 +                       s = send(dest,send_ptr + send_size,buf_size - send_size ,flag); 
27560 +                       if (s < 0)
27561 +                       {
27562 +                               if (errno == EINTR || errno == EAGAIN)
27563 +                                       continue;
27564 +                               else
27565 +                               {
27566 +                                       show_error("%s:send error: %d(%s)", func, errno, strerror(errno));
27567 +                                       memset(send_ptr, 0, PGR_MESSAGE_BUFSIZE);
27568 +                                       return STATUS_ERROR;
27569 +                               }
27570 +                       }
27571 +                       else if (s > 0)
27572 +                       {
27573 +                               send_size += s;
27574 +                               if (send_size == buf_size)
27575 +                               {
27576 +
27577 +                                       status = STATUS_OK;
27578 +                                       if (wait == PGR_WAIT_ANSWER)
27579 +                                       {
27580 +                                               status = read_answer(dest);
27581 +                                       }
27582 +                                       return status;
27583 +                               }
27584 +                       }
27585 +                       else /* s == 0 */
27586 +                       {
27587 +                               show_error("%s:unexpected EOF", func);
27588 +                               memset(send_ptr, 0, PGR_MESSAGE_BUFSIZE);
27589 +                               return STATUS_ERROR;
27590 +                       }
27591 +               }
27592 +       }
27593 +       memset(send_ptr, 0, PGR_MESSAGE_BUFSIZE);
27594 +       return STATUS_ERROR;
27595 +}
27596 +
27597 +/*--------------------------------------------------------------------
27598 + * SYMBOL
27599 + *    read_answer()
27600 + * NOTES
27601 + *    Receive answer packet
27602 + * ARGS
27603 + *    int dest: socket of destination server (I)
27604 + * RETURN
27605 + *    OK: STATUS_OK
27606 + *    NG: STATUS_ERROR
27607 + *    NG: STATUS_LOCK_CONFLICT
27608 + *    NG: STATUS_DEADLOCK_DETECT
27609 + *--------------------------------------------------------------------
27610 + */
27611 +static int
27612 +read_answer(int dest)
27613 +{
27614 +       char * func = "read_answer()";
27615 +       fd_set    rmask;
27616 +       struct timeval timeout;
27617 +       int rtn;
27618 +       ReplicateHeader header;
27619 +       char * answer = NULL;
27620 +       int status = STATUS_ERROR;
27621 +
27622 +       for(;;)
27623 +       {
27624 +               if (answer != NULL)
27625 +               {
27626 +                       free(answer);
27627 +                       answer = NULL;
27628 +               }
27629 +               timeout.tv_sec = PGR_Replication_Timeout;
27630 +               timeout.tv_usec = 0;
27631 +               FD_ZERO(&rmask);
27632 +               FD_SET(dest,&rmask);
27633 +               rtn = select(dest+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
27634 +               if (rtn < 0)
27635 +               {
27636 +                       if (errno == EINTR || errno == EAGAIN)
27637 +                               continue;
27638 +
27639 +                       show_error("%s:select failed ,errno is %s",func , strerror(errno));
27640 +                       return STATUS_ERROR;
27641 +               }
27642 +               else if (rtn && FD_ISSET(dest, &rmask))
27643 +               {
27644 +                       memset(&header,0,sizeof(ReplicateHeader));
27645 +                       answer = PGRread_packet(dest,&header);
27646 +                       if (answer == NULL)
27647 +                       {
27648 +                               status = STATUS_ERROR;
27649 +                               break;
27650 +                       }
27651 +                       if ((header.cmdSts != CMD_STS_RESPONSE) && 
27652 +                               (header.cmdSts != CMD_STS_NOTICE))
27653 +                       {
27654 +                               show_error("%s:none response packet received",func);
27655 +                               free(answer);
27656 +                               answer = NULL;
27657 +                               status = STATUS_ERROR;
27658 +                               break;
27659 +                       }
27660 +#ifdef PRINT_DEBUG
27661 +                       show_debug("%s:answer[%s]",func,answer);
27662 +#endif                 
27663 +                       if (answer != NULL)
27664 +                       {
27665 +                               if (!strncasecmp(answer,PGR_QUERY_DONE_NOTICE_CMD,strlen(PGR_QUERY_DONE_NOTICE_CMD)))
27666 +                               {
27667 +#ifdef PRINT_DEBUG
27668 +                                       show_debug("%s:QUERY DONE",func);
27669 +#endif                 
27670 +                                       status = STATUS_OK;
27671 +                               }
27672 +                               else if (!strncasecmp(answer,PGR_QUERY_ABORTED_NOTICE_CMD,strlen(PGR_QUERY_ABORTED_NOTICE_CMD)))
27673 +                               {
27674 +#ifdef PRINT_DEBUG
27675 +                                       show_debug("%s:QUERY ABORTED",func);
27676 +#endif                 
27677 +                                       status = STATUS_ABORTED;
27678 +                               }
27679 +                               else if (!strncasecmp(answer,PGR_LOCK_CONFLICT_NOTICE_CMD,strlen(PGR_LOCK_CONFLICT_NOTICE_CMD)))
27680 +                               {
27681 +#ifdef PRINT_DEBUG
27682 +                                       show_debug("%s:LOCK CONFLICT !!",func);
27683 +#endif                 
27684 +                                       status = STATUS_LOCK_CONFLICT;
27685 +                               }
27686 +                               else if (!strncasecmp(answer,PGR_DEADLOCK_DETECT_NOTICE_CMD,strlen(PGR_DEADLOCK_DETECT_NOTICE_CMD)))
27687 +                               {
27688 +#ifdef PRINT_DEBUG
27689 +                                       show_debug("%s:DEADLOCK DETECT !!",func);
27690 +#endif                 
27691 +                                       status = STATUS_DEADLOCK_DETECT;
27692 +                               }
27693 +                               free(answer);
27694 +                               answer = NULL;
27695 +                       }
27696 +                       return status;
27697 +               }
27698 +       }
27699 +       return status;
27700 +}
27701 +
27702 +/*--------------------------------------------------
27703 + * SYMBOL
27704 + *     PGRreplicate_packet_send()
27705 + * NOTES
27706 + *     Send query to each cluster DB servers and return result.
27707 + * ARGS 
27708 + *     ReplicateHeader * header : packet header (I)
27709 + *     char * query : query for replication (I)
27710 + *     int dest : destination socket for return result (I)
27711 + * RETURN
27712 + *     OK : STATUS_OK
27713 + *     NG : STATUS_ERROR
27714 + *     DEADLOCK : STATUS_DEADLOCK_DETECT
27715 + *---------------------------------------------------
27716 + */
27717 +int
27718 +PGRreplicate_packet_send( ReplicateHeader * header, char * query,int dest,int recovery_status) {
27719 +       return replicate_packet_send_internal(header,query,dest,recovery_status,false);
27720 +}
27721 +
27722 +
27723 +int
27724 +replicate_packet_send_internal(ReplicateHeader * header, char * query,int dest,int recovery_status,bool isHeldLock)
27725 +{
27726 +       char * func = "replicate_packet_send_internal()";
27727 +       HostTbl * host_ptr = (HostTbl*)NULL;
27728 +       HostTbl * source_host_ptr = (HostTbl*)NULL;
27729 +       int status = STATUS_OK;
27730 +       int sem_cnt = 0;
27731 +       int sem_id = 0;
27732 +       char       *database = NULL;
27733 +       char       port[8];
27734 +       char       *userName = NULL;
27735 +       char       *password = NULL;
27736 +       char * md5Salt = NULL;
27737 +       char * cryptSalt = NULL;
27738 +       char * host = NULL;
27739 +       char result[PGR_MESSAGE_BUFSIZE];
27740 +
27741 +       pthread_attr_t attr;
27742 +       int rc = 0;
27743 +       int t = 0;
27744 +       int t_cnt = 0;
27745 +       int source_t_cnt = -1;
27746 +       int transaction_count = 0;
27747 +       int *results_from_thread;
27748 +       bool reliable_mode = true;
27749 +
27750 +       pthread_t thread[MAX_DB_SERVER];
27751 +       ThreadArgInf thread_arg[MAX_DB_SERVER];
27752 +
27753 +
27754 +#ifdef PRINT_DEBUG
27755 +       show_debug("cmdSts=%c",header->cmdSts);
27756 +       if(header->cmdType!='\0')
27757 +               show_debug("cmdType=%c",header->cmdType);
27758 +       show_debug("rlog=%d",header->rlog);
27759 +       show_debug("port=%d",ntohs(header->port));
27760 +       show_debug("pid=%d",ntohs(header->pid));
27761 +       show_debug("from_host=%s",header->from_host);
27762 +       show_debug("dbName=%s",header->dbName);
27763 +       show_debug("userName=%s",header->userName);
27764 +       show_debug("recieve sec=%u",ntohl(header->tv.tv_sec));
27765 +       show_debug("recieve usec=%u",ntohl(header->tv.tv_usec));
27766 +       show_debug("query_size=%d",ntohl(header->query_size));
27767 +       show_debug("request_id=%d",ntohl(header->request_id));
27768 +       show_debug("replicate_id=%d",ntohl(header->replicate_id));
27769 +       show_debug("recovery_status=%d",recovery_status);
27770 +       if (header->cmdSts != CMD_STS_PREPARE)
27771 +               show_debug("query=%s",query);
27772 +
27773 +#endif
27774 +
27775 +       /* check rlog type */
27776 +       if (header->rlog == FROM_R_LOG_TYPE)
27777 +       {
27778 +               if (is_executed_query_in_origin(header) == false)
27779 +               {
27780 +#ifdef PRINT_DEBUG
27781 +                       show_debug("this query is not yet done in source cluster db. so it wait for receive re-replicate request");
27782 +#endif
27783 +                       /* wait re-replicate request */
27784 +                       return STATUS_SKIP_REPLICATE;
27785 +               }
27786 +       }
27787 +       /*
27788 +        * loop while registrated cluster DB exist 
27789 +        */
27790 +       if (Host_Tbl_Begin == NULL)
27791 +       {
27792 +               return STATUS_ERROR;
27793 +       }
27794 +       host_ptr = Host_Tbl_Begin;
27795 +       PGR_Response_Inf->current_cluster = 0;
27796 +       memset(result,0,sizeof(result));
27797 +       sem_cnt = 1;
27798 +
27799 +       if (is_need_queue_jump(header,query) == false)
27800 +       {
27801 +               sem_id = SemID;
27802 +       }
27803 +       else
27804 +       {
27805 +               sem_id = VacuumSemID;
27806 +       }
27807 +       if(!isHeldLock) {
27808 +#ifdef PRINT_DEBUG
27809 +               show_debug("sem_lock [%d] req",sem_cnt);
27810 +#endif
27811 +
27812 +               PGRsem_lock(sem_id,sem_cnt);
27813 +#ifdef PRINT_DEBUG
27814 +               show_debug("sem_lock [%d] got it",sem_cnt);
27815 +#endif
27816 +       }               
27817 +       ++*PGR_ReplicateSerializationID;
27818 +
27819 +       /* set replication log */
27820 +       if (is_need_use_rlog(header) == true)
27821 +       {
27822 +               PGRset_rlog(header,query);
27823 +       }
27824 +
27825 +       pthread_attr_init(&attr);
27826 +       pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
27827 +       PGR_Response_Inf->current_cluster = 0;
27828 +       t_cnt = 0;
27829 +       while(host_ptr->useFlag != DB_TBL_END)
27830 +       {
27831 +               /*
27832 +                * check the status of the cluster DB
27833 +                */
27834 +               if ((host_ptr->useFlag != DB_TBL_USE) &&
27835 +                       (host_ptr->useFlag != DB_TBL_INIT))
27836 +               {
27837 +                       host_ptr ++;
27838 +                       continue;
27839 +               }
27840 +               /*
27841 +                * skip loop during recover and the host name is master DB
27842 +                */
27843 +               if (is_master_in_recovery(host_ptr->hostName, host_ptr->port,recovery_status) == true)
27844 +               {
27845 +                       if (PGRset_queue(header,query) != STATUS_OK)
27846 +                       {
27847 +                               show_error("%s:failed to put query to queue.abort to recovery",func);
27848 +                               PGRset_recovery_status(RECOVERY_INIT);
27849 +                       }
27850 +#ifdef PRINT_DEBUG
27851 +                       show_debug("%s master is using for recovery",func);
27852 +#endif
27853 +                       host_ptr ++;
27854 +                       continue;
27855 +               }
27856 +               host_ptr = check_host_transaction_status(header, host_ptr);
27857 +               /*
27858 +                *  compare with the host name and the exceptional host name
27859 +                */
27860 +               thread_arg[t_cnt].header = header;
27861 +               thread_arg[t_cnt].query = query;
27862 +               thread_arg[t_cnt].dest = dest;
27863 +               thread_arg[t_cnt].host_ptr = host_ptr;
27864 +               thread_arg[t_cnt].current_cluster = t_cnt;
27865 +               thread_arg[t_cnt].transaction_tbl = (TransactionTbl *)NULL;
27866 +
27867 +               if (PGRis_same_host(header->from_host,ntohs(header->port),host_ptr->resolvedName, host_ptr->port) == true)
27868 +               {
27869 +#ifdef PRINT_DEBUG
27870 +                       show_debug("source host");
27871 +#endif
27872 +                       /* replication to source cluster db */
27873 +                       source_host_ptr = host_ptr;
27874 +                       source_t_cnt = t_cnt;
27875 +
27876 +                       if (header->rlog == FROM_R_LOG_TYPE )
27877 +                       {
27878 +#ifdef PRINT_DEBUG
27879 +                               show_debug("%s: This simple query was suspended. Therefore this query is not re-replicated to source cluster db.",func);
27880 +#endif
27881 +                       }
27882 +                       check_transaction_status(header, thread_arg[t_cnt].transaction_tbl);
27883 +                       t_cnt++;
27884 +               }
27885 +               /* replication to other cluster db */
27886 +               else
27887 +               {
27888 +                       if ((header->rlog == CONNECTION_SUSPENDED_TYPE ) &&
27889 +                           (header->cmdSts == CMD_STS_TRANSACTION) )
27890 +                       {
27891 +#ifdef PRINT_DEBUG
27892 +                               show_debug("%s: This transaction query was suspended. Therefore this query is not replicated to other cluster dbs.",func);
27893 +#endif
27894 +                       }
27895 +                       else
27896 +                       {
27897 +                               /*
27898 +                                * get the transaction table data
27899 +                                * it has the connection data with each cluster DB
27900 +                                */
27901 +                               thread_arg[t_cnt].transaction_tbl = getTransactionTbl(host_ptr,header);
27902 +                               /*
27903 +                                * if the transaction process is new one, 
27904 +                                * create connection data and add the transaction table
27905 +                                */
27906 +                               if (thread_arg[t_cnt].transaction_tbl == (TransactionTbl *)NULL)
27907 +                               {
27908 +                                       thread_arg[t_cnt].transaction_tbl = setTransactionTbl(host_ptr, header);
27909 +                                       if (thread_arg[t_cnt].transaction_tbl == (TransactionTbl *)NULL)
27910 +                                       {
27911 +                                               show_error("%s:setTransactionTbl failed",func);
27912 +                                               if ( header->cmdSts != CMD_STS_NOTICE )
27913 +                                               {
27914 +                                                       PGRset_host_status(host_ptr,DB_TBL_ERROR);
27915 +                                               }
27916 +                                               host_ptr ++;
27917 +                                               continue;
27918 +                                       }
27919 +                                       StartReplication[t_cnt] = true;
27920 +                               }
27921 +                               else
27922 +                               {
27923 +                                       /*
27924 +                                        * re-use the connection data
27925 +                                        */
27926 +                                       if ((thread_arg[t_cnt].transaction_tbl->conn != (PGconn *)NULL) &&
27927 +                                           (thread_arg[t_cnt].transaction_tbl->conn->sock > 0))
27928 +                                       {
27929 +                                               /*
27930 +                                                 memset(thread_arg[t_cnt].transaction_tbl->conn->inBuffer,0,thread_arg[t_cnt].transaction_tbl->conn->inBufSize);
27931 +                                                 memset(thread_arg[t_cnt].transaction_tbl->conn->outBuffer,0,thread_arg[t_cnt].transaction_tbl->conn->outBufSize);
27932 +                                               */
27933 +                                               StartReplication[t_cnt] = false;
27934 +                                       }
27935 +                                       else
27936 +                                       {
27937 +                                               if (thread_arg[t_cnt].transaction_tbl->conn != (PGconn *)NULL)
27938 +                                               {
27939 +                                                       PQfinish(thread_arg[t_cnt].transaction_tbl->conn);
27940 +                                                       thread_arg[t_cnt].transaction_tbl->conn = NULL;
27941 +                                               }
27942 +
27943 +                                               database = (char *)(header->dbName);
27944 +                                               snprintf(port,sizeof(port),"%d", host_ptr->port);
27945 +                                               userName = (char *)(header->userName);
27946 +                                               password = (char *)(header->password);
27947 +                                               md5Salt = (char *)(header->md5Salt);
27948 +                                               cryptSalt = (char *)(header->cryptSalt);
27949 +                                               host = (char *)(host_ptr->hostName);
27950 +
27951 +                                               thread_arg[t_cnt].transaction_tbl->conn = PGRcreateConn(host,port,database,userName,password,md5Salt,cryptSalt);
27952 +                                               StartReplication[t_cnt] = true;
27953 +#ifdef PRINT_DEBUG
27954 +                                               show_debug("%s:connect db:%s port:%s user:%s host:%s query:%s",
27955 +                                                          func, database,port,userName,host,query);
27956 +#endif
27957 +                                       }
27958 +                               }
27959 +                               check_transaction_status(header, thread_arg[t_cnt].transaction_tbl);
27960 +                               transaction_count = thread_arg[t_cnt].transaction_tbl->transaction_count;
27961 +                               rc = pthread_create(&thread[t_cnt], &attr, thread_send_cluster, (void*)&thread_arg[t_cnt]);       
27962 +
27963 +                               if (rc)
27964 +                               {
27965 +                                       show_error("pthread_create error");
27966 +                               }
27967 +                               t_cnt++;
27968 +                       }
27969 +               }
27970 +               /*
27971 +                * send replication query to each cluster server
27972 +                */
27973 +               if (host_ptr->useFlag != DB_TBL_USE) 
27974 +               {
27975 +                       PGRset_host_status(host_ptr,DB_TBL_USE);
27976 +               }
27977 +
27978 +               host_ptr++;
27979 +               PGR_Response_Inf->current_cluster ++;
27980 +               status = STATUS_OK;
27981 +       }    
27982 +
27983 +       /* When the query is SELECT, source cluster would not need to wait other cluster's result */
27984 +       if ((header->cmdType == CMD_TYPE_SELECT) && (header->cmdSts != CMD_STS_PREPARE))
27985 +       {
27986 +               thread_send_source( (void*)&thread_arg[source_t_cnt]);
27987 +               reliable_mode = false;
27988 +       }
27989 +
27990 +       pthread_attr_destroy(&attr);
27991 +
27992 +       results_from_thread = malloc(t_cnt * sizeof(int));
27993 +       for ( t = 0 ; t < t_cnt; )
27994 +       {
27995 +               int result;
27996 +               if (t == source_t_cnt)
27997 +               {
27998 +                       t++;
27999 +                       continue;
28000 +               }
28001 +               rc = pthread_join(thread[t], (void **)&result);
28002 +               if ((rc != 0) && (errno == EINTR))
28003 +               {
28004 +                       usleep(100);
28005 +                       continue;
28006 +               }
28007 +               results_from_thread[t] = (int)result;
28008 +               pthread_detach(thread[t]);
28009 +               t++;
28010 +       }
28011 +       
28012 +       if (compare_results(results_from_thread, t_cnt, source_t_cnt) == false)
28013 +       show_error("query results discrepancy between cluster servers: %s", query);
28014 +       free(results_from_thread);
28015 +
28016 +       thread_arg[source_t_cnt].transaction_count = transaction_count;
28017 +       /*
28018 +        * send replication query to source cluster server.
28019 +        */
28020 +       if ((source_t_cnt >= 0) && ( reliable_mode == true ))
28021 +       {
28022 +               thread_send_source( (void*)&thread_arg[source_t_cnt]);
28023 +       }
28024 +       /* unset replication log */
28025 +       if (is_need_use_rlog(header) == true)
28026 +       {
28027 +               PGRunset_rlog(header,query);
28028 +       }
28029 +
28030 +       check_replication_id();
28031 +       if (header->cmdSts == CMD_STS_PREPARE)
28032 +       {
28033 +               if (header->cmdType != CMD_TYPE_P_SYNC)
28034 +               {
28035 +                       if (PGR_Parse_Session_Started == false)
28036 +                       {
28037 +                               PGR_Parse_Session_Started = true;
28038 +                       }
28039 +               }
28040 +       }
28041 +       else
28042 +       {
28043 +               PGR_Parse_Session_Started = false;
28044 +       }
28045 +
28046 +       if(!isHeldLock) {
28047 +#ifdef PRINT_DEBUG
28048 +               show_debug("sem_unlock[%d]",sem_cnt);
28049 +#endif
28050 +               PGRsem_unlock(sem_id,sem_cnt);
28051 +       }
28052 +
28053 +       return status;
28054 +}
28055 +
28056 +static void *
28057 +thread_send_source(void * arg)
28058 +{
28059 +       char * func = "thread_send_source()";
28060 +       ThreadArgInf * thread_arg = NULL;
28061 +       ReplicateHeader * header = (ReplicateHeader*)NULL;
28062 +       char * query = NULL;
28063 +       int dest = 0;
28064 +       HostTbl * host_ptr = (HostTbl*)NULL;
28065 +       int status = STATUS_OK;
28066 +       int transaction_count = 0;
28067 +       char result[PGR_MESSAGE_BUFSIZE];
28068 +       bool sync_command_flg = false;
28069 +
28070 +       if (arg == NULL)
28071 +       {
28072 +               show_error("%s:arg is NULL",func);
28073 +               status = STATUS_ERROR;
28074 +               pthread_exit((void *) status);
28075 +       }
28076 +       thread_arg = (ThreadArgInf *)arg;
28077 +       header = thread_arg->header;
28078 +       query = thread_arg->query;
28079 +       dest = thread_arg->dest;
28080 +       host_ptr = thread_arg->host_ptr;
28081 +       transaction_count = thread_arg->transaction_count;
28082 +
28083 +       if(header->cmdSts==CMD_STS_OTHER &&
28084 +          header->cmdType==CMD_TYPE_CONNECTION_CLOSE) 
28085 +       {
28086 +                       return (void *)0;
28087 +       }
28088 +
28089 +       if (header->rlog == FROM_R_LOG_TYPE )
28090 +       {
28091 +               /* It is not necessary to return rlog to source DB. */
28092 +#ifdef PRINT_DEBUG
28093 +               show_debug("%s: It is not necessary to return rlog to source DB",func);
28094 +#endif
28095 +               status = STATUS_OK;
28096 +               return (void *)status;
28097 +       }
28098 +
28099 +       /**
28100 +        * NOTE: 
28101 +        * We can use PGR_ReplicateSerializationID here , because 
28102 +        * all queries from cluster server isn't recovery query.
28103 +        *
28104 +        */
28105 +       if (is_need_sync_time(header) == true)
28106 +       {
28107 +               if (transaction_count >1 )
28108 +               {
28109 +                       sync_command_flg = false;
28110 +               }
28111 +               else
28112 +               {
28113 +                       sync_command_flg = true;
28114 +               }
28115 +       }
28116 +       if (sync_command_flg == true)
28117 +       {
28118 +               snprintf(result,PGR_MESSAGE_BUFSIZE,
28119 +                       "%d,%u,%u,%u,%d,%u", 
28120 +                       PGR_SET_CURRENT_TIME_FUNC_NO,
28121 +                       (unsigned int)ntohl(header->tv.tv_sec),
28122 +                       (unsigned int)ntohl(header->tv.tv_usec),
28123 +                       (unsigned int)ntohl(PGR_Log_Header->replicate_id),
28124 +                       PGR_Response_Inf->response_mode,
28125 +                       *PGR_ReplicateSerializationID);
28126 +       }
28127 +       else
28128 +       {
28129 +               snprintf(result,PGR_MESSAGE_BUFSIZE,
28130 +                       "%d,%u,%u,%d", 
28131 +                       PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO,
28132 +                       *PGR_ReplicateSerializationID,
28133 +                       0,
28134 +                       PGR_Response_Inf->response_mode);
28135 +       }
28136 +       /* execute query in the exceptional host */
28137 +       /* it is not use replication */
28138 +       if (is_need_wait_answer(header) == true)
28139 +       {
28140 +               status = PGRreturn_result(dest,result, PGR_WAIT_ANSWER);
28141 +       }
28142 +       else
28143 +       {
28144 +               status = PGRreturn_result(dest, result, PGR_NOWAIT_ANSWER);
28145 +       }
28146 +
28147 +       /*
28148 +       if (status == STATUS_ERROR )
28149 +       {
28150 +               show_error("%s: %s[%d] should be down ",func,host_ptr->hostName,host_ptr->port);
28151 +               PGRset_host_status(host_ptr,DB_TBL_ERROR);
28152 +       }
28153 +       */
28154 +
28155 +       /* delete server table when query use template db */
28156 +       if (PGR_Response_Inf->response_mode != PGR_RELIABLE_MODE)
28157 +       {
28158 +               delete_template(host_ptr,header);
28159 +       }
28160 +#ifdef PRINT_DEBUG
28161 +       show_debug("end thread_send_source()");
28162 +#endif
28163 +       return (void *)0;
28164 +}
28165 +
28166 +static void *
28167 +thread_send_cluster(void * arg)
28168 +{
28169 +       char * func = "thread_send_cluster()";
28170 +       ThreadArgInf * thread_arg = NULL;
28171 +       ReplicateHeader * header = (ReplicateHeader*)NULL;
28172 +       char * query = NULL;
28173 +       int dest = 0;
28174 +       HostTbl * host_ptr = (HostTbl*)NULL;
28175 +       int rtn = 0;
28176 +       int status = STATUS_OK;
28177 +       TransactionTbl * transaction_tbl = (TransactionTbl *)NULL;
28178 +       int current_cluster = 0;
28179 +       char result[PGR_MESSAGE_BUFSIZE];
28180 +
28181 +#ifdef PRINT_DEBUG
28182 +       show_debug("start thread_send_cluster()");
28183 +#endif
28184 +       if (arg == NULL)
28185 +       {
28186 +               show_error("%s:arg is NULL",func);
28187 +               status = STATUS_ERROR;
28188 +               pthread_exit((void *) status);
28189 +       }
28190 +
28191 +       thread_arg = (ThreadArgInf *)arg;
28192 +       header = thread_arg->header;
28193 +       query = thread_arg->query;
28194 +       dest = thread_arg->dest;
28195 +       host_ptr = thread_arg->host_ptr;
28196 +       transaction_tbl = thread_arg->transaction_tbl;
28197 +       current_cluster = thread_arg->current_cluster;
28198 +
28199 +       
28200 +       if(header->cmdSts==CMD_STS_OTHER &&
28201 +          header->cmdType==CMD_TYPE_CONNECTION_CLOSE) 
28202 +       {
28203 +               check_delete_transaction(host_ptr, header);
28204 +               return (void *)0;
28205 +       }
28206 +
28207 +       rtn = send_replicate_packet_to_server( transaction_tbl, current_cluster, host_ptr, header, query ,  result,*PGR_ReplicateSerializationID, false);
28208 +
28209 +#ifdef PRINT_DEBUG
28210 +       show_debug("%s:return value from send_replicate_packet_to_server() is %d",func,rtn);
28211 +#endif 
28212 +       if (rtn == STATUS_ABORTED)
28213 +       {
28214 +               snprintf(result,PGR_MESSAGE_BUFSIZE,"%d", PGR_NOTICE_ABORT_FUNC_NO);
28215 +               status = PGRreturn_result(dest, result, PGR_NOWAIT_ANSWER);
28216 +               status = STATUS_ABORTED;
28217 +               pthread_exit((void *) status);
28218 +       }
28219 +       /* delete server table when query use template db */
28220 +       delete_template(host_ptr,header);
28221 +#ifdef PRINT_DEBUG
28222 +       show_debug("%s:pthread_exit[%d]",func,current_cluster );
28223 +#endif 
28224 +
28225 +       pthread_exit((void *) rtn);
28226 +}
28227 +
28228 +/*--------------------------------------------------
28229 + * SYMBOL
28230 + *     PGRreplicate_packet_send_each_server()
28231 + * NOTES
28232 + *     Send query to a cluster DB server and return result.
28233 + * ARGS 
28234 + *     HostTbl * ptr : cluster server info table (I)
28235 + *     bool return_response : flag for return result(I)
28236 + *     ReplicateHeader * header: header data (I)
28237 + *     char * query : query data (I)
28238 + *     int dest : socket of destination server(I)
28239 + * RETURN
28240 + *     OK : STATUS_OK
28241 + *     NG : STATUS_ERROR
28242 + *---------------------------------------------------
28243 + */
28244 +int
28245 +PGRreplicate_packet_send_each_server( HostTbl * ptr, bool return_response, ReplicateHeader * header, char * query,int dest)
28246 +{
28247 +       char * func = "PGRreplicate_packet_send_each_server()";
28248 +       char * host;
28249 +       int rtn;
28250 +
28251 +       host = ptr->hostName;
28252 +       /*
28253 +        * send query to cluster DB
28254 +        */
28255 +       if (PGR_Result == NULL)
28256 +       {
28257 +               show_error("%s:PGR_Result is not initialize",func);
28258 +               return STATUS_ERROR;
28259 +       }
28260 +
28261 +       rtn = PGRsend_replicate_packet_to_server( ptr, header,query,PGR_Result, dest, false);
28262 +
28263 +       return rtn;
28264 +}
28265 +
28266 +/*--------------------------------------------------
28267 + * SYMBOL
28268 + *     PGRread_packet()
28269 + * NOTES
28270 + *     Read packet data and send the query to each cluster DB.
28271 + *     The packet data has header data and query data.
28272 + * ARGS 
28273 + *     int sock : socket (I)
28274 + *     ReplicateHeader *header : header data (O)
28275 + * RETURN
28276 + *     OK: pointer of read query
28277 + *     NG: NULL
28278 + *---------------------------------------------------
28279 + */
28280 +char *
28281 +PGRread_packet(int sock, ReplicateHeader *header)
28282 +{
28283 +       char * func = "PGRread_packet()";
28284 +       int r =0;
28285 +       int cnt = 0;
28286 +       char * read_ptr = NULL;
28287 +       int read_size = 0;
28288 +       int header_size = 0;
28289 +       char * query = NULL;
28290 +       fd_set      rmask;
28291 +       struct timeval timeout;
28292 +       int rtn;
28293 +
28294 +       if (header == NULL)
28295 +       {
28296 +               return NULL;
28297 +       }
28298 +       memset(header,0,sizeof(ReplicateHeader));
28299 +       read_ptr = (char*)header;
28300 +       header_size = sizeof(ReplicateHeader);
28301 +       cnt = 0;
28302 +
28303 +       for (;;){
28304 +               /*
28305 +                * read header data
28306 +                */
28307 +
28308 +               timeout.tv_sec = 1;
28309 +               timeout.tv_usec = 0;
28310 +
28311 +                               /*
28312 +                * Wait for something to happen.
28313 +                */
28314 +               FD_ZERO(&rmask);
28315 +               FD_SET(sock,&rmask);
28316 +               rtn = select(sock+1,  &rmask, (fd_set *)NULL,(fd_set *)NULL, &timeout);
28317 +
28318 +               if (rtn < 0)
28319 +               {
28320 +                       if (errno == EINTR || errno == EAGAIN)
28321 +                               continue;
28322 +
28323 +                       show_error("%s:select failed ,errno is %s",func , strerror(errno));
28324 +                       return NULL;
28325 +               }
28326 +
28327 +               if (rtn && FD_ISSET(sock, &rmask))
28328 +               {
28329 +                       r = recv(sock,read_ptr + read_size ,header_size - read_size, MSG_WAITALL);
28330 +                       /*
28331 +                         r = recv(sock,read_ptr + read_size ,header_size - read_size, 0);
28332 +                       */
28333 +                       if (r < 0)
28334 +                       {
28335 +                               show_error("%s:recv failed: (%s)",func,strerror(errno));
28336 +                               if (errno == EINTR || errno == EAGAIN)
28337 +                                       continue;
28338 +                               else
28339 +                               {
28340 +                                       show_error("%s:recv failed: (%s)",func,strerror(errno));
28341 +                                       return NULL;
28342 +                               }
28343 +                       }
28344 +                       else if (r > 0)
28345 +                       {
28346 +                               read_size += r;
28347 +                               if ( read_size == header_size)
28348 +                               {
28349 +                                       query = PGRread_query(sock,header);
28350 +                                       return query;
28351 +                               }
28352 +                       }
28353 +                       else if (r == 0)
28354 +                       {
28355 +                               return NULL;
28356 +                       }
28357 +               }
28358 +       }
28359 +       return NULL;
28360 +}
28361 +
28362 +char *
28363 +PGRread_query(int sock, ReplicateHeader *header)
28364 +{
28365 +       char * func = "PGRread_query()";
28366 +       int r =0;
28367 +       int cnt = 0;
28368 +       char * read_ptr;
28369 +       int read_size = 0;
28370 +       int query_size = 0;
28371 +       char * query = NULL;
28372 +
28373 +       query_size = ntohl(header->query_size);
28374 +       if (query_size < 0)
28375 +       {
28376 +               show_error("%s:receive size less than 0",func);
28377 +               return NULL;
28378 +       }
28379 +       query = malloc(query_size+4);
28380 +       if (query == NULL)
28381 +       {
28382 +               /*
28383 +                * buffer allocation failed
28384 +                */
28385 +               show_error("%s:malloc failed: (%s)",func,strerror(errno));
28386 +               return NULL;
28387 +       }
28388 +       memset(query,0,query_size+4);
28389 +       if (query_size == 0)
28390 +       {
28391 +               return query;
28392 +       }
28393 +       read_size = 0;
28394 +       cnt = 0;
28395 +       read_ptr = (char *)query;
28396 +       for (;;)
28397 +       {
28398 +               /*
28399 +                * read query data
28400 +                */
28401 +
28402 +               /*r = recv(sock,read_ptr + read_size ,query_size - read_size, MSG_WAITALL); */
28403 +               r = recv(sock,read_ptr + read_size ,query_size - read_size, 0); 
28404 +               if (r < 0)
28405 +               {
28406 +                       if (errno == EINTR || errno == EAGAIN)
28407 +                               continue;
28408 +                       else
28409 +                       {
28410 +                               show_error("%s:recv failed: (%s)",func,strerror(errno));
28411 +                               free(query);
28412 +                               query = NULL;
28413 +                               return NULL;
28414 +                       }
28415 +               }
28416 +               else if (r > 0)
28417 +               {
28418 +                       read_size += r;
28419 +                       if ( read_size == query_size)
28420 +                       {
28421 +                               return query;
28422 +                       }
28423 +               }
28424 +               else /* r == 0 */
28425 +               {
28426 +                       show_error("%s:unexpected EOF", func);
28427 +                       free(query);
28428 +                       query = NULL;
28429 +                       return NULL;
28430 +               }
28431 +       }
28432 +       free(query);
28433 +       query = NULL;
28434 +       return NULL;
28435 +}
28436 +
28437 +static bool
28438 +is_autocommit_off(char * query)
28439 +{
28440 +       int i;
28441 +       char buf[256];
28442 +       char * p = NULL;
28443 +
28444 +       if (query == NULL)
28445 +               return false;
28446 +       memset(buf,0,sizeof(buf));
28447 +       p = query;
28448 +       i = 0;
28449 +       while ( *p != '\0' )
28450 +       {
28451 +               buf[i++] = toupper(*p);
28452 +               p++;
28453 +               if (i >= (sizeof(buf) -2))
28454 +                       break;
28455 +       }
28456 +       p = strstr(buf,"AUTOCOMMIT");
28457 +       if ( p == NULL)
28458 +       {
28459 +               return false;
28460 +       }
28461 +       p = strstr(buf,"OFF");
28462 +       if ( p == NULL )
28463 +       {
28464 +               return false;
28465 +       }
28466 +       return true;
28467 +}
28468 +
28469 +static bool
28470 +is_autocommit_on(char * query)
28471 +{
28472 +       int i;
28473 +       char buf[256];
28474 +       char * p = NULL;
28475 +
28476 +       if (query == NULL)
28477 +               return false;
28478 +       memset(buf,0,sizeof(buf));
28479 +       p = query;
28480 +       i = 0;
28481 +       while ( *p != '\0' )
28482 +       {
28483 +               buf[i++] = toupper(*p);
28484 +               p++;
28485 +               if (i >= (sizeof(buf) -2))
28486 +                       break;
28487 +       }
28488 +       p = strstr(buf,"AUTOCOMMIT");
28489 +       if ( p == NULL)
28490 +       {
28491 +               return false;
28492 +       }
28493 +       p = strstr(buf,"ON");
28494 +       if ( p == NULL )
28495 +       {
28496 +               return false;
28497 +       }
28498 +       return true;
28499 +}
28500 +
28501 +static unsigned int 
28502 +get_host_ip_from_tbl(char * host)
28503 +{
28504 +       Dlelem * ptr = NULL;
28505 +
28506 +       pthread_mutex_lock(&transaction_table_mutex);
28507 +       if (Transaction_Tbl_Begin == NULL)
28508 +       {
28509 +               pthread_mutex_unlock(&transaction_table_mutex);
28510 +               return 0;
28511 +       }
28512 +       ptr = DLGetHead(Transaction_Tbl_Begin);
28513 +       while (ptr)
28514 +       {
28515 +               TransactionTbl *transaction = DLE_VAL(ptr);
28516 +               if (!strncasecmp(transaction->host,host,sizeof(transaction->host)))
28517 +               {
28518 +                       pthread_mutex_unlock(&transaction_table_mutex);
28519 +                       return transaction->hostIP;
28520 +               }
28521 +               ptr = DLGetSucc(ptr);
28522 +       }
28523 +       pthread_mutex_unlock(&transaction_table_mutex);
28524 +
28525 +       return 0;
28526 +}
28527 +
28528 +static unsigned int 
28529 +get_srcHost_ip_from_tbl(char * srcHost)
28530 +{
28531 +       Dlelem * ptr = NULL;
28532 +
28533 +       pthread_mutex_lock(&transaction_table_mutex);
28534 +
28535 +       if (Transaction_Tbl_Begin == NULL)
28536 +       {
28537 +               pthread_mutex_unlock(&transaction_table_mutex);
28538 +
28539 +               return 0;
28540 +       }
28541 +       ptr = DLGetHead(Transaction_Tbl_Begin);
28542 +       while (ptr)
28543 +       {
28544 +               TransactionTbl *transaction = DLE_VAL(ptr);
28545 +               if (!strncasecmp(transaction->srcHost,srcHost,sizeof(transaction->srcHost)))
28546 +               {
28547 +                       pthread_mutex_unlock(&transaction_table_mutex);
28548 +
28549 +                       return transaction->srcHostIP;
28550 +               }
28551 +               ptr = DLGetSucc(ptr);
28552 +       }
28553 +       pthread_mutex_unlock(&transaction_table_mutex);
28554 +
28555 +       return 0;
28556 +}
28557 +
28558 +unsigned int
28559 +PGRget_next_query_id(void)
28560 +{
28561 +       if (PGR_Query_ID >= PGR_MAX_QUERY_ID)
28562 +       {
28563 +               PGR_Query_ID = 0;
28564 +       }
28565 +       PGR_Query_ID ++;
28566 +       return PGR_Query_ID;
28567 +}
28568 +
28569 +
28570 +void
28571 +PGRnotice_replication_server(char * hostName, unsigned short portNumber,unsigned short recoveryPortNumber, unsigned short lifecheckPortNumber, char * userName)
28572 +{
28573 +       char * func ="PGRnotice_replication_server()";
28574 +       ReplicateHeader  header;
28575 +       char query[PGR_MESSAGE_BUFSIZE];
28576 +
28577 +       if (((hostName == NULL) || (*hostName == 0)) ||
28578 +               ((userName == NULL) || (*userName == 0)) ||
28579 +               ((portNumber == 0) || (recoveryPortNumber == 0)))
28580 +       {
28581 +#ifdef PRINT_DEBUG
28582 +               show_debug("%s: can not connect server[%s][%s][%d][%d]",func,hostName,userName,portNumber,recoveryPortNumber);
28583 +#endif                 
28584 +               return;
28585 +       }
28586 +       memset(&header,0,sizeof(ReplicateHeader));
28587 +       memset(query,0,sizeof(query));
28588 +       snprintf(query,sizeof(query)-1,"SELECT %s(%d,'%s',%d,%d,%d)",
28589 +                       PGR_SYSTEM_COMMAND_FUNC,
28590 +                       PGR_STARTUP_REPLICATION_SERVER_FUNC_NO,
28591 +                       hostName,
28592 +                       portNumber,
28593 +                       recoveryPortNumber,
28594 +                       lifecheckPortNumber);
28595 +       header.cmdSys = CMD_SYS_CALL;
28596 +       header.cmdSts = CMD_STS_NOTICE;
28597 +       header.query_size = htonl(strlen(query));
28598 +       header.query_id = htonl(PGRget_next_query_id());
28599 +       strncpy(header.from_host,hostName,sizeof(header.from_host));
28600 +       strncpy(header.userName,userName,sizeof(header.userName));
28601 +       strcpy(header.dbName,"template1");
28602 +       PGRreplicate_packet_send( &header, query, NOTICE_SYSTEM_CALL_TYPE ,RECOVERY_INIT);
28603 +}
28604 +
28605 +static bool
28606 +is_need_use_rlog(ReplicateHeader * header)
28607 +{
28608 +       bool rtn = false;
28609 +       if ((Cascade_Inf->useFlag != DB_TBL_USE) ||
28610 +               (PGR_Use_Replication_Log != true)  ||
28611 +               (header->rlog > 0))
28612 +       {
28613 +               rtn=false;
28614 +       }
28615 +       else if ((header->cmdSts == CMD_STS_QUERY ) &&
28616 +               ((header->cmdType == CMD_TYPE_INSERT) || 
28617 +                (header->cmdType == CMD_TYPE_UPDATE) || 
28618 +                (header->cmdType == CMD_TYPE_DELETE) || 
28619 +                (header->cmdType == CMD_TYPE_EXECUTE)))
28620 +       {
28621 +               rtn = true;     
28622 +       }
28623 +       else 
28624 +       {
28625 +               if ((header->cmdSts == CMD_STS_TRANSACTION ) &&
28626 +                       (header->cmdType == CMD_TYPE_COMMIT))
28627 +               {
28628 +                       rtn = true;
28629 +               }
28630 +       }
28631 +       return rtn;
28632 +}
28633 +
28634 +int
28635 +PGRinit_transaction_table(void)
28636 +{
28637 +       if (Transaction_Tbl_Begin != NULL)
28638 +       {
28639 +               DLFreeList(Transaction_Tbl_Begin);
28640 +       }
28641 +
28642 +       Transaction_Tbl_Begin = DLNewList();
28643 +
28644 +       return STATUS_OK;
28645 +} 
28646 +
28647 +static bool
28648 +is_need_queue_jump( ReplicateHeader * header,char *query)
28649 +{
28650 +       if (header == NULL)
28651 +       {
28652 +               return true;
28653 +       }
28654 +
28655 +       if (header->cmdSts == CMD_STS_QUERY)
28656 +       {
28657 +               if ((header->cmdType  == CMD_TYPE_VACUUM ) ||
28658 +                       (header->cmdType  == CMD_TYPE_ANALYZE ))
28659 +               {
28660 +                       if ((strstr(query,"full") == NULL) &&
28661 +                               (strstr(query,"FULL") == NULL))
28662 +                       {
28663 +                               return true;
28664 +                       }
28665 +               }
28666 +       }
28667 +       return false;
28668 +}
28669 +
28670 +
28671 +static bool
28672 +is_executed_query_in_origin( ReplicateHeader *header )
28673 +{
28674 +       char *database = NULL;
28675 +       char port[8];
28676 +       char *userName = NULL;
28677 +       char *password = NULL;
28678 +       char * md5Salt = NULL;
28679 +       char * cryptSalt = NULL;
28680 +       char * host = NULL;
28681 +       HostTbl * host_ptr = (HostTbl*)NULL;
28682 +       TransactionTbl * transaction_tbl = (TransactionTbl*)NULL;
28683 +       PGconn * conn = (PGconn *)NULL;
28684 +       bool result = false;
28685 +
28686 +       if (Host_Tbl_Begin == NULL)
28687 +       {
28688 +               return STATUS_ERROR;
28689 +       }
28690 +       host_ptr = Host_Tbl_Begin;
28691 +       while(host_ptr->useFlag != DB_TBL_END)
28692 +       {
28693 +               /*
28694 +                * check the status of the cluster DB
28695 +                */
28696 +               if (host_ptr->useFlag != DB_TBL_USE)
28697 +               {
28698 +                       host_ptr ++;
28699 +                       continue;
28700 +               }
28701 +               if (PGRis_same_host(header->from_host,ntohs(header->port),host_ptr->hostName, host_ptr->port) == true)
28702 +               {
28703 +                       break;
28704 +               }
28705 +               host_ptr ++;
28706 +       }
28707 +       if (host_ptr->useFlag == DB_TBL_END)
28708 +       {
28709 +               return false;
28710 +       }
28711 +       /*
28712 +        * set up the connection
28713 +        */
28714 +       transaction_tbl = getTransactionTbl(host_ptr,header);
28715 +       if (transaction_tbl == (TransactionTbl *)NULL)
28716 +       {
28717 +               transaction_tbl = setTransactionTbl(host_ptr, header);
28718 +               if (transaction_tbl == (TransactionTbl *)NULL)
28719 +               {
28720 +                       return false;
28721 +               }
28722 +       }
28723 +       else
28724 +       {
28725 +               if ((transaction_tbl->conn == (PGconn *)NULL) ||
28726 +                       (transaction_tbl->conn->sock <= 0))
28727 +               {
28728 +                       database = (char *)(header->dbName);
28729 +                       snprintf(port,sizeof(port),"%d", host_ptr->port);
28730 +                       userName = (char *)(header->userName);
28731 +                       password = (char *)(header->password);
28732 +                       md5Salt = (char *)(header->md5Salt);
28733 +                       cryptSalt = (char *)(header->cryptSalt);
28734 +                       host = (char *)(host_ptr->hostName);
28735 +                       transaction_tbl->conn = PGRcreateConn(host,port,database,userName,password,md5Salt,cryptSalt);
28736 +               }
28737 +       }
28738 +       conn = transaction_tbl->conn;
28739 +       if (conn == NULL)
28740 +       {
28741 +               return false;
28742 +       }
28743 +
28744 +       result = is_executed_query( conn, header);
28745 +       deleteTransactionTbl(host_ptr,header);
28746 +       return result;
28747 +}
28748 +
28749 +static bool
28750 +is_executed_query( PGconn *conn, ReplicateHeader * header)
28751 +{
28752 +       static PGresult * res = (PGresult *)NULL;
28753 +       char sync_command[PGR_MESSAGE_BUFSIZE];
28754 +       char * str = NULL;
28755 +       
28756 +       snprintf(sync_command,sizeof(sync_command),
28757 +               "SELECT %s(%d,%u,%u,%u,%d) ",
28758 +               PGR_SYSTEM_COMMAND_FUNC,
28759 +               PGR_QUERY_CONFIRM_ANSWER_FUNC_NO,
28760 +               (unsigned int)ntohl(header->tv.tv_sec),
28761 +               (unsigned int)ntohl(header->tv.tv_usec),
28762 +               (unsigned int)ntohl(header->replicate_id),
28763 +               PGR_Response_Inf->response_mode);
28764 +       
28765 +       res = PQexec(conn, sync_command);
28766 +       if (res != NULL)
28767 +       {
28768 +               str = PQcmdStatus(res);
28769 +               if ((str != NULL) &&
28770 +                       (!strncasecmp(str,PGR_ALREADY_REPLICATED_NOTICE_CMD,strlen(PGR_ALREADY_REPLICATED_NOTICE_CMD))))
28771 +               {
28772 +                       PQclear(res);
28773 +                       return true;
28774 +               }
28775 +               PQclear(res);
28776 +
28777 +       }
28778 +       return false;
28779 +}
28780 +
28781 +static int
28782 +replicate_lo( PGconn * conn, ReplicateHeader * header, LOArgs * query)
28783 +{
28784 +       int status = STATUS_OK;
28785 +       int mode = 0;
28786 +       Oid lobjId = 0;
28787 +       int fd = 0;
28788 +       char * buf = NULL;
28789 +       char * filename = NULL;
28790 +       size_t len = 0;
28791 +       int offset = 0;
28792 +       int whence = 0;
28793 +
28794 +       if ((conn == (PGconn *)NULL) || (query == (LOArgs *)NULL) || (header == (ReplicateHeader *)NULL))
28795 +       {
28796 +               return STATUS_ERROR;
28797 +       }
28798 +       switch (header->cmdType)
28799 +       {
28800 +               case CMD_TYPE_LO_IMPORT :
28801 +                       filename = query->buf;
28802 +                       if (lo_import(conn, filename) > 0 )
28803 +                       {
28804 +                               status = STATUS_OK;
28805 +                       }
28806 +                       else
28807 +                       {
28808 +                               status = STATUS_ERROR;
28809 +                       }
28810 +                       break;
28811 +               case CMD_TYPE_LO_CREATE :
28812 +                       mode = (int)ntohl(query->arg1);
28813 +                       if (lo_creat(conn, mode) > 0)
28814 +                       {
28815 +                               status = STATUS_OK;
28816 +                       }
28817 +                       else
28818 +                       {
28819 +                               status = STATUS_ERROR;
28820 +                       }
28821 +                       break;
28822 +               case CMD_TYPE_LO_OPEN :
28823 +                       lobjId = (Oid)ntohl(query->arg1);
28824 +                       mode = (int)ntohl(query->arg2);
28825 +                       if (lo_open(conn, lobjId, mode) > 0)
28826 +                       {
28827 +                               status = STATUS_OK;
28828 +                       }
28829 +                       else
28830 +                       {
28831 +                               status = STATUS_ERROR;
28832 +                       }
28833 +                       break;
28834 +               case CMD_TYPE_LO_WRITE :
28835 +                       fd = (int)ntohl(query->arg1);
28836 +                       len = (int)ntohl(query->arg2);
28837 +                       buf = query->buf;
28838 +                       if (lo_write(conn, fd, buf, len) == len )
28839 +                       {
28840 +                               status = STATUS_OK;
28841 +                       }
28842 +                       else
28843 +                       {
28844 +                               status = STATUS_ERROR;
28845 +                       }
28846 +                       break;
28847 +               case CMD_TYPE_LO_LSEEK :
28848 +                       fd = (int)ntohl(query->arg1);
28849 +                       offset = (int)ntohl(query->arg2);
28850 +                       whence = (int)ntohl(query->arg3);
28851 +                       if (lo_lseek(conn, fd, offset, whence) >= 0)
28852 +                       {
28853 +                               status = STATUS_OK;
28854 +                       }
28855 +                       else
28856 +                       {
28857 +                               status = STATUS_ERROR;
28858 +                       }
28859 +                       break;
28860 +               case CMD_TYPE_LO_CLOSE :
28861 +                       fd = (int)ntohl(query->arg1);
28862 +                       if (lo_close(conn, fd) == 0)
28863 +                       {
28864 +                               status = STATUS_OK;
28865 +                       }
28866 +                       else
28867 +                       {
28868 +                               status = STATUS_ERROR;
28869 +                       }
28870 +                       break;
28871 +               case CMD_TYPE_LO_UNLINK :
28872 +                       lobjId = (Oid)ntohl(query->arg1);
28873 +                       if (lo_unlink(conn,lobjId) >= 0)
28874 +                       {
28875 +                               status = STATUS_OK;
28876 +                       }
28877 +                       else
28878 +                       {
28879 +                               status = STATUS_ERROR;
28880 +                       }
28881 +                       break;
28882 +               default :
28883 +                       break;
28884 +       }
28885 +       return status;
28886 +}
28887 +
28888 +static int 
28889 +send_func(HostTbl * host_ptr,ReplicateHeader * header, char * func,char * result)
28890 +{
28891 +       char * f ="send_func()";
28892 +       char       *database = NULL;
28893 +       char       port[8];
28894 +       char       *userName = NULL;
28895 +       char       *password = NULL;
28896 +       char * md5Salt = NULL;
28897 +       char * cryptSalt = NULL;
28898 +       char * host = NULL;
28899 +       char * str = NULL;
28900 +       TransactionTbl * transaction_tbl = (TransactionTbl *)NULL;
28901 +       PGresult * res = (PGresult *)NULL;
28902 +       PGconn * conn = (PGconn *)NULL;
28903 +       int rtn = 0;
28904 +       int current_cluster = 0;
28905 +
28906 +       if ((host_ptr == (HostTbl *)NULL)               ||
28907 +               (header == (ReplicateHeader *)NULL)     ||
28908 +               (func == NULL)                                          ||
28909 +               (result == NULL))
28910 +       {
28911 +               return STATUS_ERROR;
28912 +       }
28913 +       /*
28914 +        * set up the connection
28915 +        */
28916 +       database = (char *)header->dbName;
28917 +       snprintf(port,sizeof(port),"%d", host_ptr->port);
28918 +       userName = (char *)(header->userName);
28919 +       password = (char *)(header->password);
28920 +       md5Salt = (char *)(header->md5Salt);
28921 +       cryptSalt = (char *)(header->cryptSalt);
28922 +       host = (char *)(host_ptr->hostName);
28923 +       if (PGR_Response_Inf != NULL)
28924 +       {
28925 +               current_cluster = PGR_Response_Inf->current_cluster;
28926 +       }
28927 +
28928 +       /*
28929 +        * get the transaction table data
28930 +        * it has the connection data with each cluster DB
28931 +        */
28932 +       transaction_tbl = getTransactionTbl(host_ptr,header);
28933 +       /*
28934 +        * if the transaction process is new one, 
28935 +        * create connection data and add the transaction table
28936 +        */
28937 +       if (transaction_tbl == (TransactionTbl *)NULL)
28938 +       {
28939 +               transaction_tbl = setTransactionTbl(host_ptr, header);
28940 +               if (transaction_tbl == (TransactionTbl *)NULL)
28941 +               {
28942 +                       StartReplication[current_cluster] = true;
28943 +                       show_error("%s:setTransactionTbl failed",f);
28944 +                       if ( header->cmdSts != CMD_STS_NOTICE )
28945 +                       {
28946 +                               PGRset_host_status(host_ptr,DB_TBL_ERROR);
28947 +                       }
28948 +                       return STATUS_ERROR;
28949 +               }
28950 +       }
28951 +       else
28952 +       {
28953 +               /*
28954 +                * re-use the connection data
28955 +                */
28956 +               if ((transaction_tbl->conn != (PGconn *)NULL) &&
28957 +                       (transaction_tbl->conn->sock > 0))
28958 +               {
28959 +                       StartReplication[current_cluster] = false;
28960 +               }
28961 +               else
28962 +               {
28963 +                       if (transaction_tbl->conn != (PGconn *)NULL)
28964 +                       {
28965 +                               PQfinish(transaction_tbl->conn);
28966 +                       }
28967 +                       transaction_tbl->conn = PGRcreateConn(host,port,database,userName,password,md5Salt,cryptSalt);
28968 +                       StartReplication[current_cluster] = true;
28969 +               }
28970 +       }
28971 +       conn = transaction_tbl->conn;
28972 +
28973 +       if (conn == NULL)
28974 +       {
28975 +               show_error("%s:[%d@%s] may be down",f,host_ptr->port,host_ptr->hostName);
28976 +               if ( header->cmdSts != CMD_STS_NOTICE )
28977 +               {
28978 +                       StartReplication[current_cluster] = true;
28979 +                       PGRset_host_status(host_ptr,DB_TBL_ERROR);
28980 +               }
28981 +               return STATUS_ERROR;
28982 +       }
28983 +       res = PQexec(conn, func);
28984 +       if (res == NULL)
28985 +       {
28986 +               StartReplication[current_cluster] = true;
28987 +               return STATUS_ERROR;
28988 +       }
28989 +       str = PQcmdStatus(res);
28990 +       if ((str == NULL) || (*str == '\0'))
28991 +       {
28992 +               rtn = STATUS_ERROR;
28993 +       }
28994 +       else
28995 +       {
28996 +               snprintf(result, PGR_MESSAGE_BUFSIZE, "%s",str);
28997 +               rtn = STATUS_OK;
28998 +       }
28999 +       if (res != NULL)
29000 +               PQclear(res);
29001 +       return rtn;     
29002 +}
29003 +
29004 +static uint32_t
29005 +get_oid(HostTbl * host_ptr,ReplicateHeader * header)
29006 +{
29007 +       char sync_command[PGR_MESSAGE_BUFSIZE];
29008 +       char result[PGR_MESSAGE_BUFSIZE];
29009 +
29010 +       memset(result,0,sizeof(result));
29011 +       snprintf(sync_command,sizeof(sync_command),
29012 +               "SELECT %s(%d)",
29013 +               PGR_SYSTEM_COMMAND_FUNC, PGR_GET_OID_FUNC_NO);
29014 +       if (send_func(host_ptr, header, sync_command, result) == STATUS_OK)
29015 +       {
29016 +               return (strtoul(result, NULL, 10));
29017 +       }
29018 +       return 0;
29019 +}
29020 +
29021 +static int
29022 +set_oid(HostTbl * host_ptr,ReplicateHeader * header, uint32_t oid)
29023 +{
29024 +       char sync_command[PGR_MESSAGE_BUFSIZE];
29025 +       char result[PGR_MESSAGE_BUFSIZE];
29026 +
29027 +       memset(result,0,sizeof(result));
29028 +       snprintf(sync_command,sizeof(sync_command),
29029 +               "SELECT %s(%d,%u)",
29030 +               PGR_SYSTEM_COMMAND_FUNC, 
29031 +               PGR_SET_OID_FUNC_NO,
29032 +               oid);
29033 +       return ( send_func(host_ptr, header, sync_command, result) );
29034 +}
29035 +
29036 +/*
29037 + * sync oid during cluster DB's 
29038 + */
29039 +int
29040 +PGRsync_oid(ReplicateHeader *header)
29041 +{
29042 +       HostTbl * host_ptr = (HostTbl*)NULL;
29043 +       uint32_t max_oid = 0;
29044 +       uint32_t oid = 0;
29045 +       int recovery_status = 0;
29046 +
29047 +       /* get current oid of all cluster db's */
29048 +       host_ptr = Host_Tbl_Begin;
29049 +       if (host_ptr == (HostTbl *)NULL)
29050 +       {
29051 +               return STATUS_ERROR;
29052 +       }
29053 +       recovery_status = PGRget_recovery_status();
29054 +       while(host_ptr->useFlag != DB_TBL_END)
29055 +       {
29056 +               /*
29057 +                * check the status of the cluster DB
29058 +                */
29059 +               if (host_ptr->useFlag != DB_TBL_USE)
29060 +               {
29061 +                       host_ptr ++;
29062 +                       continue;
29063 +               }
29064 +               /*
29065 +                * skip loop during recover and the host name is master DB
29066 +                */
29067 +               if (is_master_in_recovery(host_ptr->hostName, host_ptr->port,recovery_status) == true)
29068 +               {
29069 +                       host_ptr ++;
29070 +                       continue;
29071 +               }
29072 +               oid = get_oid(host_ptr,header);
29073 +               if (max_oid < oid )
29074 +               {
29075 +                       max_oid = oid;
29076 +               }
29077 +               host_ptr ++;
29078 +       }
29079 +       if (max_oid <= 0)
29080 +               return STATUS_ERROR;
29081 +       
29082 +       /* set oid in cluster db */
29083 +       host_ptr = Host_Tbl_Begin;
29084 +       while(host_ptr->useFlag != DB_TBL_END)
29085 +       {
29086 +               /*
29087 +                * check the status of the cluster DB
29088 +                */
29089 +               if (host_ptr->useFlag != DB_TBL_USE)
29090 +               {
29091 +                       host_ptr ++;
29092 +                       continue;
29093 +               }
29094 +               /*
29095 +                * skip loop during recover and the host name is master DB
29096 +                */
29097 +               if (is_master_in_recovery(host_ptr->hostName, host_ptr->port,recovery_status) == true)
29098 +               {
29099 +                       host_ptr ++;
29100 +                       continue;
29101 +               }
29102 +               set_oid(host_ptr,header,max_oid);
29103 +               host_ptr ++;
29104 +       }
29105 +
29106 +       return STATUS_OK;
29107 +}
29108 +
29109 +int
29110 +PGRload_replication_id(void)
29111 +{
29112 +       char * func = "PGRload_replication_id()";
29113 +       char buf[256];
29114 +       char *p;
29115 +
29116 +       if (Recovery_Status_Inf == (RecoveryStatusInf *)NULL)
29117 +       {
29118 +               show_error("%s: Recovery_Status_Inf is NULL",func);
29119 +               return STATUS_ERROR;
29120 +       }
29121 +       if (RidFp == (FILE *)NULL)
29122 +       {
29123 +               show_error("%s: replication id file is not open",func);
29124 +               return STATUS_ERROR;
29125 +       }
29126 +       rewind(RidFp);
29127 +       if (fgets(buf,sizeof(buf),RidFp) == NULL)
29128 +       {
29129 +               Recovery_Status_Inf->replication_id = 0;
29130 +       }
29131 +       else
29132 +       {
29133 +               p = strrchr(buf,' ');
29134 +               if (p != NULL)
29135 +               {
29136 +                       p++;
29137 +                       Recovery_Status_Inf->replication_id = (uint32_t) atol(p);
29138 +               }
29139 +               else
29140 +               {
29141 +                       Recovery_Status_Inf->replication_id = 0;
29142 +               }
29143 +       }
29144 +       return STATUS_OK;
29145 +}
29146 +
29147 +static int
29148 +notice_abort(HostTbl * host_ptr,ReplicateHeader * header)
29149 +{
29150 +       char sync_command[PGR_MESSAGE_BUFSIZE];
29151 +       char result[PGR_MESSAGE_BUFSIZE];
29152 +
29153 +       memset(result,0,sizeof(result));
29154 +       snprintf(sync_command,sizeof(sync_command),
29155 +               "SELECT %s(%d)",
29156 +               PGR_SYSTEM_COMMAND_FUNC, 
29157 +               PGR_NOTICE_ABORT_FUNC_NO);
29158 +       return ( send_func(host_ptr, header, sync_command, result) );
29159 +}
29160 +
29161 +static int
29162 +send_p_parse (PGconn * conn, StringInfo input_message)
29163 +{
29164 +       const char *stmt_name;
29165 +       const char *query_string;
29166 +       int                     numParams;
29167 +       Oid                     paramTypes;
29168 +
29169 +       /* get name,query */
29170 +       stmt_name = pq_getmsgstring(input_message);
29171 +       query_string = pq_getmsgstring(input_message);
29172 +       /* send name,query */
29173 +       if (pqPutMsgStart('P', false, conn) < 0 ||
29174 +               pqPuts(stmt_name, conn) < 0 ||
29175 +               pqPuts(query_string, conn) < 0)
29176 +       {
29177 +               return STATUS_ERROR;
29178 +       }
29179 +       /* get number of parameter */
29180 +       numParams = pq_getmsgint(input_message, 2);
29181 +       /* send number of parameter */
29182 +       if (pqPutInt(numParams, 2, conn) < 0)
29183 +       {
29184 +               return STATUS_ERROR;
29185 +       }
29186 +       if (numParams > 0)
29187 +       {
29188 +               int                     i;
29189 +               for (i = 0; i < numParams; i++)
29190 +               {
29191 +                       paramTypes = pq_getmsgint(input_message, 4);
29192 +                       if (pqPutInt(paramTypes, 4, conn) < 0)
29193 +                       {
29194 +                               return STATUS_ERROR;
29195 +                       }
29196 +               }
29197 +       }
29198 +       if (pqPutMsgEnd(conn) < 0)
29199 +       {
29200 +               return STATUS_ERROR;
29201 +       }
29202 +       return STATUS_OK;
29203 +}
29204 +
29205 +static int
29206 +send_p_bind (PGconn * conn, StringInfo input_message)
29207 +{
29208 +       const char *portal_name;
29209 +       const char *stmt_name;
29210 +       int                     numPFormats;
29211 +       int16           pformats;
29212 +       int                     numParams;
29213 +       int                     numRFormats;
29214 +       int16           rformats;
29215 +       int                     i;
29216 +
29217 +       /* Get&Send the fixed part of the message */
29218 +       portal_name = pq_getmsgstring(input_message);
29219 +       stmt_name = pq_getmsgstring(input_message);
29220 +       if (pqPutMsgStart('B', false, conn) < 0 ||
29221 +               pqPuts(portal_name, conn) < 0 ||
29222 +               pqPuts(stmt_name, conn) < 0)
29223 +       {
29224 +               return STATUS_ERROR;
29225 +       }
29226 +
29227 +       /* Get&Send the parameter format codes */
29228 +       numPFormats = pq_getmsgint(input_message, 2);
29229 +       if (pqPutInt(numPFormats, 2, conn) < 0)
29230 +       {
29231 +               return STATUS_ERROR;
29232 +       }
29233 +       if (numPFormats > 0)
29234 +       {
29235 +               for (i = 0; i < numPFormats; i++)
29236 +               {
29237 +                       pformats = pq_getmsgint(input_message, 2);
29238 +                       if (pqPutInt(pformats, 2, conn) < 0)
29239 +                       {
29240 +                               return STATUS_ERROR;
29241 +                       }
29242 +               }
29243 +       }
29244 +
29245 +       /* Get&Send the parameter value count */
29246 +       numParams = pq_getmsgint(input_message, 2);
29247 +       if (pqPutInt(numParams, 2, conn) < 0)
29248 +       {
29249 +               return STATUS_ERROR;
29250 +       }
29251 +       if (numParams > 0)
29252 +       {
29253 +               int32       plength;
29254 +               for (i = 0 ; i < numParams ; i ++)
29255 +               {
29256 +                       plength = pq_getmsgint(input_message, 4);
29257 +                       if (plength != -1)
29258 +                       {
29259 +                               const char *pvalue = pq_getmsgbytes(input_message, plength);
29260 +                               if (pqPutInt(plength, 4, conn) < 0 ||
29261 +                                       pqPutnchar(pvalue, plength, conn) < 0)
29262 +                               {
29263 +                                       return STATUS_ERROR;
29264 +                               }
29265 +                       }
29266 +                       else
29267 +                       {
29268 +                               if (pqPutInt(plength, 4, conn) < 0)
29269 +                               {
29270 +                                       return STATUS_ERROR;
29271 +                               }
29272 +                       }
29273 +               }
29274 +       }
29275 +
29276 +       /* Get&Send the result format codes */
29277 +       numRFormats = pq_getmsgint(input_message, 2);
29278 +       if (pqPutInt(numRFormats, 2, conn) < 0 )
29279 +       {
29280 +               return STATUS_ERROR;
29281 +       }
29282 +       if (numRFormats > 0)
29283 +       {
29284 +               for (i = 0; i < numRFormats; i++)
29285 +               {
29286 +                       rformats = pq_getmsgint(input_message, 2);
29287 +                       if (pqPutInt(rformats, 2, conn) < 0)
29288 +                       {
29289 +                               return STATUS_ERROR;
29290 +                       }
29291 +               }
29292 +       }
29293 +       if (pqPutMsgEnd(conn) < 0)
29294 +       {
29295 +               return STATUS_ERROR;
29296 +       }
29297 +       return STATUS_OK;
29298 +}
29299 +
29300 +static int
29301 +send_p_describe (PGconn * conn, StringInfo input_message)
29302 +{
29303 +
29304 +       int                     describe_type;
29305 +       const char *describe_target;
29306 +
29307 +       describe_type = pq_getmsgbyte(input_message);
29308 +       describe_target = pq_getmsgstring(input_message);
29309 +
29310 +       /* construct the Describe Portal message */
29311 +       if (pqPutMsgStart('D', false, conn) < 0 ||
29312 +               pqPutc(describe_type, conn) < 0 ||
29313 +               pqPuts(describe_target, conn) < 0 ||
29314 +               pqPutMsgEnd(conn) < 0)
29315 +       {
29316 +               return STATUS_ERROR;
29317 +       }
29318 +       return STATUS_OK;
29319 +}
29320 +
29321 +static int
29322 +send_p_execute (PGconn * conn, StringInfo input_message)
29323 +{
29324 +       const char *portal_name;
29325 +       int                     max_rows;
29326 +
29327 +       portal_name = pq_getmsgstring(input_message);
29328 +       max_rows = pq_getmsgint(input_message, 4);
29329 +       /* construct the Execute message */
29330 +       if (pqPutMsgStart('E', false, conn) < 0 ||
29331 +               pqPuts(portal_name, conn) < 0 ||
29332 +               pqPutInt(max_rows, 4, conn) < 0 ||
29333 +               pqPutMsgEnd(conn) < 0)
29334 +       {
29335 +               return STATUS_ERROR;
29336 +       }
29337 +       return STATUS_OK;
29338 +}
29339 +
29340 +static int
29341 +send_p_sync (PGconn * conn, StringInfo input_message)
29342 +{
29343 +       PGresult   *result;
29344 +       PGresult   *lastResult;
29345 +
29346 +       /* construct the Sync message */
29347 +       if (pqPutMsgStart('S', false, conn) < 0 ||
29348 +               pqPutMsgEnd(conn) < 0)
29349 +       {
29350 +               return STATUS_ERROR;
29351 +       }
29352 +       /* remember we are using extended query protocol */
29353 +       conn->queryclass = PGQUERY_EXTENDED;
29354 +
29355 +       /*
29356 +        * Give the data a push.  In nonblock mode, don't complain if we're unable
29357 +        * to send it all; PQgetResult() will do any additional flushing needed.
29358 +        */
29359 +       if (pqFlush(conn) < 0)
29360 +       {
29361 +               return STATUS_ERROR;
29362 +       }
29363 +
29364 +       /* OK, it's launched! */
29365 +       conn->asyncStatus = PGASYNC_BUSY;
29366 +
29367 +       lastResult = NULL;
29368 +       while ((result = PQgetResult(conn)) != NULL)
29369 +       {
29370 +               if (lastResult)
29371 +               {
29372 +                       if (lastResult->resultStatus == PGRES_FATAL_ERROR &&
29373 +                               result->resultStatus == PGRES_FATAL_ERROR)
29374 +                       {
29375 +                               PQclear(result);
29376 +                               result = lastResult;
29377 +                       }
29378 +                       else
29379 +                               PQclear(lastResult);
29380 +               }
29381 +               lastResult = result;
29382 +               if (result->resultStatus == PGRES_COPY_IN ||
29383 +                       result->resultStatus == PGRES_COPY_OUT ||
29384 +                       conn->status == CONNECTION_BAD)
29385 +                       break;
29386 +       }
29387 +       if (lastResult != NULL)
29388 +       {
29389 +               PQclear(lastResult);
29390 +       }
29391 +       return STATUS_OK;
29392 +}
29393 +
29394 +static int
29395 +send_p_close (PGconn * conn, StringInfo input_message)
29396 +{
29397 +
29398 +       int                     close_type;
29399 +       const char *close_target;
29400 +
29401 +       close_type = pq_getmsgbyte(input_message);
29402 +       close_target = pq_getmsgstring(input_message);
29403 +       if (pqPutMsgStart('C', false, conn) < 0 ||
29404 +               pqPutc(close_type, conn) < 0 ||
29405 +               pqPuts(close_target, conn) < 0 ||
29406 +               pqPutMsgEnd(conn) < 0)
29407 +       {
29408 +               return STATUS_ERROR;
29409 +       }
29410 +       return STATUS_OK;
29411 +}
29412 +static void
29413 +set_string_info(StringInfo input_message, ReplicateHeader * header, char * query)
29414 +{
29415 +       int len;
29416 +       len = ntohl(header->query_size);
29417 +       input_message->data = query;
29418 +       input_message->maxlen = len;
29419 +       input_message->len = len -1;
29420 +       input_message->cursor = 0;
29421 +}
29422 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/rlog.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/rlog.c
29423 --- postgresql-8.2.4/src/pgcluster/pgrp/rlog.c  1970-01-01 01:00:00.000000000 +0100
29424 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/rlog.c        2007-02-18 22:52:17.000000000 +0100
29425 @@ -0,0 +1,1260 @@
29426 +/*--------------------------------------------------------------------
29427 + * FILE:
29428 + *     rlog.c
29429 + *
29430 + * NOTE:
29431 + *     This file is composed of the functions to call with the source
29432 + *     at pgreplicate for replicate ahead log.
29433 + *
29434 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
29435 + *--------------------------------------------------------------------
29436 + */
29437 +#ifdef USE_REPLICATION
29438 +
29439 +#include "postgres.h"
29440 +#include "postgres_fe.h"
29441 +
29442 +#include <stdio.h>
29443 +#include <sys/types.h>
29444 +#include <sys/stat.h>
29445 +#include <sys/un.h>
29446 +#include <unistd.h>
29447 +#ifdef HAVE_FCNTL_H
29448 +#include <fcntl.h>
29449 +#endif
29450 +#include <errno.h>
29451 +#include <ctype.h>
29452 +#include <time.h>
29453 +#include <sys/ipc.h>
29454 +#include <sys/shm.h>
29455 +#include <sys/sem.h>
29456 +#include <sys/msg.h>
29457 +#include <signal.h>
29458 +#include <sys/socket.h>
29459 +#include <netdb.h>
29460 +#ifdef HAVE_NETINET_TCP_H
29461 +#include <netinet/tcp.h>
29462 +#endif
29463 +#include <dirent.h>
29464 +#include <arpa/inet.h>
29465 +
29466 +#ifdef HAVE_CRYPT_H
29467 +#include <crypt.h>
29468 +#endif
29469 +
29470 +#ifdef MULTIBYTE
29471 +#include "mb/pg_wchar.h"
29472 +#endif
29473 +
29474 +#include "libpq-fe.h"
29475 +#include "libpq-int.h"
29476 +#include "fe-auth.h"
29477 +#include "access/xact.h"
29478 +#include "replicate_com.h"
29479 +#include "pgreplicate.h"
29480 +
29481 +static int RLog_Recv_Sock = -1;
29482 +/*--------------------------------------
29483 + * PROTOTYPE DECLARATION
29484 + *--------------------------------------
29485 + */
29486 +static int set_query_log(ReplicateHeader * header, char * query);
29487 +static QueryLogType * get_query_log_by_header(ReplicateHeader * header);
29488 +static QueryLogType * get_query_log(ReplicateHeader * header);
29489 +static void delete_query_log(ReplicateHeader * header);
29490 +static int set_commit_log(ReplicateHeader * header);
29491 +static CommitLogInf * get_commit_log(ReplicateHeader * header);
29492 +static void delete_commit_log(ReplicateHeader * header);
29493 +static bool was_committed_transaction(ReplicateHeader * header);
29494 +static int create_recv_rlog_socket(void);
29495 +static int do_rlog(int fd);
29496 +static int recv_message(int sock,char * buf, int len);
29497 +static int send_message(int sock, char * msg, int len);
29498 +static void exit_rlog(int sig);
29499 +static int reconfirm_commit(ReplicateHeader * header);
29500 +#if 0
29501 +static int PGRget_sync_data(ReplicateHeader *header);
29502 +static int PGRdelete_sync_data(ReplicateHeader *header);
29503 +#endif /* #if 0 */
29504 +
29505 +
29506 +int
29507 +PGRwrite_rlog(ReplicateHeader * header, char * query)
29508 +{
29509 +       char * func = "PGRwrite_rlog()";
29510 +
29511 +       if (header == NULL)
29512 +       {
29513 +               show_error("%s:header is null",func);
29514 +               return STATUS_ERROR;
29515 +       }
29516 +       switch (header->cmdSts)
29517 +       {
29518 +               case CMD_STS_QUERY:
29519 +#ifdef PRINT_DEBUG
29520 +                       show_debug("%s:set_query_log",func);
29521 +#endif                 
29522 +                       set_query_log(header,query);
29523 +                       break;
29524 +               case CMD_STS_DELETE_QUERY:
29525 +#ifdef PRINT_DEBUG
29526 +                       show_debug("%s:delete_query_log",func);
29527 +#endif                 
29528 +                       delete_query_log(header);
29529 +                       break;
29530 +               case CMD_STS_TRANSACTION:
29531 +                       if (header->cmdType == CMD_TYPE_COMMIT)
29532 +                       {
29533 +#ifdef PRINT_DEBUG
29534 +                               show_debug("%s:set_commit_log call",func);
29535 +#endif                 
29536 +                               set_commit_log(header);
29537 +                       }
29538 +                       break;
29539 +               case CMD_STS_DELETE_TRANSACTION:
29540 +                       if (header->cmdType == CMD_TYPE_COMMIT)
29541 +                       {
29542 +#ifdef PRINT_DEBUG
29543 +                               show_debug("%s:delete_commit_log call",func);
29544 +#endif                 
29545 +                               delete_commit_log(header);
29546 +                       }
29547 +                       break;
29548 +       default:
29549 +         show_error("%s:unknown status %c",func,header->cmdSts);
29550 +         break;
29551 +       }
29552 +       return STATUS_OK;
29553 +}
29554 +
29555 +ReplicateHeader *
29556 +PGRget_requested_query(ReplicateHeader * header)
29557 +{
29558 +       QueryLogType * query_log = NULL;
29559 +
29560 +       if (Query_Log_Top == NULL)
29561 +       {
29562 +               return NULL;
29563 +       }
29564 +       query_log = Query_Log_Top;
29565 +       while(query_log != (QueryLogType *)NULL)
29566 +       {
29567 +               if ((query_log->header->request_id == header->request_id) &&
29568 +                       (query_log->header->pid == header->pid) &&
29569 +                       (query_log->header->port == header->port) &&
29570 +                       (!strncmp(query_log->header->from_host,header->from_host,sizeof(header->from_host))))
29571 +               {
29572 +                       return query_log->header;
29573 +               }
29574 +               query_log = (QueryLogType *)(query_log->next);
29575 +       }
29576 +       return (ReplicateHeader *)NULL;
29577 +}
29578 +
29579 +static int
29580 +set_query_log(ReplicateHeader * header, char * query)
29581 +{
29582 +       char * func = "set_query_log()";
29583 +       int size = 0;
29584 +       QueryLogType * query_log = NULL;
29585 +
29586 +       if (Query_Log_Top == NULL)
29587 +       {
29588 +               Query_Log_Top = (QueryLogType *)malloc(sizeof(QueryLogType));
29589 +               if (Query_Log_Top == (QueryLogType *)NULL)
29590 +               {
29591 +                       show_error("%s:malloc failed: (%s)",func,strerror(errno));
29592 +                       return STATUS_ERROR;
29593 +               }
29594 +               Query_Log_Top->next = NULL;
29595 +               Query_Log_Top->last = NULL;
29596 +               Query_Log_End = Query_Log_Top;
29597 +               Query_Log_End->next = NULL;
29598 +               Query_Log_End->last = NULL;
29599 +               query_log = Query_Log_Top;      
29600 +       }
29601 +       else
29602 +       {
29603 +               query_log = (QueryLogType *)malloc(sizeof(QueryLogType));
29604 +               if (query_log == (QueryLogType *)NULL)
29605 +               {
29606 +                       show_error("%s:malloc failed: (%s)",func,strerror(errno));
29607 +                       return STATUS_ERROR;
29608 +               }
29609 +               Query_Log_End->next = (char *)query_log;
29610 +               query_log->last = (char *)Query_Log_End;
29611 +               query_log->next = NULL;
29612 +               Query_Log_End = query_log;      
29613 +       }
29614 +       query_log->header = (ReplicateHeader *)malloc(sizeof(ReplicateHeader));
29615 +       if (query_log->header == (ReplicateHeader *)NULL)
29616 +       {
29617 +               show_error("%s:malloc failed: (%s)",func,strerror(errno));
29618 +               return STATUS_ERROR;
29619 +       }
29620 +       size = ntohl(header->query_size);
29621 +
29622 +       query_log->query = (char *)malloc(size+4);
29623 +       if (query_log->query == (char *)NULL)
29624 +       {
29625 +               show_error("%s:malloc failed: (%s)",func,strerror(errno));
29626 +               return STATUS_ERROR;
29627 +       }
29628 +       memset(query_log->query,0,size+4);
29629 +       memcpy(query_log->header,header,sizeof(ReplicateHeader));
29630 +       query_log->header->rlog = FROM_R_LOG_TYPE ;
29631 +       memcpy(query_log->query,query,size);
29632 +
29633 +       return STATUS_OK;
29634 +}
29635 +
29636 +static QueryLogType *
29637 +get_query_log_by_header(ReplicateHeader * header)
29638 +{
29639 +       QueryLogType * query_log = NULL;
29640 +
29641 +       if (Query_Log_Top == NULL)
29642 +       {
29643 +               return (QueryLogType *)NULL;
29644 +       }
29645 +       query_log = Query_Log_Top;
29646 +           show_debug("get_query_log_by_header:header is %d,%d,%d,%s",
29647 +                      header->request_id,
29648 +                      header->pid,
29649 +                      header->port,
29650 +                      header->from_host);
29651 +
29652 +       while(query_log != (QueryLogType *)NULL)
29653 +         {
29654 +           show_debug("get_query_log_by_header:comparing to %d,%d,%d,%s",
29655 +                      query_log->header->request_id,
29656 +                       query_log->header->pid,
29657 +                       query_log->header->port,
29658 +                       query_log->header->from_host);
29659 +
29660 +               if ((query_log->header->request_id == header->request_id) &&
29661 +                       (query_log->header->pid == header->pid) &&
29662 +                       (query_log->header->port == header->port) &&
29663 +                       (!strncmp(query_log->header->from_host,header->from_host,sizeof(header->from_host))))
29664 +               {
29665 +                       return query_log;
29666 +               }
29667 +               query_log = (QueryLogType *)(query_log->next);
29668 +       }
29669 +       return (QueryLogType *)NULL;
29670 +}
29671 +
29672 +static QueryLogType *
29673 +get_query_log(ReplicateHeader * header)
29674 +{
29675 +       QueryLogType * query_log = NULL;
29676 +
29677 +       if (Query_Log_Top == NULL)
29678 +       {
29679 +               return NULL;
29680 +       }
29681 +       query_log = Query_Log_Top;
29682 +       while(query_log != (QueryLogType *)NULL)
29683 +       {
29684 +         show_debug("get_qurey_log: comparing in log is %d,header is %d",query_log->header->replicate_id,header->replicate_id);
29685 +               if (query_log->header->replicate_id == header->replicate_id)
29686 +               {
29687 +                       return query_log;
29688 +               }
29689 +               query_log = (QueryLogType *)(query_log->next);
29690 +       }
29691 +       return (QueryLogType*)NULL;
29692 +}
29693 +
29694 +static void
29695 +delete_query_log(ReplicateHeader * header)
29696 +{
29697 +       QueryLogType * query_log = NULL;
29698 +       QueryLogType * last = NULL;
29699 +       QueryLogType * next = NULL;
29700 +
29701 +       query_log = get_query_log(header);
29702 +
29703 +       if (query_log == NULL)
29704 +       {
29705 +               return ;
29706 +       }
29707 +       last = (QueryLogType *)query_log->last;
29708 +       next = (QueryLogType *)query_log->next;
29709 +
29710 +       /* change link */
29711 +       if (last != (QueryLogType *)NULL)
29712 +       {
29713 +               last->next = (char *)next;
29714 +       }
29715 +       else
29716 +       {
29717 +               Query_Log_Top = next;
29718 +       }
29719 +       if (next != (QueryLogType *)NULL)
29720 +       {
29721 +               next->last = (char *)last;
29722 +       }
29723 +       else
29724 +       {
29725 +               Query_Log_End = last;
29726 +       }
29727 +
29728 +       /* delete contents */
29729 +       if (query_log->header != NULL)
29730 +       {
29731 +               free(query_log->header);
29732 +       }
29733 +       if (query_log->query != NULL)
29734 +       {
29735 +               free(query_log->query);
29736 +       }
29737 +       free(query_log);
29738 +}
29739 +
29740 +static int
29741 +set_commit_log(ReplicateHeader * header)
29742 +{
29743 +
29744 +       CommitLogInf * commit_log = NULL;
29745 +       ReplicateHeader * c_header;
29746 +
29747 +       if (Commit_Log_Tbl == NULL)
29748 +       {
29749 +               return STATUS_ERROR;
29750 +       }
29751 +       commit_log = Commit_Log_Tbl + 1;
29752 +       while ( commit_log->inf.useFlag != DB_TBL_END )
29753 +       {
29754 +               if (commit_log->inf.useFlag != DB_TBL_USE)
29755 +               {
29756 +                       commit_log->inf.useFlag = DB_TBL_USE;
29757 +                       c_header = &(commit_log->header);
29758 +                       memcpy(c_header,header,sizeof(ReplicateHeader));
29759 +                       Commit_Log_Tbl->inf.commit_log_num ++;
29760 +                       break;
29761 +               }
29762 +               commit_log ++;
29763 +       }
29764 +       return STATUS_OK;
29765 +}
29766 +
29767 +static CommitLogInf *
29768 +get_commit_log(ReplicateHeader * header)
29769 +{
29770 +       CommitLogInf * commit_log = NULL;
29771 +       ReplicateHeader * c_header;
29772 +       int cnt = 0;
29773 +
29774 +       if (Commit_Log_Tbl == NULL)
29775 +       {
29776 +               return (CommitLogInf *)NULL;
29777 +       }
29778 +       commit_log = Commit_Log_Tbl + 1;
29779 +       while ( commit_log->inf.useFlag != DB_TBL_END )
29780 +       {
29781 +               if (commit_log->inf.useFlag == DB_TBL_USE)
29782 +               {
29783 +                       cnt ++;
29784 +                       c_header = &(commit_log->header);
29785 +                       if (c_header == NULL)
29786 +                       {
29787 +                               commit_log ++;
29788 +                               continue;
29789 +                       }
29790 +                       if (c_header->replicate_id == header->replicate_id)
29791 +                       {
29792 +                               return commit_log;      
29793 +                       }
29794 +               }
29795 +               else
29796 +               {
29797 +               }
29798 +               if (cnt >= Commit_Log_Tbl->inf.commit_log_num)
29799 +               {
29800 +                       break;
29801 +               }
29802 +               commit_log ++;
29803 +       }
29804 +       return (CommitLogInf *)NULL;
29805 +}
29806 +
29807 +static void
29808 +delete_commit_log(ReplicateHeader * header)
29809 +{
29810 +       CommitLogInf * commit_log = NULL;
29811 +
29812 +       commit_log = get_commit_log(header);
29813 +       if (commit_log != NULL)
29814 +       {
29815 +               memset(&(commit_log->header),0,sizeof(commit_log->header));
29816 +               commit_log->inf.useFlag = DB_TBL_INIT;
29817 +               Commit_Log_Tbl->inf.commit_log_num --;
29818 +       }
29819 +}
29820 +
29821 +static bool
29822 +was_committed_transaction(ReplicateHeader * header)
29823 +{
29824 +       CommitLogInf * commit_log = NULL;
29825 +
29826 +       commit_log = get_commit_log(header);
29827 +       if (commit_log != NULL)
29828 +       {
29829 +               return true;
29830 +       }
29831 +       return false;
29832 +}
29833 +
29834 +void 
29835 +PGRreconfirm_commit(int sock, ReplicateHeader * header)
29836 +{
29837 +       int result = PGR_NOT_YET_COMMIT;
29838 +
29839 +       if (Replicateion_Log == NULL) 
29840 +       {
29841 +               return ;
29842 +       }
29843 +       
29844 +       if (Replicateion_Log->r_log_sock > 0)
29845 +       {
29846 +               close(Replicateion_Log->r_log_sock );
29847 +               Replicateion_Log->r_log_sock = -1;
29848 +       }
29849 +       Replicateion_Log->r_log_sock = PGRcreate_send_rlog_socket();
29850 +       if (Replicateion_Log->r_log_sock == -1)
29851 +               return;
29852 +       
29853 +       header->query_size = 0;
29854 +       PGRsend_rlog_packet(Replicateion_Log->r_log_sock,header,"");
29855 +       PGRrecv_rlog_result(Replicateion_Log->r_log_sock,&result, sizeof(result));
29856 +
29857 +       
29858 +       close(Replicateion_Log->r_log_sock );
29859 +       Replicateion_Log->r_log_sock = -1;
29860 +       
29861 +       snprintf(PGR_Result,PGR_MESSAGE_BUFSIZE,"%d,%d", PGR_TRANSACTION_CONFIRM_ANSWER_FUNC_NO,result);
29862 +
29863 +       PGRreturn_result(sock, PGR_Result,PGR_NOWAIT_ANSWER);
29864 +}
29865 +
29866 +static int 
29867 +reconfirm_commit(ReplicateHeader * header)
29868 +{
29869 +       char * func = "reconfirm_commit()";
29870 +       int result = PGR_NOT_YET_COMMIT;
29871 +
29872 +       /* check the transaction was committed */
29873 +       if (was_committed_transaction(header) == true)
29874 +       {
29875 +               result = PGR_ALREADY_COMMITTED;
29876 +#ifdef PRINT_DEBUG
29877 +               show_debug("%s:PGR_ALREADY_COMMITTED",func);
29878 +#endif                 
29879 +       }
29880 +       else
29881 +       {
29882 +#ifdef PRINT_DEBUG
29883 +               show_debug("%s:PGR_NOT_YET_COMMIT",func);
29884 +#endif                 
29885 +       }
29886 +       return result;
29887 +}
29888 +
29889 +void
29890 +PGRset_rlog(ReplicateHeader * header, char * query)
29891 +{
29892 +               char * func = "PGRset_rlog()";
29893 +               int status = STATUS_OK;
29894 +               bool send_flag = false;
29895 +
29896 +               if (PGR_Log_Header == NULL)
29897 +               {
29898 +                               return;
29899 +               }
29900 +               switch (header->cmdSts)
29901 +               {
29902 +                               case CMD_STS_QUERY:
29903 +                                               send_flag = true;
29904 +                                               break;
29905 +                               case CMD_STS_TRANSACTION:
29906 +                                               if (header->cmdType == CMD_TYPE_COMMIT)
29907 +                                               {
29908 +                                                               send_flag = true;
29909 +                                                               PGR_Log_Header->cmdType = header->cmdType;
29910 +                                                               PGR_Log_Header->query_size = htonl(strlen(query));
29911 +                                               }
29912 +                                               break;
29913 +               }
29914 +               if (send_flag != true)
29915 +               {
29916 +                               show_error("%s:send_flag is false",func);
29917 +                               return;
29918 +               }
29919 +               PGR_Log_Header->cmdSys = CMD_SYS_LOG;
29920 +               if (Cascade_Inf->useFlag == DB_TBL_USE)
29921 +               {
29922 +                               /* save log data in remote server */
29923 +                               show_debug("%s:set rlog %s",func,query);
29924 +                               status = PGRsend_lower_cascade(PGR_Log_Header, query);
29925 +                       if (status == STATUS_OK) {
29926 +                                               status=PGRwait_notice_rlog_done();
29927 +                               }
29928 +                               if (status != STATUS_OK)
29929 +                               {
29930 +#ifdef PRINT_DEBUG
29931 +                                               show_debug("%s:PGRsend_lower_cascade failed",func);
29932 +#endif                 
29933 +                                               PGRwrite_rlog(PGR_Log_Header, query);
29934 +                               }
29935 +               }
29936 +               else
29937 +               {
29938 +                               /* save log data in local server */
29939 +                               PGRwrite_rlog(PGR_Log_Header, query);
29940 +               }
29941 +}
29942 +
29943 +void
29944 +PGRunset_rlog(ReplicateHeader * header, char * query)
29945 +{
29946 +        int status = STATUS_OK;
29947 +       bool send_flag = false;
29948 +
29949 +       if (PGR_Log_Header == NULL)
29950 +       {
29951 +               return;
29952 +       }
29953 +       switch (header->cmdSts)
29954 +       {
29955 +               case CMD_STS_QUERY:
29956 +                       send_flag = true;
29957 +                       PGR_Log_Header->cmdSts = CMD_STS_DELETE_QUERY;
29958 +                       break;
29959 +               case CMD_STS_TRANSACTION:
29960 +                       if (PGR_Log_Header->cmdType == CMD_TYPE_COMMIT)
29961 +               {
29962 +                               PGR_Log_Header->cmdSts = CMD_STS_DELETE_TRANSACTION;
29963 +                               PGR_Log_Header->query_size = htonl(strlen(query));
29964 +                               send_flag = true;
29965 +                       }
29966 +                       break;
29967 +       }
29968 +       if (send_flag != true)
29969 +       {
29970 +               return;
29971 +       }
29972 +       PGR_Log_Header->cmdSys = CMD_SYS_LOG;
29973 +       if (Cascade_Inf->useFlag == DB_TBL_USE)
29974 +       {
29975 +               /* save log data in remote server */
29976 +         show_debug("unset rlog %s",query);
29977 +
29978 +               status = PGRsend_lower_cascade(PGR_Log_Header, query);  
29979 +               if (status == STATUS_OK)
29980 +               {
29981 +                               status=PGRwait_notice_rlog_done();
29982 +               }
29983 +               if (status != STATUS_OK)
29984 +               {
29985 +#ifdef PRINT_DEBUG
29986 +                       show_debug("PGRsend_lower_cascade recv failed");
29987 +#endif                 
29988 +                       PGRwrite_rlog(PGR_Log_Header, query);
29989 +               }
29990 +       }
29991 +       else
29992 +       {
29993 +               /* save log data in local server */
29994 +                PGRwrite_rlog(PGR_Log_Header, query);
29995 +       }
29996 +}
29997 +
29998 +int
29999 +PGRresend_rlog_to_db(void)
30000 +{
30001 +  char *func="PGRresend_rlog_to_db";
30002 +       QueryLogType * query_log = NULL;
30003 +       QueryLogType * next = NULL;
30004 +       int status = STATUS_OK;
30005 +       int dest = 0;
30006 +
30007 +         show_debug("%s:enter.",func);
30008 +
30009 +       query_log = Query_Log_Top;
30010 +
30011 +       while (query_log != NULL)
30012 +       {
30013 +
30014 +
30015 +         show_debug("%s:processing qlog,query=%s",func,query_log->query);
30016 +               if (query_log->header->rlog != FROM_R_LOG_TYPE )
30017 +               {
30018 +                       query_log = (QueryLogType *)query_log->next;
30019 +                       continue;
30020 +               }
30021 +               status = replicate_packet_send_internal(query_log->header,query_log->query, dest,RECOVERY_INIT,false);
30022 +               show_debug("%s:status=%d",func,status);
30023 +               
30024 +               if (status == STATUS_SKIP_REPLICATE )
30025 +               {
30026 +                       Query_Log_Top = query_log;
30027 +                       query_log = (QueryLogType *)query_log->next;
30028 +                       }
30029 +               else             
30030 +               {
30031 +                       if (query_log->header != NULL)
30032 +                         {
30033 +                           free(query_log->header );
30034 +                         }
30035 +                       if (query_log->query != NULL)
30036 +                         {
30037 +                           free(query_log->query );
30038 +                         }
30039 +                       next = (QueryLogType *)query_log->next;
30040 +                       free(query_log);
30041 +                       query_log = next;
30042 +                       Query_Log_Top = query_log;
30043 +               }
30044 +               if (query_log != NULL)
30045 +               {
30046 +                       Query_Log_End = (QueryLogType *)query_log->next;
30047 +               }
30048 +               else
30049 +               {
30050 +                       Query_Log_End = (QueryLogType *)NULL;
30051 +               }
30052 +       }
30053 +
30054 +         show_debug("%s:exit.",func);
30055 +
30056 +       return STATUS_OK;
30057 +}
30058 +       
30059 +pid_t
30060 +PGR_RLog_Main(void)
30061 +{
30062 +       char * func = "PGR_RLog_Main()";
30063 +       int afd = -1;
30064 +       int rtn;
30065 +       struct sockaddr addr;
30066 +       socklen_t addrlen;
30067 +       pid_t pid = 0;
30068 +       pid_t pgid = 0;
30069 +
30070 +       extern int fork_wait_time;
30071 +
30072 +       if (Replicateion_Log == NULL)
30073 +       {
30074 +               show_error("%s:Replicateion_Log is NULL",func);
30075 +               return -1;
30076 +       }
30077 +       pgid = getpgid(0);
30078 +       if ((pid = fork()) != 0 )
30079 +       {
30080 +               return pid;
30081 +       }
30082 +       PGRsignal(SIGTERM,exit_rlog);
30083 +       PGRsignal(SIGINT,exit_rlog);
30084 +       PGRsignal(SIGQUIT,exit_rlog);
30085 +       PGRsignal(SIGPIPE,SIG_IGN);
30086 +
30087 +       if (PGRinit_transaction_table() != STATUS_OK)
30088 +       {
30089 +               show_error("RLog process transaction table memory allocate failed");
30090 +               return -1;
30091 +       }
30092 +
30093 +       setpgid(0,pgid);        
30094 +       RLog_Recv_Sock = create_recv_rlog_socket();
30095 +       if(RLog_Recv_Sock == -1) 
30096 +       {
30097 +               show_error("rlog socket creation failure.quit all process.");
30098 +               kill(pgreplicate_pid, SIGINT);
30099 +               exit_rlog(0);
30100 +       }
30101 +
30102 +       if (fork_wait_time > 0) {
30103 +#ifdef PRINT_DEBUG
30104 +               show_debug("rlog process: wait fork(): pid = %d", getpid());
30105 +#endif                 
30106 +               sleep(fork_wait_time);
30107 +       }
30108 +
30109 +       for (;;)
30110 +       {
30111 +               fd_set    rmask;
30112 +               struct timeval timeout;
30113 +
30114 +               timeout.tv_sec = PGR_Replication_Timeout;
30115 +               timeout.tv_usec = 0;
30116 +
30117 +               Idle_Flag = IDLE_MODE ;
30118 +               if (Exit_Request)
30119 +               {
30120 +                       exit_rlog(0);
30121 +               }
30122 +               /*
30123 +                * Wait for something to happen.
30124 +                */
30125 +               FD_ZERO(&rmask);
30126 +               FD_SET(RLog_Recv_Sock,&rmask);
30127 +               rtn = select(RLog_Recv_Sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
30128 +               if (rtn < 0)
30129 +               {
30130 +                       if (errno == EINTR || errno == EAGAIN)
30131 +                               continue;
30132 +               }
30133 +               else if (rtn && FD_ISSET(RLog_Recv_Sock, &rmask))
30134 +               {
30135 +                       Idle_Flag = BUSY_MODE ;
30136 +                       addrlen = sizeof(addr);
30137 +                       afd = accept(RLog_Recv_Sock, &addr, &addrlen);
30138 +                       if (afd < 0)
30139 +                       {
30140 +                               continue;
30141 +                       }
30142 +                       else
30143 +                       {
30144 +                               do_rlog(afd);
30145 +                               close(afd);
30146 +                       }
30147 +               }
30148 +       }
30149 +       exit(0);
30150 +}
30151 +
30152 +static int 
30153 +create_recv_rlog_socket(void)
30154 +{
30155 +       char * func = "create_recv_socket()";
30156 +       struct sockaddr_un addr;
30157 +       int fd;
30158 +       int status;
30159 +       int len;
30160 +
30161 +       /* set unix domain socket path */
30162 +       fd = socket(AF_UNIX, SOCK_STREAM, 0);
30163 +       if (fd == -1)
30164 +       {
30165 +               show_error("%s:Failed to create UNIX domain socket. reason: %s",func,  strerror(errno));
30166 +               return -1;
30167 +       }
30168 +       memset((char *) &addr, 0, sizeof(addr));
30169 +       ((struct sockaddr *)&addr)->sa_family = AF_UNIX;
30170 +       snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/.s.PGRLOG.%d",
30171 +                       PGR_Write_Path, 
30172 +                       Replicateion_Log->RLog_Port_Number);
30173 +fprintf(stderr,"addr.sun_path[%s]\n",addr.sun_path);
30174 +       if (Replicateion_Log->RLog_Sock_Path == NULL)
30175 +       {
30176 +               Replicateion_Log->RLog_Sock_Path = strdup(addr.sun_path);
30177 +fprintf(stderr,"Replicateion_Log->RLog_Sock_Path[%s]\n",Replicateion_Log->RLog_Sock_Path);
30178 +       }
30179 +       len = sizeof(struct sockaddr_un);
30180 +       status = bind(fd, (struct sockaddr *)&addr, len);
30181 +       if (status == -1)
30182 +       {
30183 +               show_error("%s: bind() failed. reason: %s", func, strerror(errno));
30184 +               return -1;
30185 +       }
30186 +
30187 +       if (chmod(addr.sun_path, 0770) == -1)
30188 +       {
30189 +               show_error("%s: chmod() failed. reason: %s", func, strerror(errno));
30190 +               return -1;
30191 +       }
30192 +
30193 +       status = listen(fd, 1000000);
30194 +       if (status < 0)
30195 +       {
30196 +               show_error("%s: listen() failed. reason: %s", func, strerror(errno));
30197 +               return -1;
30198 +       }
30199 +       return fd;
30200 +}
30201 +
30202 +int 
30203 +PGRcreate_send_rlog_socket(void)
30204 +{
30205 +       char * func = "create_recv_socket()";
30206 +       struct sockaddr_un addr;
30207 +       int fd;
30208 +       int len;
30209 +
30210 +       /* set unix domain socket path */
30211 +       fd = socket(AF_UNIX, SOCK_STREAM, 0);
30212 +       if (fd == -1)
30213 +       {
30214 +               show_error("%s:Failed to create UNIX domain socket. reason: %s",func,  strerror(errno));
30215 +               return -1;
30216 +       }
30217 +       memset((char *) &addr, 0, sizeof(addr));
30218 +       ((struct sockaddr *)&addr)->sa_family = AF_UNIX;
30219 +       snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/.s.PGRLOG.%d",
30220 +                       PGR_Write_Path, 
30221 +                       Replicateion_Log->RLog_Port_Number);
30222 +       if (Replicateion_Log->RLog_Sock_Path == NULL)
30223 +       {
30224 +               Replicateion_Log->RLog_Sock_Path = strdup(addr.sun_path);
30225 +       }
30226 +       len = sizeof(struct sockaddr_un);
30227 +       if (connect(fd, (struct sockaddr *)&addr, len) < 0)
30228 +       {
30229 +               close(fd);
30230 +               return -1;
30231 +       }
30232 +       return fd;
30233 +}
30234 +
30235 +static int
30236 +do_rlog(int fd)
30237 +{
30238 +       char * func = "do_rlog()";
30239 +       QueryLogType * query_log = NULL;
30240 +       ReplicateHeader  header;
30241 +       char * query = NULL;
30242 +       int status = STATUS_OK;
30243 +
30244 +       memset(&header,0,sizeof(header));
30245 +       query = PGRread_packet(fd, &header);
30246 +       show_debug("%s:got result:cmdSys='%c'",func,header.cmdSys);
30247 +       if (header.cmdSys == 0)
30248 +       {
30249 +               return STATUS_ERROR;
30250 +       }
30251 +       switch (header.cmdSys)
30252 +       {
30253 +               case CMD_SYS_REPLICATE:
30254 +                       if (header.cmdSts != CMD_STS_DELETE_QUERY)
30255 +                       {
30256 +                               query_log = get_query_log_by_header(&header);
30257 +                               if (query_log != (QueryLogType*)NULL)
30258 +                               {
30259 +                                       memcpy(&header,query_log->header,sizeof(ReplicateHeader));
30260 +                               }
30261 +                               send_message(fd,(char *)&header,sizeof(ReplicateHeader));
30262 +                               header.cmdSts = CMD_STS_DELETE_QUERY;
30263 +                               PGRwrite_rlog(&header, NULL);
30264 +                       }
30265 +                       else
30266 +                       {
30267 +                               status = PGRwrite_rlog((ReplicateHeader*)&header,(char *)NULL);
30268 +                               send_message(fd,(char *)&status,sizeof(status));
30269 +                       }
30270 +                       break;
30271 +               case CMD_SYS_LOG:
30272 +                       status = PGRwrite_rlog((ReplicateHeader*)&header, query);
30273 +                       send_message(fd,(char *)&status,sizeof(status));
30274 +                       break;
30275 +               case  CMD_SYS_CALL:
30276 +                       if (header.cmdSts == CMD_STS_TRANSACTION_ABORT)
30277 +                       {
30278 +#ifdef PRINT_DEBUG
30279 +                               show_debug("%s: CMD_STS_TRANSACTION_ABORT",func);
30280 +#endif
30281 +                               status = reconfirm_commit(&header);
30282 +                       }
30283 +                       else if (header.cmdSts == CMD_STS_QUERY_SUSPEND)
30284 +                       {
30285 +#ifdef PRINT_DEBUG
30286 +                               show_debug("%s: CMD_STS_QUERY_SUSPEND",func);
30287 +#endif                        
30288 +                               //                      status = PGRresend_rlog_to_db();
30289 +                       }
30290 +#ifdef PRINT_DEBUG
30291 +                               show_debug("%s: SYS_CALL process done",func);
30292 +#endif
30293 +                       send_message(fd,(char *)&status,sizeof(status));
30294 +                       break;
30295 +       }
30296 +       show_debug("%s:process result done:cmdSys='%c'",func,header.cmdSys);
30297 +       return STATUS_OK;
30298 +}
30299 +
30300 +#if 0
30301 +static int
30302 +PGRget_sync_data(ReplicateHeader *header)
30303 +{
30304 +       ReplicateHeader rlog_header;
30305 +
30306 +       if (header == NULL)
30307 +       {
30308 +               return STATUS_ERROR;
30309 +       }
30310 +
30311 +
30312 +       if (Replicateion_Log->r_log_sock > 0)
30313 +       {
30314 +               close(Replicateion_Log->r_log_sock );
30315 +               Replicateion_Log->r_log_sock = -1;
30316 +       }
30317 +       Replicateion_Log->r_log_sock = PGRcreate_send_rlog_socket();
30318 +       if (Replicateion_Log->r_log_sock == -1)
30319 +               return STATUS_ERROR;
30320 +
30321 +       memset(&rlog_header,0,sizeof(ReplicateHeader));
30322 +       send_message( Replicateion_Log->r_log_sock, (char *)header,sizeof(ReplicateHeader));
30323 +       recv_message( Replicateion_Log->r_log_sock, (char *)&rlog_header,sizeof(ReplicateHeader));
30324 +       if (rlog_header.cmdSts != 0)
30325 +       {
30326 +         close(Replicateion_Log->r_log_sock );
30327 +         Replicateion_Log->r_log_sock = -1;
30328 +               memcpy(header,&rlog_header, sizeof(ReplicateHeader));
30329 +               return STATUS_OK;
30330 +       }
30331 +
30332 +       close(Replicateion_Log->r_log_sock );
30333 +       Replicateion_Log->r_log_sock = -1;
30334 +
30335 +       return STATUS_ERROR;
30336 +
30337 +}
30338 +
30339 +static int
30340 +PGRdelete_sync_data(ReplicateHeader *header)
30341 +{
30342 +       int status;
30343 +       char cmdSts;
30344 +
30345 +       cmdSts = header->cmdSts;
30346 +       header->cmdSts = CMD_STS_DELETE_QUERY;
30347 +       if (header == NULL)
30348 +       {
30349 +               return STATUS_ERROR;
30350 +       }
30351 +       
30352 +       if (Replicateion_Log->r_log_sock > 0)
30353 +       {
30354 +               close(Replicateion_Log->r_log_sock );
30355 +               Replicateion_Log->r_log_sock = -1;
30356 +       }
30357 +       Replicateion_Log->r_log_sock = PGRcreate_send_rlog_socket();
30358 +       if (Replicateion_Log->r_log_sock == -1)
30359 +               return STATUS_ERROR;
30360 +       
30361 +       send_message( Replicateion_Log->r_log_sock, (char *)header,sizeof(ReplicateHeader));
30362 +       recv_message( Replicateion_Log->r_log_sock, (char *)&status,sizeof(status));
30363 +       header->cmdSts = cmdSts;
30364 +       
30365 +       close(Replicateion_Log->r_log_sock );
30366 +       Replicateion_Log->r_log_sock = -1;
30367 +       
30368 +       return status;
30369 +
30370 +}
30371 +#endif /* #if 0 */
30372 +
30373 +int
30374 +PGRsend_rlog_packet(int sock,ReplicateHeader * header, const char * query_string)
30375 +{
30376 +       char * buf = NULL;
30377 +       int buf_size = 0;
30378 +       int header_size = 0;
30379 +       int query_size = 0;
30380 +       int rtn = 0;
30381 +
30382 +       /* check parameter */
30383 +       if ((sock <= 0) || (header == NULL))
30384 +       {
30385 +               return STATUS_ERROR;
30386 +       }
30387 +       if (query_string != NULL)
30388 +       {
30389 +               query_size = ntohl(header->query_size);
30390 +       }
30391 +       header_size = sizeof(ReplicateHeader);
30392 +       buf_size = header_size + query_size + 4;
30393 +       buf = (char *)malloc(buf_size);
30394 +       if (buf == (char *)NULL)
30395 +       {
30396 +               return STATUS_ERROR;
30397 +       }
30398 +       memset(buf,0,buf_size);
30399 +       buf_size -= 4;
30400 +       memcpy(buf,header,header_size);
30401 +       if (query_size > 0)
30402 +       {
30403 +               memcpy((char *)(buf+header_size),query_string,query_size+1);
30404 +       }
30405 +       rtn = send_message(sock,buf,buf_size);
30406 +       free(buf);
30407 +       return rtn;
30408 +}
30409 +
30410 +int
30411 +PGRrecv_rlog_result(int sock,void * result, int size)
30412 +{
30413 +       char *func = "PGRrecv_rlog_result";
30414 +       fd_set      rmask;
30415 +       struct timeval timeout;
30416 +       int rtn;
30417 +
30418 +       if ((result == (void *)NULL) || (size <= 0))
30419 +       {
30420 +               return -1;
30421 +       }
30422 +
30423 +       /*
30424 +        * Wait for something to happen.
30425 +        */
30426 +       rtn = 1;
30427 +       for (;;)
30428 +       {
30429 +               timeout.tv_sec = PGR_Replication_Timeout;
30430 +               timeout.tv_usec = 0;
30431 +
30432 +               FD_ZERO(&rmask);
30433 +               FD_SET(sock,&rmask);
30434 +               rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
30435 +               if (rtn < 0)
30436 +               {
30437 +                       if (errno != EINTR || errno != EAGAIN)
30438 +                       {
30439 +                               show_error("%s: select() failed (%s)",func,strerror(errno));
30440 +                               return -1;
30441 +                       }
30442 +               }
30443 +               else if (rtn && FD_ISSET(sock, &rmask))
30444 +               {
30445 +                       return (recv_message(sock, (char*)result, size));
30446 +               }
30447 +       }
30448 +       return -1;
30449 +}
30450 +
30451 +
30452 +static int
30453 +recv_message(int sock,char * buf, int len)
30454 +{
30455 +       char *func = "recv_message";
30456 +       int cnt = 0;
30457 +       int r = 0;
30458 +       char * read_ptr;
30459 +       int read_size = 0;
30460 +       cnt = 0;
30461 +       read_ptr = buf;
30462 +
30463 +       for (;;)
30464 +       {
30465 +               r = recv(sock,read_ptr + read_size ,len - read_size, 0); 
30466 +               if (r < 0)
30467 +               {
30468 +                       if (errno == EINTR || errno == EAGAIN)
30469 +                               continue;
30470 +                       else
30471 +                       {
30472 +                               show_error("%s:recv failed: %d(%s)",func, errno, strerror(errno));
30473 +                               return -1;
30474 +                       }
30475 +               }
30476 +               else if (r > 0)
30477 +               {
30478 +                       read_size += r;
30479 +                       if (read_size == len)
30480 +                               return read_size;
30481 +               }
30482 +               else /* r == 0 */
30483 +               {
30484 +                       show_error("%s:unexpected EOF", func);
30485 +                       return -1;
30486 +               }
30487 +       }
30488 +       return -1;
30489 +}
30490 +
30491 +int
30492 +PGRsend_rlog_to_local(ReplicateHeader * header,char * query)
30493 +{
30494 +       int status = STATUS_OK;
30495 +
30496 +       if (Replicateion_Log == NULL) 
30497 +       {
30498 +               return STATUS_ERROR;
30499 +       }
30500 +       
30501 +       if (Replicateion_Log->r_log_sock > 0)
30502 +       {
30503 +               close(Replicateion_Log->r_log_sock );
30504 +               Replicateion_Log->r_log_sock = -1;
30505 +       }
30506 +       
30507 +       Replicateion_Log->r_log_sock = PGRcreate_send_rlog_socket();
30508 +       if (Replicateion_Log->r_log_sock == -1)
30509 +               return STATUS_ERROR;
30510 +       
30511 +       show_debug("send_to_local %s",query);
30512 +       status = PGRsend_rlog_packet(Replicateion_Log->r_log_sock,header,query);
30513 +       show_debug("send_to_local result is %d,errno=%d(%s)",status,errno ,strerror(errno));
30514 +        
30515 +       if (status != STATUS_ERROR)
30516 +       {
30517 +               PGRrecv_rlog_result(Replicateion_Log->r_log_sock,&status, sizeof(status));
30518 +       }
30519 +       
30520 +       close(Replicateion_Log->r_log_sock );
30521 +       Replicateion_Log->r_log_sock = -1;
30522 +       
30523 +       return status;
30524 +}
30525 +
30526 +int
30527 +PGRget_rlog_header(ReplicateHeader * header)
30528 +{
30529 +       int status = STATUS_OK;
30530 +       ReplicateHeader rlog_header;
30531 +
30532 +       if ((Replicateion_Log == NULL) || 
30533 +               (header == NULL))
30534 +       {
30535 +               return STATUS_ERROR;
30536 +       }
30537 +       
30538 +       if (Replicateion_Log->r_log_sock > 0)
30539 +       {
30540 +               close(Replicateion_Log->r_log_sock );
30541 +               Replicateion_Log->r_log_sock = -1;
30542 +       }
30543 +       Replicateion_Log->r_log_sock = PGRcreate_send_rlog_socket();
30544 +       if (Replicateion_Log->r_log_sock == -1)
30545 +               return STATUS_ERROR;
30546 +       
30547 +       memcpy(&rlog_header,header,sizeof(ReplicateHeader));
30548 +       rlog_header.cmdSys = CMD_SYS_REPLICATE;
30549 +       rlog_header.query_size = 0;
30550 +       status =PGRsend_rlog_packet(Replicateion_Log->r_log_sock,&rlog_header,"");
30551 +       if (status != STATUS_ERROR)
30552 +       {
30553 +               status = PGRrecv_rlog_result(Replicateion_Log->r_log_sock,&rlog_header, sizeof(ReplicateHeader));
30554 +               if (status > 0)
30555 +               {
30556 +                       memcpy(header,&rlog_header,sizeof(ReplicateHeader));
30557 +                       status = STATUS_OK;
30558 +               }
30559 +               else
30560 +               {
30561 +                       status = STATUS_ERROR;
30562 +               }
30563 +       }
30564 +       
30565 +       close(Replicateion_Log->r_log_sock );
30566 +       Replicateion_Log->r_log_sock = -1;
30567 +               
30568 +       return status;
30569 +}
30570 +
30571 +static int
30572 +send_message(int sock, char * msg, int len)
30573 +{
30574 +       char * func = "send_message()";
30575 +       fd_set    wmask;
30576 +       struct timeval timeout;
30577 +       int rtn = 0;
30578 +       char * send_ptr = NULL;
30579 +       int send_size= 0;
30580 +       int buf_size = 0;
30581 +       int s = 0;
30582 +       int flag = 0;
30583 +       
30584 +       if ((msg == NULL) || (len <= 0) || (sock <= 0))
30585 +       {
30586 +               return STATUS_ERROR;
30587 +       }
30588 +       send_ptr = msg;
30589 +       buf_size = len;
30590 +
30591 +       /*
30592 +        * Wait for something to happen.
30593 +        */
30594 +#ifdef MSG_DONTWAIT
30595 +       flag |= MSG_DONTWAIT;
30596 +#endif
30597 +#ifdef MSG_NOSIGNAL
30598 +       flag |= MSG_NOSIGNAL;
30599 +#endif
30600 +
30601 +       for (;;)
30602 +       {
30603 +               timeout.tv_sec = PGR_Replication_Timeout;
30604 +               timeout.tv_usec = 0;
30605 +
30606 +               FD_ZERO(&wmask);
30607 +               FD_SET(sock,&wmask);
30608 +               rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
30609 +         
30610 +               if (rtn < 0 )
30611 +               {
30612 +                       if (errno == EAGAIN || errno == EINTR)
30613 +                               continue;
30614 +
30615 +                       show_error("%s:send-select error: %d(%s)",func,errno,strerror(errno));
30616 +                       return STATUS_ERROR;
30617 +               }
30618 +               else if (rtn & FD_ISSET(sock, &wmask))
30619 +               {
30620 +                       s = send(sock,send_ptr + send_size,buf_size - send_size ,flag); 
30621 +                       if (s < 0)
30622 +                       {
30623 +                               if (errno == EINTR || errno == EAGAIN)
30624 +                                       continue;
30625 +                               else
30626 +                               {
30627 +                                       show_error("%s:send error: %d(%s)",func,errno,strerror(errno));
30628 +                                       memset(send_ptr, 0, len);
30629 +                                       return STATUS_ERROR;
30630 +                               }
30631 +                       }
30632 +                       else if (s == 0)
30633 +                       {
30634 +                               show_error("%s:unexpected EOF");
30635 +                               memset(send_ptr, 0, len);
30636 +                               return STATUS_ERROR;
30637 +                       }
30638 +                       else /* s > 0 */
30639 +                       {
30640 +                               send_size += s;
30641 +                               if (send_size == buf_size)
30642 +                               {
30643 +                                       return STATUS_OK;
30644 +                               }
30645 +                       }
30646 +               }
30647 +       }
30648 +       show_error("%s:send-select unknown error: %d(%s)",
30649 +                          func,errno,strerror(errno));
30650 +       return STATUS_ERROR;
30651 +}
30652 +
30653 +static void
30654 +exit_rlog(int sig)
30655 +{
30656 +       sigset_t mask;
30657 +
30658 +       Exit_Request = true;
30659 +       if (sig == SIGTERM)
30660 +       {
30661 +               if (Idle_Flag == BUSY_MODE)
30662 +               {
30663 +                       return;
30664 +               }
30665 +       }
30666 +       
30667 +       sigemptyset(&mask);
30668 +       sigaddset(&mask, SIGINT);
30669 +       sigaddset(&mask, SIGTERM);
30670 +       sigaddset(&mask, SIGQUIT);
30671 +       sigprocmask(SIG_BLOCK, &mask, NULL);
30672 +
30673 +       if (RLog_Recv_Sock >= 0)
30674 +       {
30675 +               close(RLog_Recv_Sock);
30676 +               RLog_Recv_Sock = -1;
30677 +       }
30678 +       if (Replicateion_Log->RLog_Sock_Path != NULL)
30679 +       {
30680 +               unlink(Replicateion_Log->RLog_Sock_Path);
30681 +               free(Replicateion_Log->RLog_Sock_Path);
30682 +       }
30683 +       exit(0);
30684 +}
30685 +#endif /* USE_REPLICATION */
30686 diff -aruN postgresql-8.2.4/src/pgcluster/tool/Makefile pgcluster-1.7.0rc7/src/pgcluster/tool/Makefile
30687 --- postgresql-8.2.4/src/pgcluster/tool/Makefile        1970-01-01 01:00:00.000000000 +0100
30688 +++ pgcluster-1.7.0rc7/src/pgcluster/tool/Makefile      2007-02-18 22:52:17.000000000 +0100
30689 @@ -0,0 +1,32 @@
30690 +#-------------------------------------------------------------------------
30691 +#
30692 +# Makefile for src/pgcluster/pgrp
30693 +#
30694 +#-------------------------------------------------------------------------
30695 +
30696 +subdir = src/pgcluster/tool
30697 +top_builddir = ../../..
30698 +include $(top_builddir)/src/Makefile.global
30699 +
30700 +OBJS= pgcbench.o
30701 +
30702 +# EXTRA_OBJS = $(top_builddir)/src/backend/libpq/replicate_com.o
30703 +
30704 +override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -DBINDIR=\"$(bindir)\"
30705 +
30706 +all: pgcbench
30707 +
30708 +pgcbench: $(OBJS) $(libpq_builddir)/libpq.a 
30709 +       $(CC) $(CFLAGS) $(OBJS) $(EXTRA_OBJS) $(libpq) $(LDFLAGS) $(LIBS) -o $@
30710 +
30711 +install: all installdirs
30712 +       $(INSTALL_PROGRAM) pgcbench$(X) $(DESTDIR)$(bindir)/pgcbench$(X)
30713 +
30714 +installdirs:
30715 +       $(mkinstalldirs) $(DESTDIR)$(bindir)
30716 +
30717 +uninstall:
30718 +       rm -f $(addprefix $(DESTDIR)$(bindir)/, pgcbench$(X))
30719 +
30720 +clean distclean maintainer-clean:
30721 +       rm -f pgcbench$(X) $(OBJS) 
30722 diff -aruN postgresql-8.2.4/src/pgcluster/tool/README.jp pgcluster-1.7.0rc7/src/pgcluster/tool/README.jp
30723 --- postgresql-8.2.4/src/pgcluster/tool/README.jp       1970-01-01 01:00:00.000000000 +0100
30724 +++ pgcluster-1.7.0rc7/src/pgcluster/tool/README.jp     2007-02-18 22:52:17.000000000 +0100
30725 @@ -0,0 +1,296 @@
30726 +$Id$
30727 +
30728 +\e$B"#\e(B pgcbench \e$B$H$O\e(B
30729 +
30730 +pgcbench \e$B$O\e(B PGCluster \e$B$N%Y%s%A%^!<%/%F%9%H$r9T$&%W%m%0%i%`$G$9!#$b$A$m\e(B
30731 +\e$B$s!"\e(BPGCluster \e$B$@$1$G$O$J$/\e(B PostgreSQL \e$B$N%Y%s%A%^!<%/$r<B;\$9$k$3$H$b$G\e(B
30732 +\e$B$-$^$9!#\e(B
30733 +
30734 +pgcbench \e$B$O\e(B SELECT\e$B!"\e(BUPDATE\e$B!"\e(BINSERT \e$B$r4^$`%H%i%s%6%/%7%g%s$r<B9T$7!"A4\e(B
30735 +\e$BBN$N<B9T;~4V$H<B:]$K40N;$7$?$G$"$m$&%H%i%s%6%/%7%g%s$N?t$+$i\e(B 1 \e$BIC4V$K\e(B
30736 +\e$B<B9T$G$-$?%H%i%s%6%/%7%g%s?t\e(B (tps) \e$B$rI=<($7$^$9!#=hM}$NBP>]$H$J$k%F!<\e(B
30737 +\e$B%V%k$O%G%U%)%k%H$G$O\e(B 10 \e$BK|9T$N%G!<%?$r4^$_$^$9!#\e(B
30738 +
30739 +\e$B<B:]$KI=<($O0J2<$N$h$&$K$J$j$^$9!#\e(B
30740 +
30741 +   number of clients: 4
30742 +   number of transactions actually processed: 100
30743 +   run time (sec) = 4.416423 
30744 +   tps = 22.642759 (including connections establishing)
30745 +
30746 +\e$B"(\e(B \e$BCm0U\e(B
30747 +
30748 +   pgcbench \e$B$G$O!"%H%i%s%6%/%7%g%s$,<B:]$K40N;$7$?$+$I$&$+$K4X78$J$/!"\e(B
30749 +   \e$B:G=i$K;XDj$5$l$?%H%i%s%6%/%7%g%s$N?t$r$b$H$K\e(B tps \e$B$r5a$a$F$$$k$?$a!"\e(B
30750 +   \e$B%Y%s%A%^!<%/$,ESCf$G=*N;$7$F$7$^$C$?>l9g!"I=<($5$l$k\e(B tps \e$B$,?.MQ$G$-\e(B
30751 +   \e$B$J$$2DG=@-$,$"$k$3$H$KCm0U$7$F$/$@$5$$!#\e(B
30752 +
30753 +\e$B$J$*!"\e(Bpgcbench \e$B$O\e(B pgbench \e$B$H$$$&\e(B PostgreSQL \e$BMQ$K=q$+$l$?%Y%s%A%^!<%/%F\e(B
30754 +\e$B%9%H$r9T$J$&%W%m%0%i%`$r$b$H$K:n@.$5$l$^$7$?!#\e(B
30755 +
30756 +
30757 +\e$B"#\e(B pgbench \e$B$H$N0c$$\e(B
30758 +
30759 +\e$B!&\e(B \e$B%^%k%A%f!<%64D6-$N<B8=J}K!\e(B
30760 +
30761 +   pgbench \e$B$,\e(B libpq \e$B$NHsF14|=hM}5!G=$r;H$C$?%7%s%0%k%W%m%;%9$K$h$C$F%7\e(B
30762 +   \e$B%_%e%l!<%H$7$F$$$k$N$KBP$7$F!"\e(Bpgcbench \e$B$G$O\e(B fork \e$B$r;H$C$?%^%k%A%W%m\e(B
30763 +   \e$B%;%9$K$h$C$F<B8=$7$F$$$^$9!#$3$l$O!"%7%s%0%k%W%m%;%9$G$O%m%C%/$,H/\e(B
30764 +   \e$B@8$9$k$H!"\e(BPGCluster \e$B$,;_$^$C$F$7$^$&$3$H$rHr$1$k$?$a$G$9!#\e(B
30765 +
30766 +\e$B!&\e(B \e$B%*%W%7%g%s$NJQ99\e(B
30767 +
30768 +   pgcbench \e$B$K$O!"\e(BPGCluster \e$B$N%Y%s%A%^!<%/%F%9%H$r9T$J$&$N$KJXMx$J!"%H\e(B
30769 +   \e$B%i%s%6%/%7%g%s$NFbMF$r;XDj$9$k%*%W%7%g%s$,!"$$$/$D$+DI2C$5$l$F$$$^\e(B
30770 +   \e$B$9!%\e(B
30771 +
30772 +
30773 +\e$B"#\e(B pgcbench \e$B$N%$%s%9%H!<%k\e(B
30774 +
30775 +1. PGCluster \e$B$r\e(B configure\e$B!"\e(Bmake \e$B$7$^$9!#\e(B
30776 +
30777 +   pgcbench \e$B$N%$%s%9%H!<%k$@$1$,L\E*$G$"$l$P!"\e(BPGCluster \e$B$N$9$Y$F$r%3%s\e(B
30778 +   \e$B%Q%$%k$9$kI,MW$O$"$j$^$;$s!#\e(BPGCluster \e$B%=!<%9$N%H%C%W%G%#%l%/%H%j$G\e(B 
30779 +   configure \e$B$r$7$?8e!"\e(Bsrc/interface/libpq \e$B$G\e(B make all \e$B$r<B9T$9$l$P=`\e(B
30780 +   \e$BHw40N;$G$9!#\e(B
30781 +
30782 +2. \e$B$3$N%G%#%l%/%H%j\e(B (src/pgcluster/tool) \e$B$G\e(B make \e$B$r<B9T$7$^$9!#\e(B
30783 +
30784 +   \e$B$=$&$9$k$H!"\e(Bpgcbench \e$B$H$$$&<B9T%W%m%0%i%`$,$G$-$^$9!#$=$N$^$^<B9T$7\e(B
30785 +   \e$B$F$b9=$$$^$;$s$7!"\e(Bmake install \e$B$r<B9T$7$F\e(B PGCluster \e$B$NI8=`<B9T%W%m\e(B
30786 +   \e$B%0%i%`%G%#%l%/%H%j\e(B (\e$B%G%U%)%k%H$G$O\e(B /usr/local/pgsql/bin) \e$B$K%$%s%9%H!<\e(B
30787 +   \e$B%k$9$k$3$H$b$G$-$^$9!#\e(B
30788 +
30789 +
30790 +\e$B"#\e(B pgcbench \e$B$N;H$$J}\e(B
30791 +
30792 +   pgcbench [\e$B%*%W%7%g%s\e(B] [\e$B%G!<%?%Y!<%9L>\e(B]
30793 +
30794 +\e$B%G!<%?%Y!<%9L>$r>JN,$9$k$H!"%m%0%$%sL>$HF1$8%G!<%?%Y!<%9L>$r;XDj$7$?$b\e(B
30795 +\e$B$N$H8+$J$7$^$9!#$J$*!"\e(Bpgcbench \e$B$G%G%U%)%k%H$N%Y%s%A%^!<%/$r<B;\$9$k$?\e(B
30796 +\e$B$a$K$O!"8e=R$N\e(B -i \e$B%*%W%7%g%s$r;HMQ$7$F%G!<%?%Y!<%9$r$"$i$+$8$a=i4|2=$7\e(B
30797 +\e$B$F$*$/I,MW$,$"$j$^$9!#\e(B
30798 +
30799 +pgcbench \e$B$K$O$$$m$$$m$J%*%W%7%g%s$,$"$j$^$9!#\e(B
30800 +
30801 +-h \e$B%[%9%HL>\e(B
30802 +
30803 +   PostgreSQL\e$B$N\e(B \e$B%G!<%?%Y!<%9%G!<%b%s\e(B postmaster \e$B$NF0:n$7$F$$$k%[%9%HL>\e(B
30804 +   \e$B$r;XDj$7$^$9!#>JN,$9$k$H4D6-JQ?t\e(B PGHOST \e$B$K@_Dj$7$?%[%9%HL>$,;XDj$5\e(B
30805 +   \e$B$l$^$9!#\e(BPGHOST \e$B$b;XDj$5$l$F$$$J$$$H<+%[%9%H$K\e(B Unix \e$B%I%a%$%s%=%1%C%H\e(B
30806 +   \e$B$G@\B3$7$^$9!#\e(B
30807 +
30808 +-p \e$B%]!<%HHV9f\e(B
30809 +
30810 +   postmaster \e$B$N;HMQ$9$k%]!<%HHV9f$r;XDj$7$^$9!#>JN,$9$k$H4D6-JQ?t\e(B
30811 +   PGPORT \e$B$K@_Dj$7$?%]!<%HHV9f$,;XDj$5$l$^$9!#\e(BPGPORT \e$B$b;XDj$5$l$F$$$J\e(B
30812 +   \e$B$$$H\e(B 5432 \e$B$,;XDj$5$l$?$b$N$H8+$J$7$^$9!#\e(B
30813 +
30814 +-c \e$B%/%i%$%"%s%H?t\e(B
30815 +
30816 +   \e$BF1;~<B9T%/%i%$%"%s%H?t$r;XDj$7$^$9!#>JN,;~$O\e(B 1 \e$B$H$J$j$^$9!#\e(Bpgcbench 
30817 +   \e$B$OF1;~<B9T%/%i%$%"%s%H$4$H$K%U%!%$%k%G%#%9%/%j%W%?$r;HMQ$9$k$N$G!"\e(B
30818 +   \e$B;HMQ2DG=%U%!%$%k%G%#%9%/%j%W%??t$r1[$($k%/%i%$%"%s%H?t$O;XDj$G$-$^\e(B
30819 +   \e$B$;$s!#;HMQ2DG=%U%!%$%k%G%#%9%/%j%W%??t$O\e(B limit \e$B$d\e(B ulimit \e$B%3%^%s%I$G\e(B
30820 +   \e$B3NG'$9$k$3$H$,$G$-$^$9!#\e(B
30821 +
30822 +-t \e$B%H%i%s%6%/%7%g%s?t\e(B
30823 +
30824 +   \e$B%H%i%s%6%/%7%g%s?t$r;XDj$7$^$9!#3F%/%i%$%"%s%H$,<B9T$9$k%H%i%s%6%/\e(B
30825 +   \e$B%7%g%s?t$O$3$l$r%/%i%$%"%s%H?t$G3d$C$??t$H$J$j$^$9!#>JN,;~$O\e(B 10 \e$B$H\e(B
30826 +   \e$B$J$j$^$9!#\e(B
30827 +
30828 +-s \e$B%9%1!<%j%s%0%U%!%/%?!<\e(B
30829 +
30830 +   -i \e$B%*%W%7%g%s$H$H$b$K;HMQ$7$^$9!#%9%1!<%j%s%0%U%!%/%?!<$O\e(B 1 \e$B0J>e$N\e(B
30831 +   \e$B@0?t$G$9!#%9%1!<%j%s%0%U%!%/%?!<$rJQ$($k$3$H$K$h$j!"%F%9%H$NBP>]$H\e(B
30832 +   \e$B$J$k%F!<%V%k$NBg$-$5$,\e(B 10 \e$BK|\e(B \e$B!_\e(B \e$B%9%1!<%j%s%0%U%!%/%?!<$K$J$j$^$9!#\e(B
30833 +   \e$B%G%U%)%k%H$N%9%1!<%j%s%0%U%!%/%?!<$O\e(B 1 \e$B$G$9!#\e(B
30834 +
30835 +-u \e$B%m%0%$%sL>\e(B
30836 +
30837 +   DB \e$B%f!<%6$N%m%0%$%sL>$r;XDj$7$^$9!#>JN,$9$k$H4D6-JQ?t\e(B PGUSER \e$B$K@_Dj\e(B
30838 +   \e$B$7$?%m%0%$%sL>$,;XDj$5$l$^$9!#\e(B
30839 +
30840 +-P \e$B%Q%9%o!<%I\e(B
30841 +
30842 +   \e$B%Q%9%o!<%I$r;XDj$7$^$9!#$J$*!"$3$N%*%W%7%g%s$r;H$&$H!"%Q%9%o!<%I$r\e(B
30843 +   ps \e$B%3%^%s%I$G8+$i$l$k$J$I!"%;%-%e%j%F%#%[!<%k$K$J$k2DG=@-$,$"$k$N$G!"\e(B
30844 +   \e$B%F%9%HMQ$K$N$_$*;H$$2<$5$$!#\e(B
30845 +
30846 +-n
30847 +
30848 +   \e$B$3$N%*%W%7%g%s$r;XDj$9$k$H!"%Y%s%A%^!<%/3+;OA0$K\e(B VACUUM \e$B$H\e(B history
30849 +   \e$B%F!<%V%k$N%/%j%"$r<B9T$7$^$;$s!#\e(B
30850 +
30851 +-v
30852 +
30853 +   \e$B$3$N%*%W%7%g%s$r;XDj$9$k$H!"%Y%s%A%^!<%/3+;OA0$K\e(B VACUUM \e$B$H\e(B history
30854 +   \e$B%F!<%V%k$N%/%j%"$r<B9T$7$^$9!#\e(B-v \e$B$H\e(B -n \e$B$r>JN,$9$k$H!":G>.8B$N\e(B
30855 +   VACUUM \e$B$J$I$r<B9T$7$^$9!#$9$J$o$A!"\e(Bhistory \e$B%F!<%V%k$N%/%j%"$H!"\e(B
30856 +   branches\e$B!"\e(Btellers\e$B!"\e(Bhistory \e$B%F!<%V%k$N\e(B VACUUM \e$B$r<B9T$7$^$9!#$3$l$O!"\e(B
30857 +   VACUUM \e$B$N;~4V$r:G>.8B$K$7$J$,$i!"%Q%U%)!<%^%s%9$K1F6A$9$k%4%_A]=|$r\e(B
30858 +   \e$B8z2LE*$K<B9T$7$^$9!#DL>o$O\e(B -v \e$B$H\e(B -n \e$B$r>JN,$9$k$3$H$r?d>)$7$^$9!#\e(B
30859 +
30860 +-I
30861 +
30862 +   \e$BA^F~$N$_$N%H%i%s%6%/%7%g%s$r<B9T$7$^$9!#A^F~%9%T!<%I$rB,Dj$7$?$$$H\e(B
30863 +   \e$B$-$K;H$$$^$9!#\e(B
30864 +
30865 +-U
30866 +
30867 +   \e$B99?7$N$_$N%H%i%s%6%/%7%g%s$r<B9T$7$^$9!#99?7%9%T!<%I$rB,Dj$7$?$$$H\e(B
30868 +   \e$B$-$K;H$$$^$9!#\e(B
30869 +
30870 +-S
30871 +
30872 +   \e$B8!:w$N$_$N%H%i%s%6%/%7%g%s$r<B9T$7$^$9!#8!:w%9%T!<%I$rB,Dj$7$?$$$H\e(B
30873 +   \e$B$-$K;H$$$^$9!#\e(B
30874 +
30875 +-f \e$B%U%!%$%kL>\e(B
30876 +
30877 +   \e$B%H%i%s%6%/%7%g%s$NFbMF$,5-=R$5$l$?%U%!%$%kL>$r;XDj$7$^$9!#$3$N%*%W\e(B
30878 +   \e$B%7%g%s$r;XDj$9$k$H!"%U%!%$%k$K5-=R$5$l$?FbMF$N%H%i%s%6%/%7%g%s$r<B\e(B
30879 +   \e$B9T$7$^$9!#%Y%s%A%^!<%/$G;HMQ$9$k%F!<%V%k$O$"$i$+$8$a=i4|2=$7$F$*$/\e(B
30880 +   \e$BI,MW$,$"$j$^$9!#F~NO%U%!%$%k$N%U%)!<%^%C%H$O8e=R$7$^$9!#\e(B
30881 +
30882 +-T
30883 +
30884 +   BEGIN \e$B$H\e(B END \e$B$G0O$^$l$?%H%i%s%6%/%7%g%s%V%m%C%/Fb$G%H%i%s%6%/%7%g%s\e(B
30885 +   \e$B$r<B9T$7$^$9!#\e(B
30886 +
30887 +-C
30888 +
30889 +   \e$B$3$N%*%W%7%g%s$r;XDj$9$k$H!":G=i$K3NN)$7$?%3%M%/%7%g%s$r;H$$2s$9$N\e(B
30890 +   \e$B$G$O$J$/!"3F%H%i%s%6%/%7%g%s$4$H$K\e(B DB \e$B$X$N@\B3$r9T$$$^$9!#%3%M%/%7%g\e(B
30891 +   \e$B%s$N%*!<%P!<$X%C%I$rB,Dj$9$k$N$KM-8z$G$9!#\e(B
30892 +
30893 +-l
30894 +
30895 +   \e$B8D!9$N%H%i%s%6%/%7%g%s$N<B9T;~4V$r5-O?$7$^$9!#5-O?@h$O%+%l%s%H%G%#\e(B
30896 +   \e$B%l%/%H%j0J2<$N\e(B pgbench_log.xxx \e$B$H$$$&%U%!%$%k$G$9!#%U%!%$%k$N%U%)!<\e(B
30897 +   \e$B%^%C%H$O0J2<$N$h$&$K$J$j$^$9!#;~4V$O%^%$%/%mICC10L$G$9!#\e(B
30898 +
30899 +      <\e$B%/%i%$%"%s%H\e(B ID> <\e$B%H%i%s%6%/%7%g%sHV9f\e(B> <\e$B;~4V\e(B>
30900 +
30901 +-d
30902 +
30903 +   \e$B%G%P%C%0%*%W%7%g%s!#MM!9$J>pJs$,I=<($5$l$^$9!#\e(B
30904 +
30905 +
30906 +\e$B"#\e(B \e$B%G!<%?%Y!<%9$N=i4|2=\e(B
30907 +
30908 +pgcbench \e$B$G%G%U%)%k%H$N%Y%s%A%^!<%/%F%9%H$r<B;\$9$k$?$a$K$O!"$"$i$+$8\e(B
30909 +\e$B$a%G!<%?%Y!<%9$r=i4|2=$7!"%F%9%H%G!<%?$r:n$kI,MW$,$"$j$^$9!#\e(B
30910 +
30911 +   pgcbench -i [\e$B%G!<%?%Y!<%9L>\e(B]
30912 +
30913 +\e$B$3$l$K$h$j0J2<$N%F!<%V%k$,:n@.$5$l$^$9\e(B (\e$B%9%1!<%j%s%0%U%!%/%?!<$,\e(B 1 \e$B$N\e(B
30914 +\e$B>l9g\e(B)\e$B!#\e(B
30915 +
30916 +    \e$B%F!<%V%kL>\e(B |  \e$B9T?t\e(B
30917 +   ------------+--------
30918 +    branches   |      1
30919 +    tellers    |     10
30920 +    accounts   | 100000
30921 +    history    |      0
30922 +
30923 +\e$B"(\e(B \e$BCm0U\e(B
30924 +
30925 +   \e$BF1$8L>A0$N%F!<%V%k$,$"$k$H:o=|$5$l$F$7$^$&$N$G$4Cm0U2<$5$$!#\e(B
30926 +
30927 +\e$B%9%1!<%j%s%0%U%!%/%?!<$r\e(B 10\e$B!"\e(B100\e$B!"\e(B1000 \e$B$J$I$KJQ99$9$k$H!">e5-9T?t$O$=\e(B
30928 +\e$B$l$K1~$8$F\e(B 10 \e$BG\!"\e(B100 \e$BG\!"\e(B1000 \e$BG\$K$J$j$^$9!#Nc$($P!"%9%1!<%j%s%0%U%!\e(B
30929 +\e$B%/%?!<$r\e(B 100 \e$B$H$9$k$H0J2<$N$h$&$K$J$j$^$9!#\e(B
30930 +
30931 +    \e$B%F!<%V%kL>\e(B |   \e$B9T?t\e(B
30932 +   ------------+----------
30933 +    branches   |      100
30934 +    tellers    |     1000
30935 +    accounts   | 10000000
30936 +    history    |        0
30937 +
30938 +
30939 +\e$B"#\e(B \e$BF~NO%U%!%$%k$N%U%)!<%^%C%H\e(B
30940 +
30941 +pgcbench \e$B$G$O!"\e(B-f \e$B%*%W%7%g%s$r;XDj$7$F%H%i%s%6%/%7%g%s$K4^$^$l$k\e(B SQL 
30942 +\e$B%3%^%s%I$NFbMF$r5-=R$7$?%U%!%$%k$rFI$_9~$`$3$H$,$G$-$^$9!#F~NO%U%!%$%k\e(B
30943 +\e$B$K$O\e(B 1 \e$B9T$K$D$-\e(B 1 \e$B$D$N%3%^%s%I$r5-=R$7$^$9!#6u9T$OL5;k$5$l!"Fs=E%O%$%U\e(B
30944 +\e$B%s$G;O$^$k9T$O%3%a%s%H$r0UL#$7$^$9!#\e(B
30945 +
30946 +\e$B%3%^%s%I$K$O!"\e(BSQL \e$B%3%^%s%I$K2C$(!"%P%C%/%9%i%C%7%e$G;O$^$k%a%?%3%^%s%I\e(B
30947 +\e$B$r5-=R$9$k$3$H$,$G$-$^$9!#%a%?%3%^%s%I$O\e(B pgcbench \e$B<+?H$K$h$C$F<B9T$5$l\e(B
30948 +\e$B$^$9!#%a%?%3%^%s%I$N7A<0$O%P%C%/%9%i%C%7%e!"$=$ND>8e$K%3%^%s%I$NF0;l!"\e(B
30949 +\e$B$=$N<!$K0z?t$,B3$-$^$9!#F0;l%3%^%s%I$H0z?t!"$^$?$=$l$>$l$N0z?t$O6uGrJ8\e(B
30950 +\e$B;z$K$h$C$F6hJL$5$l$^$9!#\e(B
30951 +
30952 +\e$B:#$N$H$3$m!"0J2<$N%a%?%3%^%s%I$,Dj5A$5$l$F$$$^$9!#\e(B
30953 +
30954 +\setrandom name min max
30955 +
30956 +   \e$B:G>.CM\e(B min \e$B$H:GBgCM\e(B max \e$B$N4V$NCM$r<h$kMp?t$r!"\e(Bname \e$BJQ?t$K@_Dj$7$^$9!#\e(B
30957 +
30958 +\e$BJQ?t$KMp?t$r@_Dj$9$k$K$O!"\e(B\setrandom \e$B%a%?%3%^%s%I$r;HMQ$7$F0J2<$N$h$&\e(B
30959 +\e$B$K5-=R$7$^$9!#\e(B
30960 +
30961 +   \setrandom aid 1 100000
30962 +
30963 +\e$B$3$l$O!"JQ?t\e(B aid \e$B$K\e(B 1 \e$B$+$i\e(B 100000 \e$B$N4V$NMp?t$r@_Dj$7$^$9!#$^$?!"JQ?t$N\e(B
30964 +\e$BCM$r\e(B SQL \e$B%3%^%s%I$KKd$a9~$`$K$O!"0J2<$N$h$&$K$=$NL>A0$NA0$K%3%m%s$rIU\e(B
30965 +\e$B$1$^$9!#\e(B
30966 +
30967 +  SELECT abalance FROM accounts WHERE aid = :aid
30968 +
30969 +\e$BNc$($P!"\e(BTPC-B \e$B$K;w$?%Y%s%A%^!<%/$r9T$&$K$O!"0J2<$N$h$&$K%H%i%s%6%/%7%g\e(B
30970 +\e$B%s$NFbMF$r%U%!%$%k$K5-=R$7!"\e(B-f \e$B%*%W%7%g%s$K$h$C$F$=$N%U%!%$%k$r;XDj$7\e(B
30971 +\e$B$F\e(B pgcbench \e$B$r<B9T$7$^$9!#\e(B
30972 +
30973 +   \setrandom aid 1 100000
30974 +   \setrandom bid 1 1
30975 +   \setrandom tid 1 10
30976 +   \setrandom delta 1 1000
30977 +   BEGIN
30978 +   UPDATE accounts SET abalance = abalance + :delta WHERE aid = :aid
30979 +   SELECT abalance FROM accounts WHERE aid = :aid
30980 +   UPDATE tellers SET tbalance = tbalance + :delta WHERE tid = :tid
30981 +   UPDATE branches SET bbalance = bbalance + :delta WHERE bid = :bid
30982 +   INSERT INTO history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, 'now')
30983 +
30984 +\e$B$J$*!"$3$NNc$G$O!"\e(B-i \e$B%*%W%7%g%s$r;H$C$F=i4|2=$7$?%G!<%?%Y!<%9\e(B (\e$B%9%1!<\e(B
30985 +\e$B%j%s%0%U%!%/%?!<$,\e(B 1 \e$B$N>l9g\e(B) \e$B$KBP$7$F%Y%s%A%^!<%/$r9T$&$3$H$r2>Dj$7$F\e(B
30986 +\e$B$$$^$9!#\e(B
30987 +
30988 +
30989 +\e$B"#\e(B \e$B%H%i%s%6%/%7%g%s$NDj5A\e(B
30990 +
30991 +pgcbench \e$B$N%G%U%)%k%H$N%Y%s%A%^!<%/$G$O!"0J2<$N\e(B SQL \e$B%3%^%s%I$rA4It40N;\e(B
30992 +\e$B$7$F\e(B 1 \e$B%H%i%s%6%/%7%g%s$H?t$($F$$$^$9!#\e(B
30993 +
30994 +1. SELECT abalance FROM accounts WHERE aid = :aid
30995 +
30996 +   :aid \e$B$O\e(B 1 \e$B$+$i%9%1!<%j%s%0%U%!%/%?!<\e(B \e$B!_\e(B 10 \e$BK|$^$G$NCM$r<h$kMp?t$G$9!#\e(B
30997 +   \e$B$3$3$G$O\e(B 1 \e$B7o$@$18!:w$5$l$^$9!#0J2<!"Mp?t$NCM$O$=$l$>$l$3$N%H%i%s%6\e(B
30998 +   \e$B%/%7%g%s$NCf$G$OF1$8CM$r;H$$$^$9!#\e(B
30999 +
31000 +2. UPDATE accounts SET abalance = abalance + :delta WHERE aid = :aid
31001 +
31002 +   :delta \e$B$O\e(B 1 \e$B$+$i\e(B 1000 \e$B$^$G$NCM$r<h$kMp?t$G$9!#\e(B
31003 +
31004 +3. SELECT abalance FROM accounts WHERE aid = :aid
31005 +
31006 +4. INSERT INTO history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, 'now')
31007 +
31008 +   :tid \e$B$O\e(B 1 \e$B$+$i%9%1!<%j%s%0%U%!%/%?!<\e(B \e$B!_\e(B 10 \e$B$^$G$NCM$r<h$kMp?t!"\e(B:bid
31009 +   \e$B$O\e(B 1 \e$B$+$i%9%1!<%j%s%0%U%!%/%?!<$^$G$NCM$r<h$kMp?t$G$9!#\e(B
31010 +
31011 +5. INSERT INTO history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, 'now')
31012 +
31013 +6. INSERT INTO history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, 'now')
31014 +
31015 +7. SELECT abalance FROM accounts WHERE aid = :aid
31016 +
31017 +-T \e$B%*%W%7%g%s$r;XDj$7$F%H%i%s%6%/%7%g%s%V%m%C%/Fb$G%H%i%s%6%/%7%g%s$r\e(B
31018 +\e$B<B9T$9$k>l9g!"\e(B1 \e$B$r\e(B BEGIN\e$B$K!"\e(B7 \e$B$r\e(B END \e$B$KCV$-49$($?\e(B SQL \e$B%3%^%s%I$,<B9T$5\e(B
31019 +\e$B$l$^$9!#$^$?!"%H%i%s%6%/%7%g%s$H$7$F<B9T$5$l$k\e(B SQL \e$B%3%^%s%I$O!"\e(B-I \e$B%*%W\e(B
31020 +\e$B%7%g%s\e(B (\e$BA^F~$N$_\e(B) \e$B$G$"$l$P\e(B 4\e$B!"\e(B-U (\e$B99?7$N$_\e(B) \e$B$G$"$l$P\e(B 2\e$B!"\e(B-S (\e$B8!:w$N$_\e(B)
31021 +\e$B$G$"$l$P\e(B 1 \e$B$H$J$j$^$9!#\e(B
31022 diff -aruN postgresql-8.2.4/src/pgcluster/tool/pgcbench.c pgcluster-1.7.0rc7/src/pgcluster/tool/pgcbench.c
31023 --- postgresql-8.2.4/src/pgcluster/tool/pgcbench.c      1970-01-01 01:00:00.000000000 +0100
31024 +++ pgcluster-1.7.0rc7/src/pgcluster/tool/pgcbench.c    2007-02-18 22:52:17.000000000 +0100
31025 @@ -0,0 +1,1625 @@
31026 +/*
31027 + * pgbench: a simple benchmark program for PGCluster
31028 + * This program was written based on pgbench by Tatsuo Ishii.
31029 + *
31030 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
31031 + * Portions Copyright (c) 2000-2006, Tatsuo Ishii
31032 + *
31033 + * Permission to use, copy, modify, and distribute this software and
31034 + * its documentation for any purpose and without fee is hereby
31035 + * granted, provided that the above copyright notice appear in all
31036 + * copies and that both that copyright notice and this permission
31037 + * notice appear in supporting documentation, and that the name of the
31038 + * author not be used in advertising or publicity pertaining to
31039 + * distribution of the software without specific, written prior
31040 + * permission. The author makes no representations about the
31041 + * suitability of this software for any purpose.  It is provided "as
31042 + * is" without express or implied warranty.
31043 + */
31044 +#include "postgres_fe.h"
31045 +
31046 +#include "libpq-fe.h"
31047 +
31048 +#include <errno.h>
31049 +
31050 +#ifdef WIN32
31051 +#include "win32.h"
31052 +#else
31053 +#include <sys/time.h>
31054 +#include <unistd.h>
31055 +
31056 +#ifdef HAVE_GETOPT_H
31057 +#include <getopt.h>
31058 +#endif
31059 +
31060 +#ifdef HAVE_SYS_SELECT_H
31061 +#include <sys/select.h>
31062 +#endif
31063 +
31064 +/* for getrlimit */
31065 +#include <sys/resource.h>
31066 +#endif   /* ! WIN32 */
31067 +
31068 +#include <sys/types.h>
31069 +#include <sys/wait.h>
31070 +
31071 +#include <ctype.h>
31072 +#include <search.h>
31073 +
31074 +extern char *optarg;
31075 +extern int     optind;
31076 +
31077 +#ifdef WIN32
31078 +#undef select
31079 +#endif
31080 +
31081 +
31082 +/********************************************************************
31083 + * some configurable parameters */
31084 +
31085 +#define MAXCLIENTS 4096                        /* max number of clients allowed */
31086 +
31087 +int                    nclients = 1;           /* default number of simulated clients */
31088 +int                    nxacts = 10;            /* default number of transactions per
31089 +                                                                * clients */
31090 +
31091 +/*
31092 + * scaling factor. for example, tps = 10 will make 1000000 tuples of
31093 + * accounts table.
31094 + */
31095 +int                    tps = 1;
31096 +
31097 +/*
31098 + * end of configurable parameters
31099 + *********************************************************************/
31100 +
31101 +#define nbranches      1
31102 +#define ntellers       10
31103 +#define naccounts      100000
31104 +
31105 +#define SELECT_ONLY    (1)
31106 +#define        INSERT_ONLY     (2)
31107 +#define        UPDATE_ONLY     (3)
31108 +#define        WITH_TRANSACTION        (4)
31109 +#define TPC_B_LIKE     (5)
31110 +#define CUSTOM_QUERY   (6)
31111 +
31112 +#define SQL_COMMAND            1
31113 +#define META_COMMAND   2
31114 +
31115 +FILE      *LOGFILE = NULL;
31116 +
31117 +bool           use_log = false;                        /* log transaction latencies to a file */
31118 +
31119 +int                    remains;                        /* number of remaining clients */
31120 +
31121 +int                    is_connect;                     /* establish connection  for each
31122 +                                                                * transaction */
31123 +
31124 +char      *pghost = "";
31125 +char      *pgport = NULL;
31126 +char      *pgoptions = NULL;
31127 +char      *pgtty = NULL;
31128 +char      *login = NULL;
31129 +char      *pwd = NULL;
31130 +char      *dbName;
31131 +
31132 +typedef struct
31133 +{
31134 +       char       *name;
31135 +       char       *value;
31136 +}      Variable;
31137 +
31138 +typedef struct
31139 +{
31140 +       PGconn     *con;                        /* connection handle to DB */
31141 +       int                     id;                             /* client No. */
31142 +       int                     state;                  /* state No. */
31143 +       int                     cnt;                    /* xacts count */
31144 +       int                     ecnt;                   /* error count */
31145 +       int         maxAct;
31146 +       int                     listen;                 /* 0 indicates that an async query has
31147 +                                                                * been sent */
31148 +       int                     aid;                    /* account id for this transaction */
31149 +       int                     bid;                    /* branch id for this transaction */
31150 +       int                     tid;                    /* teller id for this transaction */
31151 +       int                     delta;
31152 +       int                     abalance;
31153 +       void       *variables;
31154 +       struct timeval txn_begin;       /* used for measuring latencies */
31155 +}      CState;
31156 +
31157 +typedef struct
31158 +{
31159 +       int                     type;
31160 +       int                     argc;
31161 +       char      **argv;
31162 +}      Command;
31163 +
31164 +Command          **commands = NULL;
31165 +
31166 +static void
31167 +usage(void)
31168 +{
31169 +       fprintf(stderr, "usage: pgcbench [-h hostname][-p port][-c nclients][-t ntransactions][-s scaling_factor][-I(insert only)][-U(update only)][-S(select only)][-f filename][-u login][-P password][-d(debug)][dbname]\n");
31170 +       fprintf(stderr, "(initialize mode): pgcbench -i [-h hostname][-p port][-s scaling_factor][-u login][-P password][-d(debug)][dbname]\n");
31171 +}
31172 +
31173 +/* random number generator */
31174 +static int
31175 +getrand(int min, int max )
31176 +{
31177 +
31178 +       return (min + (int) (max * 1.0 * rand() / (RAND_MAX + 1.0)));
31179 +}
31180 +
31181 +/* set up a connection to the backend */
31182 +static PGconn *
31183 +doConnect(void)
31184 +{
31185 +       PGconn     *con;
31186 +       PGresult   *res;
31187 +
31188 +       con = PQsetdbLogin(pghost, pgport, pgoptions, pgtty, dbName,
31189 +                                          login, pwd);
31190 +       if (con == NULL)
31191 +       {
31192 +               fprintf(stderr, "Connection to database '%s' failed.\n", dbName);
31193 +               fprintf(stderr, "Memory allocatin problem?\n");
31194 +               return (NULL);
31195 +       }
31196 +
31197 +       if (PQstatus(con) == CONNECTION_BAD)
31198 +       {
31199 +               fprintf(stderr, "Connection to database '%s' failed.\n", dbName);
31200 +
31201 +               if (PQerrorMessage(con))
31202 +                       fprintf(stderr, "%s", PQerrorMessage(con));
31203 +               else
31204 +                       fprintf(stderr, "No explanation from the backend\n");
31205 +
31206 +               return (NULL);
31207 +       }
31208 +
31209 +       res = PQexec(con, "SET search_path = public");
31210 +       if (PQresultStatus(res) != PGRES_COMMAND_OK)
31211 +       {
31212 +               fprintf(stderr, "%s", PQerrorMessage(con));
31213 +               exit(1);
31214 +       }
31215 +       PQclear(res);
31216 +
31217 +       return (con);
31218 +}
31219 +
31220 +/* throw away response from backend */
31221 +static void
31222 +discard_response(CState * state)
31223 +{
31224 +       PGresult   *res;
31225 +
31226 +       do
31227 +       {
31228 +               res = PQgetResult(state->con);
31229 +               if (res)
31230 +                       PQclear(res);
31231 +       } while (res);
31232 +}
31233 +
31234 +/* check to see if the SQL result was good */
31235 +static int
31236 +check(CState * st, PGresult *res, int good)
31237 +{
31238 +       if (res && PQresultStatus(res) != good)
31239 +       {
31240 +               fprintf(stderr, "aborted in state %d: %s",  st->state, PQerrorMessage(st->con));
31241 +               PQfinish(st->con);
31242 +               st->con = NULL;
31243 +               return (-1);
31244 +       }
31245 +       return (0);                                     /* OK */
31246 +}
31247 +
31248 +static int
31249 +compareVariables(const void *v1, const void *v2)
31250 +{
31251 +       return strcmp(((Variable *)v1)->name, ((Variable *)v2)->name);
31252 +}
31253 +
31254 +static char *
31255 +getVariable(CState * st, char *name)
31256 +{
31257 +       Variable                key = { name }, *var;
31258 +
31259 +       var = tfind(&key, &st->variables, compareVariables);
31260 +       if (var != NULL)
31261 +               return (*(Variable **)var)->value;
31262 +       else
31263 +               return NULL;
31264 +}
31265 +
31266 +static int
31267 +putVariable(CState * st, char *name, char *value)
31268 +{
31269 +       Variable                key = { name }, *var;
31270 +
31271 +       var = tfind(&key, &st->variables, compareVariables);
31272 +       if (var == NULL)
31273 +       {
31274 +               if ((var = malloc(sizeof(Variable))) == NULL)
31275 +                       return false;
31276 +
31277 +               var->name = NULL;
31278 +               var->value = NULL;
31279 +
31280 +               if ((var->name = strdup(name)) == NULL
31281 +                       || (var->value = strdup(value)) == NULL
31282 +                       || tsearch(var, &st->variables, compareVariables) == NULL)
31283 +               {
31284 +                       free(var->name);
31285 +                       free(var->value);
31286 +                       free(var);
31287 +                       return false;
31288 +               }
31289 +       }
31290 +       else
31291 +       {
31292 +               free((*(Variable **)var)->value);
31293 +               if (((*(Variable **)var)->value = strdup(value)) == NULL)
31294 +                       return false;
31295 +       }
31296 +
31297 +       return true;
31298 +}
31299 +
31300 +static char *
31301 +assignVariables(CState * st, char *sql)
31302 +{
31303 +       int                     i, j;
31304 +       char       *p, *name, *val;
31305 +       void       *tmp;
31306 +
31307 +       i = 0;
31308 +       while ((p = strchr(&sql[i], ':')) != NULL)
31309 +       {
31310 +               i = j = p - sql;
31311 +               do
31312 +                       i++;
31313 +               while (isalnum(sql[i]) != 0 || sql[i] == '_');
31314 +               if (i == j + 1)
31315 +                       continue;
31316 +
31317 +               name = malloc(i - j);
31318 +               if (name == NULL)
31319 +                       return NULL;
31320 +               memcpy(name, &sql[j + 1], i - (j + 1));
31321 +               name[i - (j + 1)] = '\0';
31322 +               val = getVariable(st, name);
31323 +               free(name);
31324 +               if (val == NULL)
31325 +                       continue;
31326 +
31327 +               if (strlen(val) > i - j)
31328 +               {
31329 +                       tmp = realloc(sql, strlen(sql) - (i - j) + strlen(val) + 1);
31330 +                       if (tmp == NULL)
31331 +                       {
31332 +                               free(sql);
31333 +                               return NULL;
31334 +                       }
31335 +                       sql = tmp;
31336 +               }
31337 +
31338 +               if (strlen(val) != i - j)
31339 +                       memmove(&sql[j + strlen(val)], &sql[i], strlen(&sql[i]) + 1);
31340 +
31341 +               strncpy(&sql[j], val, strlen(val));
31342 +
31343 +               if (strlen(val) < i - j)
31344 +               {
31345 +                       tmp = realloc(sql, strlen(sql) + 1);
31346 +                       if (tmp == NULL)
31347 +                       {
31348 +                               free(sql);
31349 +                               return NULL;
31350 +                       }
31351 +                       sql = tmp;
31352 +               }
31353 +
31354 +               i = j + strlen(val);
31355 +       }
31356 +
31357 +       return sql;
31358 +}
31359 +
31360 +/* process a transaction */
31361 +static void
31362 +doMix(CState * st, int debug, int ttype)
31363 +{
31364 +       char            sql[256];
31365 +       PGresult   *res;
31366 +
31367 +       if (st->listen)
31368 +       {                                                       /* are we receiver? */
31369 +               if (debug)
31370 +                       fprintf(stderr, "client receiving\n");
31371 +               if (!PQconsumeInput(st->con))
31372 +               {                                               /* there's something wrong */
31373 +                       fprintf(stderr, "Client aborted in state %d. Probably the backend died while processing.\n", st->state);
31374 +                       PQfinish(st->con);
31375 +                       st->con = NULL;
31376 +                       return;
31377 +               }
31378 +               if (PQisBusy(st->con))
31379 +                       return;                         /* don't have the whole result yet */
31380 +
31381 +               switch (st->state)
31382 +               {
31383 +                       case 0:                         /* response to "begin" */
31384 +                               res = PQgetResult(st->con);
31385 +                               if (ttype == WITH_TRANSACTION)
31386 +                               {
31387 +                                       if (check(st, res, PGRES_COMMAND_OK))
31388 +                                               return;
31389 +                               }
31390 +                               else
31391 +                               {
31392 +                                       if (check(st, res, PGRES_TUPLES_OK))
31393 +                                               return;
31394 +                               }
31395 +                               PQclear(res);
31396 +                               discard_response(st);
31397 +                               break;
31398 +                       case 1:                         /* response to "update accounts..." */
31399 +                               res = PQgetResult(st->con);
31400 +                               if (check(st, res, PGRES_COMMAND_OK))
31401 +                                       return;
31402 +                               PQclear(res);
31403 +                               discard_response(st);
31404 +                               break;
31405 +                       case 2:                         /* response to "select abalance ..." */
31406 +                               res = PQgetResult(st->con);
31407 +                               if (check(st, res, PGRES_TUPLES_OK))
31408 +                                       return;
31409 +                               PQclear(res);
31410 +                               discard_response(st);
31411 +                               break;
31412 +                       case 3:                         /* response to "update tellers ..." */
31413 +                               res = PQgetResult(st->con);
31414 +                               if (check(st, res, PGRES_COMMAND_OK))
31415 +                                       return;
31416 +                               PQclear(res);
31417 +                               discard_response(st);
31418 +                               break;
31419 +                       case 4:                         /* response to "update branches ..." */
31420 +                               res = PQgetResult(st->con);
31421 +                               if (check(st, res, PGRES_COMMAND_OK))
31422 +                                       return;
31423 +                               PQclear(res);
31424 +                               discard_response(st);
31425 +                               break;
31426 +                       case 5:                         /* response to "insert into history ..." */
31427 +                               res = PQgetResult(st->con);
31428 +                               if (check(st, res, PGRES_COMMAND_OK))
31429 +                                       return;
31430 +                               PQclear(res);
31431 +                               discard_response(st);
31432 +                               break;
31433 +                       case 6:                         /* response to "end" */
31434 +
31435 +                               /*
31436 +                                * transaction finished: record the time it took in the
31437 +                                * log
31438 +                                */
31439 +                               if (use_log)
31440 +                               {
31441 +                                       double          diff;
31442 +                                       struct timeval now;
31443 +
31444 +                                       gettimeofday(&now, NULL);
31445 +                                       diff = (int) (now.tv_sec - st->txn_begin.tv_sec) * 1000000.0 +
31446 +                                               (int) (now.tv_usec - st->txn_begin.tv_usec);
31447 +
31448 +                                       fprintf(LOGFILE, "%d %d %.0f\n", st->id, st->cnt, diff);
31449 +                               }
31450 +
31451 +                               res = PQgetResult(st->con);
31452 +                               if (ttype == WITH_TRANSACTION)
31453 +                               {
31454 +                                       if (check(st, res, PGRES_COMMAND_OK))
31455 +                                               return;
31456 +                               }
31457 +                               else
31458 +                               {
31459 +                                       if (check(st, res, PGRES_TUPLES_OK))
31460 +                                               return;
31461 +                               }
31462 +                               PQclear(res);
31463 +                               discard_response(st);
31464 +
31465 +                               if (is_connect)
31466 +                               {
31467 +                                       PQfinish(st->con);
31468 +                                       st->con = NULL;
31469 +                               }
31470 +                               if (++st->cnt >= st->maxAct)
31471 +                               {
31472 +                                       remains--;                      /* I've done */
31473 +                                       if (st->con != NULL)
31474 +                                       {
31475 +                                               PQfinish(st->con);
31476 +                                               st->con = NULL;
31477 +                                       }
31478 +                                       return;
31479 +                               }
31480 +                               break;
31481 +               }
31482 +
31483 +               /* increment state counter */
31484 +               st->state++;
31485 +               if (st->state > 6)
31486 +               {
31487 +                       st->state = 0;
31488 +                       remains--;                      /* I've done */
31489 +               }
31490 +       }
31491 +
31492 +       if (st->con == NULL)
31493 +       {
31494 +               if ((st->con = doConnect()) == NULL)
31495 +               {
31496 +                       fprintf(stderr, "Client aborted in establishing connection.\n");
31497 +                       remains--;                      /* I've aborted */
31498 +                       PQfinish(st->con);
31499 +                       st->con = NULL;
31500 +                       return;
31501 +               }
31502 +       }
31503 +
31504 +       switch (st->state)
31505 +       {
31506 +               case 0:                 /* about to start */
31507 +                       if (ttype == WITH_TRANSACTION)
31508 +                       {
31509 +                               strcpy(sql, "begin");
31510 +                       }
31511 +                       else
31512 +                       {
31513 +                               st->aid = getrand(1, naccounts * tps);
31514 +                               snprintf(sql, 256, "select abalance from accounts where aid = %d", st->aid);
31515 +                       }
31516 +                       st->aid = getrand(1, naccounts * tps);
31517 +                       st->bid = getrand(1, nbranches * tps);
31518 +                       st->tid = getrand(1, ntellers * tps);
31519 +                       st->delta = getrand(1, 1000);
31520 +                       if (use_log)
31521 +                               gettimeofday(&(st->txn_begin), NULL);
31522 +                       break;
31523 +               case 1:
31524 +                       snprintf(sql, 256, "update accounts set abalance = abalance + %d where aid = %d\n", st->delta, st->aid);
31525 +                       break;
31526 +               case 2:
31527 +                       snprintf(sql, 256, "select abalance from accounts where aid = %d", st->aid);
31528 +                       break;
31529 +               case 3:
31530 +                       if (ttype == 0)
31531 +                       {
31532 +                               snprintf(sql, 256, "update tellers set tbalance = tbalance + %d where tid = %d\n",
31533 +                                                st->delta, st->tid);
31534 +                               break;
31535 +                       }
31536 +               case 4:
31537 +                       if (ttype == 0)
31538 +                       {
31539 +                               snprintf(sql, 256, "update branches set bbalance = bbalance + %d where bid = %d", st->delta, st->bid);
31540 +                               break;
31541 +                       }
31542 +               case 5:
31543 +                       snprintf(sql, 256, "insert into history(tid,bid,aid,delta,mtime) values(%d,%d,%d,%d,'now')",
31544 +                                        st->tid, st->bid, st->aid, st->delta);
31545 +                       break;
31546 +               case 6:
31547 +                       if (ttype == WITH_TRANSACTION)
31548 +                       {
31549 +                               strcpy(sql, "end");
31550 +                       }
31551 +                       else
31552 +                       {
31553 +                               st->aid = getrand(1, naccounts * tps);
31554 +                               snprintf(sql, 256, "select abalance from accounts where aid = %d", st->aid);
31555 +                       }
31556 +                       break;
31557 +       }
31558 +
31559 +       if (debug)
31560 +               fprintf(stderr, "client sending %s\n", sql);
31561 +
31562 +       if (PQsendQuery(st->con, sql) == 0)
31563 +       {
31564 +               if (debug)
31565 +                       fprintf(stderr, "PQsendQuery(%s)failed\n", sql);
31566 +               st->ecnt++;
31567 +       }
31568 +       else
31569 +       {
31570 +               st->listen++;                   /* flags that should be listened */
31571 +       }
31572 +}
31573 +
31574 +/* process a select only transaction */
31575 +static void
31576 +doOne(CState * st, int debug, int ttype )
31577 +{
31578 +       char            sql[256];
31579 +       PGresult   *res;
31580 +
31581 +       if (st->listen)
31582 +       {                                                       /* are we receiver? */
31583 +               if (debug)
31584 +                       fprintf(stderr, "client receiving\n");
31585 +               if (!PQconsumeInput(st->con))
31586 +               {                                               /* there's something wrong */
31587 +                       fprintf(stderr, "Client aborted in state %d. Probably the backend died while processing.\n", st->state);
31588 +                       remains--;                      /* I've aborted */
31589 +                       PQfinish(st->con);
31590 +                       st->con = NULL;
31591 +                       return;
31592 +               }
31593 +               if (PQisBusy(st->con))
31594 +                       return;                         /* don't have the whole result yet */
31595 +
31596 +               switch (st->state)
31597 +               {
31598 +                       case 0:                         /* response to "select abalance ..." */
31599 +                               res = PQgetResult(st->con);
31600 +                               if (ttype == SELECT_ONLY)
31601 +                               {
31602 +                                       if (check(st, res, PGRES_TUPLES_OK))
31603 +                                               return;
31604 +                               }
31605 +                               else
31606 +                               {
31607 +                                       if (check(st, res, PGRES_COMMAND_OK))
31608 +                                               return;
31609 +                               }
31610 +                               PQclear(res);
31611 +                               discard_response(st);
31612 +
31613 +                               if (is_connect)
31614 +                               {
31615 +                                       PQfinish(st->con);
31616 +                                       st->con = NULL;
31617 +                               }
31618 +
31619 +                               if (++st->cnt >= st->maxAct)
31620 +                               {
31621 +                                       remains--;                      /* I've done */
31622 +                                       if (st->con != NULL)
31623 +                                       {
31624 +                                               PQfinish(st->con);
31625 +                                               st->con = NULL;
31626 +                                       }
31627 +                                       return;
31628 +                               }
31629 +                               break;
31630 +               }
31631 +
31632 +               /* increment state counter */
31633 +               st->state++;
31634 +               if (st->state > 0)
31635 +               {
31636 +                       st->state = 0;
31637 +                       remains--;      /* I've done */
31638 +               }
31639 +       }
31640 +
31641 +       if (st->con == NULL)
31642 +       {
31643 +               if ((st->con = doConnect()) == NULL)
31644 +               {
31645 +                       fprintf(stderr, "Client aborted in establishing connection.\n");
31646 +                       PQfinish(st->con);
31647 +                       st->con = NULL;
31648 +                       return;
31649 +               }
31650 +       }
31651 +
31652 +       switch (st->state)
31653 +       {
31654 +               case 0:
31655 +                       st->aid = getrand(1, naccounts * tps);
31656 +                       st->bid = getrand(1, nbranches * tps);
31657 +                       st->tid = getrand(1, ntellers * tps);
31658 +                       st->delta = getrand(1, 1000);
31659 +                       if ( ttype == SELECT_ONLY)
31660 +                       {
31661 +                               snprintf(sql, 256, "select abalance from accounts where aid = %d", st->aid);
31662 +                       }
31663 +                       if ( ttype == UPDATE_ONLY)
31664 +                       {
31665 +                               snprintf(sql, 256, "update accounts set abalance = abalance + %d where aid = %d\n", st->delta, st->aid);
31666 +                       }
31667 +                       if ( ttype == INSERT_ONLY)
31668 +                       {
31669 +                               snprintf(sql, 256, "insert into history(tid,bid,aid,delta,mtime) values(%d,%d,%d,%d,'now')",
31670 +                                                st->tid, st->bid, st->aid, st->delta);
31671 +                       }
31672 +                       break;
31673 +       }
31674 +
31675 +       if (debug)
31676 +               fprintf(stderr, "client sending %s\n", sql);
31677 +
31678 +       if (PQsendQuery(st->con, sql) == 0)
31679 +       {
31680 +               if (debug)
31681 +                       fprintf(stderr, "PQsendQuery(%s)failed\n", sql);
31682 +               st->ecnt++;
31683 +       }
31684 +       else
31685 +       {
31686 +               st->listen++;                   /* flags that should be listened */
31687 +       }
31688 +}
31689 +
31690 +static void
31691 +doCustom(CState * st, int debug, int ttype )
31692 +{
31693 +       PGresult   *res;
31694 +
31695 +       if (st->listen)
31696 +       {                                                       /* are we receiver? */
31697 +               if (commands[st->state]->type == SQL_COMMAND)
31698 +               {
31699 +                       if (debug)
31700 +                               fprintf(stderr, "client receiving\n");
31701 +                       if (!PQconsumeInput(st->con))
31702 +                       {                                               /* there's something wrong */
31703 +                               fprintf(stderr, "Client aborted in state %d. Probably the backend died while processing.\n", st->state);
31704 +                               PQfinish(st->con);
31705 +                               st->con = NULL;
31706 +                               return;
31707 +                       }
31708 +                       if (PQisBusy(st->con))
31709 +                               return;                         /* don't have the whole result yet */
31710 +               }
31711 +
31712 +               /*
31713 +                * transaction finished: record the time it took in the
31714 +                * log
31715 +                */
31716 +               if (use_log && commands[st->state + 1] == NULL)
31717 +               {
31718 +                       double          diff;
31719 +                       struct timeval now;
31720 +
31721 +                       gettimeofday(&now, NULL);
31722 +                       diff = (int) (now.tv_sec - st->txn_begin.tv_sec) * 1000000.0 +
31723 +                               (int) (now.tv_usec - st->txn_begin.tv_usec);
31724 +
31725 +                       fprintf(LOGFILE, "%d %d %.0f\n", st->id, st->cnt, diff);
31726 +               }
31727 +
31728 +               if (commands[st->state]->type == SQL_COMMAND)
31729 +               {
31730 +                       res = PQgetResult(st->con);
31731 +                       if (strncasecmp(commands[st->state]->argv[0], "select", 6) != 0)
31732 +                       {
31733 +                               if (check(st, res, PGRES_COMMAND_OK))
31734 +                                       return;
31735 +                       }
31736 +                       else
31737 +                       {
31738 +                               if (check(st, res, PGRES_TUPLES_OK))
31739 +                                       return;
31740 +                       }
31741 +                       PQclear(res);
31742 +                       discard_response(st);
31743 +               }
31744 +
31745 +               if (commands[st->state + 1] == NULL)
31746 +               {
31747 +                       if (is_connect)
31748 +                       {
31749 +                               PQfinish(st->con);
31750 +                               st->con = NULL;
31751 +                       }
31752 +                       if (++st->cnt >= st->maxAct)
31753 +                       {
31754 +                               remains--;                      /* I've done */
31755 +                               if (st->con != NULL)
31756 +                               {
31757 +                                       PQfinish(st->con);
31758 +                                       st->con = NULL;
31759 +                               }
31760 +                               return;
31761 +                       }
31762 +               }
31763 +
31764 +               /* increment state counter */
31765 +               st->state++;
31766 +               if (commands[st->state] == NULL)
31767 +               {
31768 +                       st->state = 0;
31769 +                       remains--;                      /* I've done */
31770 +               }
31771 +       }
31772 +
31773 +       if (st->con == NULL)
31774 +       {
31775 +               if ((st->con = doConnect()) == NULL)
31776 +               {
31777 +                       fprintf(stderr, "Client aborted in establishing connection.\n");
31778 +                       remains--;                      /* I've aborted */
31779 +                       PQfinish(st->con);
31780 +                       st->con = NULL;
31781 +                       return;
31782 +               }
31783 +       }
31784 +
31785 +       if (use_log && st->state == 0)
31786 +               gettimeofday(&(st->txn_begin), NULL);
31787 +
31788 +       if (commands[st->state]->type == SQL_COMMAND)
31789 +       {
31790 +               char       *sql;
31791 +
31792 +               if ((sql = strdup(commands[st->state]->argv[0])) == NULL
31793 +                       || (sql = assignVariables(st, sql)) == NULL)
31794 +               {
31795 +                       fprintf(stderr, "out of memory\n");
31796 +                       st->ecnt++;
31797 +                       return;
31798 +               }
31799 +
31800 +               if (debug)
31801 +                       fprintf(stderr, "client sending %s\n", sql);
31802 +
31803 +               if (PQsendQuery(st->con, sql) == 0)
31804 +               {
31805 +                       if (debug)
31806 +                               fprintf(stderr, "PQsendQuery(%s)failed\n", sql);
31807 +                       st->ecnt++;
31808 +               }
31809 +               else
31810 +               {
31811 +                       st->listen++;                   /* flags that should be listened */
31812 +               }
31813 +
31814 +               free(sql);
31815 +       }
31816 +       else if (commands[st->state]->type == META_COMMAND)
31817 +       {
31818 +               int                     argc = commands[st->state]->argc, i;
31819 +               char      **argv = commands[st->state]->argv;
31820 +
31821 +               if (debug)
31822 +               {
31823 +                       fprintf(stderr, "client executing \\%s", argv[0]);
31824 +                       for (i = 1; i < argc; i++)
31825 +                               fprintf(stderr, " %s", argv[i]);
31826 +                       fprintf(stderr, "\n");
31827 +               }
31828 +
31829 +               if (strcasecmp(argv[0], "setrandom") == 0)
31830 +               {
31831 +                       char       *val;
31832 +
31833 +                       if ((val = malloc(strlen(argv[3]) + 1)) == NULL)
31834 +                       {
31835 +                               fprintf(stderr, "%s: out of memory\n", argv[0]);
31836 +                               st->ecnt++;
31837 +                               return;
31838 +                       }
31839 +
31840 +                       sprintf(val, "%d", getrand(atoi(argv[2]), atoi(argv[3])));
31841 +
31842 +                       if (putVariable(st, argv[1], val) == false)
31843 +                       {
31844 +                               fprintf(stderr, "%s: out of memory\n", argv[0]);
31845 +                               free(val);
31846 +                               st->ecnt++;
31847 +                               return;
31848 +                       }
31849 +
31850 +                       free(val);
31851 +                       st->listen++;
31852 +               }
31853 +       }
31854 +}
31855 +
31856 +/* discard connections */
31857 +static void
31858 +disconnect_all(CState * state)
31859 +{
31860 +       if (state->con)
31861 +               PQfinish(state->con);
31862 +}
31863 +
31864 +/* create tables and setup data */
31865 +static void
31866 +init(void)
31867 +{
31868 +       PGconn     *con;
31869 +       PGresult   *res;
31870 +       static char *DDLs[] = {
31871 +               "drop table branches",
31872 +               "create table branches(bid int not null,bbalance int,filler char(88))",
31873 +               "drop table tellers",
31874 +               "create table tellers(tid int not null,bid int,tbalance int,filler char(84))",
31875 +               "drop table accounts",
31876 +               "create table accounts(aid int not null,bid int,abalance int,filler char(84))",
31877 +               "drop table history",
31878 +       "create table history(tid int,bid int,aid int,delta int,mtime timestamp,filler char(22))"};
31879 +       static char *DDLAFTERs[] = {
31880 +               "alter table branches add primary key (bid)",
31881 +               "alter table tellers add primary key (tid)",
31882 +       "alter table accounts add primary key (aid)"};
31883 +
31884 +
31885 +       char            sql[256];
31886 +
31887 +       int                     i;
31888 +
31889 +       if ((con = doConnect()) == NULL)
31890 +               exit(1);
31891 +
31892 +       for (i = 0; i < (sizeof(DDLs) / sizeof(char *)); i++)
31893 +       {
31894 +               res = PQexec(con, DDLs[i]);
31895 +               if (strncmp(DDLs[i], "drop", 4) && PQresultStatus(res) != PGRES_COMMAND_OK)
31896 +               {
31897 +                       fprintf(stderr, "%s", PQerrorMessage(con));
31898 +                       exit(1);
31899 +               }
31900 +               PQclear(res);
31901 +       }
31902 +
31903 +       res = PQexec(con, "begin");
31904 +       if (PQresultStatus(res) != PGRES_COMMAND_OK)
31905 +       {
31906 +               fprintf(stderr, "%s", PQerrorMessage(con));
31907 +               exit(1);
31908 +       }
31909 +       PQclear(res);
31910 +
31911 +       for (i = 0; i < nbranches * tps; i++)
31912 +       {
31913 +               snprintf(sql, 256, "insert into branches(bid,bbalance) values(%d,0)", i + 1);
31914 +               res = PQexec(con, sql);
31915 +               if (PQresultStatus(res) != PGRES_COMMAND_OK)
31916 +               {
31917 +                       fprintf(stderr, "%s", PQerrorMessage(con));
31918 +                       exit(1);
31919 +               }
31920 +               PQclear(res);
31921 +       }
31922 +
31923 +       for (i = 0; i < ntellers * tps; i++)
31924 +       {
31925 +               snprintf(sql, 256, "insert into tellers(tid,bid,tbalance) values (%d,%d,0)"
31926 +                                ,i + 1, i / ntellers + 1);
31927 +               res = PQexec(con, sql);
31928 +               if (PQresultStatus(res) != PGRES_COMMAND_OK)
31929 +               {
31930 +                       fprintf(stderr, "%s", PQerrorMessage(con));
31931 +                       exit(1);
31932 +               }
31933 +               PQclear(res);
31934 +       }
31935 +
31936 +       res = PQexec(con, "end");
31937 +       if (PQresultStatus(res) != PGRES_COMMAND_OK)
31938 +       {
31939 +               fprintf(stderr, "%s", PQerrorMessage(con));
31940 +               exit(1);
31941 +       }
31942 +       PQclear(res);
31943 +
31944 +       /*
31945 +        * occupy accounts table with some data
31946 +        */
31947 +       fprintf(stderr, "creating tables...\n");
31948 +       for (i = 0; i < naccounts * tps; i++)
31949 +       {
31950 +               int                     j = i + 1;
31951 +
31952 +               if (j % 10000 == 1)
31953 +               {
31954 +                       res = PQexec(con, "copy accounts from stdin");
31955 +                       if (PQresultStatus(res) != PGRES_COPY_IN)
31956 +                       {
31957 +                               fprintf(stderr, "%s", PQerrorMessage(con));
31958 +                               exit(1);
31959 +                       }
31960 +                       PQclear(res);
31961 +               }
31962 +
31963 +               snprintf(sql, 256, "%d\t%d\t%d\t\n", j, i / naccounts + 1, 0);
31964 +               if (PQputline(con, sql))
31965 +               {
31966 +                       fprintf(stderr, "PQputline failed\n");
31967 +                       exit(1);
31968 +               }
31969 +
31970 +               if (j % 10000 == 0)
31971 +               {
31972 +                       /*
31973 +                        * every 10000 tuples, we commit the copy command. this should
31974 +                        * avoid generating too much WAL logs
31975 +                        */
31976 +                       fprintf(stderr, "%d tuples done.\n", j);
31977 +                       if (PQputline(con, "\\.\n"))
31978 +                       {
31979 +                               fprintf(stderr, "very last PQputline failed\n");
31980 +                               exit(1);
31981 +                       }
31982 +
31983 +                       if (PQendcopy(con))
31984 +                       {
31985 +                               fprintf(stderr, "PQendcopy failed\n");
31986 +                               exit(1);
31987 +                       }
31988 +
31989 +#ifdef NOT_USED
31990 +
31991 +                       /*
31992 +                        * do a checkpoint to purge the old WAL logs
31993 +                        */
31994 +                       res = PQexec(con, "checkpoint");
31995 +                       if (PQresultStatus(res) != PGRES_COMMAND_OK)
31996 +                       {
31997 +                               fprintf(stderr, "%s", PQerrorMessage(con));
31998 +                               exit(1);
31999 +                       }
32000 +                       PQclear(res);
32001 +#endif   /* NOT_USED */
32002 +               }
32003 +       }
32004 +       fprintf(stderr, "set primary key...\n");
32005 +       for (i = 0; i < (sizeof(DDLAFTERs) / sizeof(char *)); i++)
32006 +       {
32007 +               res = PQexec(con, DDLAFTERs[i]);
32008 +               if (PQresultStatus(res) != PGRES_COMMAND_OK)
32009 +               {
32010 +                       fprintf(stderr, "%s", PQerrorMessage(con));
32011 +                       exit(1);
32012 +               }
32013 +               PQclear(res);
32014 +       }
32015 +
32016 +       /* vacuum */
32017 +       fprintf(stderr, "vacuum...");
32018 +       res = PQexec(con, "vacuum analyze");
32019 +       if (PQresultStatus(res) != PGRES_COMMAND_OK)
32020 +       {
32021 +               fprintf(stderr, "%s", PQerrorMessage(con));
32022 +               exit(1);
32023 +       }
32024 +       PQclear(res);
32025 +       fprintf(stderr, "done.\n");
32026 +
32027 +       PQfinish(con);
32028 +}
32029 +
32030 +static int
32031 +process_file(char *filename)
32032 +{
32033 +       const char      delim[] = " \f\n\r\t\v";
32034 +
32035 +       FILE       *fd;
32036 +       int                     lineno, i, j;
32037 +       char            buf[BUFSIZ], *p, *tok;
32038 +       void       *tmp;
32039 +
32040 +       if (strcmp(filename, "-") == 0)
32041 +               fd = stdin;
32042 +       else if ((fd = fopen(filename, "r")) == NULL)
32043 +       {
32044 +               fprintf(stderr, "%s: %s\n", strerror(errno), filename);
32045 +               return false;
32046 +       }
32047 +
32048 +       fprintf(stderr, "processing file...\n");
32049 +
32050 +       lineno = 1;
32051 +       i = 0;
32052 +       while (fgets(buf, sizeof(buf), fd) != NULL)
32053 +       {
32054 +               if ((p = strchr(buf, '\n')) != NULL)
32055 +                       *p = '\0';
32056 +               p = buf;
32057 +               while (isspace(*p))
32058 +                       p++;
32059 +               if (*p == '\0' || strncmp(p, "--", 2) == 0)
32060 +               {
32061 +                       lineno++;
32062 +                       continue;
32063 +               }
32064 +
32065 +               if ((tmp = realloc(commands, sizeof(Command *) * (i + 1))) == NULL)
32066 +               {
32067 +                       i--;
32068 +                       goto error;
32069 +               }
32070 +               commands = tmp;
32071 +
32072 +               if ((commands[i] = malloc(sizeof(Command))) == NULL)
32073 +                       goto error;
32074 +
32075 +               commands[i]->argv = NULL;
32076 +               commands[i]->argc = 0;
32077 +
32078 +               if (*p == '\\')
32079 +               {
32080 +                       commands[i]->type = META_COMMAND;
32081 +
32082 +                       j = 0;
32083 +                       tok = strtok(++p, delim);
32084 +                       while (tok != NULL)
32085 +                       {
32086 +                               tmp = realloc(commands[i]->argv, sizeof(char *) * (j + 1));
32087 +                               if (tmp == NULL)
32088 +                                       goto error;
32089 +                               commands[i]->argv = tmp;
32090 +
32091 +                               if ((commands[i]->argv[j] = strdup(tok)) == NULL)
32092 +                                       goto error;
32093 +
32094 +                               commands[i]->argc++;
32095 +
32096 +                               j++;
32097 +                               tok = strtok(NULL, delim);
32098 +                       }
32099 +
32100 +                       if (strcasecmp(commands[i]->argv[0], "setrandom") == 0)
32101 +                       {
32102 +                               int                     min, max;
32103 +
32104 +                               if (commands[i]->argc < 4)
32105 +                               {
32106 +                                       fprintf(stderr, "%s: %d: \\%s: missing argument\n", filename, lineno, commands[i]->argv[0]);
32107 +                                       goto error;
32108 +                               }
32109 +
32110 +                               for (j = 4; j < commands[i]->argc; j++)
32111 +                                       fprintf(stderr, "%s: %d: \\%s: extra argument \"%s\" ignored\n", filename, lineno, commands[i]->argv[0], commands[i]->argv[j]);
32112 +
32113 +                               if ((min = atoi(commands[i]->argv[2])) < 0)
32114 +                               {
32115 +                                       fprintf(stderr, "%s: %d: \\%s: invalid minimum number %s\n", filename, lineno, commands[i]->argv[0], commands[i]->argv[2]);
32116 +                                       goto error;
32117 +                               }
32118 +
32119 +                               if ((max = atoi(commands[i]->argv[3])) < min || max > RAND_MAX)
32120 +                               {
32121 +                                       fprintf(stderr, "%s: %d: \\%s: invalid maximum number %s\n", filename, lineno, commands[i]->argv[0], commands[i]->argv[3]);
32122 +                                       goto error;
32123 +                               }
32124 +                       }
32125 +                       else
32126 +                       {
32127 +                               fprintf(stderr, "%s: %d: invalid command \\%s\n", filename, lineno, commands[i]->argv[0]);
32128 +                               goto error;
32129 +                       }
32130 +               }
32131 +               else
32132 +               {
32133 +                       commands[i]->type = SQL_COMMAND;
32134 +
32135 +                       if ((commands[i]->argv = malloc(sizeof(char *))) == NULL)
32136 +                               goto error;
32137 +
32138 +                       if ((commands[i]->argv[0] = strdup(p)) == NULL)
32139 +                               goto error;
32140 +
32141 +                       commands[i]->argc++;
32142 +               }
32143 +
32144 +               i++;
32145 +               lineno++;
32146 +       }
32147 +       fclose(fd);
32148 +
32149 +       if ((tmp = realloc(commands, sizeof(Command *) * (i + 1))) == NULL)
32150 +               goto error;
32151 +       commands = tmp;
32152 +
32153 +       commands[i] = NULL;
32154 +
32155 +       return true;
32156 +
32157 +error:
32158 +       if (errno == ENOMEM)
32159 +               fprintf(stderr, "%s: %d: out of memory\n", filename, lineno);
32160 +
32161 +       fclose(fd);
32162 +
32163 +       if (commands == NULL)
32164 +               return false;
32165 +
32166 +       while (i >= 0)
32167 +       {
32168 +               if (commands[i] != NULL)
32169 +               {
32170 +                       for (j = 0; j < commands[i]->argc; j++)
32171 +                               free(commands[i]->argv[j]);
32172 +
32173 +                       free(commands[i]->argv);
32174 +                       free(commands[i]);
32175 +               }
32176 +
32177 +               i--;
32178 +       }
32179 +       free(commands);
32180 +
32181 +       return false;
32182 +}
32183 +
32184 +/* print out results */
32185 +static void
32186 +printResults(
32187 +                        int ttype, int normal_xacts,
32188 +                        struct timeval * tv1, struct timeval * tv2,
32189 +                        struct timeval * tv3)
32190 +{
32191 +       double          t1,
32192 +                               t2;
32193 +       char       *s;
32194 +
32195 +       t1 = (tv3->tv_sec - tv1->tv_sec) * 1000000.0 + (tv3->tv_usec - tv1->tv_usec);
32196 +       t1 = t1 / 1000000.0 ;
32197 +
32198 +       t2 = (tv3->tv_sec - tv1->tv_sec) * 1000000.0 + (tv3->tv_usec - tv1->tv_usec);
32199 +       t2 = normal_xacts * 1000000.0 / t2;
32200 +
32201 +#define SELECT_ONLY    (1)
32202 +#define        INSERT_ONLY     (2)
32203 +#define        UPDATE_ONLY     (3)
32204 +#define        WITH_TRANSACTION        (4)
32205 +       switch (ttype)
32206 +       {
32207 +               case 0:
32208 +                       s = "TPC-B (sort of)";
32209 +                       break;
32210 +               case SELECT_ONLY :
32211 +                       s = "SELECT only";
32212 +                       break;
32213 +               case INSERT_ONLY :
32214 +                       s = "INSERT only";
32215 +                       break;
32216 +               case UPDATE_ONLY :
32217 +                       s = "UPDATE only";
32218 +                       break;
32219 +               case CUSTOM_QUERY :
32220 +                       s = "Custom query";
32221 +                       break;
32222 +               default:
32223 +                       s = "Mix query";
32224 +                       break;
32225 +       }
32226 +
32227 +
32228 +       printf("transaction type: %s\n", s);
32229 +       printf("scaling factor: %d\n", tps);
32230 +       printf("number of clients: %d\n", nclients);
32231 +       printf("number of transactions actually processed: %d\n", normal_xacts );
32232 +       printf("run time (sec) = %f \n", t1);
32233 +       printf("tps = %f (including connections establishing)\n", t2);
32234 +}
32235 +
32236 +static int
32237 +doChild(int clientId, int min, int max, int debug, int ttype)
32238 +{
32239 +       CState state;           /* status of clients */
32240 +
32241 +       struct timeval tv1;                     /* start up time */
32242 +       fd_set          input_mask;
32243 +       int                     nsocks = 0;             /* return from select(2) */
32244 +       int                     sock = 0;
32245 +
32246 +       gettimeofday(&tv1, NULL);
32247 +       srand((unsigned int) tv1.tv_usec + clientId );
32248 +
32249 +       memset((char *)&state,0,sizeof(CState));
32250 +       /* make connections to the database */
32251 +       state.id = clientId;
32252 +       if ((state.con = doConnect()) == NULL)
32253 +               exit(1);
32254 +
32255 +       state.maxAct = max - min + 1;
32256 +       /* send start up queries in async manner */
32257 +       switch (ttype)
32258 +       {
32259 +               case WITH_TRANSACTION :
32260 +               case TPC_B_LIKE :
32261 +                       doMix(&state, debug, ttype);
32262 +                       break;
32263 +               case CUSTOM_QUERY :
32264 +                       doCustom(&state, debug, ttype);
32265 +                       break;
32266 +               default :
32267 +                       doOne(&state, debug, ttype);
32268 +                       break;
32269 +       }
32270 +
32271 +       remains = max;
32272 +       for (;;)
32273 +       {
32274 +               if (remains < min || !state.con)
32275 +               {
32276 +                       break;
32277 +               }
32278 +
32279 +               FD_ZERO(&input_mask);
32280 +
32281 +               if (ttype != CUSTOM_QUERY || commands[state.state]->type != META_COMMAND)
32282 +               {
32283 +                       if (state.con == NULL)
32284 +                       {
32285 +                               if ((state.con = doConnect()) == NULL)
32286 +                               {
32287 +                                       exit(1);
32288 +                               }
32289 +                       }
32290 +                       sock = PQsocket(state.con);
32291 +
32292 +                       if (sock < 0)
32293 +                       {
32294 +                               fprintf(stderr, "Client %d: PQsocket failed\n", clientId);
32295 +                               disconnect_all(&state);
32296 +                               exit(1);
32297 +                       }
32298 +                       FD_SET(sock, &input_mask);
32299 +
32300 +                       if ((nsocks = select(sock + 1, &input_mask, (fd_set *) NULL,
32301 +                                                         (fd_set *) NULL, (struct timeval *) NULL)) < 0)
32302 +                       {
32303 +                               if (errno == EINTR)
32304 +                                       continue;
32305 +                               /* must be something wrong */
32306 +                               disconnect_all(&state);
32307 +                               fprintf(stderr, "select failed: %s\n", strerror(errno));
32308 +                               exit(1);
32309 +                       }
32310 +                       else if (nsocks == 0)
32311 +                       {                                               /* timeout */
32312 +                               fprintf(stderr, "select timeout\n");
32313 +                               fprintf(stderr, "client %d:state %d cnt %d ecnt %d listen %d\n",
32314 +                                               clientId, state.state, state.cnt, state.ecnt, state.listen);
32315 +                               exit(0);
32316 +                       }
32317 +               }
32318 +
32319 +               /* ok, backend returns reply */
32320 +               if (state.con && (FD_ISSET(PQsocket(state.con), &input_mask)
32321 +                                                 || (ttype == CUSTOM_QUERY
32322 +                                                         && commands[state.state]->type == META_COMMAND)))
32323 +               {
32324 +                       switch (ttype)
32325 +                       {
32326 +                               case WITH_TRANSACTION :
32327 +                               case TPC_B_LIKE :
32328 +                                       doMix(&state, debug, ttype);
32329 +                                       break;
32330 +                               case CUSTOM_QUERY :
32331 +                                       doCustom(&state, debug, ttype);
32332 +                                       break;
32333 +                               default :
32334 +                                       doOne(&state, debug, ttype);
32335 +                                       break;
32336 +                       }
32337 +               }
32338 +       }
32339 +       disconnect_all(&state);
32340 +       return 1;
32341 +}
32342 +
32343 +static int
32344 +doClient(int debug, int ttype)
32345 +{
32346 +       pid_t pid;
32347 +       int i;
32348 +       int min,max;
32349 +       int base,mo;
32350 +
32351 +       base = nxacts / nclients;
32352 +       mo = nxacts % nclients;
32353 +       min = max = 0;
32354 +       for ( i = 0 ; i < nclients ; i ++)
32355 +       {
32356 +               min = max + 1;
32357 +               max += base;
32358 +               if (mo > 0)
32359 +               {
32360 +                       max += 1;
32361 +                       mo --;
32362 +               }
32363 +               pid = fork();
32364 +               if (pid == 0)
32365 +               {
32366 +                       doChild(i, min, max, debug, ttype);
32367 +                       exit(0);
32368 +               }
32369 +       }
32370 +       while ( wait(NULL) > 0)
32371 +               ;
32372 +       return 1;
32373 +}
32374 +
32375 +int
32376 +main(int argc, char **argv)
32377 +{
32378 +       int                     c;
32379 +       int                     is_init_mode = 0;               /* initialize mode? */
32380 +       int                     is_no_vacuum = 0;               /* no vacuum at all before
32381 +                                                                                * testing? */
32382 +       int                     is_full_vacuum = 0;             /* do full vacuum before testing? */
32383 +       int                     debug = 0;              /* debug flag */
32384 +       int                     ttype = TPC_B_LIKE;             /* transaction type */
32385 +       char       *filename = NULL;
32386 +
32387 +       struct timeval tv1;                     /* start up time */
32388 +       struct timeval tv2;                     /* after establishing all connections to
32389 +                                                                * the backend */
32390 +       struct timeval tv3;                     /* end time */
32391 +
32392 +#if !(defined(__CYGWIN__) || defined(__MINGW32__))
32393 +       struct rlimit rlim;
32394 +#endif
32395 +
32396 +       PGconn     *con;
32397 +       PGresult   *res;
32398 +       char       *env;
32399 +
32400 +       if ((env = getenv("PGHOST")) != NULL && *env != '\0')
32401 +               pghost = env;
32402 +       if ((env = getenv("PGPORT")) != NULL && *env != '\0')
32403 +               pgport = env;
32404 +       else if ((env = getenv("PGUSER")) != NULL && *env != '\0')
32405 +               login = env;
32406 +
32407 +       while ((c = getopt(argc, argv, "ih:nvp:dc:t:s:u:P:CNSlTUIf:")) != -1)
32408 +       {
32409 +               switch (c)
32410 +               {
32411 +                       case 'i':
32412 +                               is_init_mode++;
32413 +                               break;
32414 +                       case 'h':
32415 +                               pghost = optarg;
32416 +                               break;
32417 +                       case 'n':
32418 +                               is_no_vacuum++;
32419 +                               break;
32420 +                       case 'v':
32421 +                               is_full_vacuum++;
32422 +                               break;
32423 +                       case 'p':
32424 +                               pgport = optarg;
32425 +                               break;
32426 +                       case 'd':
32427 +                               debug++;
32428 +                               break;
32429 +                       case 'S':
32430 +                               ttype = SELECT_ONLY;
32431 +                               break;
32432 +                       case 'I':
32433 +                               ttype = INSERT_ONLY;
32434 +                               break;
32435 +                       case 'U':
32436 +                               ttype = UPDATE_ONLY;
32437 +                               break;
32438 +                       case 'T':
32439 +                               ttype = WITH_TRANSACTION;
32440 +                               break;
32441 +                       case 'c':
32442 +                               nclients = atoi(optarg);
32443 +                               if (nclients <= 0 || nclients > MAXCLIENTS)
32444 +                               {
32445 +                                       fprintf(stderr, "invalid number of clients: %d\n", nclients);
32446 +                                       exit(1);
32447 +                               }
32448 +#if !(defined(__CYGWIN__) || defined(__MINGW32__))
32449 +#ifdef RLIMIT_NOFILE                   /* most platform uses RLIMIT_NOFILE */
32450 +                               if (getrlimit(RLIMIT_NOFILE, &rlim) == -1)
32451 +                               {
32452 +#else                                                  /* but BSD doesn't ... */
32453 +                               if (getrlimit(RLIMIT_OFILE, &rlim) == -1)
32454 +                               {
32455 +#endif   /* HAVE_RLIMIT_NOFILE */
32456 +                                       fprintf(stderr, "getrlimit failed. reason: %s\n", strerror(errno));
32457 +                                       exit(1);
32458 +                               }
32459 +                               if (rlim.rlim_cur <= (nclients + 2))
32460 +                               {
32461 +                                       fprintf(stderr, "You need at least %d open files resource but you are only allowed to use %ld.\n", nclients + 2, (long) rlim.rlim_cur);
32462 +                                       fprintf(stderr, "Use limit/ulimt to increase the limit before using pgbench.\n");
32463 +                                       exit(1);
32464 +                               }
32465 +#endif   /* #if !(defined(__CYGWIN__) || defined(__MINGW32__)) */
32466 +                               break;
32467 +                       case 'C':
32468 +                               is_connect = 1;
32469 +                               break;
32470 +                       case 's':
32471 +                               tps = atoi(optarg);
32472 +                               if (tps <= 0)
32473 +                               {
32474 +                                       fprintf(stderr, "invalid scaling factor: %d\n", tps);
32475 +                                       exit(1);
32476 +                               }
32477 +                               break;
32478 +                       case 't':
32479 +                               nxacts = atoi(optarg);
32480 +                               if (nxacts <= 0)
32481 +                               {
32482 +                                       fprintf(stderr, "invalid number of transactions: %d\n", nxacts);
32483 +                                       exit(1);
32484 +                               }
32485 +                               break;
32486 +                       case 'u':
32487 +                               login = optarg;
32488 +                               break;
32489 +                       case 'P':
32490 +                               pwd = optarg;
32491 +                               break;
32492 +                       case 'l':
32493 +                               use_log = true;
32494 +                               break;
32495 +                       case 'f':
32496 +                               ttype = CUSTOM_QUERY;
32497 +                               filename = optarg;
32498 +                               break;
32499 +                       default:
32500 +                               usage();
32501 +                               exit(1);
32502 +                               break;
32503 +               }
32504 +       }
32505 +
32506 +       if (argc > optind)
32507 +               dbName = argv[optind];
32508 +       else
32509 +       {
32510 +               if ((env = getenv("PGDATABASE")) != NULL && *env != '\0')
32511 +                       dbName = env;
32512 +               else if (login != NULL && *login != '\0')
32513 +                       dbName = login;
32514 +               else
32515 +                       dbName = "";
32516 +       }
32517 +
32518 +       if (is_init_mode)
32519 +       {
32520 +               init();
32521 +               exit(0);
32522 +       }
32523 +
32524 +       if (use_log)
32525 +       {
32526 +               char            logpath[64];
32527 +
32528 +               snprintf(logpath, 64, "pgbench_log.%d", getpid());
32529 +               LOGFILE = fopen(logpath, "w");
32530 +
32531 +               if (LOGFILE == NULL)
32532 +               {
32533 +                       fprintf(stderr, "Couldn't open logfile \"%s\": %s", logpath, strerror(errno));
32534 +                       exit(1);
32535 +               }
32536 +       }
32537 +
32538 +       if (debug)
32539 +       {
32540 +               printf("pghost: %s pgport: %s nclients: %d nxacts: %d dbName: %s\n",
32541 +                          pghost, pgport, nclients, nxacts, dbName);
32542 +       }
32543 +
32544 +       /* opening connection... */
32545 +       con = doConnect();
32546 +       if (con == NULL)
32547 +               exit(1);
32548 +
32549 +       if (PQstatus(con) == CONNECTION_BAD)
32550 +       {
32551 +               fprintf(stderr, "Connection to database '%s' failed.\n", dbName);
32552 +               fprintf(stderr, "%s", PQerrorMessage(con));
32553 +               exit(1);
32554 +       }
32555 +
32556 +       if (ttype == CUSTOM_QUERY)
32557 +       {
32558 +               PQfinish(con);
32559 +               if (process_file(filename) == false)
32560 +                       exit(1);
32561 +       }
32562 +       else
32563 +       {
32564 +               /*
32565 +                * get the scaling factor that should be same as count(*) from
32566 +                * branches...
32567 +                */
32568 +               res = PQexec(con, "select count(*) from branches");
32569 +               if (PQresultStatus(res) != PGRES_TUPLES_OK)
32570 +               {
32571 +                       fprintf(stderr, "%s", PQerrorMessage(con));
32572 +                       exit(1);
32573 +               }
32574 +               tps = atoi(PQgetvalue(res, 0, 0));
32575 +               if (tps < 0)
32576 +               {
32577 +                       fprintf(stderr, "count(*) from branches invalid (%d)\n", tps);
32578 +                       exit(1);
32579 +               }
32580 +               PQclear(res);
32581 +
32582 +               if (!is_no_vacuum)
32583 +               {
32584 +                       fprintf(stderr, "starting vacuum...");
32585 +                       res = PQexec(con, "vacuum branches");
32586 +                       if (PQresultStatus(res) != PGRES_COMMAND_OK)
32587 +                       {
32588 +                               fprintf(stderr, "%s", PQerrorMessage(con));
32589 +                               exit(1);
32590 +                       }
32591 +                       PQclear(res);
32592 +
32593 +                       res = PQexec(con, "vacuum tellers");
32594 +                       if (PQresultStatus(res) != PGRES_COMMAND_OK)
32595 +                       {
32596 +                               fprintf(stderr, "%s", PQerrorMessage(con));
32597 +                               exit(1);
32598 +                       }
32599 +                       PQclear(res);
32600 +
32601 +                       res = PQexec(con, "delete from history");
32602 +                       if (PQresultStatus(res) != PGRES_COMMAND_OK)
32603 +                       {
32604 +                               fprintf(stderr, "%s", PQerrorMessage(con));
32605 +                               exit(1);
32606 +                       }
32607 +                       PQclear(res);
32608 +                       res = PQexec(con, "vacuum history");
32609 +                       if (PQresultStatus(res) != PGRES_COMMAND_OK)
32610 +                       {
32611 +                               fprintf(stderr, "%s", PQerrorMessage(con));
32612 +                               exit(1);
32613 +                       }
32614 +                       PQclear(res);
32615 +
32616 +                       fprintf(stderr, "end.\n");
32617 +
32618 +                       if (is_full_vacuum)
32619 +                       {
32620 +                               fprintf(stderr, "starting full vacuum...");
32621 +                               res = PQexec(con, "vacuum analyze accounts");
32622 +                               if (PQresultStatus(res) != PGRES_COMMAND_OK)
32623 +                               {
32624 +                                       fprintf(stderr, "%s", PQerrorMessage(con));
32625 +                                       exit(1);
32626 +                               }
32627 +                               PQclear(res);
32628 +                               fprintf(stderr, "end.\n");
32629 +                       }
32630 +               }
32631 +               PQfinish(con);
32632 +       }
32633 +       
32634 +       /* set random seed */
32635 +       gettimeofday(&tv1, NULL);
32636 +       srand((unsigned int) tv1.tv_usec);
32637 +       /* get start up time */
32638 +       gettimeofday(&tv1, NULL);
32639 +       /* time after connections set up */
32640 +       gettimeofday(&tv2, NULL);
32641 +
32642 +       doClient(debug, ttype);
32643 +
32644 +       /* get end time */
32645 +       gettimeofday(&tv3, NULL);
32646 +       printResults(ttype, nxacts, &tv1, &tv2, &tv3);
32647 +       if (LOGFILE)
32648 +               fclose(LOGFILE);
32649 +       return 1;
32650 +}
32651 diff -aruN postgresql-8.2.4/src/pgcluster/tool/pgcbench.sh pgcluster-1.7.0rc7/src/pgcluster/tool/pgcbench.sh
32652 --- postgresql-8.2.4/src/pgcluster/tool/pgcbench.sh     1970-01-01 01:00:00.000000000 +0100
32653 +++ pgcluster-1.7.0rc7/src/pgcluster/tool/pgcbench.sh   2007-02-18 22:52:17.000000000 +0100
32654 @@ -0,0 +1,30 @@
32655 +#! /bin/bash
32656 +
32657 +set -e
32658 +
32659 +while getopts ih:nvp:dc:t:s:u:P:CNSlTUIf: opt; do
32660 +    case $opt in
32661 +    f)
32662 +       filename=$OPTARG
32663 +       ;;
32664 +    *)
32665 +       opts=(${opts[@]} -$opt $OPTARG)
32666 +       ;;
32667 +    esac
32668 +done
32669 +shift $(($OPTIND - 1))
32670 +dbname=$1
32671 +
32672 +tps=$(psql -At -c "SELECT count(*) FROM branches" $dbname)
32673 +
32674 +vacuumdb -t branches $dbname
32675 +vacuumdb -t tellers $dbname
32676 +psql -c "DELETE FROM history" $dbname
32677 +vacuumdb -t history $dbname
32678 +
32679 +if [ -z $filename ]; then
32680 +    pgcbench ${opts[@]} $@
32681 +else
32682 +    perl -pe "BEGIN { \$tps = $tps } s/\`([^\`]+)\`/eval \$1/eg" $filename \
32683 +       | pgcbench ${opts[@]} -f - $@
32684 +fi
32685 diff -aruN postgresql-8.2.4/src/pgcluster/tool/tpc-b_like.sql pgcluster-1.7.0rc7/src/pgcluster/tool/tpc-b_like.sql
32686 --- postgresql-8.2.4/src/pgcluster/tool/tpc-b_like.sql  1970-01-01 01:00:00.000000000 +0100
32687 +++ pgcluster-1.7.0rc7/src/pgcluster/tool/tpc-b_like.sql        2007-02-18 22:52:17.000000000 +0100
32688 @@ -0,0 +1,11 @@
32689 +\setrandom aid 1 `100000 * $tps`
32690 +\setrandom bid 1 `1 * $tps`
32691 +\setrandom tid 1 `10 * $tps`
32692 +\setrandom delta 1 1000
32693 +BEGIN
32694 +UPDATE accounts SET abalance = abalance + :delta WHERE aid = :aid
32695 +SELECT abalance FROM accounts WHERE aid = :aid
32696 +UPDATE tellers SET tbalance = tbalance + :delta WHERE tid = :tid
32697 +UPDATE branches SET bbalance = bbalance + :delta WHERE bid = :bid
32698 +INSERT INTO history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, current_timestamp)
32699 +END
This page took 2.595927 seconds and 3 git commands to generate.