1 diff -aruN postgresql-8.2.4/GNUmakefile.in pgcluster-1.7.0rc7/GNUmakefile.in
2 --- postgresql-8.2.4/GNUmakefile.in 2006-08-18 21:58:05.000000000 +0200
3 +++ pgcluster-1.7.0rc7/GNUmakefile.in 2007-02-18 22:52:16.000000000 +0100
6 ##########################################################################
8 -distdir := postgresql-$(VERSION)
9 +distdir := pgcluster-$(PGCLUSTER_VERSION)
11 -garbage := =* "#"* ."#"* *~* *.orig *.rej core postgresql-*
12 +garbage := =* "#"* ."#"* *~* *.orig *.rej core pgcluster-*
14 dist: $(distdir).tar.gz
15 ifeq ($(split-dist), yes)
16 -dist: postgresql-base-$(VERSION).tar.gz postgresql-docs-$(VERSION).tar.gz postgresql-opt-$(VERSION).tar.gz postgresql-test-$(VERSION).tar.gz
17 +dist: pgcluster-base-$(PGCLUSTER_VERSION).tar.gz pgcluster-docs-$(PGCLUSTER_VERSION).tar.gz pgcluster-opt-$(PGCLUSTER_VERSION).tar.gz pgcluster-test-$(PGCLUSTER_VERSION).tar.gz
22 src/tools src/tutorial \
23 $(addprefix src/pl/, plperl plpython tcl)
25 -docs_files := doc/postgres.tar.gz doc/src doc/TODO.detail
26 +docs_files := doc/pgcluster.tar.gz doc/src doc/TODO.detail
28 -postgresql-base-$(VERSION).tar: distdir
29 +pgcluster-base-$(PGCLUSTER_VERSION).tar: distdir
30 $(TAR) -c $(addprefix --exclude $(distdir)/, $(docs_files) $(opt_files) src/test) \
33 -postgresql-docs-$(VERSION).tar: distdir
34 +pgcluster-docs-$(PGCLUSTER_VERSION).tar: distdir
35 $(TAR) cf $@ $(addprefix $(distdir)/, $(docs_files))
37 -postgresql-opt-$(VERSION).tar: distdir
38 +pgcluster-opt-$(PGCLUSTER_VERSION).tar: distdir
39 $(TAR) cf $@ $(addprefix $(distdir)/, $(opt_files))
41 -postgresql-test-$(VERSION).tar: distdir
42 +pgcluster-test-$(PGCLUSTER_VERSION).tar: distdir
43 $(TAR) cf $@ $(distdir)/src/test
46 diff -aruN postgresql-8.2.4/INSTALL_PGCLUSTER pgcluster-1.7.0rc7/INSTALL_PGCLUSTER
47 --- postgresql-8.2.4/INSTALL_PGCLUSTER 1970-01-01 01:00:00.000000000 +0100
48 +++ pgcluster-1.7.0rc7/INSTALL_PGCLUSTER 2007-02-19 00:59:13.000000000 +0100
50 +PGCluster Installation Instructions
52 +=============================================================
54 +=============================================================
56 +1-1. Install Cluster DB Server, Replication Server & Load Balancer
57 +----------------------------------------------------------------
63 +# chown -R postgres /usr/local/pgsql
64 +----------------------------------------------------------------
66 +=============================================================
68 +=============================================================
71 +# mkdir /usr/local/pgsql/data
72 +# chown postgres /usr/local/pgsql/data
74 +$ /usr/local/pgsql/bin/initdb -D /usr/local/pgsql/data
77 +=============================================================
79 +=============================================================
80 +(EX.System Composition)
83 + ((Load Balance Server))
84 + ( hostname: lb.pgcluster.org)
85 + ( receive port:5432 )
86 + ( recovery port:6001 )
88 +----------+-------------+------------+----------
90 + (( Cluster DB 1 )) (( Cluster DB 2 ))
91 + ( hostname:c1.pgcluster.org) ( hostname:c2.pgcluster.org)
92 + ( receive port: 5432 ) ( receive port:5432 )
93 + ( recovery port:7001 ) ( recovery port 7002 )
95 +----------+-------------+------------+----------
97 + ((Replication Server))
98 + ( hostname:pgr.pgcluster.org)
99 + ( receive port:8001 )
100 + ( recovery port:8101 )
103 +3-1. Load Balance Server
105 +The setup file of load balance server is copied from the sample file and edited.
106 +(the sample file is installed '/usr/local/pgsql/share' in default)
107 +----------------------------------------------------------------
108 +$cd /usr/local/pgsql/share
109 +$cp pglb.conf.sample pglb.conf
110 +----------------------------------------------------------------
112 +In the case of the above system composition example,
113 +the setup example of pglb.conf file is as the following
115 +#============================================================
116 +# Load Balance Server configuration file
117 +#-------------------------------------------------------------
119 +#-------------------------------------------------------------
120 +# This file controls:
121 +# o which hosts are db cluster server
122 +# o which port use connect to db cluster server
123 +# o how many connections are allowed on each DB server
124 +#============================================================
125 +#-------------------------------------------------------------
126 +# set cluster DB server information
127 +# o Host_Name : hostname
128 +# o Port : Connection for postmaster
129 +# o Max_Connection : Maximum number of connection to postmaster
130 +#-------------------------------------------------------------
131 +<Cluster_Server_Info>
132 + <Host_Name> c1.pgcluster.org </Host_Name>
133 + <Port> 5432 </Port>
134 + <Max_Connect> 32 </Max_Connect>
135 +</Cluster_Server_Info>
136 +<Cluster_Server_Info>
137 + <Host_Name> c2.pgcluster.org </Host_Name>
138 + <Port> 5432 </Port>
139 + <Max_Connect> 32 </Max_Connect>
140 +</Cluster_Server_Info>
141 +#-------------------------------------------------------------
142 +# set Load Balance server information
143 +# o Host_Name : The host name of this load balance server.
144 +# -- please write a host name by FQDN or IP address.
145 +# o Backend_Socket_Dir : Unix domain socket path for the backend
146 +# o Receive_Port : Connection from client
147 +# o Recovery_Port : Connection for recovery process
148 +# o Max_Cluster_Num : Maximum number of cluster DB servers
149 +# o Use_Connection_Pooling : Use connection pool [yes/no]
150 +# o Lifecheck_Timeout : Timeout of the lifecheck response
151 +# o Lifecheck_Interval : Interval time of the lifecheck
154 +# 10min -- 10 minutes
156 +#-------------------------------------------------------------
157 +<Host_Name> lb.pgcluster.org </Host_Name>
158 +<Backend_Socket_Dir> /tmp </Backend_Socket_Dir>
159 +<Receive_Port> 5432 </Receive_Port>
160 +<Recovery_Port> 6001 </Recovery_Port>
161 +<Max_Cluster_Num> 128 </Max_Cluster_Num>
162 +<Use_Connection_Pooling> no </Use_Connection_Pooling>
163 +<LifeCheck_Timeout> 3s </LifeCheck_Timeout>
164 +<LifeCheck_Interval> 15s </LifeCheck_Interval>
165 +#-------------------------------------------------------------
166 +# A setup of a log files
168 +# o File_Name : Log file name with full path
169 +# o File_Size : Maximum size of each log files
170 +# Please specify in a number and unit(K or M)
174 +# o Rotate : Rotation times
175 +# If specified 0, old versions are removed.
176 +#-------------------------------------------------------------
178 + <File_Name> /tmp/pglb.log </File_Name>
179 + <File_Size> 1M </File_Size>
180 + <Rotate> 3 </Rotate>
183 +3-2. Cluster DB Server
185 +The Cluster DB server need edit two configuration files
186 +('pg_hba.conf' and 'cluster.conf').
187 +These files are create under the $PG_DATA directory after 'initdb'.
190 +Permission to connect DB via IP connectoins is need for this system.
193 +In the case of the above system composition example,
194 +the setup example of cluster.conf file is as the following
196 +#============================================================
197 +# Cluster DB Server configuration file
198 +#-------------------------------------------------------------
199 +# file: cluster.conf
200 +#-------------------------------------------------------------
201 +# This file controls:
202 +# o which hosts & port are replication server
203 +# o which port use for replication request to replication server
204 +# o which command use for recovery function
206 +#============================================================
207 +#-------------------------------------------------------------
208 +# set cluster DB server information
209 +# o Host_Name : hostname
210 +# o Port : Connection port for postmaster
211 +# o Recovery_Port : Connection for recovery process
212 +#-------------------------------------------------------------
213 +<Replicate_Server_Info>
214 + <Host_Name> pgr.pgcluster.org </Host_Name>
215 + <Port> 8001 </Port>
216 + <Recovery_Port> 8101 </Recovery_Port>
217 +</Replicate_Server_Info>
218 +#-------------------------------------------------------------
219 +# set Cluster DB Server information
220 +# o Host_Name : Host name which connect with replication server
221 +# o Recovery_Port : Connection port for recovery
222 +# o Rsync_Path : Path of rsync command
223 +# o Rsync_Option : File transfer option for rsync
224 +# o Rsync_Compress : Use compression option for rsync
225 +# [yes/no]. default : yes
226 +# o Pg_Dump_Path : path of pg_dump
227 +# o When_Stand_Alone : When all replication servers fell,
228 +# you can set up two kinds of permittion,
229 +# "real_only" or "read_write".
230 +# o Replication_Timeout : Timeout of each replication request
231 +# o Lifecheck_Timeout : Timeout of the lifecheck response
232 +# o Lifecheck_Interval : Interval time of the lifecheck
235 +# 10min -- 10 minutes
237 +#-------------------------------------------------------------
238 +<Host_Name> c1.pgcluster.org </Host_Name>
239 +<Recovery_Port> 7001 </Recovery_Port>
240 +<Rsync_Path> /usr/bin/rsync </Rsync_Path>
241 +<Rsync_Option> ssh -1 </Rsync_Option>
242 +<Rsync_Compress> yes </Rsync_Compress>
243 +<Pg_Dump_Path> /usr/local/pgsql/bin/pg_dump </Pg_Dump_Path>
244 +<When_Stand_Alone> read_only </When_Stand_Alone>
245 +<Replication_Timeout> 1min </Replication_Timeout>
246 +<LifeCheck_Timeout> 3s </LifeCheck_Timeout>
247 +<LifeCheck_Interval> 11s </LifeCheck_Interval>
248 +#-------------------------------------------------------------
249 +# set partitional replicate control information
250 +# set DB name and Table name to stop reprication
251 +# o DB_Name : DB name
252 +# o Table_Name : Table name
253 +#-------------------------------------------------------------
254 +#<Not_Replicate_Info>
255 +# <DB_Name> test_db </DB_Name>
256 +# <Table_Name> log_table </Table_Name>
257 +#</Not_Replicate_Info>
259 +3-3. Replication Server
261 +The setup file of replication server is copied from the sample file and edited.
262 +(the sample file is installed '/usr/local/pgsql/share' in default)
263 +----------------------------------------------------------------
264 +$cd /usr/local/pgsql/share
265 +$cp pgreplicate.conf.sample pgreplicate.conf
266 +----------------------------------------------------------------
267 +In the case of the above system composition example,
268 +the setup example of pgreplicate.conf file is as the following
270 +#============================================================
271 +# PGReplicate configuration file
272 +#-------------------------------------------------------------
273 +# file: pgreplicate.conf
274 +#-------------------------------------------------------------
275 +# This file controls:
276 +# o which hosts & port are cluster server
277 +# o which port use for replication request from cluster server
278 +#============================================================
279 +#-------------------------------------------------------------
280 +# set cluster DB server information
281 +# o Host_Name : hostname
282 +# o Port : Connection port for postmaster
283 +# o Recovery_Port : Connection port for recovery
284 +#-------------------------------------------------------------
285 +<Cluster_Server_Info>
286 + <Host_Name> c1.pgcluster.org </Host_Name>
287 + <Port> 5432 </Port>
288 + <Recovery_Port> 7001 </Recovery_Port>
289 +</Cluster_Server_Info>
290 +<Cluster_Server_Info>
291 + <Host_Name> c2.pgcluster.org </Host_Name>
292 + <Port> 5432 </Port>
293 + <Recovery_Port> 7001 </Recovery_Port>
294 +</Cluster_Server_Info>
295 +#-------------------------------------------------------------
296 +# set Load Balance server information
297 +# o Host_Name : hostname
298 +# o Recovery_Port : Connection port for recovery
299 +#-------------------------------------------------------------
300 +<LoadBalance_Server_Info>
301 + <Host_Name> lb.pgcluster.org </Host_Name>
302 + <Recovery_Port> 6001 </Recovery_Port>
303 +</LoadBalance_Server_Info>
304 +#------------------------------------------------------------
305 +# A setup of the cascade connection between replication servers.
306 +# When you do not use RLOG recovery, you can skip this setup
308 +# o Host_Name : The host name of the upper replication server.
309 +# Please write a host name by FQDN or IP address.
310 +# o Port : The connection port with postmaster.
311 +# o Recovery_Port : The connection port at the time of
312 +# a recovery sequence .
313 +#------------------------------------------------------------
314 +#<Replicate_Server_Info>
315 +# <Host_Name> upper_replicate.pgcluster.org </Host_Name>
316 +# <Port> 8002 </Port>
317 +# <Recovery_Port> 8102 </Recovery_Port>
318 +#</Replicate_Server_Info>
320 +#-------------------------------------------------------------
321 +# A setup of a replication server
323 +# o Host_Name : The host name of the this replication server.
324 +# Please write a host name by FQDN or IP address.
325 +# o Replicate_Port : Connection port for replication
326 +# o Recovery_Port : Connection port for recovery
327 +# o RLOG_Port : Connection port for replication log
328 +# o Response_mode : Timing which returns a response
329 +# - normal -- return result of DB which received the query
330 +# - reliable -- return result after waiting for response of
332 +# o Use_Replication_Log : Use replication log
333 +# [yes/no]. default : no
334 +# o Replication_Timeout : Timeout of each replication response
335 +# o Lifecheck_Timeout : Timeout of the lifecheck response
336 +# o Lifecheck_Interval : Interval time of the lifecheck
339 +# 10min -- 10 minutes
341 +#-------------------------------------------------------------
342 +<Host_Name> pgr.pgcluster.org </Host_Name>
343 +<Replication_Port> 8001 </Replication_Port>
344 +<Recovery_Port> 8101 </Recovery_Port>
345 +<RLOG_Port> 8301 </RLOG_Port>
346 +<Response_Mode> normal </Response_Mode>
347 +<Use_Replication_Log> no </Use_Replication_Log>
348 +<Replication_Timeout> 1min </Replication_Timeout>
349 +<LifeCheck_Timeout> 3s </LifeCheck_Timeout>
350 +<LifeCheck_Interval> 15s </LifeCheck_Interval>
351 +#-------------------------------------------------------------
352 +# A setup of a log files
354 +# o File_Name : Log file name with full path
355 +# o File_Size : maximum size of each log files
356 +# Please specify in a number and unit(K or M)
360 +# o Rotate : Rotation times
361 +# If specified 0, old versions are removed.
362 +#-------------------------------------------------------------
364 + <File_Name> /tmp/pgreplicate.log </File_Name>
365 + <File_Size> 1M </File_Size>
366 + <Rotate> 3 </Rotate>
369 +=============================================================
371 +=============================================================
373 +4-1. replication server
375 +A. Start replication server
376 +----------------------------------------------------------------
377 +$ /usr/local/pgsql/bin/pgreplicate -D /usr/local/pgsql/etc
378 +----------------------------------------------------------------
380 +B. Stop replication server
381 +----------------------------------------------------------------
382 +$ /usr/local/pgsql/bin/pgreplicate -D /usr/local/pgsql/etc stop
383 +----------------------------------------------------------------
385 +usage: pgreplicate [-D path_of_config_file] [-W path_of_work_files]
386 +[-w wait time before fork process][-U login user][-l][-n][-v][-h][stop]
387 + -l: print error logs in the log file.
388 + -n: don't run in daemon mode.
389 + -v: debug mode. need '-n' flag
390 + -h: print this help
391 + stop: stop pgreplicate
392 +(config file default path: ./pgreplicate.conf)
394 +4-2. cluster DB server
395 +$PG_HOME = /usr/local/pgsql
396 +$PG_DATA = /usr/local/pgsql/data
398 +A. Start cluster DB server
399 +----------------------------------------------------------------
400 +$ /usr/local/pgsql/bin/pg_ctl start -D /usr/local/pgsql/data
401 +----------------------------------------------------------------
403 +B. Stop cluster DB server
404 +----------------------------------------------------------------
405 +$ /usr/local/pgsql/bin/pg_ctl stop -D /usr/local/pgsql/data
406 +----------------------------------------------------------------
408 +C-1. RE start (recovery) cluster DB server with backup
409 +----------------------------------------------------------------
410 +$ /usr/local/pgsql/bin/pg_ctl start -D /usr/local/pgsql/data -o "-R"
411 +----------------------------------------------------------------
413 +C-2. RE start (recovery) cluster DB server without backup
414 +----------------------------------------------------------------
415 +$ /usr/local/pgsql/bin/pg_ctl start -D /usr/local/pgsql/data -o "-r"
416 +----------------------------------------------------------------
418 +D. Upgrade cluster DB server with pg_dump
419 +----------------------------------------------------------------
420 +$ /usr/local/pgsql/bin/pg_ctl start -D /usr/local/pgsql/data -o "-U"
421 +----------------------------------------------------------------
423 +4-3. load balance server
425 +A. Start load balance server
426 +----------------------------------------------------------------
427 +$ /usr/local/pgsql/bin/pglb -D /usr/local/pgsql/share
428 +----------------------------------------------------------------
430 +B. Stop load balance server
431 +----------------------------------------------------------------
432 +$ /usr/local/pgsql/bin/pglb -D /usr/local/pgsql/share stop
433 +----------------------------------------------------------------
435 +usage: pglb [-D path_of_config_file] [-W path_of_work_files] [-n][-v][-h][stop]
436 + -l: print error logs in the log file.
437 + -n: don't run in daemon mode.
438 + -v: debug mode. need '-n' flag
439 + -h: print this help
441 + (config file default path: ./pglb.conf)
442 diff -aruN postgresql-8.2.4/README_PGCLUSTER pgcluster-1.7.0rc7/README_PGCLUSTER
443 --- postgresql-8.2.4/README_PGCLUSTER 1970-01-01 01:00:00.000000000 +0100
444 +++ pgcluster-1.7.0rc7/README_PGCLUSTER 2007-02-19 01:00:40.000000000 +0100
446 +PGCluster: Multi-Master Synchronous Replication System for PostgreSQL
447 +===========================================================
449 +PGCluster is a multi-master and synchronous replication system that supports load balancing of PostgreSQL.
452 + $INSTALL_DIR/GNUmakefile.in
453 + $INSTALL_DIR/INSTALL_PGCLUSTER
454 + $INSTALL_DIR/README_PGCLUSTER
455 + $INSTALL_DIR/configure
456 + $INSTALL_DIR/configure.in
457 + $INSTALL_DIR/pgcluster.sh.tmpl
458 + $INSTALL_DIR/src/Makefile
459 + $INSTALL_DIR/src/Makefile.global.in
460 + $INSTALL_DIR/src/backend/Makefile
461 + $INSTALL_DIR/src/backend/access/transam/clog.c
462 + $INSTALL_DIR/src/backend/access/transam/xact.c
463 + $INSTALL_DIR/src/backend/catalog/catalog.c
464 + $INSTALL_DIR/src/backend/commands/analyze.c
465 + $INSTALL_DIR/src/backend/commands/copy.c
466 + $INSTALL_DIR/src/backend/commands/sequence.c
467 + $INSTALL_DIR/src/backend/executor/functions.c
468 + $INSTALL_DIR/src/backend/libpq/Makefile
469 + $INSTALL_DIR/src/backend/libpq/be-fsstubs.c
470 + $INSTALL_DIR/src/backend/libpq/cluster.conf.sample
471 + $INSTALL_DIR/src/backend/libpq/recovery.c
472 + $INSTALL_DIR/src/backend/libpq/lifecheck.c
473 + $INSTALL_DIR/src/backend/libpq/replicate.c
474 + $INSTALL_DIR/src/backend/libpq/replicate_com.c
475 + $INSTALL_DIR/src/backend/main/main.c
476 + $INSTALL_DIR/src/backend/parser/gram.y
477 + $INSTALL_DIR/src/backend/parser/keywords.c
478 + $INSTALL_DIR/src/backend/parser/parse_clause.c
479 + $INSTALL_DIR/src/backend/parser/parse_relation.c
480 + $INSTALL_DIR/src/backend/postmaster/postmaster.c
481 + $INSTALL_DIR/src/backend/storage/large_object/inv_api.c
482 + $INSTALL_DIR/src/backend/storage/lmgr/deadlock.c
483 + $INSTALL_DIR/src/backend/storage/lmgr/lmgr.c
484 + $INSTALL_DIR/src/backend/storage/lmgr/lock.c
485 + $INSTALL_DIR/src/backend/storage/lmgr/proc.c
486 + $INSTALL_DIR/src/backend/tcop/postgres.c
487 + $INSTALL_DIR/src/backend/tcop/pquery.c
488 + $INSTALL_DIR/src/backend/tcop/utility.c
489 + $INSTALL_DIR/src/backend/utils/adt/float.c
490 + $INSTALL_DIR/src/backend/utils/adt/nabstime.c
491 + $INSTALL_DIR/src/backend/utils/adt/ri_triggers.c
492 + $INSTALL_DIR/src/backend/utils/adt/timestamp.c
493 + $INSTALL_DIR/src/backend/utils/error/assert.c
494 + $INSTALL_DIR/src/backend/utils/error/elog.c
495 + $INSTALL_DIR/src/backend/utils/fmgr/fmgr.c
496 + $INSTALL_DIR/src/backend/utils/mb/mbutils.c
497 + $INSTALL_DIR/src/backend/utils/misc/guc.c
498 + $INSTALL_DIR/src/backend/utils/misc/postgresql.conf.sample
499 + $INSTALL_DIR/src/bin/initdb/initdb.c
500 + $INSTALL_DIR/src/bin/pg_dump/pg_dump.c
501 + $INSTALL_DIR/src/bin/pg_dump/pg_dumpall.c
502 + $INSTALL_DIR/src/include/pg_config.h.in
503 + $INSTALL_DIR/src/include/replicate.h
504 + $INSTALL_DIR/src/include/replicate_com.h
505 + $INSTALL_DIR/src/include/storage/lmgr.h
506 + $INSTALL_DIR/src/include/storage/proc.h
507 + $INSTALL_DIR/src/interfaces/libpq/Makefile
508 + $INSTALL_DIR/src/makefiles/Makefile.aix
509 + $INSTALL_DIR/src/makefiles/Makefile.freebsd
510 + $INSTALL_DIR/src/makefiles/Makefile.hpux
511 + $INSTALL_DIR/src/makefiles/Makefile.linux
512 + $INSTALL_DIR/src/makefiles/Makefile.netbsd
513 + $INSTALL_DIR/src/makefiles/Makefile.openbsd
514 + $INSTALL_DIR/src/makefiles/Makefile.solaris
515 + $INSTALL_DIR/src/makefiles/Makefile.sunos4
517 + $INSTALL_DIR/src/pgcluster/Makefile
518 + $INSTALL_DIR/src/pgcluster/libpgc/Makefile
519 + $INSTALL_DIR/src/pgcluster/libpgc/libpgc.h
520 + $INSTALL_DIR/src/pgcluster/libpgc/sem.c
521 + $INSTALL_DIR/src/pgcluster/libpgc/show.c
522 + $INSTALL_DIR/src/pgcluster/libpgc/signal.c
523 + $INSTALL_DIR/src/pgcluster/pglb/AUTHORS
524 + $INSTALL_DIR/src/pgcluster/pglb/COPYING
525 + $INSTALL_DIR/src/pgcluster/pglb/Makefile
526 + $INSTALL_DIR/src/pgcluster/pglb/child.c
527 + $INSTALL_DIR/src/pgcluster/pglb/cluster_table.c
528 + $INSTALL_DIR/src/pgcluster/pglb/lifecheck.c
529 + $INSTALL_DIR/src/pgcluster/pglb/load_balance.c
530 + $INSTALL_DIR/src/pgcluster/pglb/main.c
531 + $INSTALL_DIR/src/pgcluster/pglb/pglb.conf.sample
532 + $INSTALL_DIR/src/pgcluster/pglb/pglb.h
533 + $INSTALL_DIR/src/pgcluster/pglb/pool_auth.c
534 + $INSTALL_DIR/src/pgcluster/pglb/pool_connection_pool.c
535 + $INSTALL_DIR/src/pgcluster/pglb/pool_params.c
536 + $INSTALL_DIR/src/pgcluster/pglb/pool_process_query.c
537 + $INSTALL_DIR/src/pgcluster/pglb/pool_stream.c
538 + $INSTALL_DIR/src/pgcluster/pglb/recovery.c
539 + $INSTALL_DIR/src/pgcluster/pglb/socket.c
540 + $INSTALL_DIR/src/pgcluster/pgrp/AUTHORS
541 + $INSTALL_DIR/src/pgcluster/pgrp/COPYING
542 + $INSTALL_DIR/src/pgcluster/pgrp/Makefile
543 + $INSTALL_DIR/src/pgcluster/pgrp/cascade.c
544 + $INSTALL_DIR/src/pgcluster/pgrp/conf.c
545 + $INSTALL_DIR/src/pgcluster/pgrp/lifecheck.c
546 + $INSTALL_DIR/src/pgcluster/pgrp/main.c
547 + $INSTALL_DIR/src/pgcluster/pgrp/pgreplicate.conf.sample
548 + $INSTALL_DIR/src/pgcluster/pgrp/pgreplicate.h
549 + $INSTALL_DIR/src/pgcluster/pgrp/pqformat.c
550 + $INSTALL_DIR/src/pgcluster/pgrp/recovery.c
551 + $INSTALL_DIR/src/pgcluster/pgrp/replicate.c
552 + $INSTALL_DIR/src/pgcluster/pgrp/rlog.c
553 + $INSTALL_DIR/src/pgcluster/tool/Makefile
554 + $INSTALL_DIR/src/pgcluster/tool/README.jp
555 + $INSTALL_DIR/src/pgcluster/tool/pgcbench.c
556 + $INSTALL_DIR/src/pgcluster/tool/pgcbench.sh
557 + $INSTALL_DIR/src/pgcluster/tool/tpc-b_like.sql
559 +The latest version of this software may be obtained at
560 +http://pgfoundry.org/projects/pgcluster/
562 +For more information look at pgFoundry web site located at
563 +http://pgcluster.projects.postgresql.org/
564 diff -aruN postgresql-8.2.4/configure pgcluster-1.7.0rc7/configure
565 --- postgresql-8.2.4/configure 2007-02-07 04:48:58.000000000 +0100
566 +++ pgcluster-1.7.0rc7/configure 2007-03-01 16:27:35.000000000 +0100
568 PACKAGE_STRING='PostgreSQL 8.2.4'
569 PACKAGE_BUGREPORT='pgsql-bugs@postgresql.org'
571 +PGCLUSTER_VERSION='1.7.0rc7'
573 ac_unique_file="src/backend/access/common/heaptuple.c"
574 ac_default_prefix=/usr/local/pgsql
575 # Factoring default headers for most tests.
580 -ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS configure_args build build_cpu build_vendor build_os host host_cpu host_vendor host_os PORTNAME docdir enable_nls WANTED_LANGUAGES default_port enable_shared enable_rpath enable_debug DTRACE DTRACEFLAGS enable_dtrace CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP GCC TAS autodepend INCLUDES enable_thread_safety with_tcl with_perl with_python with_krb5 krb_srvtab with_pam with_ldap with_bonjour with_openssl with_zlib EGREP ELF_SYS LDFLAGS_SL AWK FLEX FLEXFLAGS LN_S LD with_gnu_ld ld_R_works RANLIB ac_ct_RANLIB TAR STRIP ac_ct_STRIP STRIP_STATIC_LIB STRIP_SHARED_LIB YACC YFLAGS PERL perl_archlibexp perl_privlibexp perl_useshrplib perl_embed_ldflags PYTHON python_version python_configdir python_includespec python_libdir python_libspec python_additional_libs HAVE_IPV6 LIBOBJS acx_pthread_config PTHREAD_CC PTHREAD_LIBS PTHREAD_CFLAGS LDAP_LIBS_FE LDAP_LIBS_BE HAVE_POSIX_SIGNALS MSGFMT MSGMERGE XGETTEXT localedir TCLSH TCL_CONFIG_SH TCL_INCLUDE_SPEC TCL_LIB_FILE TCL_LIBS TCL_LIB_SPEC TCL_SHARED_BUILD TCL_SHLIB_LD_LIBS NSGMLS JADE have_docbook DOCBOOKSTYLE COLLATEINDEX SGMLSPL vpath_build LTLIBOBJS'
581 +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS configure_args build build_cpu build_vendor build_os host host_cpu host_vendor host_os PORTNAME docdir enable_nls WANTED_LANGUAGES default_port enable_shared enable_rpath enable_debug DTRACE DTRACEFLAGS enable_dtrace CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP GCC TAS autodepend INCLUDES enable_thread_safety with_tcl with_perl with_python with_krb5 krb_srvtab with_pam with_ldap with_bonjour with_openssl with_zlib EGREP ELF_SYS LDFLAGS_SL AWK FLEX FLEXFLAGS LN_S LD with_gnu_ld ld_R_works RANLIB ac_ct_RANLIB TAR STRIP ac_ct_STRIP STRIP_STATIC_LIB STRIP_SHARED_LIB YACC YFLAGS PERL perl_archlibexp perl_privlibexp perl_useshrplib perl_embed_ldflags PYTHON python_version python_configdir python_includespec python_libdir python_libspec python_additional_libs HAVE_IPV6 LIBOBJS acx_pthread_config PTHREAD_CC PTHREAD_LIBS PTHREAD_CFLAGS LDAP_LIBS_FE LDAP_LIBS_BE HAVE_POSIX_SIGNALS MSGFMT MSGMERGE XGETTEXT localedir TCLSH TCL_CONFIG_SH TCL_INCLUDE_SPEC TCL_LIB_FILE TCL_LIBS TCL_LIB_SPEC TCL_SHARED_BUILD TCL_SHLIB_LD_LIBS NSGMLS JADE have_docbook DOCBOOKSTYLE COLLATEINDEX SGMLSPL vpath_build LTLIBOBJS PGCLUSTER_VERSION'
584 # Initialize some variables set by options.
585 @@ -1241,6 +1243,10 @@
586 #define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
589 +cat >>confdefs.h <<_ACEOF
590 +#define PGCLUSTER_VERSION "$PGCLUSTER_VERSION"
594 # Let the site file select an alternate cache file if it wants to.
595 # Prefer explicitly selected file to automatically selected ones.
596 @@ -23555,6 +23561,7 @@
597 s,@host_os@,$host_os,;t t
598 s,@PORTNAME@,$PORTNAME,;t t
599 s,@docdir@,$docdir,;t t
600 +s,@PGCLUSTER_VERSION@,$PGCLUSTER_VERSION,;t t
601 s,@enable_nls@,$enable_nls,;t t
602 s,@WANTED_LANGUAGES@,$WANTED_LANGUAGES,;t t
603 s,@default_port@,$default_port,;t t
604 diff -aruN postgresql-8.2.4/configure.in pgcluster-1.7.0rc7/configure.in
605 --- postgresql-8.2.4/configure.in 2007-02-07 04:48:58.000000000 +0100
606 +++ pgcluster-1.7.0rc7/configure.in 2007-02-18 22:52:16.000000000 +0100
608 AC_SUBST(configure_args, [$ac_configure_args])
610 AC_DEFINE_UNQUOTED(PG_VERSION, "$PACKAGE_VERSION", [PostgreSQL version as a string])
611 +AC_DEFINE_UNQUOTED(PGCLUSTER_VERSION, "$PGCLUSTER_VERSION", [PGCluster version])
615 diff -aruN postgresql-8.2.4/pgcluster.sh.tmpl pgcluster-1.7.0rc7/pgcluster.sh.tmpl
616 --- postgresql-8.2.4/pgcluster.sh.tmpl 1970-01-01 01:00:00.000000000 +0100
617 +++ pgcluster-1.7.0rc7/pgcluster.sh.tmpl 2007-02-18 22:52:16.000000000 +0100
621 +# $FreeBSD: ports/databases/pgcluster/files/pgcluster.sh.tmpl,v 1.1 2004/01/26 09:02:45 kuriyama Exp $
623 +# PROVIDE: pgcluster
625 +# BEFORE: pgreplicate
628 +# Add the following line to /etc/rc.conf to enable pgcluster:
630 +# pgcluster_enable="YES"
632 +# pgcluster_data="/home/pgsql/data"
633 +# pgcluster_flags="-w -s"
636 +pgcluster_enable="NO"
637 +pgcluster_data="%%PREFIX%%/pgsql/data"
638 +pgcluster_flags="-w -s"
642 +load_rc_config pgcluster
645 +command=%%PREFIX%%/bin/pg_ctl
646 +pgcluster_user=pgsql
647 +extra_commands="initdb recover"
648 +initdb_cmd="pgcluster_initdb"
649 +recover_cmd="pgcluster_recover"
650 +start_cmd="pgcluster_start"
651 +stop_cmd="pgcluster_stop"
653 +pgcluster_flags="${pgcluster_flags} -D ${pgcluster_data}"
654 +pidfile="${pgcluster_data}/postmaster.pid"
658 + su -m ${pgcluster_user} -c "exec ${command} start ${pgcluster_flags} -o '-i'"
662 + su -m ${pgcluster_user} -c "exec ${command} stop ${pgcluster_flags} -m i"
666 + su -m ${pgcluster_user} -c "exec ${command} start ${pgcluster_flags} -o '-i -R'"
670 + su -m ${pgcluster_user} -c "exec %%PREFIX%%/bin/initdb -D ${pgcluster_data}"
673 +load_rc_config $name
675 diff -aruN postgresql-8.2.4/src/Makefile pgcluster-1.7.0rc7/src/Makefile
676 --- postgresql-8.2.4/src/Makefile 2006-06-23 01:50:35.000000000 +0200
677 +++ pgcluster-1.7.0rc7/src/Makefile 2007-02-18 22:52:16.000000000 +0100
679 all install installdirs uninstall dep depend distprep:
681 $(MAKE) -C timezone $@
682 + $(MAKE) -C interfaces $@
683 $(MAKE) -C backend $@
684 $(MAKE) -C backend/utils/mb/conversion_procs $@
685 $(MAKE) -C include $@
686 - $(MAKE) -C interfaces $@
689 $(MAKE) -C makefiles $@
690 $(MAKE) -C test/regress $@
691 + $(MAKE) -C pgcluster $@
693 install: install-local
696 rm -f $(addprefix '$(DESTDIR)$(pgxsdir)/$(subdir)'/, Makefile.global Makefile.port Makefile.shlib nls-global.mk)
699 + $(MAKE) -C pgcluster $@
701 $(MAKE) -C timezone $@
702 $(MAKE) -C backend $@
704 $(MAKE) -C test/thread $@
706 distclean maintainer-clean:
707 + -$(MAKE) -C pgcluster $@
709 -$(MAKE) -C timezone $@
710 -$(MAKE) -C backend $@
711 diff -aruN postgresql-8.2.4/src/Makefile.global.in pgcluster-1.7.0rc7/src/Makefile.global.in
712 --- postgresql-8.2.4/src/Makefile.global.in 2006-10-08 19:15:33.000000000 +0200
713 +++ pgcluster-1.7.0rc7/src/Makefile.global.in 2007-02-18 22:52:16.000000000 +0100
715 # PostgreSQL version number
716 VERSION = @PACKAGE_VERSION@
718 +# PGCluster version number
719 +PGCLUSTER_VERSION = @PGCLUSTER_VERSION@
721 # Support for VPATH builds
722 vpath_build = @vpath_build@
723 abs_top_srcdir = @abs_top_srcdir@
728 +CFLAGS += -DUSE_REPLICATION -DPRINT_DEBUG
732 diff -aruN postgresql-8.2.4/src/backend/Makefile pgcluster-1.7.0rc7/src/backend/Makefile
733 --- postgresql-8.2.4/src/backend/Makefile 2006-10-08 19:15:33.000000000 +0200
734 +++ pgcluster-1.7.0rc7/src/backend/Makefile 2007-02-18 22:52:16.000000000 +0100
736 ifneq ($(PORTNAME), win32)
737 ifneq ($(PORTNAME), aix)
740 +postgres: $(OBJS) $(libpq_srcdir)/libpq.a
741 $(CC) $(CFLAGS) $(LDFLAGS) $(export_dynamic) $^ $(LIBS) -o $@
745 $(INSTALL_DATA) $(srcdir)/libpq/pg_ident.conf.sample '$(DESTDIR)$(datadir)/pg_ident.conf.sample'
746 $(INSTALL_DATA) $(srcdir)/utils/misc/postgresql.conf.sample '$(DESTDIR)$(datadir)/postgresql.conf.sample'
747 $(INSTALL_DATA) $(srcdir)/access/transam/recovery.conf.sample '$(DESTDIR)$(datadir)/recovery.conf.sample'
748 + $(INSTALL_DATA) $(srcdir)/libpq/cluster.conf.sample $(DESTDIR)$(datadir)/cluster.conf.sample
750 install-bin: postgres $(POSTGRES_IMP) installdirs
751 $(INSTALL_PROGRAM) postgres$(X) '$(DESTDIR)$(bindir)/postgres$(X)'
753 $(MAKE) -C catalog uninstall-data
754 rm -f '$(DESTDIR)$(datadir)/pg_hba.conf.sample' \
755 '$(DESTDIR)$(datadir)/pg_ident.conf.sample' \
756 - '$(DESTDIR)$(datadir)/postgresql.conf.sample' \
757 - '$(DESTDIR)$(datadir)/recovery.conf.sample'
758 + '$(DESTDIR)$(datadir)/postgresql.conf.sample' \
759 + '$(DESTDIR)$(datadir)/recovery.conf.sample' \
760 + '$(DESTDIR)$(datadir)/cluster.conf.sample'
763 ##########################################################################
764 diff -aruN postgresql-8.2.4/src/backend/access/transam/clog.c pgcluster-1.7.0rc7/src/backend/access/transam/clog.c
765 --- postgresql-8.2.4/src/backend/access/transam/clog.c 2006-11-05 23:42:07.000000000 +0100
766 +++ pgcluster-1.7.0rc7/src/backend/access/transam/clog.c 2007-02-18 22:52:16.000000000 +0100
768 #define TransactionIdToByte(xid) (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE)
769 #define TransactionIdToBIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE)
771 +#ifdef USE_REPLICATION
772 +#include "replicate.h"
773 +#endif /* USE_REPLICATION */
776 * Link to shared-memory data structures for CLOG control
779 /* Check to see if there's any files that could be removed */
780 if (!SlruScanDirectory(ClogCtl, cutoffPage, false))
781 +#ifdef USE_REPLICATION
783 + /* Perform a forced CHECKPOINT */
784 + /* CreateCheckPoint(false, true); */
785 + RequestCheckpoint(true, false);
786 +#endif /* USE_REPLICATION */
787 return; /* nothing to remove */
788 +#ifdef USE_REPLICATION
790 +#endif /* USE_REPLICATION */
792 /* Write XLOG record and flush XLOG to disk */
793 WriteTruncateXlogRec(cutoffPage);
794 diff -aruN postgresql-8.2.4/src/backend/access/transam/xact.c pgcluster-1.7.0rc7/src/backend/access/transam/xact.c
795 --- postgresql-8.2.4/src/backend/access/transam/xact.c 2006-11-23 02:14:59.000000000 +0100
796 +++ pgcluster-1.7.0rc7/src/backend/access/transam/xact.c 2007-02-18 22:52:16.000000000 +0100
798 #include "utils/relcache.h"
799 #include "utils/guc.h"
801 +#ifdef USE_REPLICATION
802 +#include "replicate.h"
803 +#endif /* USE_REPLICATION */
806 * User-tweakable parameters
807 @@ -4335,3 +4338,11 @@
809 appendStringInfo(buf, "UNKNOWN");
812 +#ifdef USE_REPLICATION
814 +PGR_Reload_Start_Time(void)
816 + xactStartTimestamp = GetCurrentTimestamp();
818 +#endif /* USE_REPLICATION */
819 diff -aruN postgresql-8.2.4/src/backend/catalog/catalog.c pgcluster-1.7.0rc7/src/backend/catalog/catalog.c
820 --- postgresql-8.2.4/src/backend/catalog/catalog.c 2006-10-04 02:29:50.000000000 +0200
821 +++ pgcluster-1.7.0rc7/src/backend/catalog/catalog.c 2007-02-18 22:52:16.000000000 +0100
823 #include "utils/fmgroids.h"
824 #include "utils/relcache.h"
826 +#ifdef USE_REPLICATION
827 +#include "replicate.h"
828 +#endif /* USE_REPLICATION */
830 #define OIDCHARS 10 /* max chars printed by %u */
834 GetNewOidWithIndex(Relation relation, Relation indexrel)
842 /* Generate new OIDs until we find one not in the table */
845 +#ifdef USE_REPLICATION
846 + if (PGR_Is_Sync_OID == true)
848 + newOid = PGRGetNewObjectId(newOid);
852 + newOid = GetNewObjectId();
855 newOid = GetNewObjectId();
857 +#endif /* USE_REPLICATION */
860 BTEqualStrategyNumber, F_OIDEQ,
863 return rnode.relNode;
866 diff -aruN postgresql-8.2.4/src/backend/commands/analyze.c pgcluster-1.7.0rc7/src/backend/commands/analyze.c
867 --- postgresql-8.2.4/src/backend/commands/analyze.c 2006-11-05 23:42:08.000000000 +0100
868 +++ pgcluster-1.7.0rc7/src/backend/commands/analyze.c 2007-02-18 22:52:16.000000000 +0100
870 #include "utils/syscache.h"
871 #include "utils/tuplesort.h"
873 +#ifdef USE_REPLICATION
874 +#include "replicate.h"
875 +#endif /* USE_REPLICATION */
877 /* Data structure for Algorithm S from Knuth 3.4.2 */
883 +#ifdef USE_REPLICATION
884 + return ((double) PGR_Random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
886 return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
887 +#endif /* USE_REPLICATION */
891 diff -aruN postgresql-8.2.4/src/backend/commands/copy.c pgcluster-1.7.0rc7/src/backend/commands/copy.c
892 --- postgresql-8.2.4/src/backend/commands/copy.c 2006-10-06 19:13:58.000000000 +0200
893 +++ pgcluster-1.7.0rc7/src/backend/commands/copy.c 2007-02-18 22:52:16.000000000 +0100
895 #include "utils/lsyscache.h"
896 #include "utils/memutils.h"
898 +#ifdef USE_REPLICATION
899 +#include "replicate.h"
900 +#endif /* USE_REPLICATION */
902 #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
903 #define OCTVALUE(c) ((c) - '0')
905 CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
908 +#ifdef USE_REPLICATION
909 + char * ptr = (char *)databuf;
912 switch (cstate->copy_dest)
918 +#ifdef USE_REPLICATION
919 + PGR_Set_Copy_Data(PGRCopyData,ptr,bytesread,0);
920 +#endif /* USE_REPLICATION */
924 @@ -2093,6 +2102,13 @@
928 +#ifdef USE_REPLICATION
931 + PGR_Set_Copy_Data(PGRCopyData,(char *)NULL,0,1);
933 +#endif /* USE_REPLICATION */
936 error_context_stack = errcontext.previous;
938 @@ -2201,6 +2217,11 @@
942 +#ifdef USE_REPLICATION
944 + PGR_Set_Copy_Data(PGRCopyData,cstate->line_buf.data,cstate->line_buf.len,0);
948 /* Done reading the line. Convert it to server encoding. */
949 if (cstate->need_transcoding)
950 diff -aruN postgresql-8.2.4/src/backend/commands/prepare.c pgcluster-1.7.0rc7/src/backend/commands/prepare.c
951 --- postgresql-8.2.4/src/backend/commands/prepare.c 2006-10-04 02:29:51.000000000 +0200
952 +++ pgcluster-1.7.0rc7/src/backend/commands/prepare.c 2007-02-18 22:52:16.000000000 +0100
954 #include "utils/builtins.h"
955 #include "utils/memutils.h"
957 +#ifdef USE_REPLICATION
958 +#include "replicate.h"
959 +#endif /* USE_REPLICATION */
962 * The hash table in which prepared queries are stored. This is
964 result = construct_array(tmp_ary, len, REGTYPEOID, 4, true, 'i');
965 return PointerGetDatum(result);
969 +#ifdef USE_REPLICATION
971 +PGR_is_select_prepared_statement(PrepareStmt *stmt)
973 + PreparedStatement *entry;
974 + if ((stmt == NULL) || (stmt->name == NULL))
978 + entry = FetchPreparedStatement(stmt->name, true);
983 + if (!strcmp(entry->commandTag,"SELECT"))
989 +#endif /* USE_REPLICATION */
991 diff -aruN postgresql-8.2.4/src/backend/commands/sequence.c pgcluster-1.7.0rc7/src/backend/commands/sequence.c
992 --- postgresql-8.2.4/src/backend/commands/sequence.c 2006-10-06 19:13:58.000000000 +0200
993 +++ pgcluster-1.7.0rc7/src/backend/commands/sequence.c 2007-02-18 22:52:16.000000000 +0100
995 #include "utils/resowner.h"
996 #include "utils/syscache.h"
998 +#ifdef USE_REPLICATION
999 +#include "replicate.h"
1000 +#endif /* USE_REPLICATION */
1003 * We don't want to log each fetching of a value from a sequence,
1008 +#ifdef USE_REPLICATION
1009 + Xlog_Check_Replicate(CMD_UTILITY);
1010 +#endif /* USE_REPLICATION */
1011 sequence = makeRangeVarFromNameList(textToQualifiedNameList(seqin));
1012 relid = RangeVarGetRelid(sequence, false);
1014 @@ -622,6 +628,10 @@
1018 +#ifdef USE_REPLICATION
1019 + Xlog_Check_Replicate(CMD_UTILITY);
1020 +#endif /* USE_REPLICATION */
1022 /* open and AccessShareLock sequence */
1023 init_sequence(relid, &elm, &seqrel);
1025 diff -aruN postgresql-8.2.4/src/backend/executor/functions.c pgcluster-1.7.0rc7/src/backend/executor/functions.c
1026 --- postgresql-8.2.4/src/backend/executor/functions.c 2007-02-02 01:03:17.000000000 +0100
1027 +++ pgcluster-1.7.0rc7/src/backend/executor/functions.c 2007-02-18 22:52:16.000000000 +0100
1029 #include "utils/syscache.h"
1030 #include "utils/typcache.h"
1032 +#ifdef USE_REPLICATION
1033 +#include "replicate.h"
1034 +#endif /* USE_REPLICATION */
1037 * We have an execution_state record for each query in a function. Each
1038 @@ -454,6 +457,13 @@
1040 MemoryContext oldcontext;
1042 +#ifdef USE_REPLICATION
1043 + if ((es != NULL) && (es->qd != NULL))
1045 + Xlog_Check_Replicate(es->qd->operation);
1047 +#endif /* USE_REPLICATION */
1049 if (es->status == F_EXEC_START)
1050 postquel_start(es, fcache);
1052 diff -aruN postgresql-8.2.4/src/backend/libpq/Makefile pgcluster-1.7.0rc7/src/backend/libpq/Makefile
1053 --- postgresql-8.2.4/src/backend/libpq/Makefile 2003-11-29 20:51:49.000000000 +0100
1054 +++ pgcluster-1.7.0rc7/src/backend/libpq/Makefile 2007-02-18 22:52:16.000000000 +0100
1056 # be-fsstubs is here for historical reasons, probably belongs elsewhere
1058 OBJS = be-fsstubs.o be-secure.o auth.o crypt.o hba.o ip.o md5.o pqcomm.o \
1059 - pqformat.o pqsignal.o
1060 + pqformat.o pqsignal.o \
1061 + replicate.o replicate_com.o recovery.o lifecheck.o
1065 diff -aruN postgresql-8.2.4/src/backend/libpq/auth.c pgcluster-1.7.0rc7/src/backend/libpq/auth.c
1066 --- postgresql-8.2.4/src/backend/libpq/auth.c 2006-11-06 02:27:52.000000000 +0100
1067 +++ pgcluster-1.7.0rc7/src/backend/libpq/auth.c 2007-02-18 22:52:16.000000000 +0100
1069 #include "libpq/pqformat.h"
1070 #include "storage/ipc.h"
1072 +#ifdef USE_REPLICATION
1073 +#include "replicate.h"
1074 +#endif /* USE_REPLICATION */
1076 static void sendAuthRequest(Port *port, AuthRequest areq);
1077 static void auth_failed(Port *port, int status);
1078 @@ -888,6 +891,12 @@
1082 +#ifdef USE_REPLICATION
1083 + if (PGR_password == NULL)
1087 +#endif /* USE_REPLICATION */
1088 if (PG_PROTOCOL_MAJOR(port->proto) >= 3)
1090 /* Expect 'p' message type */
1091 @@ -939,6 +948,19 @@
1093 (errmsg("received password packet")));
1095 +#ifdef USE_REPLICATION
1096 + if (strncmp(buf.data,"md5",3) == 0)
1098 + char * ptr = NULL;
1099 + ptr = strchr(buf.data,'(');
1102 + PGR_get_md5salt(PGR_password->md5Salt,ptr);
1106 + strncpy(PGR_password->password,buf.data, PASSWORD_MAX_LENGTH );
1107 +#endif /* USE_REPLICATION */
1109 * Return the received string. Note we do not attempt to do any
1110 * character-set conversion on it; since we don't yet know the client's
1111 diff -aruN postgresql-8.2.4/src/backend/libpq/be-fsstubs.c pgcluster-1.7.0rc7/src/backend/libpq/be-fsstubs.c
1112 --- postgresql-8.2.4/src/backend/libpq/be-fsstubs.c 2006-09-07 17:37:25.000000000 +0200
1113 +++ pgcluster-1.7.0rc7/src/backend/libpq/be-fsstubs.c 2007-02-18 22:52:16.000000000 +0100
1115 #include "storage/large_object.h"
1116 #include "utils/memutils.h"
1118 +#ifdef USE_REPLICATION
1119 +#include "replicate.h"
1120 +#endif /* USE_REPLICATION */
1123 #define BUFSIZE 8192
1125 LargeObjectDesc *lobjDesc;
1128 +#ifdef USE_REPLICATION
1129 + if ((PGR_Stand_Alone != NULL) &&
1130 + (PGR_lo_open(lobjId,mode) != STATUS_OK))
1132 + if ((mode & INV_WRITE) &&
1133 + (PGR_Is_Stand_Alone() == true) &&
1134 + (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1136 + elog(WARNING, "This query is not permitted when all replication servers fell down ");
1137 + PG_RETURN_INT32(-1);
1140 +#endif /* USE_REPLICATION */
1142 elog(DEBUG4, "lo_open(%u,%d)", lobjId, mode);
1145 errmsg("invalid large-object descriptor: %d", fd)));
1146 PG_RETURN_INT32(-1);
1148 +#ifdef USE_REPLICATION
1152 elog(DEBUG4, "lo_close(%d)", fd);
1154 @@ -183,6 +202,18 @@
1155 errmsg("large object descriptor %d was not opened for writing",
1158 +#ifdef USE_REPLICATION
1159 + if ((PGR_Stand_Alone != NULL) &&
1160 + (PGR_lo_write(fd, buf, len) != STATUS_OK))
1162 + if ((PGR_Is_Stand_Alone() == true) &&
1163 + (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1165 + elog(WARNING, "This query is not permitted when all replication servers fell down ");
1170 status = inv_write(cookies[fd], buf, len);
1173 @@ -205,6 +236,10 @@
1174 PG_RETURN_INT32(-1);
1177 +#ifdef USE_REPLICATION
1178 + PGR_lo_lseek(fd, offset, whence);
1179 +#endif /* USE_REPLICATION */
1181 status = inv_seek(cookies[fd], offset, whence);
1183 PG_RETURN_INT32(status);
1184 @@ -221,6 +256,18 @@
1188 +#ifdef USE_REPLICATION
1189 + if ((PGR_Stand_Alone != NULL) &&
1190 + (PGR_lo_create(InvalidOid) != STATUS_OK))
1192 + if ((PGR_Is_Stand_Alone() == true) &&
1193 + (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1195 + elog(WARNING, "This query is not permitted when all replication servers fell down ");
1196 + PG_RETURN_INT32(-1);
1199 +#endif /* USE_REPLICATION */
1200 lobjId = inv_create(InvalidOid);
1202 PG_RETURN_OID(lobjId);
1203 @@ -231,6 +278,18 @@
1205 Oid lobjId = PG_GETARG_OID(0);
1207 +#ifdef USE_REPLICATION
1208 + if ((PGR_Stand_Alone != NULL) &&
1209 + (PGR_lo_create(lobjId) != STATUS_OK))
1211 + if ((PGR_Is_Stand_Alone() == true) &&
1212 + (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1214 + elog(WARNING, "This query is not permitted when all replication servers fell down ");
1215 + PG_RETURN_INT32(-1);
1218 +#endif /* USE_REPLICATION */
1220 * We don't actually need to store into fscxt, but create it anyway to
1221 * ensure that AtEOXact_LargeObject knows there is state to clean up
1222 @@ -263,6 +322,18 @@
1224 Oid lobjId = PG_GETARG_OID(0);
1226 +#ifdef USE_REPLICATION
1227 + if ((PGR_Stand_Alone != NULL) &&
1228 + (PGR_lo_unlink(lobjId) != STATUS_OK))
1230 + if ((PGR_Is_Stand_Alone() == true) &&
1231 + (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1233 + elog(WARNING, "This query is not permitted when all replication servers fell down ");
1237 +#endif /* USE_REPLICATION */
1239 * If there are any open LO FDs referencing that ID, close 'em.
1241 @@ -360,6 +431,19 @@
1242 nbytes = MAXPGPATH - 1;
1243 memcpy(fnamebuf, VARDATA(filename), nbytes);
1244 fnamebuf[nbytes] = '\0';
1246 +#ifdef USE_REPLICATION
1247 + if ((PGR_Stand_Alone != NULL) &&
1248 + (PGR_lo_import((char*)fnamebuf) != STATUS_OK))
1250 + if ((PGR_Is_Stand_Alone() == true) &&
1251 + (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1253 + elog(WARNING, "This query is not permitted when all replication servers fell down ");
1258 fd = PathNameOpenFile(fnamebuf, O_RDONLY | PG_BINARY, 0666);
1263 lobjOid = inv_create(InvalidOid);
1267 * read in from the filesystem and write to the inversion object
1269 diff -aruN postgresql-8.2.4/src/backend/libpq/cluster.conf.sample pgcluster-1.7.0rc7/src/backend/libpq/cluster.conf.sample
1270 --- postgresql-8.2.4/src/backend/libpq/cluster.conf.sample 1970-01-01 01:00:00.000000000 +0100
1271 +++ pgcluster-1.7.0rc7/src/backend/libpq/cluster.conf.sample 2007-02-18 22:52:16.000000000 +0100
1273 +#============================================================
1274 +# Cluster DB Server configuration file
1275 +#------------------------------------------------------------
1276 +# file: cluster.conf
1277 +#------------------------------------------------------------
1278 +# This file controls:
1279 +# o which hosts & port are replication server
1280 +# o which port use for replication request to replication server
1281 +# o which command use for recovery function
1282 +#============================================================
1283 +#------------------------------------------------------------
1284 +# set Replication Server information
1285 +# o Host_Name : hostname
1286 +# o Port : Connection port for postmaster
1287 +# o Recovery_Port : Connection port for recovery process
1288 +#------------------------------------------------------------
1289 +<Replicate_Server_Info>
1290 + <Host_Name> replicate1.pgcluster.org </Host_Name>
1291 + <Port> 8001 </Port>
1292 + <Recovery_Port> 8101 </Recovery_Port>
1293 +</Replicate_Server_Info>
1294 +#<Replicate_Server_Info>
1295 +# <Host_Name> replicate2.pgcluster.org </Host_Name>
1296 +# <Port> 8002 </Port>
1297 +# <Recovery_Port> 8102 </Recovery_Port>
1298 +#</Replicate_Server_Info>
1299 +#<Replicate_Server_Info>
1300 +# <Host_Name> replicate3.pgcluster.org </Host_Name>
1301 +# <Port> 8003 </Port>
1302 +# <Recovery_Port> 8103 </Recovery_Port>
1303 +#</Replicate_Server_Info>
1304 +#-------------------------------------------------------------
1305 +# set Cluster DB Server information
1306 +# o Host_Name : Host name which connect with replication server
1307 +# o Recovery_Port : Connection port for recovery
1308 +# o Rsync_Path : Path of rsync command
1309 +# o Rsync_Option : File transfer option for rsync
1310 +# o Rsync_Compress : Use compression option for rsync
1311 +# [yes/no]. default : yes
1312 +# o Pg_Dump_Path : Path of pg_dump
1313 +# o When_Stand_Alone : When all replication servers fell,
1314 +# you can set up two kinds of permission,
1315 +# "real_only" or "read_write".
1316 +# o Replication_Timeout : Timeout of each replication request
1317 +# o Lifecheck_Timeout : Timeout of the lifecheck response
1318 +# o Lifecheck_Interval : Interval time of the lifecheck
1320 +# 10s -- 10 seconds
1321 +# 10min -- 10 minutes
1323 +#-------------------------------------------------------------
1324 +<Host_Name> cluster1.pgcluster.org </Host_Name>
1325 +<Recovery_Port> 7001 </Recovery_Port>
1326 +<Rsync_Path> /usr/bin/rsync </Rsync_Path>
1327 +<Rsync_Option> ssh -1 </Rsync_Option>
1328 +<Rsync_Compress> yes </Rsync_Compress>
1329 +<Pg_Dump_Path> /usr/local/pgsql/bin/pg_dump </Pg_Dump_Path>
1330 +<When_Stand_Alone> read_only </When_Stand_Alone>
1331 +<Replication_Timeout> 1 min </Replication_Timeout>
1332 +<LifeCheck_Timeout> 3s </LifeCheck_Timeout>
1333 +<LifeCheck_Interval> 11s </LifeCheck_Interval>
1334 +#-------------------------------------------------------------
1335 +# set partitional replicate control information
1336 +# set DB name and Table name to stop reprication
1337 +# o DB_Name : DB name
1338 +# o Table_Name : Table name
1339 +#-------------------------------------------------------------
1340 +#<Not_Replicate_Info>
1341 +# <DB_Name> test_db </DB_Name>
1342 +# <Table_Name> log_table </Table_Name>
1343 +#</Not_Replicate_Info>
1344 diff -aruN postgresql-8.2.4/src/backend/libpq/crypt.c pgcluster-1.7.0rc7/src/backend/libpq/crypt.c
1345 --- postgresql-8.2.4/src/backend/libpq/crypt.c 2006-07-14 16:52:19.000000000 +0200
1346 +++ pgcluster-1.7.0rc7/src/backend/libpq/crypt.c 2007-02-18 22:52:16.000000000 +0100
1348 #include "libpq/crypt.h"
1349 #include "libpq/md5.h"
1351 +#ifdef USE_REPLICATION
1352 +#include "replicate.h"
1353 +#endif /* USE_REPLICATION */
1356 md5_crypt_verify(const Port *port, const char *role, char *client_pass)
1358 if (isMD5(shadow_pass))
1360 /* stored password already encrypted, only do salt */
1361 - if (!pg_md5_encrypt(shadow_pass + strlen("md5"),
1362 - (char *) port->md5Salt,
1363 +#ifdef USE_REPLICATION
1364 + if ((PGR_password != NULL) &&
1365 + ((PGR_password->md5Salt[0] |
1366 + PGR_password->md5Salt[1] |
1367 + PGR_password->md5Salt[2] |
1368 + PGR_password->md5Salt[3]) != 0 ))
1370 + if (!pg_md5_encrypt(shadow_pass + strlen("md5"),
1371 + (char *) PGR_password->md5Salt,
1372 sizeof(port->md5Salt), crypt_pwd))
1375 + return STATUS_ERROR;
1381 - return STATUS_ERROR;
1382 +#endif /* USE_REPLICATION */
1383 + if (!pg_md5_encrypt(shadow_pass + strlen("md5"),
1384 + (char *) port->md5Salt,
1385 + sizeof(port->md5Salt), crypt_pwd))
1388 + return STATUS_ERROR;
1390 +#ifdef USE_REPLICATION
1392 +#endif /* USE_REPLICATION */
1396 @@ -134,6 +158,16 @@
1398 if (strcmp(crypt_client_pass, crypt_pwd) == 0)
1400 +#ifdef USE_REPLICATION
1402 + if (*(PGR_password->password) != '\0')
1404 + memset(PGR_password->password,0,PASSWORD_MAX_LENGTH);
1405 + memset(PGR_password->md5Salt,0,sizeof(PGR_password->md5Salt));
1406 + memset(PGR_password->cryptSalt,0,sizeof(PGR_password->cryptSalt));
1409 +#endif /* USE_REPLICATION */
1411 * Password OK, now check to be sure we are not past valuntil
1413 diff -aruN postgresql-8.2.4/src/backend/libpq/lifecheck.c pgcluster-1.7.0rc7/src/backend/libpq/lifecheck.c
1414 --- postgresql-8.2.4/src/backend/libpq/lifecheck.c 1970-01-01 01:00:00.000000000 +0100
1415 +++ pgcluster-1.7.0rc7/src/backend/libpq/lifecheck.c 2007-03-01 16:27:15.000000000 +0100
1417 +/*--------------------------------------------------------------------
1422 + * This file is composed of the functions to call with the source
1423 + * at backend for the lifecheck.
1424 + * Low level I/O functions that called by in these functions are
1425 + * contained in 'replicate_com.c'.
1427 + *--------------------------------------------------------------------
1430 +#ifdef USE_REPLICATION
1432 +#include "postgres.h"
1435 +#include <unistd.h>
1436 +#include <signal.h>
1437 +#include <sys/wait.h>
1441 +#include <sys/time.h>
1442 +#include <sys/types.h>
1443 +#include <sys/stat.h>
1444 +#include <sys/socket.h>
1445 +#include <sys/ipc.h>
1446 +#include <sys/shm.h>
1448 +#include <netinet/in.h>
1452 +#include <sys/param.h>
1453 +#include <sys/select.h>
1454 +#include <netinet/tcp.h>
1455 +#include <arpa/inet.h>
1456 +#include <sys/file.h>
1457 +#include <dirent.h>
1459 +#include "libpq/pqsignal.h"
1460 +#include "utils/guc.h"
1461 +#include "miscadmin.h"
1462 +#include "nodes/nodes.h"
1463 +#include "nodes/parsenodes.h"
1464 +#include "access/xact.h"
1465 +#include "access/xlog.h"
1466 +#include "tcop/tcopprot.h"
1467 +#include "postmaster/postmaster.h"
1469 +#include "replicate.h"
1474 +#ifdef HAVE_NETINET_TCP_H
1475 +#include <netinet/tcp.h>
1477 +#include <arpa/inet.h>
1480 +#ifndef HAVE_STRDUP
1481 +#include "strdup.h"
1483 +#ifdef HAVE_CRYPT_H
1488 +#include "mb/pg_wchar.h"
1491 +static void set_replication_server_status(int status);
1492 +static int send_lifecheck(int sock);
1493 +static int recv_lifecheck(int sock);
1494 +static void set_timeout(SIGNAL_ARGS);
1495 +static void exit_lifecheck(SIGNAL_ARGS);
1497 +ReplicateServerInfo * PGR_Replicator_4_Lifecheck = NULL;
1500 +PGR_Lifecheck_Main(void)
1502 + int status = STATUS_OK;
1506 + if ((pid = fork()) != 0 )
1511 + pqsignal(SIGHUP, exit_lifecheck);
1512 + pqsignal(SIGTERM, exit_lifecheck);
1513 + pqsignal(SIGINT, exit_lifecheck);
1514 + pqsignal(SIGQUIT, exit_lifecheck);
1515 + pqsignal(SIGALRM, set_timeout);
1516 + PG_SETMASK(&UnBlockSig);
1521 + PGR_Replicator_4_Lifecheck = PGR_check_replicate_server_info();
1522 + if (PGR_Replicator_4_Lifecheck == NULL)
1525 + sleep(PGR_Lifecheck_Interval);
1528 + /* get replication server information */
1529 + PGR_Replicator_4_Lifecheck = PGR_get_replicate_server_info();
1530 + if (PGR_Replicator_4_Lifecheck == NULL)
1532 + if (Debug_pretty_print)
1534 + elog(DEBUG1,"not found replication server");
1536 + return STATUS_ERROR;
1538 + sock = PGR_get_replicate_server_socket( PGR_Replicator_4_Lifecheck , PGR_QUERY_SOCKET );
1541 + set_replication_server_status(DATA_ERR);
1542 + if (Debug_pretty_print)
1543 + elog(DEBUG1,"get_replicate_server_socket failed");
1547 + /* set alarm as lifecheck timeout */
1548 + alarm(PGR_Lifecheck_Timeout * 2);
1550 + /* send lifecheck to replication server */
1551 + status = send_lifecheck(sock);
1552 + if (status != STATUS_OK)
1554 + set_replication_server_status(DATA_ERR);
1557 + if (Debug_pretty_print)
1558 + elog(DEBUG1,"send life check failed");
1562 + /* receive lifecheck response */
1563 + status = recv_lifecheck(sock);
1564 + if (status != STATUS_OK)
1566 + set_replication_server_status(DATA_ERR);
1569 + if (Debug_pretty_print)
1570 + elog(DEBUG1,"receive life check failed");
1576 + set_replication_server_status(DATA_USE);
1578 + /* wait next lifecheck as interval */
1579 + sleep(PGR_Lifecheck_Interval);
1584 +set_replication_server_status(int status)
1586 + if (status == DATA_ERR)
1588 + PGR_Replicator_4_Lifecheck->retry_count ++;
1589 + if (PGR_Replicator_4_Lifecheck->retry_count > MAX_RETRY_TIMES)
1591 + PGR_Set_Replication_Server_Status(PGR_Replicator_4_Lifecheck, status);
1596 + PGR_Replicator_4_Lifecheck->retry_count = 0;
1597 + PGR_Set_Replication_Server_Status(PGR_Replicator_4_Lifecheck, status);
1602 +send_lifecheck(int sock)
1604 + ReplicateHeader header;
1606 + struct timeval timeout;
1607 + int send_size = 0;
1609 + char * send_ptr = (char *)&header;
1613 + timeout.tv_sec = PGR_Lifecheck_Timeout;
1614 + timeout.tv_usec = 0;
1616 + memset(&header,0,sizeof(ReplicateHeader));
1617 + header.cmdSys = CMD_SYS_LIFECHECK;
1618 + header.cmdSts = CMD_STS_CLUSTER;
1619 + buf_size = sizeof(ReplicateHeader);
1624 + FD_SET(sock,&wmask);
1625 + rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
1628 + if (errno == EINTR)
1634 + elog(DEBUG1, "send_lifecheck():select() failed");
1635 + return STATUS_ERROR;
1638 + else if (rtn && FD_ISSET(sock, &wmask))
1640 + s = send(sock,send_ptr + send_size,buf_size - send_size ,0);
1642 + if (errno == EINTR)
1646 + if (errno == EAGAIN)
1650 + elog(DEBUG1, "send_replicate_packet():send error");
1652 + /* EPIPE || ENCONNREFUSED || ENSOCK || EHOSTUNREACH */
1653 + return STATUS_ERROR;
1654 + } else if (s == 0) {
1655 + elog(DEBUG1, "send_lifecheck():unexpected EOF");
1656 + return STATUS_ERROR;
1657 + } else /*if (s > 0)*/ {
1659 + if (send_size == buf_size)
1669 +recv_lifecheck(int sock)
1671 + int status = STATUS_OK;
1672 + char result[PGR_MESSAGE_BUFSIZE];
1674 + memset(result,0,PGR_MESSAGE_BUFSIZE);
1675 + status = PGR_recv_replicate_result(sock,result, PGR_Lifecheck_Timeout);
1676 + return ((status >= 0) ?STATUS_OK:STATUS_ERROR);
1680 +set_timeout(SIGNAL_ARGS)
1682 + if (PGR_Replicator_4_Lifecheck != NULL)
1684 + set_replication_server_status(DATA_ERR);
1685 + if (Debug_pretty_print)
1686 + elog(DEBUG1,"time out is occured in life check");
1691 +exit_lifecheck(SIGNAL_ARGS)
1693 + fprintf(stderr,"lifecheck stopped\n");
1697 +#endif /* USE_REPLICATION */
1698 diff -aruN postgresql-8.2.4/src/backend/libpq/recovery.c pgcluster-1.7.0rc7/src/backend/libpq/recovery.c
1699 --- postgresql-8.2.4/src/backend/libpq/recovery.c 1970-01-01 01:00:00.000000000 +0100
1700 +++ pgcluster-1.7.0rc7/src/backend/libpq/recovery.c 2007-02-18 22:52:16.000000000 +0100
1702 +/*--------------------------------------------------------------------
1707 + * This file is composed of the functions to call with the source
1708 + * at backend for the recovery.
1709 + * Low level I/O functions that called by in these functions are
1710 + * contained in 'replicate_com.c'.
1712 + *--------------------------------------------------------------------
1715 +/*--------------------------------------
1716 + * INTERFACE ROUTINES
1719 + * PGR_recovery_finish_send
1721 + * PGR_Master_Main(void);
1722 + * recovery module:
1723 + * PGR_Recovery_Main
1724 + *-------------------------------------
1726 +#ifdef USE_REPLICATION
1728 +#include "postgres.h"
1731 +#include <unistd.h>
1732 +#include <signal.h>
1733 +#include <sys/wait.h>
1737 +#include <sys/time.h>
1738 +#include <sys/types.h>
1739 +#include <sys/stat.h>
1740 +#include <sys/socket.h>
1741 +#include <sys/ipc.h>
1742 +#include <sys/shm.h>
1744 +#include <netinet/in.h>
1748 +#include <sys/param.h>
1749 +#include <sys/select.h>
1750 +#include <netinet/tcp.h>
1751 +#include <arpa/inet.h>
1752 +#include <sys/file.h>
1753 +#include <dirent.h>
1755 +#include "libpq/pqsignal.h"
1756 +#include "utils/guc.h"
1757 +#include "miscadmin.h"
1758 +#include "nodes/nodes.h"
1759 +#include "nodes/parsenodes.h"
1760 +#include "access/xact.h"
1761 +#include "access/xlog.h"
1762 +#include "tcop/tcopprot.h"
1763 +#include "postmaster/postmaster.h"
1765 +#include "../interfaces/libpq/libpq-fe.h"
1766 +#include "../interfaces/libpq/libpq-int.h"
1767 +#include "../interfaces/libpq/fe-auth.h"
1769 +#include "replicate.h"
1774 +#ifdef HAVE_NETINET_TCP_H
1775 +#include <netinet/tcp.h>
1777 +#include <arpa/inet.h>
1780 +#ifndef HAVE_STRDUP
1781 +#include "strdup.h"
1783 +#ifdef HAVE_CRYPT_H
1788 +#include "mb/pg_wchar.h"
1791 +#define RECOVERY_LOOP_END (0)
1792 +#define RECOVERY_LOOP_CONTINUE (1)
1793 +#define RECOVERY_LOOP_FAIL (2)
1794 +char Local_Host_Name[HOSTNAME_MAX_LENGTH];
1795 +int PGR_Recovery_Mode = 0;
1797 +static int read_packet(int sock,RecoveryPacket * packet);
1798 +static int send_recovery_packet(int sock, RecoveryPacket * packet);
1799 +static int send_packet(int * sock, RecoveryPacket * packet );
1800 +static void master_loop(int fd);
1801 +static int start_recovery_send(int * sock, ReplicateServerInfo * host);
1802 +static int stop_recovery_send(int * sock, ReplicateServerInfo * host);
1803 +static int rsync_pg_data(char * src , char * dest);
1804 +static int remove_dir(char * dir_name);
1805 +static int clear_bkup_dir(char * dir_name);
1806 +static int bkup_dir(char * dir_name);
1807 +static int restore_dir(char * dir_name);
1808 +static int rsync_global_dir(char * src, char * dest, int stage);
1809 +static int first_recovery(char * src, char * dest, char * dir);
1810 +static int second_recovery(char * src, char * dest, char * dir);
1811 +static int recovery_rsync(char * src , char * dest, int stage);
1812 +static int recovery_loop(int fd, int mode);
1813 +static void show_recovery_packet(RecoveryPacket * packet);
1814 +static int direct_send_packet(int packet_no);
1815 +static void set_recovery_packet(RecoveryPacket * packet, int packet_no);
1816 +static int cold_recovery(char * src, RecoveryPacket *packet, bool need_sync_table_space, int stage);
1817 +static int hot_recovery(RecoveryPacket *packet, int stage);
1818 +static int restore_from_dumpall( char * hostName, uint16_t portNum, char * userName);
1819 +static int restore_from_dump( char * hostName, uint16_t portNum, char * userName, char * dbName);
1820 +static int restore_from_each_dump( char * hostName, uint16_t portNum, char * userName);
1821 +static PGresult * get_dbName(char * hostName, uint16_t portNum, char * userName);
1823 +static int sync_table_space(char * hostName, uint16_t portNum, char * userName, int stage);
1824 +static PGresult * get_table_space_location(char * hostName, uint16_t portNum, char * userName);
1825 +static int rsync_table_space(char * hostName, char * location, int stage);
1827 +int PGR_recovery_error_send(void);
1828 +int PGR_recovery_finish_send(void);
1829 +int PGR_recovery_queue_data_req(void);
1830 +int PGR_Master_Main(void);
1831 +int PGR_Recovery_Main(int mode);
1834 +read_packet(int sock,RecoveryPacket * packet)
1838 + int read_size = 0;
1839 + int packet_size = 0;
1841 + read_ptr = (char*)packet;
1842 + packet_size = sizeof(RecoveryPacket);
1845 + r = recv(sock,read_ptr + read_size ,packet_size, MSG_WAITALL);
1847 + if (errno == EINTR || errno == EAGAIN) {
1850 + elog(DEBUG1, "read_packet():recv failed");
1853 + } else if (r == 0) {
1854 + elog(DEBUG1, "read_packet():unexpected EOF");
1856 + } else /*if (r > 0)*/ {
1858 + if (read_size == packet_size) {
1859 + show_recovery_packet(packet);
1868 +send_recovery_packet(int sock, RecoveryPacket * packet)
1876 + struct timeval timeout;
1878 + timeout.tv_sec = RECOVERY_TIMEOUT;
1879 + timeout.tv_usec = 0;
1882 + * Wait for something to happen.
1889 + timeout.tv_sec = RECOVERY_TIMEOUT;
1890 + timeout.tv_usec = 0;
1893 + FD_SET(sock,&wmask);
1894 + rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
1898 + if (errno == EINTR || errno == EAGAIN)
1908 + else if (rtn && FD_ISSET(sock, &wmask))
1910 + send_ptr = (char *)packet;
1911 + buf_size = sizeof(RecoveryPacket);
1913 + s = send(sock,send_ptr + send_size,buf_size - send_size ,0);
1915 + if (errno == EINTR || errno == EAGAIN) {
1918 + elog(DEBUG1, "send_recovery_packet():send error");
1920 + /* EPIPE || ENCONNREFUSED || ENSOCK || EHOSTUNREACH */
1921 + return STATUS_ERROR;
1922 + } else if (s == 0) {
1923 + elog(DEBUG1, "send_recovery_packet():unexpected EOF");
1924 + return STATUS_ERROR;
1925 + } else /*if (s > 0)*/ {
1927 + if (send_size == buf_size)
1935 + return STATUS_ERROR;
1939 +send_packet(int * sock, RecoveryPacket * packet )
1942 + ReplicateServerInfo * host = NULL;
1944 + host = PGR_get_replicate_server_info();
1945 + if (host == (ReplicateServerInfo*)NULL)
1947 + return STATUS_ERROR;
1950 + while (send_recovery_packet(*sock,packet) != STATUS_OK)
1952 + if (count < MAX_RETRY_TIMES )
1959 + PGR_Set_Replication_Server_Status(host,DATA_ERR);
1960 + host = PGR_get_replicate_server_info();
1961 + if (host == (ReplicateServerInfo*)NULL)
1963 + return STATUS_ERROR;
1965 + PGR_Set_Replication_Server_Status(host,DATA_USE);
1966 + PGR_Create_Socket_Connect(sock, host->hostName , host->recoveryPortNumber);
1972 +master_loop(int fd)
1976 + int status = STATUS_OK;
1977 + RecoveryPacket packet;
1979 + bool loop_end = false;
1982 + while ((status = PGR_Create_Acception(fd,&sock,"",RecoveryPortNumber)) != STATUS_OK)
1984 + PGR_Close_Sock(&sock);
1986 + if ( count > MAX_RETRY_TIMES)
1996 + struct timeval timeout;
1998 + timeout.tv_sec = RECOVERY_TIMEOUT;
1999 + timeout.tv_usec = 0;
2002 + * Wait for something to happen.
2005 + FD_SET(sock,&rmask);
2006 + memset(&packet,0,sizeof(RecoveryPacket));
2007 + rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
2008 + if (rtn && FD_ISSET(sock, &rmask))
2010 + r_size = read_packet(sock,&packet);
2015 + else if (r_size < 0)
2025 + switch (ntohs(packet.packet_no))
2027 + case RECOVERY_PGDATA_REQ :
2029 + * PGDATA information request
2032 + * get master server information
2034 + memset(&packet,0,sizeof(packet));
2035 + set_recovery_packet(&packet, RECOVERY_PGDATA_ANS) ;
2036 + status = send_packet(&sock,&packet);
2037 + PGR_Set_Cluster_Status(STATUS_RECOVERY);
2039 + case RECOVERY_FSYNC_REQ :
2041 + * get master server information
2043 + memset(&packet,0,sizeof(packet));
2044 + set_recovery_packet(&packet, RECOVERY_FSYNC_ANS );
2045 + status = send_packet(&sock,&packet);
2046 + PGR_Set_Cluster_Status(STATUS_RECOVERY);
2049 + case RECOVERY_ERROR_TARGET_ONLY:
2050 + memset(&packet,0,sizeof(packet));
2051 + set_recovery_packet(&packet, RECOVERY_ERROR_ANS );
2052 + status = send_packet(&sock,&packet);
2053 + PGR_Set_Cluster_Status(STATUS_REPLICATED);
2055 + case RECOVERY_ERROR_CONNECTION:
2056 + memset(&packet,0,sizeof(packet));
2057 + set_recovery_packet(&packet, RECOVERY_ERROR_ANS );
2058 + status = send_packet(&sock,&packet);
2059 + PGR_Set_Cluster_Status(STATUS_REPLICATED);
2061 + * kill broken cluster db.
2062 + * FIXME: missing MyProcPid here. It must be postmaster's pid.
2063 + * but here's a bug MyProcPid doesn't initialized properly , so MyProcPid = postmaster's pid.
2064 + * To fix this, define variable to set posmaster's pid.
2066 + kill(MyProcPid,SIGQUIT);
2069 + case RECOVERY_ERROR_ANS:
2070 + /* TODO: recovery failed. close this postmaster */
2073 + case RECOVERY_FINISH:
2074 + PGR_Set_Cluster_Status(STATUS_REPLICATED);
2086 + PGR_Close_Sock(&sock);
2090 +PGR_Master_Main(void)
2097 + if ((pid = fork()) != 0 )
2102 + memset(Local_Host_Name,0,sizeof(Local_Host_Name));
2103 + gethostname(Local_Host_Name,sizeof(Local_Host_Name));
2104 + pqsignal(SIGHUP, authdie);
2105 + pqsignal(SIGTERM, authdie);
2106 + pqsignal(SIGINT, authdie);
2107 + pqsignal(SIGQUIT, authdie);
2108 + pqsignal(SIGALRM, authdie);
2109 + PG_SETMASK(&UnBlockSig);
2111 + status = STATUS_ERROR;
2112 + status = PGR_Create_Socket_Bind(&fd, "", RecoveryPortNumber);
2114 + if (status != STATUS_OK)
2121 + struct timeval timeout;
2123 + timeout.tv_sec = 60;
2124 + timeout.tv_usec = 0;
2127 + * Wait for something to happen.
2130 + FD_SET(fd,&rmask);
2131 + rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
2132 + if (rtn && FD_ISSET(fd, &rmask))
2141 +start_recovery_send(int * sock, ReplicateServerInfo * host)
2144 + RecoveryPacket packet;
2145 + status = PGR_Create_Socket_Connect(sock, host->hostName, host->recoveryPortNumber);
2146 + if (status != STATUS_OK)
2148 + if (Debug_pretty_print)
2150 + elog(DEBUG1,"connection error to replication server");
2152 + return STATUS_ERROR;
2155 + memset(&packet,0,sizeof(packet));
2156 + set_recovery_packet(&packet, RECOVERY_PREPARE_REQ );
2157 + status = send_packet(sock,&packet);
2163 +stop_recovery_send(int * sock, ReplicateServerInfo * host)
2166 + RecoveryPacket packet;
2168 + memset(&packet,0,sizeof(packet));
2169 + set_recovery_packet(&packet, RECOVERY_ERROR_ANS );
2170 + status = send_packet(sock,&packet);
2175 +direct_send_packet(int packet_no)
2180 + ReplicateServerInfo * host;
2181 + RecoveryPacket packet;
2183 + host = PGR_get_replicate_server_info();
2186 + return STATUS_ERROR;
2188 + status = PGR_Create_Socket_Connect(&fd, host->hostName, host->recoveryPortNumber);
2189 + if (status != STATUS_OK)
2191 + PGR_Set_Replication_Server_Status(host,DATA_ERR);
2192 + return STATUS_ERROR;
2195 + memset(&packet,0,sizeof(packet));
2196 + set_recovery_packet(&packet, packet_no );
2197 + status = send_packet(&fd,&packet);
2205 +PGR_recovery_error_send(void)
2207 + return direct_send_packet(RECOVERY_ERROR_ANS);
2211 +PGR_recovery_finish_send(void)
2213 + return direct_send_packet(RECOVERY_FINISH);
2217 +PGR_recovery_queue_data_req(void)
2219 + int status = STATUS_OK;
2221 + int rtn = STATUS_OK;
2223 + ReplicateServerInfo * host = NULL;
2224 + RecoveryPacket packet;
2226 + host = PGR_get_replicate_server_info();
2229 + return STATUS_ERROR;
2231 + status = PGR_Create_Socket_Connect(&fd, host->hostName, host->recoveryPortNumber);
2232 + if (status != STATUS_OK)
2234 + PGR_Set_Replication_Server_Status(host,DATA_ERR);
2235 + PGR_Set_Cluster_Status(STATUS_REPLICATED);
2237 + return STATUS_ERROR;
2240 + memset(&packet,0,sizeof(packet));
2241 + PGRset_recovery_packet_no(&packet, RECOVERY_QUEUE_DATA_REQ );
2242 + status = send_packet(&fd,&packet);
2243 + if (status != STATUS_OK)
2245 + status = stop_recovery_send(&fd,host);
2246 + PGR_Set_Cluster_Status(STATUS_REPLICATED);
2248 + return STATUS_ERROR;
2250 + memset(&packet,0,sizeof(RecoveryPacket));
2251 + r_size = read_packet(fd,&packet);
2254 + rtn = STATUS_ERROR;
2256 + switch (ntohs(packet.packet_no))
2258 + case RECOVERY_QUEUE_DATA_ANS:
2262 + rtn = STATUS_ERROR;
2265 + PGR_Set_Cluster_Status(STATUS_REPLICATED);
2271 +rsync_pg_data(char * src, char * dest)
2277 + args[i++] = "rsync";
2280 + if (RsyncCompress)
2282 + args[i++] = "--delete";
2284 + args[i++] = RsyncOption;
2292 + status = execv(RsyncPath,args);
2299 + result = wait(&status);
2302 + if (errno == EINTR)
2304 + return STATUS_ERROR;
2307 + if (WIFEXITED(status) == 0 || WEXITSTATUS(status) != 0)
2308 + return STATUS_ERROR;
2317 +remove_dir(char * dir_name)
2320 + struct dirent *dirp = NULL;
2324 + if ((dp = opendir(dir_name)) == NULL)
2326 + return STATUS_ERROR;
2328 + while ((dirp = readdir(dp)) != NULL)
2330 + if ((!strcmp(dirp->d_name,".")) ||
2331 + (!strcmp(dirp->d_name,"..")))
2335 + sprintf(fname,"%s/%s",dir_name,dirp->d_name);
2336 + status = remove(fname);
2339 + remove_dir(fname);
2343 + if (remove(dir_name) < 0)
2345 + return STATUS_ERROR;
2351 +clear_bkup_dir(char * dir_name)
2353 + char bkp_dir[256];
2354 + pid_t pid = getpid();
2356 + sprintf(bkp_dir,"%s_%d",dir_name,pid);
2357 + return (remove_dir(bkp_dir));
2361 +bkup_dir(char * dir_name)
2364 + char org_dir[256];
2365 + char bkp_dir[256];
2366 + pid_t pid = getpid();
2368 + sprintf(org_dir,"%s",dir_name);
2369 + sprintf(bkp_dir,"%s_%d",dir_name,pid);
2370 + status = rename(org_dir,bkp_dir);
2373 + return STATUS_ERROR;
2379 +restore_dir(char * dir_name)
2382 + char org_dir[256];
2383 + char bkp_dir[256];
2384 + pid_t pid = getpid();
2386 + sprintf(org_dir,"%s",dir_name);
2387 + sprintf(bkp_dir,"%s_%d",dir_name,pid);
2388 + status = rename(bkp_dir,org_dir);
2391 + remove_dir(org_dir);
2392 + status = rename(bkp_dir,org_dir);
2395 + return STATUS_ERROR;
2402 +rsync_global_dir(char * src, char * dest, int stage)
2405 + char control_file[256];
2406 + char org_dir[256];
2407 + char src_dir[256];
2408 + struct stat fstat;
2411 + sprintf(org_dir,"%s/global",dest);
2412 + sprintf(control_file,"%s/global/pg_control",dest);
2413 + if ((stage == PGR_1ST_RECOVERY) && (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP))
2415 + if (bkup_dir(org_dir) != STATUS_OK)
2417 + return STATUS_ERROR;
2420 + sprintf(src_dir,"%s/global",src);
2421 + status = rsync_pg_data(src_dir, dest);
2422 + if (status != STATUS_OK )
2424 + restore_dir(org_dir);
2425 + return STATUS_ERROR;
2427 + /* check pg_control file */
2429 + while (stat(control_file, &fstat) < 0)
2431 + if (cnt > MAX_RETRY_TIMES )
2433 + restore_dir(org_dir);
2434 + return STATUS_ERROR;
2439 + if ((stage == PGR_2ND_RECOVERY) && (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP))
2441 + clear_bkup_dir(org_dir);
2447 +first_recovery(char * src, char * dest, char * dir)
2449 + int status = STATUS_OK;
2450 + char src_dir[256];
2451 + char dest_dir[256];
2453 + memset(src_dir,0,sizeof(src_dir));
2454 + memset(dest_dir,0,sizeof(dest_dir));
2455 + sprintf(src_dir,"%s/%s",src,dir);
2456 + sprintf(dest_dir,"%s/%s",dest,dir);
2457 + if (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP)
2459 + status = bkup_dir(dest_dir);
2462 + return STATUS_ERROR;
2465 + status = rsync_pg_data(src_dir, dest);
2466 + if (status != STATUS_OK )
2468 + restore_dir(dest_dir);
2469 + return STATUS_ERROR;
2475 +second_recovery(char * src, char * dest, char * dir)
2477 + int status = STATUS_OK;
2478 + char src_dir[256];
2479 + char dest_dir[256];
2481 + memset(src_dir,0,sizeof(src_dir));
2482 + memset(dest_dir,0,sizeof(dest_dir));
2483 + sprintf(src_dir,"%s/%s",src,dir);
2484 + sprintf(dest_dir,"%s/%s",dest,dir);
2486 + status = rsync_pg_data(src_dir, dest);
2487 + if (status != STATUS_OK )
2489 + restore_dir(dest_dir);
2490 + return STATUS_ERROR;
2492 + if (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP)
2494 + clear_bkup_dir(dest_dir);
2501 +recovery_rsync(char * src , char * dest, int stage)
2503 + if ((src== NULL) || ( dest == NULL))
2505 + return STATUS_ERROR;
2508 + /* recovery step of "global" directory */
2509 + fprintf(stderr,"%s recovery step of [global] directory...",
2510 + ((stage == 1)?"1st":"2nd"));
2511 + if (rsync_global_dir(src, dest, stage) != STATUS_OK)
2513 + fprintf(stderr,"NG\n");
2514 + return STATUS_ERROR;
2516 + fprintf(stderr,"OK\n");
2518 + if (stage == PGR_1ST_RECOVERY)
2520 + /* 1st recovery step of "base" directory */
2521 + fprintf(stderr,"1st recovery step of [base] directory...");
2522 + if (first_recovery(src,dest,"base") != STATUS_OK)
2524 + fprintf(stderr,"NG\n");
2525 + return STATUS_ERROR;
2527 + fprintf(stderr,"OK\n");
2529 + fprintf(stderr,"1st recovery step of [pg_clog] directory...");
2530 + /* 1st recovery step of "pg_clog" directory */
2531 + if (first_recovery(src,dest,"pg_clog") != STATUS_OK)
2533 + fprintf(stderr,"NG\n");
2534 + return STATUS_ERROR;
2536 + fprintf(stderr,"OK\n");
2538 + /* 1st recovery step of "pg_xlog" directory */
2539 + fprintf(stderr,"1st recovery step of [pg_xlog] directory...");
2540 + if (first_recovery(src,dest,"pg_xlog") != STATUS_OK)
2542 + fprintf(stderr,"NG\n");
2543 + return STATUS_ERROR;
2545 + fprintf(stderr,"OK\n");
2549 + /* 2nd recovery step of "base" directory */
2550 + fprintf(stderr,"2nd recovery step of [base] directory...");
2551 + if (second_recovery(src,dest,"base") != STATUS_OK)
2553 + fprintf(stderr,"NG\n");
2554 + return STATUS_ERROR;
2556 + fprintf(stderr,"OK\n");
2558 + /* 2nd recovery step of "pg_clog" directory */
2559 + fprintf(stderr,"2nd recovery step of [pg_clog] directory...");
2560 + if (second_recovery(src,dest,"pg_clog") != STATUS_OK)
2562 + fprintf(stderr,"NG\n");
2563 + return STATUS_ERROR;
2565 + fprintf(stderr,"OK\n");
2567 + /* 2nd recovery step of "pg_xlog" directory */
2568 + fprintf(stderr,"2nd recovery step of [pg_xlog] directory...");
2569 + if (second_recovery(src,dest,"pg_xlog") != STATUS_OK)
2571 + fprintf(stderr,"NG\n");
2572 + return STATUS_ERROR;
2574 + fprintf(stderr,"OK\n");
2581 +recovery_loop(int fd, int mode)
2584 + int status = STATUS_OK;
2585 + RecoveryPacket packet;
2587 + int rtn = RECOVERY_LOOP_END;
2589 + bool need_sync_table_space = false;
2591 + memset(&packet,0,sizeof(RecoveryPacket));
2592 + r_size = read_packet(fd,&packet);
2595 + rtn = RECOVERY_LOOP_FAIL;
2597 + switch (ntohs(packet.packet_no))
2599 + case RECOVERY_PREPARE_ANS :
2601 + * get master information
2604 + * sync master data before recovery
2606 + if (Debug_pretty_print)
2608 + elog(DEBUG1,"local host : %s master:%s",Local_Host_Name,packet.hostName);
2610 + if (!strncmp(Local_Host_Name,packet.hostName,strlen(Local_Host_Name)))
2612 + strcpy(src,packet.pg_data);
2613 + need_sync_table_space = false;
2617 + sprintf(src,"%s:%s",packet.hostName,packet.pg_data);
2618 + need_sync_table_space = true;
2620 + if (PGR_Recovery_Mode == PGR_COLD_RECOVERY)
2622 + rtn = cold_recovery(src,&packet,need_sync_table_space,PGR_1ST_RECOVERY);
2626 + rtn = hot_recovery(&packet,PGR_1ST_RECOVERY);
2628 + if (rtn != STATUS_OK)
2630 + rtn = RECOVERY_LOOP_FAIL;
2635 + * send recovery start request
2637 + PGRset_recovery_packet_no(&packet, RECOVERY_START_REQ );
2638 + status = send_packet(&fd,&packet);
2639 + if (status != STATUS_OK)
2641 + fprintf(stderr,"RECOVERY_START_REQ send error\n");
2642 + rtn = RECOVERY_LOOP_FAIL;
2645 + rtn = RECOVERY_LOOP_CONTINUE;
2647 + case RECOVERY_START_ANS :
2649 + * sync master data for recovery
2651 + if (!strncmp(Local_Host_Name,packet.hostName,strlen(Local_Host_Name)))
2653 + strcpy(src,packet.pg_data);
2654 + need_sync_table_space = false;
2658 + sprintf(src,"%s:%s",packet.hostName,packet.pg_data);
2659 + need_sync_table_space = true;
2661 + if (PGR_Recovery_Mode == PGR_COLD_RECOVERY)
2663 + rtn = cold_recovery(src,&packet,need_sync_table_space,PGR_2ND_RECOVERY);
2667 + rtn = hot_recovery(&packet,PGR_2ND_RECOVERY);
2670 + if (rtn == STATUS_OK)
2672 + fprintf(stderr,"2nd recovery successed\n");
2673 + if (mode == PGR_HOT_RECOVERY)
2675 + rtn = RECOVERY_LOOP_CONTINUE;
2677 + * send recovery queued data request
2679 + PGRset_recovery_packet_no(&packet, RECOVERY_QUEUE_DATA_REQ );
2680 + status = send_packet(&fd,&packet);
2681 + if (status != STATUS_OK)
2683 + fprintf(stderr,"RECOVERY_QUEUE_DATA_REQ send error\n");
2684 + rtn = RECOVERY_LOOP_FAIL;
2690 + rtn = RECOVERY_LOOP_END;
2695 + fprintf(stderr,"2nd hot recovery failed\n");
2696 + rtn = RECOVERY_LOOP_FAIL;
2699 + case RECOVERY_QUEUE_DATA_ANS:
2700 + rtn = RECOVERY_LOOP_END;
2702 + case RECOVERY_ERROR_OCCUPIED:
2703 + fprintf(stderr,"already in use for another recovery\n");
2704 + rtn = RECOVERY_LOOP_FAIL;
2706 + case RECOVERY_ERROR_CONNECTION:
2707 + fprintf(stderr,"connection failed\n");
2708 + rtn = RECOVERY_LOOP_FAIL;
2711 + fprintf(stderr,"unknown packet received\n");
2712 + rtn = RECOVERY_LOOP_FAIL;
2720 +PGR_Recovery_Main(int mode)
2725 + ReplicateServerInfo * host;
2727 + memset(Local_Host_Name,0,sizeof(Local_Host_Name));
2728 + gethostname(Local_Host_Name,sizeof(Local_Host_Name));
2729 + PGR_Recovery_Mode = mode;
2731 + status = STATUS_ERROR;
2733 +Retry_Start_Recovery:
2734 + host = PGR_get_replicate_server_info();
2737 + if (Debug_pretty_print)
2739 + elog(DEBUG1,"not found replication server");
2741 + PGR_Set_Cluster_Status(STATUS_REPLICATED);
2742 + return STATUS_ERROR;
2745 + PGR_Set_Cluster_Status(STATUS_RECOVERY);
2746 + status = start_recovery_send(&fd,host);
2747 + if (status != STATUS_OK)
2749 + PGR_Set_Replication_Server_Status(host,DATA_ERR);
2751 + if (Debug_pretty_print)
2753 + elog(DEBUG1,"start recovery packet send error");
2755 + goto Retry_Start_Recovery;
2761 + struct timeval timeout;
2763 + timeout.tv_sec = RECOVERY_TIMEOUT;
2764 + timeout.tv_usec = 0;
2767 + * Wait for something to happen.
2770 + FD_SET(fd,&rmask);
2771 + rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
2772 + if (rtn && FD_ISSET(fd, &rmask))
2774 + status = recovery_loop(fd, mode);
2775 + if (status == RECOVERY_LOOP_CONTINUE)
2779 + else if (status == RECOVERY_LOOP_END)
2784 + else if (status == RECOVERY_LOOP_FAIL)
2786 + status = stop_recovery_send(&fd,host);
2787 + PGR_Set_Cluster_Status(STATUS_REPLICATED);
2788 + if (status != STATUS_OK)
2791 + return STATUS_ERROR;
2794 + return STATUS_ERROR;
2799 + PGR_Set_Cluster_Status(STATUS_REPLICATED);
2800 + return STATUS_ERROR;
2804 + PGR_Set_Cluster_Status(STATUS_REPLICATED);
2809 +show_recovery_packet(RecoveryPacket * packet)
2812 + if (Debug_pretty_print)
2814 + elog(DEBUG1,"no = %d",ntohs(packet->packet_no));
2815 + elog(DEBUG1,"max_connect = %d",ntohs(packet->max_connect));
2816 + elog(DEBUG1,"port = %d",ntohs(packet->port));
2817 + elog(DEBUG1,"recoveryPort = %d",ntohs(packet->recoveryPort));
2818 + if (packet->hostName != NULL)
2819 + elog(DEBUG1,"hostName = %s",packet->hostName);
2820 + if (packet->pg_data != NULL)
2821 + elog(DEBUG1,"pg_data = %s",packet->pg_data);
2826 +set_recovery_packet(RecoveryPacket * packet, int packet_no)
2828 + struct passwd * pw = NULL;
2830 + if (packet == NULL)
2834 + PGRset_recovery_packet_no(packet, packet_no );
2835 + packet->max_connect = htons(MaxBackends);
2836 + packet->port = htons(PostPortNumber);
2837 + packet->recoveryPort = htons(RecoveryPortNumber);
2838 + gethostname(packet->hostName,sizeof(packet->hostName));
2839 + memcpy(packet->pg_data,DataDir,sizeof(packet->pg_data));
2840 + memset(packet->userName,0,sizeof(packet->userName));
2841 + if ((pw = getpwuid(geteuid())) != NULL)
2843 + strncpy(packet->userName,pw->pw_name,sizeof(packet->userName));
2847 + cuserid(packet->userName);
2852 +sync_table_space(char * hostName, uint16_t portNum, char * userName, int stage)
2854 + PGresult * res = (PGresult *)NULL;
2857 + char * location = NULL;
2858 + int rtn = STATUS_OK;
2860 + res = get_table_space_location(hostName, portNum, userName);
2861 + if (res == (PGresult *)NULL)
2863 + return STATUS_ERROR;
2865 + row_num = PQntuples(res);
2866 + for ( i = 0 ; i < row_num ; i ++)
2868 + location = PQgetvalue(res,i,0);
2869 + if (strlen(location) > 0 )
2871 + fprintf(stderr,"sync tablespace[%s]...",location);
2872 + rtn = rsync_table_space(hostName, location, stage);
2873 + fprintf(stderr,"%s\n", (rtn == STATUS_OK)?"OK":"NG");
2876 + if (res != (PGresult *)NULL)
2885 +get_table_space_location(char * hostName, uint16_t portNum, char * userName)
2887 + PGresult * res = (PGresult *)NULL;
2889 + PGconn * conn = (PGconn *)NULL;
2891 + char *database = "template1";
2892 + char * query = "select spclocation from pg_tablespace where spcname not like 'pg_%'";
2894 + if ( (hostName == NULL) ||
2896 + (userName == NULL))
2898 + return (PGresult *)NULL;
2900 + snprintf(port,sizeof(port),"%d", portNum);
2902 + /* create connection to master */
2903 + conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
2906 + return (PGresult *)NULL;
2908 + /* check to see that the backend Connection was successfully made */
2910 + while (PQstatus(conn) == CONNECTION_BAD)
2916 + if (cnt > MAX_RETRY_TIMES )
2918 + return (PGresult *)NULL;
2920 + conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
2923 + res = PQexec(conn , query);
2924 + if ((res == NULL) ||
2925 + (PQresultStatus(res) != PGRES_TUPLES_OK))
2928 + res = (PGresult *)NULL;
2939 +rsync_table_space(char * hostName, char * location, int stage)
2941 + int status = STATUS_OK;
2942 + char src_dir[256];
2943 + char dest_dir[256];
2944 + struct stat fstat;
2947 + sprintf(src_dir,"%s:%s",hostName,location);
2948 + strncpy(dest_dir,location,sizeof(dest_dir));
2950 + if ((stage == PGR_1ST_RECOVERY) && (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP))
2952 + status = bkup_dir(location);
2954 + status = rsync_pg_data(src_dir, dest_dir);
2955 + if (status != STATUS_OK )
2957 + restore_dir(location);
2958 + return STATUS_ERROR;
2960 + /* check file status */
2962 + while (stat(location,&fstat) < 0)
2964 + if (cnt > MAX_RETRY_TIMES )
2966 + restore_dir(location);
2967 + return STATUS_ERROR;
2972 + if ((stage == PGR_2ND_RECOVERY) && (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP))
2974 + clear_bkup_dir(location);
2980 +cold_recovery(char * src, RecoveryPacket *packet, bool need_sync_table_space, int stage)
2982 + int status = STATUS_OK;
2984 + status = recovery_rsync(src,DataDir,stage);
2985 + if (status != STATUS_OK)
2987 + if (Debug_pretty_print)
2989 + elog(DEBUG1,"%s rsync error",
2990 + ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
2992 + return STATUS_ERROR;
2994 + if (need_sync_table_space == true)
2996 + status = sync_table_space(packet->hostName, ntohs(packet->port), packet->userName, stage);
2997 + fprintf(stderr,"%s sync_table_space ",
2998 + ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
2999 + if (status != STATUS_OK)
3001 + if (Debug_pretty_print)
3003 + elog(DEBUG1,"%s sync table space error",
3004 + ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
3006 + fprintf(stderr,"NG\n");
3007 + return STATUS_ERROR;
3009 + fprintf(stderr,"OK\n");
3015 +hot_recovery(RecoveryPacket *packet, int stage)
3017 + int status = STATUS_OK;
3019 + fprintf(stderr,"%s restore from pg_dump ",
3020 + ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
3021 + if (stage == PGR_1ST_RECOVERY)
3023 + status = restore_from_dumpall(packet->hostName, ntohs(packet->port), packet->userName );
3027 + status = restore_from_each_dump(packet->hostName, ntohs(packet->port), packet->userName );
3029 + if (status != STATUS_OK)
3031 + if (Debug_pretty_print)
3033 + elog(DEBUG1,"%s sync table space error",
3034 + ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
3036 + fprintf(stderr,"->NG\n");
3037 + return STATUS_ERROR;
3039 + fprintf(stderr,"->OK\n");
3044 +restore_from_dumpall( char * hostName, uint16_t portNum, char * userName)
3047 + char exec_command[512];
3049 + char pg_dumpall[256];
3053 + /* set pg_dumpall path */
3054 + memset(pg_dumpall, 0, sizeof(pg_dumpall));
3055 + strncpy(pg_dumpall, PgDumpPath, sizeof(pg_dumpall));
3056 + p = strrchr(pg_dumpall,'/');
3059 + return STATUS_ERROR;
3062 + strcpy(p,"pg_dumpall");
3064 + /* set psql path */
3066 + memset(psql, 0, sizeof(psql));
3067 + strncpy(psql, PgDumpPath, sizeof(psql));
3068 + p = strrchr(psql,'/');
3071 + return STATUS_ERROR;
3078 + snprintf(exec_command,sizeof(exec_command),"%s -i -o -c -h %s -p %d -U %s | %s -p %d template1",
3086 + fprintf(stderr,"1st exec:[%s]\n",exec_command);
3091 + system(exec_command);
3099 + result = wait(&status);
3102 + if (errno == EINTR)
3104 + return STATUS_ERROR;
3107 + if (WIFEXITED(status) == 0 || WEXITSTATUS(status) != 0)
3108 + return STATUS_ERROR;
3117 +restore_from_dump( char * hostName, uint16_t portNum, char * userName, char * dbName)
3120 + char exec_command[512];
3122 + char pg_restore[256];
3125 + /* set pq_restore path */
3127 + memset(pg_restore, 0, sizeof(pg_restore));
3128 + strncpy(pg_restore, PgDumpPath, sizeof(pg_restore));
3129 + p = strrchr(pg_restore,'/');
3132 + return STATUS_ERROR;
3135 + strcpy(p,"pg_restore");
3137 + snprintf(exec_command,sizeof(exec_command),"%s -i -Fc -o -b -h %s -p %d -U %s %s | %s -i -c -p %d -d %s",
3148 + fprintf(stderr,"2nd exec:[%s]\n",exec_command);
3152 + system(exec_command);
3160 + result = wait(&status);
3163 + if (errno == EINTR)
3165 + return STATUS_ERROR;
3168 + if (WIFEXITED(status) == 0 || WEXITSTATUS(status) != 0)
3169 + return STATUS_ERROR;
3178 +restore_from_each_dump( char * hostName, uint16_t portNum, char * userName)
3180 + PGresult * res = (PGresult *)NULL;
3183 + char * dbName = NULL;
3184 + int rtn = STATUS_OK;
3186 + res = get_dbName(hostName, portNum, userName);
3187 + if (res == (PGresult *)NULL)
3189 + return STATUS_ERROR;
3191 + row_num = PQntuples(res);
3192 + for ( i = 0 ; i < row_num ; i ++)
3194 + dbName = PQgetvalue(res,i,0);
3195 + if (strlen(dbName) > 0 )
3197 + if ((strcmp("template0",dbName)) &&
3198 + (strcmp("template1",dbName)))
3200 + rtn = restore_from_dump(hostName, portNum, userName, dbName);
3201 + fprintf(stderr,".");
3205 + if (res != (PGresult *)NULL)
3214 +get_dbName(char * hostName, uint16_t portNum, char * userName)
3216 + PGresult * res = (PGresult *)NULL;
3218 + PGconn * conn = (PGconn *)NULL;
3220 + char *database = "template1";
3221 + char * query = "SELECT datname FROM pg_database";
3223 + if ( (hostName == NULL) ||
3225 + (userName == NULL))
3227 + return (PGresult *)NULL;
3229 + snprintf(port,sizeof(port),"%d", portNum);
3231 + /* create connection to master */
3232 + conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
3235 + return (PGresult *)NULL;
3237 + /* check to see that the backend Connection was successfully made */
3239 + while (PQstatus(conn) == CONNECTION_BAD)
3245 + if (cnt > MAX_RETRY_TIMES )
3247 + return (PGresult *)NULL;
3249 + conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
3252 + res = PQexec(conn , query);
3253 + if ((res == NULL) ||
3254 + (PQresultStatus(res) != PGRES_TUPLES_OK))
3257 + res = (PGresult *)NULL;
3267 +#endif /* USE_REPLICATION */
3268 diff -aruN postgresql-8.2.4/src/backend/libpq/replicate.c pgcluster-1.7.0rc7/src/backend/libpq/replicate.c
3269 --- postgresql-8.2.4/src/backend/libpq/replicate.c 1970-01-01 01:00:00.000000000 +0100
3270 +++ pgcluster-1.7.0rc7/src/backend/libpq/replicate.c 2007-02-18 22:52:16.000000000 +0100
3272 +/*--------------------------------------------------------------------
3277 + * This file is composed of the functions to call with the source
3278 + * at backend for the replication.
3279 + * Low level I/O functions that called by in these functions are
3280 + * contained in 'replicate_com.c'.
3282 + *--------------------------------------------------------------------
3285 +/*--------------------------------------
3286 + * INTERFACE ROUTINES
3289 + * PGR_Init_Replicate_Server_Data
3290 + * PGR_Set_Replicate_Server_Socket
3293 + * PGR_Send_Replicate_Command
3295 + * PGR_get_replicate_server_info
3296 + * status distinction:
3297 + * PGR_Is_Replicated_Command
3298 + * Xlog_Check_Replicatec
3299 + * replicateion main:
3301 + *-------------------------------------
3303 +#ifdef USE_REPLICATION
3305 +#include "postgres.h"
3308 +#include <strings.h>
3309 +#include <signal.h>
3313 +#include <unistd.h>
3316 +#include <sys/time.h>
3317 +#include <sys/types.h>
3318 +#include <sys/stat.h>
3319 +#include <sys/socket.h>
3320 +#include <sys/ipc.h>
3321 +#include <sys/shm.h>
3323 +#include <netinet/in.h>
3324 +#ifdef HAVE_NETINET_TCP_H
3325 +#include <netinet/tcp.h>
3327 +#include <arpa/inet.h>
3328 +#include <sys/file.h>
3331 +#include "access/transam.h"
3332 +#include "bootstrap/bootstrap.h"
3333 +#include "libpq/libpq.h"
3334 +#include "libpq/pqformat.h"
3335 +#include "miscadmin.h"
3336 +#include "commands/prepare.h"
3337 +#include "nodes/nodes.h"
3338 +#include "nodes/print.h"
3339 +#include "utils/guc.h"
3340 +#include "parser/parser.h"
3341 +#include "access/xact.h"
3342 +#include "storage/proc.h"
3343 +#include "tcop/tcopprot.h"
3344 +#include "tcop/utility.h"
3345 +#include "postmaster/postmaster.h"
3346 +#include "replicate.h"
3348 +/* the source of this value is 'access/transam/varsup.c' */
3349 +#define VAR_OID_PREFETCH (8192)
3351 +PGR_ReplicationLog_Info ReplicationLog_Info;
3352 +bool pgr_skip_in_prepared_query = false;
3354 +/*--------------------------------------
3355 + * PROTOTYPE DECLARATION
3356 + *--------------------------------------
3358 +static int set_command_args(char argv[PGR_CMD_ARG_NUM][256],char *str);
3359 +static bool is_same_replication_server(ReplicateServerInfo * sp1, ReplicateServerInfo * sp2 );
3360 +static ReplicateServerInfo * search_new_replication_server ( ReplicateServerInfo * sp , int socket_type );
3362 +static int close_replicate_server_socket ( ReplicateServerInfo * sp , int socket_type );
3363 +static int recv_message(int sock,char * buf,int flag);
3364 +static int send_replicate_packet(int sock,ReplicateHeader * header, char * query_string);
3365 +static bool is_copy_from(char * query);
3366 +static int get_words( char words[MAX_WORDS][MAX_WORD_LETTERS] ,char * string,int length,int upper);
3367 +static int get_table_name(char * table_name, char * query, int position );
3368 +static bool is_not_replication_query(char * query_string, int query_len, char cmdType);
3369 +static int Comp_Not_Replicate(PGR_Not_Replicate_Type * nrp1,PGR_Not_Replicate_Type* nrp2);
3370 +static bool is_serial_control_query(char cmdType,char * query);
3371 +static bool is_select_into_query(char cmdType,char * query);
3372 +static int send_response_to_replication_server(const char * notice);
3373 +static bool do_not_replication_command(const char * commandTag);
3374 +static bool is_create_temp_table(char * query);
3375 +static int add_replication_server(char * hostname,char * port, char * recovery_port);
3376 +static int change_replication_server(char * hostname,char * port, char * recovery_port);
3377 +static int get_new_replication_socket( ReplicateServerInfo * base, ReplicateServerInfo * sp, int socket_type);
3378 +static char * get_hostName(char * str);
3379 +static void set_response_mode(char * mode);
3380 +static void PGR_Set_Current_Replication_Query_ID(char *id);
3381 +#ifdef CONTROL_LOCK_CONFLICT
3382 +static int wait_lock_answer(void);
3383 +static int read_trigger(char * result, int buf_size);
3384 +#endif /* CONTROL_LOCK_CONFLICT */
3385 +static int check_conf_data(void);
3387 +static unsigned int get_next_request_id(void);
3388 +static bool is_this_query_replicated(char * id);
3389 +static int set_replication_id(char * id);
3390 +static int return_current_oid(void);
3391 +static int sync_oid(char * oid);
3392 +static bool is_concerned_with_prepared_select(char cmdType, char * query_string);
3393 +static int skip_non_blank(char * ptr, int max);
3394 +static int skip_blank(char * ptr, int max);
3395 +static int parse_message(char * query_string);
3396 +static bool is_prepared_as_select(char * query_string);
3397 +static bool is_statement_as_select(char * query_string);
3399 +extern ssize_t secure_read(Port *, void *, size_t);
3400 +/*--------------------------------------------------------------------
3402 + * PGR_Init_Replicate_Server_Data()
3404 + * Read Configuration file and create ReplicateServerData table
3409 + * NG: STATUS_ERROR
3410 + *--------------------------------------------------------------------
3413 +PGR_Init_Replicate_Server_Data(void)
3415 + int table_size,str_size;
3416 + ReplicateServerInfo *sp;
3417 + PGR_Not_Replicate_Type * nrp;
3418 + ConfDataType * conf;
3421 + char HostName[HOSTNAME_MAX_LENGTH];
3423 + memset (HostName,0,sizeof(HostName));
3424 + if (ConfData_Top == (ConfDataType *)NULL)
3426 + return STATUS_ERROR;
3429 + /* allocate replication server information table */
3430 + table_size = sizeof(ReplicateServerInfo) * MAX_SERVER_NUM;
3431 + ReplicateServerShmid = shmget(IPC_PRIVATE,table_size,IPC_CREAT | IPC_EXCL | 0600);
3432 + if (ReplicateServerShmid < 0)
3434 + return STATUS_ERROR;
3436 + ReplicateServerData = (ReplicateServerInfo *)shmat(ReplicateServerShmid,0,0);
3437 + if (ReplicateServerData == (ReplicateServerInfo *)-1)
3439 + return STATUS_ERROR;
3441 + memset(ReplicateServerData,0,table_size);
3442 + sp = ReplicateServerData;
3444 + /* allocate cluster db information table */
3445 + ClusterDBShmid = shmget(IPC_PRIVATE,sizeof(ClusterDBInfo),IPC_CREAT | IPC_EXCL | 0600);
3446 + if (ClusterDBShmid < 0)
3448 + return STATUS_ERROR;
3450 + ClusterDBData = (ClusterDBInfo *)shmat(ClusterDBShmid,0,0);
3451 + if (ClusterDBData == (ClusterDBInfo *)-1)
3453 + return STATUS_ERROR;
3455 + memset(ClusterDBData,0,sizeof(ClusterDBInfo));
3456 + PGR_Set_Cluster_Status(STATUS_REPLICATED);
3458 + /* allocate partial replicate table */
3459 + table_size = sizeof(PGR_Not_Replicate_Type) * MAX_SERVER_NUM;
3460 + PGR_Not_Replicate = malloc(table_size);
3461 + if (PGR_Not_Replicate == (PGR_Not_Replicate_Type*)NULL)
3463 + return STATUS_ERROR;
3465 + memset(PGR_Not_Replicate, 0, table_size);
3466 + nrp = PGR_Not_Replicate;
3468 + conf = ConfData_Top;
3469 + while ((conf != (ConfDataType *)NULL) && (cnt < MAX_SERVER_NUM))
3471 + /* set replication server table */
3472 + if (!strcmp(conf->table,REPLICATION_SERVER_INFO_TAG))
3474 + rec_no = conf->rec_no;
3476 + if (!strcmp(conf->key,HOST_NAME_TAG))
3478 + strncpy((sp + rec_no)->hostName,conf->value,sizeof(sp->hostName));
3479 + conf = (ConfDataType *)conf->next;
3482 + if (!strcmp(conf->key,PORT_TAG))
3484 + (sp + rec_no)->portNumber = atoi(conf->value);
3485 + (sp + rec_no)->sock = -1;
3486 + if ((sp + rec_no)->useFlag != DATA_USE)
3488 + PGR_Set_Replication_Server_Status((sp+rec_no), DATA_INIT);
3490 + memset((sp + rec_no + 1)->hostName,0,sizeof(sp->hostName));
3491 + (sp + rec_no + 1)->useFlag = DATA_END;
3492 + conf = (ConfDataType *)conf->next;
3495 + if (!strcmp(conf->key,RECOVERY_PORT_TAG))
3497 + (sp + rec_no)->recoveryPortNumber = atoi(conf->value);
3498 + if ((sp + rec_no)->useFlag != DATA_USE)
3500 + PGR_Set_Replication_Server_Status((sp+rec_no), DATA_INIT);
3502 + memset((sp + rec_no + 1)->hostName,0,sizeof(sp->hostName));
3503 + (sp + rec_no + 1)->useFlag = DATA_END;
3504 + conf = (ConfDataType *)conf->next;
3508 + /* set part replication table */
3509 + if (!strcmp(conf->table,NOT_REPLICATE_INFO_TAG))
3511 + rec_no = conf->rec_no;
3513 + if (PGR_Not_Replicate_Rec_Num < rec_no +1)
3515 + PGR_Not_Replicate_Rec_Num = rec_no +1;
3517 + if (!strcmp(conf->key,DB_NAME_TAG))
3519 + strncpy((nrp + rec_no)->db_name,conf->value,sizeof(nrp->db_name));
3520 + conf = (ConfDataType *)conf->next;
3523 + if (!strcmp(conf->key,TABLE_NAME_TAG))
3525 + strncpy((nrp + rec_no)->table_name,conf->value,sizeof(nrp->table_name));
3526 + conf = (ConfDataType *)conf->next;
3530 + if (!strcmp(conf->key,HOST_NAME_TAG))
3532 + str_size = sizeof(HostName) ;
3533 + memset(HostName,0,str_size);
3534 + strncpy(HostName,conf->value,str_size-1);
3536 + else if (!strcmp(conf->key,RECOVERY_PORT_TAG))
3538 + RecoveryPortNumber = atoi(conf->value);
3540 + else if (!strcmp(conf->key,RSYNC_PATH_TAG))
3542 + str_size = strlen(conf->value) ;
3543 + RsyncPath = malloc(str_size + 1);
3544 + if (RsyncPath == NULL)
3546 + return STATUS_ERROR;
3548 + memset(RsyncPath,0,str_size + 1);
3549 + strncpy(RsyncPath,conf->value,str_size);
3551 + else if (!strcmp(conf->key,RSYNC_OPTION_TAG))
3553 + str_size = strlen(conf->value) ;
3554 + RsyncOption = malloc(str_size + 1);
3555 + if (RsyncOption == NULL)
3557 + return STATUS_ERROR;
3559 + memset(RsyncOption,0,str_size + 1);
3560 + strncpy(RsyncOption,conf->value,str_size);
3562 + else if (!strcmp(conf->key,RSYNC_COMPRESS_TAG))
3564 + if (!strcmp(conf->value, "yes"))
3565 + RsyncCompress = true;
3566 + else if (!strcmp(conf->value, "no"))
3567 + RsyncCompress = false;
3569 + else if (!strcmp(conf->key,PG_DUMP_PATH_TAG))
3571 + str_size = strlen(conf->value) ;
3572 + PgDumpPath = malloc(str_size + 1);
3573 + if (PgDumpPath == NULL)
3575 + return STATUS_ERROR;
3577 + memset(PgDumpPath,0,str_size + 1);
3578 + strncpy(PgDumpPath,conf->value,str_size);
3580 + else if (!strcmp(conf->key,STAND_ALONE_TAG))
3582 + PGR_Stand_Alone = (PGR_Stand_Alone_Type*)malloc(sizeof(PGR_Stand_Alone_Type));
3583 + if (PGR_Stand_Alone == (PGR_Stand_Alone_Type *)NULL)
3585 + return STATUS_ERROR;
3587 + PGR_Stand_Alone->is_stand_alone = false;
3588 + if (!strcmp(conf->value,READ_WRITE_IF_STAND_ALONE))
3590 + PGR_Stand_Alone->permit = PERMIT_READ_WRITE;
3594 + PGR_Stand_Alone->permit = PERMIT_READ_ONLY;
3597 + else if (!strcmp(conf->key,TIMEOUT_TAG))
3599 + /* get repliaction timeout */
3600 + PGR_Replication_Timeout = PGRget_time_value(conf->value);
3601 + if ((PGR_Replication_Timeout < 1) || (PGR_Replication_Timeout > 3600))
3603 + fprintf(stderr,"%s is out of range. It should be between 1sec-1hr.\n",TIMEOUT_TAG);
3604 + return STATUS_ERROR;
3607 + else if (!strcmp(conf->key,LIFECHECK_TIMEOUT_TAG))
3609 + /* get lifecheck timeout */
3610 + PGR_Lifecheck_Timeout = PGRget_time_value(conf->value);
3611 + if ((PGR_Lifecheck_Timeout < 1) || (PGR_Lifecheck_Timeout > 3600))
3613 + fprintf(stderr,"%s is out of range. It should be between 1sec-1hr.\n",LIFECHECK_TIMEOUT_TAG);
3614 + return STATUS_ERROR;
3617 + else if (!strcmp(conf->key,LIFECHECK_INTERVAL_TAG))
3619 + /* get lifecheck interval */
3620 + PGR_Lifecheck_Interval = PGRget_time_value(conf->value);
3621 + if ((PGR_Lifecheck_Interval < 1) || (PGR_Lifecheck_Interval > 3600))
3623 + fprintf(stderr,"%s is out of range. It should between 1sec-1hr.\n",LIFECHECK_INTERVAL_TAG);
3624 + return STATUS_ERROR;
3627 + conf = (ConfDataType *)conf->next;
3629 + TransactionSock = -1;
3630 + ReplicateCurrentTime = (ReplicateNow *)malloc(sizeof(ReplicateNow));
3631 + if (ReplicateCurrentTime == (ReplicateNow *)NULL)
3633 + return STATUS_ERROR;
3635 + memset(ReplicateCurrentTime,0,sizeof(ReplicateNow));
3637 + PGRCopyData = (CopyData *)malloc(sizeof(CopyData));
3638 + if (PGRCopyData == (CopyData *)NULL)
3640 + return STATUS_ERROR;
3642 + memset(PGRCopyData,0,sizeof(CopyData));
3644 + if (PGR_Not_Replicate_Rec_Num == 0)
3646 + free(PGR_Not_Replicate);
3647 + PGR_Not_Replicate = NULL;
3651 + qsort((char *)PGR_Not_Replicate,PGR_Not_Replicate_Rec_Num,sizeof(PGR_Not_Replicate_Type), (int (*)(const void*,const void*))Comp_Not_Replicate);
3654 + PGRSelfHostName = malloc(HOSTNAME_MAX_LENGTH);
3655 + if (PGRSelfHostName == NULL)
3657 + return STATUS_ERROR;
3659 + memset(PGRSelfHostName,0,HOSTNAME_MAX_LENGTH);
3661 + PGR_password = malloc(sizeof(PGR_Password_Info));
3662 + if (PGR_password == NULL)
3664 + return STATUS_ERROR;
3666 + memset(PGR_password,0,sizeof(PGR_Password_Info));
3667 + PGR_password->password = malloc(PASSWORD_MAX_LENGTH);
3668 + if (PGR_password->password == NULL)
3670 + return STATUS_ERROR;
3672 + memset(PGR_password->password,0,PASSWORD_MAX_LENGTH);
3674 + if (HostName[0] == 0)
3676 + if (gethostname(HostName,HOSTNAME_MAX_LENGTH) < 0)
3678 + return STATUS_ERROR;
3681 + ip=PGRget_ip_by_name(HostName);
3683 + sprintf(PGRSelfHostName,
3686 + (ip >> 8) & 0xff ,
3687 + (ip >> 16) & 0xff ,
3688 + (ip >> 24) & 0xff );
3689 + if (RsyncPath == NULL)
3691 + RsyncPath = strdup(DEFAULT_RSYNC);
3693 + if (PgDumpPath == NULL)
3695 + PgDumpPath = strdup(DEFAULT_PG_DUMP);
3698 + return (check_conf_data());
3702 +check_conf_data(void)
3705 + ReplicateServerInfo *sp;
3706 + sp = ReplicateServerData;
3707 + while ((sp + i)->useFlag != DATA_END)
3709 + if (*((sp + i)->hostName) == 0)
3711 + fprintf(stderr,"Hostname of replication server is not valid.\n");
3712 + return STATUS_ERROR;
3714 + if ((sp + i)->portNumber < 1024)
3716 + fprintf(stderr,"Replication Port of replication server is not valid. It's required larger than 1024.\n");
3717 + return STATUS_ERROR;
3719 + if ((sp + i)->recoveryPortNumber < 1024)
3721 + fprintf(stderr,"RecoveryPort of replication server is not valid. It's required larger than 1024.\n");
3722 + return STATUS_ERROR;
3724 + if ((sp + i)->portNumber == (sp + i)->recoveryPortNumber)
3726 + fprintf(stderr,"Replication Port and RecoveryPort is conflicted.\n");
3727 + return STATUS_ERROR;
3731 + if (RecoveryPortNumber < 1024)
3733 + fprintf(stderr,"RecoveryPort of Cluster DB is not valid. It's required larger than 1024.\n");
3734 + return STATUS_ERROR;
3736 + if (PGR_Stand_Alone == NULL)
3738 + fprintf(stderr,"Stand Alone Mode is not specified.\n");
3739 + return STATUS_ERROR;
3741 + if (RsyncOption == NULL)
3743 + fprintf(stderr,"Option of rsync command is not specified.\n");
3744 + return STATUS_ERROR;
3746 + if (strlen(PGRSelfHostName) <= 0)
3748 + fprintf(stderr,"Hostname of Cluster DB is not valid.\n");
3749 + return STATUS_ERROR;
3751 + if (PGR_Lifecheck_Timeout > PGR_Lifecheck_Interval)
3753 + fprintf(stderr,"The lifecheck timeouti(%d) should be shorter than interval(%d).\n",PGR_Lifecheck_Timeout,PGR_Lifecheck_Interval);
3754 + return STATUS_ERROR;
3759 +/*--------------------------------------------------------------------
3761 + * PGR_Set_Replicate_Server_Socket()
3763 + * Create new socket and set ReplicateServerData table
3768 + * NG: STATUS_ERROR
3769 + *--------------------------------------------------------------------
3772 +PGR_Set_Replicate_Server_Socket(void)
3774 + ReplicateServerInfo * sp;
3775 + if (ReplicateServerData == NULL)
3777 + return STATUS_ERROR;
3779 + sp = ReplicateServerData;
3780 + while (sp->useFlag != DATA_END){
3782 + PGR_Create_Socket_Connect(&(sp->sock),sp->hostName,sp->portNumber);
3788 +/*--------------------------------------------------------------------
3790 + * PGR_get_replicate_server_socket()
3792 + * search or create a socket to connect with the replication server
3794 + * ReplicateServerInfo * sp: replication server data (I)
3795 + * int socket_type: socket type (I)
3796 + * -PGR_TRANSACTION_SOCKET:
3797 + * -PGR_QUERY_SOCKET:
3801 + *--------------------------------------------------------------------
3804 +PGR_get_replicate_server_socket ( ReplicateServerInfo * sp , int socket_type )
3806 + ReplicateServerInfo * tmp;
3808 + if (tmp == (ReplicateServerInfo *) NULL)
3812 + if (tmp->hostName[0] == '\0')
3817 + if (TransactionSock != -1)
3819 + return TransactionSock;
3822 + while(PGR_Create_Socket_Connect(&TransactionSock,tmp->hostName,tmp->portNumber) != STATUS_OK)
3824 + close(TransactionSock);
3825 + TransactionSock = -1;
3826 + PGR_Set_Replication_Server_Status(tmp, DATA_ERR);
3828 + tmp = PGR_get_replicate_server_info();
3829 + if (tmp == (ReplicateServerInfo *)NULL)
3833 + PGR_Set_Replication_Server_Status(tmp, DATA_USE);
3836 + return TransactionSock;
3839 +/*--------------------------------------------------------------------
3841 + * close_replicate_server_socket()
3843 + * close the socket connected with the replication server
3845 + * ReplicateServerInfo * sp: replication server data (I)
3846 + * int socket_type: socket type (I)
3847 + * -PGR_TRANSACTION_SOCKET:
3848 + * -PGR_QUERY_SOCKET:
3851 + * NG: STATUS_ERROR
3852 + *--------------------------------------------------------------------
3855 +close_replicate_server_socket ( ReplicateServerInfo * sp , int socket_type )
3857 + if (sp == (ReplicateServerInfo *)NULL )
3859 + return STATUS_ERROR;
3861 + if (sp->hostName[0] == '\0')
3863 + return STATUS_ERROR;
3865 + if (TransactionSock != -1)
3867 + PGR_Close_Sock(&(TransactionSock));
3868 + TransactionSock = -1;
3870 + switch (socket_type)
3872 + case PGR_TRANSACTION_SOCKET:
3873 + if (TransactionSock != -1)
3875 + PGR_Close_Sock(&(TransactionSock));
3877 + TransactionSock = -1;
3880 + case PGR_QUERY_SOCKET:
3881 + if (sp->sock != -1)
3883 + PGR_Close_Sock(&(sp->sock));
3888 + PGR_Set_Replication_Server_Status(sp, DATA_INIT);
3893 +is_same_replication_server(ReplicateServerInfo * sp1, ReplicateServerInfo * sp2 )
3895 + if ((sp1 == NULL) || (sp2 == NULL))
3899 + if ((!strcmp(sp1->hostName,sp2->hostName)) &&
3900 + (sp1->portNumber == sp2->portNumber) &&
3901 + (sp1->recoveryPortNumber == sp2->recoveryPortNumber))
3908 +static ReplicateServerInfo *
3909 +search_new_replication_server ( ReplicateServerInfo * sp , int socket_type )
3911 + ReplicateHeader dummy_header;
3912 + ReplicateServerInfo * rs_tbl;
3913 + char command[256];
3917 + if ((ReplicateServerData == NULL) || ( sp == NULL))
3922 + close_replicate_server_socket ( sp , socket_type);
3924 + while (is_same_replication_server(sp,rs_tbl) != true)
3926 + if (sp->useFlag == DATA_END)
3928 + sp = ReplicateServerData;
3930 + sock = PGR_get_replicate_server_socket( sp , socket_type);
3933 + if (is_same_replication_server(sp,rs_tbl) == true)
3943 + memset(&dummy_header, 0, sizeof(ReplicateHeader));
3944 + memset(command,0,sizeof(command));
3945 + snprintf(command,sizeof(command)-1,"SELECT %s(%d,%s,%d,%d)",
3946 + PGR_SYSTEM_COMMAND_FUNC,
3947 + PGR_CHANGE_REPLICATION_SERVER_FUNC_NO,
3950 + sp->recoveryPortNumber);
3951 + dummy_header.cmdSys = CMD_SYS_CALL;
3952 + dummy_header.cmdSts = CMD_STS_NOTICE;
3953 + dummy_header.query_size = htonl(strlen(command));
3954 + if (send_replicate_packet(sock,&dummy_header,command) != STATUS_OK)
3957 + close_replicate_server_socket ( sp , socket_type);
3958 + PGR_Set_Replication_Server_Status(sp, DATA_ERR);
3962 + PGR_Set_Replication_Server_Status(sp, DATA_USE);
3965 + if (cnt > MAX_RETRY_TIMES )
3979 +get_table_name(char * table_name, char * query, int position )
3987 + if ((table_name == NULL) || (query == NULL) || (position < 1))
3989 + return STATUS_ERROR;
3991 + length = strlen(query);
3995 + for (i = 0 ; i < length ; i ++)
3997 + while(isspace(*p))
4002 + while((*p != '\0') && (! isspace(*p)))
4004 + if ((*p == ';') || (*p == '('))
4006 + if (wc == position)
4014 + if (wc == position)
4025 +is_not_replication_query(char * query_string, int query_len, char cmdType)
4027 + PGR_Not_Replicate_Type key;
4028 + PGR_Not_Replicate_Type * ptr = NULL;
4030 + if (PGR_Not_Replicate_Rec_Num <= 0)
4032 + if (query_string == NULL)
4034 + memset(&key,0,sizeof(PGR_Not_Replicate_Type));
4035 + strncpy(key.db_name ,(char *)(MyProcPort->database_name),sizeof(key.db_name)-1);
4038 + case CMD_TYPE_INSERT:
4039 + get_table_name(key.table_name,query_string,3);
4041 + case CMD_TYPE_UPDATE:
4042 + get_table_name(key.table_name,query_string,2);
4044 + case CMD_TYPE_DELETE:
4045 + get_table_name(key.table_name,query_string,3);
4047 + case CMD_TYPE_COPY:
4048 + get_table_name(key.table_name,query_string,2);
4053 + ptr = (PGR_Not_Replicate_Type*)bsearch((void*)&key,(void*)PGR_Not_Replicate,PGR_Not_Replicate_Rec_Num,sizeof(PGR_Not_Replicate_Type), (int (*)(const void*,const void*))Comp_Not_Replicate);
4062 +/*--------------------------------------------------------------------
4064 + * PGR_Send_Replicate_Command()
4066 + * create new socket
4068 + * char * query_string: query strings (I)
4074 + *--------------------------------------------------------------------
4077 +PGR_Send_Replicate_Command(char * query_string, int query_len, char cmdSts ,char cmdType)
4081 + ReplicateHeader header;
4082 + char * serverName = NULL;
4084 + char * result = NULL;
4085 + ReplicateServerInfo * sp = NULL;
4086 + ReplicateServerInfo * base = NULL;
4087 + int socket_type = 0;
4088 + char argv[ PGR_CMD_ARG_NUM ][256];
4091 + int check_flag =0;
4092 + bool in_transaction = false;
4096 + * check query string
4098 + if ((query_string == NULL) ||
4103 + /* check not replication query */
4104 + if (is_not_replication_query(query_string, query_len, cmdType) == true)
4106 + PGR_Copy_Data_Need_Replicate = false;
4110 + if ((cmdSts == CMD_STS_TRANSACTION ) ||
4111 + (cmdSts == CMD_STS_SET_SESSION_AUTHORIZATION ) ||
4112 + (cmdSts == CMD_STS_TEMP_TABLE ))
4114 + socket_type = PGR_TRANSACTION_SOCKET ;
4118 + socket_type = PGR_QUERY_SOCKET ;
4121 + if(cmdSts==CMD_STS_TRANSACTION
4122 + && (cmdType!=CMD_TYPE_BEGIN && cmdType!=CMD_TYPE_ROLLBACK))
4124 + in_transaction = true;
4127 + sp = PGR_get_replicate_server_info();
4130 + if (Debug_pretty_print)
4131 + elog(DEBUG1,"PGR_get_replicate_server_info get error");
4134 + sock = PGR_get_replicate_server_socket( sp , socket_type);
4137 + if (Debug_pretty_print)
4138 + elog(DEBUG1,"PGR_get_replicate_server_socket fail");
4141 + result = malloc(PGR_MESSAGE_BUFSIZE + 4);
4142 + if (result == NULL)
4147 + serverName = sp->hostName;
4148 + portNumber = (int)sp->portNumber;
4149 + memset(&header,0,sizeof(ReplicateHeader));
4151 + header.cmdSts = cmdSts;
4152 + header.cmdType = cmdType;
4153 + header.port = htons(PostPortNumber);
4154 + header.pid = htons(getpid());
4155 + header.query_size = htonl(query_len);
4156 + strncpy(header.dbName ,(char *)(MyProcPort->database_name),sizeof(header.dbName)-1);
4157 + strncpy(header.userName , (char *)(MyProcPort->user_name),sizeof(header.userName)-1);
4158 + strncpy(header.password , PGR_password->password, PASSWORD_MAX_LENGTH );
4159 + memcpy(header.md5Salt ,MyProcPort->md5Salt, sizeof(header.md5Salt));
4160 + memcpy(header.cryptSalt ,MyProcPort->cryptSalt, sizeof(header.cryptSalt));
4161 + header.request_id = htonl(get_next_request_id());
4164 + if (PGRSelfHostName != NULL)
4166 + strncpy(header.from_host, PGRSelfHostName, HOSTNAME_MAX_LENGTH);
4170 + PGR_Sock_To_Replication_Server = sock;
4172 +retry_send_prereplicate_packet:
4174 + memset(result,0,PGR_MESSAGE_BUFSIZE + 4);
4176 + header.cmdSys=CMD_SYS_PREREPLICATE;
4178 + while (send_replicate_packet(sock,&header,query_string) != STATUS_OK)
4181 + if (cnt >= MAX_RETRY_TIMES )
4183 + sock = get_new_replication_socket( base, sp, socket_type);
4186 + if (Debug_pretty_print)
4187 + elog(DEBUG1,"all replication servers may be down");
4188 + PGR_Stand_Alone->is_stand_alone = true;
4189 + if (cmdSts == CMD_STS_TRANSACTION )
4191 + strcpy(result,PGR_REPLICATION_ABORT_MSG);
4199 + if(in_transaction)
4201 + elog(ERROR,"replicate server down during replicating transaction. aborted.");
4205 + PGR_Sock_To_Replication_Server = sock;
4210 + memset(result,0,PGR_MESSAGE_BUFSIZE);
4211 + if (PGR_recv_replicate_result(sock,result,0) < 0)
4214 + sock = get_new_replication_socket( base, sp, socket_type);
4217 + if (Debug_pretty_print)
4218 + elog(DEBUG1,"all replication servers may be down");
4219 + PGR_Stand_Alone->is_stand_alone = true;
4221 + if (cmdSts == CMD_STS_TRANSACTION )
4223 + strcpy(result,PGR_REPLICATION_ABORT_MSG);
4226 + if(result!=NULL) {
4232 + PGR_Sock_To_Replication_Server = sock;
4233 + /* replication server should be down */
4235 + if(in_transaction)
4237 + elog(ERROR,"replicate server down during replicating transaction. aborted.");
4242 + goto retry_send_prereplicate_packet;
4246 + argc = set_command_args(argv,result);
4247 + func_no=atoi(argv[0]);
4249 + /* this server is not primary replicate server*/
4251 + goto retry_send_prereplicate_packet;
4253 +retry_send_replicate_packet:
4255 + memset(result,0,PGR_MESSAGE_BUFSIZE + 4);
4257 + header.cmdSys = CMD_SYS_REPLICATE;
4258 + while (send_replicate_packet(sock,&header,query_string) != STATUS_OK)
4260 + if (cnt > MAX_RETRY_TIMES )
4262 + sock = get_new_replication_socket( base, sp, socket_type);
4265 + if (Debug_pretty_print)
4266 + elog(DEBUG1,"all replication servers may be down");
4267 + PGR_Stand_Alone->is_stand_alone = true;
4268 + if (cmdSts == CMD_STS_TRANSACTION )
4270 + strcpy(result,PGR_REPLICATION_ABORT_MSG);
4278 + PGR_Sock_To_Replication_Server = sock;
4279 + header.rlog = CONNECTION_SUSPENDED_TYPE;
4285 + memset(result,0,PGR_MESSAGE_BUFSIZE);
4286 + if (PGR_recv_replicate_result(sock,result,0) < 0)
4288 + /* replication server should be down */
4289 + sock = get_new_replication_socket( base, sp, socket_type);
4292 + if (Debug_pretty_print)
4293 + elog(DEBUG1,"all replication servers may be down");
4294 + PGR_Stand_Alone->is_stand_alone = true;
4296 + if (cmdSts == CMD_STS_TRANSACTION )
4298 + strcpy(result,PGR_REPLICATION_ABORT_MSG);
4301 + if(result!=NULL) {
4307 + PGR_Sock_To_Replication_Server = sock;
4308 + header.rlog = CONNECTION_SUSPENDED_TYPE;
4310 + goto retry_send_replicate_packet;
4313 + argc = set_command_args(argv,result);
4316 + func_no = atoi(argv[0]);
4317 + if (func_no == PGR_SET_CURRENT_TIME_FUNC_NO)
4319 + if(! in_transaction)
4320 + PGR_Set_Current_Time(argv[1],argv[2]);
4321 + set_replication_id(argv[3]);
4322 + set_response_mode(argv[4]);
4323 + PGR_Set_Current_Replication_Query_ID(argv[5]);
4325 + else if (func_no == PGR_NOTICE_DEADLOCK_DETECTION_FUNC_NO)
4327 + memset(result,0,PGR_MESSAGE_BUFSIZE);
4328 + strcpy(result,PGR_DEADLOCK_DETECTION_MSG);
4330 + else if (func_no == PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO)
4332 + PGR_Set_Current_Replication_Query_ID(argv[1]);
4334 + else if (func_no == PGR_QUERY_CONFIRM_ANSWER_FUNC_NO)
4336 + check_flag = atoi(argv[1]);
4337 + if (check_flag == PGR_ALREADY_COMMITTED )
4339 + if(! in_transaction)
4340 + PGR_Set_Current_Time(argv[2],argv[3]);
4341 + set_replication_id(argv[4]);
4345 + if(! in_transaction)
4346 + PGR_Set_Current_Time(argv[1],argv[2]);
4347 + set_replication_id(argv[3]);
4348 + /* this query is not replicated */
4360 +PGRget_replication_id(void)
4362 + return (ReplicationLog_Info.PGR_Replicate_ID);
4366 +set_replication_id(char * id)
4369 + uint32_t saved_id;
4372 + return STATUS_ERROR;
4375 + rid=(uint32_t)atol(id);
4379 + needToUpdateReplicateIdOnNextQueryIsDone=true;
4380 + saved_id=ReplicationLog_Info.PGR_Replicate_ID;
4382 + ReplicationLog_Info.PGR_Replicate_ID =rid;
4385 + /*set replicate id in this process */
4388 + if (CurrentReplicateServer == NULL)
4390 + PGR_get_replicate_server_info();
4392 + if (CurrentReplicateServer != NULL)
4394 + /* set replicate id in this system */
4395 + saved_id=CurrentReplicateServer->replicate_id;
4396 + elog(DEBUG1, "replication id set from %d to %d", saved_id, rid);
4398 + CurrentReplicateServer->replicate_id = (uint32_t)(atol(id));
4405 +static unsigned int
4406 +get_next_request_id(void)
4408 + if (ReplicationLog_Info.PGR_Request_ID +1 < PGR_MAX_COUNTER)
4410 + ReplicationLog_Info.PGR_Request_ID ++;
4414 + ReplicationLog_Info.PGR_Request_ID = 0;
4416 + return ReplicationLog_Info.PGR_Request_ID ;
4421 +is_this_query_replicated(char * id)
4423 + uint32_t replicate_id = 0;
4424 + uint32_t saved_id = 0;
4426 + ReplicateServerInfo * replicate_server_info = NULL;
4432 + replicate_id = (uint32_t)atol(id);
4433 + elog(DEBUG1, "check for replication id , input=%u", replicate_id);
4435 + if (CurrentReplicateServer == NULL)
4437 + PGR_get_replicate_server_info();
4440 + if (CurrentReplicateServer != NULL)
4442 + replicate_server_info = CurrentReplicateServer;
4444 + else if (LastReplicateServer != NULL)
4446 + replicate_server_info = LastReplicateServer;
4448 + if (replicate_server_info != NULL)
4451 + saved_id=replicate_server_info->replicate_id;
4452 + saved_id = saved_id < ReplicationLog_Info.PGR_Replicate_ID
4453 + ? ReplicationLog_Info.PGR_Replicate_ID
4456 + elog(DEBUG1, "check for replication id , now=%u", saved_id);
4457 + /* check replicate_id < saved_id logically
4460 + * backend/transam/transam.c#TransactionIdPrecedes
4463 + diff = (int32) (saved_id-replicate_id);
4464 + return (diff > 0);
4466 + elog(DEBUG1, "check for replication id check failed. no replication server");
4472 +get_new_replication_socket( ReplicateServerInfo * base, ReplicateServerInfo * sp, int socket_type)
4476 + if (( base == NULL) ||
4481 + close_replicate_server_socket ( sp , socket_type);
4482 + PGR_Set_Replication_Server_Status(sp, DATA_ERR);
4483 + sp = search_new_replication_server(base, socket_type);
4486 + if (Debug_pretty_print)
4487 + elog(DEBUG1,"all replication servers may be down");
4488 + PGR_Stand_Alone->is_stand_alone = true;
4491 + sock = PGR_get_replicate_server_socket( sp , socket_type);
4497 +PGR_recv_replicate_result(int sock,char * result,int user_timeout)
4500 + struct timeval timeout;
4503 + if (result == NULL)
4509 + * Wait for something to happen.
4513 + if (user_timeout == 0)
4514 + timeout.tv_sec = PGR_Replication_Timeout;
4516 + timeout.tv_sec = user_timeout;
4518 + timeout.tv_usec = 0;
4521 + FD_SET(sock,&rmask);
4522 + rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
4525 + if (errno != EINTR)
4529 + else if ((rtn > 0) && (FD_ISSET(sock, &rmask)))
4531 + return (recv_message(sock, result,0));
4538 +recv_message(int sock,char * buf,int flag)
4543 + int read_size = 0;
4549 + r = recv(sock,read_ptr + read_size ,PGR_MESSAGE_BUFSIZE - read_size, flag);
4551 + if (errno == EINTR || errno == EAGAIN) {
4554 + elog(DEBUG1, "recv_message():recv failed");
4557 + } else if (r == 0) {
4558 + elog(DEBUG1, "recv_message():unexpected EOF");
4560 + } else /*if (r > 0)*/ {
4562 + if (read_size == PGR_MESSAGE_BUFSIZE)
4572 +send_replicate_packet(int sock,ReplicateHeader * header, char * query_string)
4575 + char * send_ptr = NULL;
4576 + char * buf = NULL;
4577 + int send_size = 0;
4579 + int header_size = 0;
4582 + struct timeval timeout;
4583 + int query_size = 0;
4585 + /* check parameter */
4586 + if ((sock < 0) || (header == NULL))
4588 + return STATUS_ERROR;
4591 + query_size = ntohl(header->query_size);
4592 + header_size = sizeof(ReplicateHeader);
4593 + buf_size = header_size + query_size + 4;
4594 + buf = malloc(buf_size);
4597 + return STATUS_ERROR;
4599 + memset(buf,0,buf_size);
4601 + memcpy(buf,header,header_size);
4602 + if (query_string != NULL)
4604 + memcpy((char *)(buf+header_size),query_string,query_size+1);
4609 + * Wait for something to happen.
4614 + timeout.tv_sec = PGR_Replication_Timeout;
4615 + timeout.tv_usec = 0;
4618 + FD_SET(sock,&wmask);
4619 + rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
4622 + if (errno == EINTR)
4626 + elog(DEBUG1, "send_replicate_packet():select() failed");
4627 + return STATUS_ERROR;
4630 + else if (rtn && FD_ISSET(sock, &wmask))
4634 + s = send(sock,send_ptr + send_size,buf_size - send_size ,0);
4636 + if (errno == EINTR || errno == EAGAIN)
4640 + elog(DEBUG1, "send_replicate_packet():send error");
4642 + /* EPIPE || ENCONNREFUSED || ENSOCK || EHOSTUNREACH */
4643 + return STATUS_ERROR;
4644 + } else if (s == 0) {
4647 + elog(DEBUG1, "send_replicate_packet():unexpected EOF");
4648 + return STATUS_ERROR;
4649 + } else /*if (s > 0)*/ {
4651 + if (send_size == buf_size)
4665 + return STATUS_ERROR;
4669 +PGR_Is_Replicated_Command(char * query)
4672 + return (PGR_Is_System_Command(query));
4676 +Xlog_Check_Replicate(int operation)
4678 + if (PGR_Get_Cluster_Status() == STATUS_RECOVERY)
4681 + /* elog(WARNING, "This query is not permitted while recovery db "); */
4683 + else if ((operation == CMD_UTILITY ) ||
4684 + (operation == CMD_INSERT ) ||
4685 + (operation == CMD_UPDATE ) ||
4686 + (operation == CMD_DELETE ))
4688 + return (PGR_Replicate_Function_Call());
4694 +PGR_Replicate_Function_Call(void)
4696 + char *result = NULL;
4697 + int status = STATUS_OK;
4699 + if ((PGR_Get_Cluster_Status() == STATUS_RECOVERY) ||
4700 + (PGR_Stand_Alone == NULL))
4704 + if (Query_String != NULL)
4706 + if (PGR_Is_Stand_Alone() == true)
4708 + if (PGR_Stand_Alone->permit == PERMIT_READ_ONLY)
4710 + Query_String = NULL;
4711 + return STATUS_ERROR;
4714 + PGR_Need_Notice = true;
4715 + PGR_Check_Lock.check_lock_conflict = true;
4716 + result = PGR_Send_Replicate_Command(Query_String,strlen(Query_String), CMD_STS_QUERY,CMD_TYPE_SELECT);
4717 + if (result != NULL)
4719 + PGR_Reload_Start_Time();
4720 + if (!strncmp(result,PGR_DEADLOCK_DETECTION_MSG,strlen(PGR_DEADLOCK_DETECTION_MSG)))
4722 + status = STATUS_DEADLOCK_DETECT;
4729 + status = STATUS_ERROR;
4731 + Query_String = NULL;
4737 +PGR_delete_shm(void)
4740 + if (ReplicateServerData != NULL)
4742 + shmdt(ReplicateServerData);
4743 + ReplicateServerData = NULL;
4744 + shmctl(ReplicateServerShmid,IPC_RMID,(struct shmid_ds *)NULL);
4746 + if (ClusterDBData != NULL)
4748 + shmdt(ClusterDBData);
4749 + ClusterDBData = NULL;
4750 + shmctl(ClusterDBShmid,IPC_RMID,(struct shmid_ds *)NULL);
4753 + if (TransactionSock != -1)
4755 + close(TransactionSock);
4758 + if (RsyncPath != NULL)
4763 + if (RsyncOption != NULL)
4765 + free(RsyncOption);
4766 + RsyncOption = NULL;
4769 + if (ReplicateCurrentTime != NULL)
4771 + free(ReplicateCurrentTime);
4772 + ReplicateCurrentTime = NULL;
4775 + if (PGRCopyData != NULL)
4777 + free (PGRCopyData);
4778 + PGRCopyData = NULL;
4781 + if (PGR_Stand_Alone != NULL)
4783 + free(PGR_Stand_Alone);
4784 + PGR_Stand_Alone = NULL;
4787 + if (PGR_Not_Replicate != NULL)
4789 + free(PGR_Not_Replicate);
4790 + PGR_Not_Replicate = NULL;
4792 + if (PGRSelfHostName != NULL)
4794 + free(PGRSelfHostName);
4795 + PGRSelfHostName = NULL;
4797 + if (PGR_password != NULL)
4799 + if (PGR_password->password != NULL)
4801 + free(PGR_password->password);
4802 + PGR_password->password = NULL;
4804 + free(PGR_password);
4805 + PGR_password = NULL;
4809 +ReplicateServerInfo *
4810 +PGR_get_replicate_server_info(void)
4813 + ReplicateServerInfo * sp;
4815 + if (ReplicateServerData == NULL)
4817 + return (ReplicateServerInfo *)NULL;
4819 + /* check current using replication server */
4820 + sp = PGR_check_replicate_server_info();
4823 + if (CurrentReplicateServer != NULL)
4825 + LastReplicateServer = CurrentReplicateServer;
4826 + CurrentReplicateServer->replicate_id = LastReplicateServer->replicate_id;
4828 + CurrentReplicateServer = sp;
4831 + /* there is no used replication server */
4832 + /* however it may exist still in initial status */
4833 + sp = ReplicateServerData;
4834 + while (sp->useFlag != DATA_END)
4836 + if (sp->useFlag != DATA_ERR )
4838 + if (CurrentReplicateServer != NULL)
4840 + LastReplicateServer = CurrentReplicateServer;
4841 + CurrentReplicateServer->replicate_id = LastReplicateServer-> replicate_id;
4843 + CurrentReplicateServer = sp;
4844 + PGR_Set_Replication_Server_Status(sp, DATA_USE);
4849 + PGR_Stand_Alone->is_stand_alone = true;
4850 + if (CurrentReplicateServer != NULL)
4852 + LastReplicateServer = CurrentReplicateServer;
4853 + CurrentReplicateServer->replicate_id = LastReplicateServer-> replicate_id;
4855 + CurrentReplicateServer = NULL;
4856 + return (ReplicateServerInfo *)NULL;
4859 +ReplicateServerInfo *
4860 +PGR_check_replicate_server_info(void)
4863 + ReplicateServerInfo * sp;
4865 + if (ReplicateServerData == NULL)
4867 + return (ReplicateServerInfo *)NULL;
4869 + sp = ReplicateServerData;
4870 + while (sp->useFlag != DATA_END)
4872 + if (sp->useFlag == DATA_USE )
4882 +PGR_Send_Copy(CopyData * copy,int end )
4885 + char cmdSts,cmdType;
4887 + char *result = NULL;
4889 + /*int status = 0; */
4893 + return STATUS_ERROR;
4896 + cmdSts = CMD_STS_COPY;
4898 + if (Transaction_Mode > 0)
4900 + cmdSts = CMD_STS_TRANSACTION ;
4902 + if (Session_Authorization_Mode)
4904 + cmdSts = CMD_STS_SET_SESSION_AUTHORIZATION ;
4906 + cmdType = CMD_TYPE_COPY_DATA;
4908 + copy->copy_data[copy->cnt] = '\0';
4911 + memset(term,0,sizeof(term));
4916 + cmdType = CMD_TYPE_COPY_DATA_END;
4918 + if (copy->cnt > 0)
4920 + copy->copy_data[copy->cnt] = '\0';
4921 + p = strstr(copy->copy_data,term);
4924 + p = &(copy->copy_data[copy->cnt-1]);
4934 + strncpy(p,term,sizeof(term));
4938 + result = PGR_Send_Replicate_Command(copy->copy_data, copy->cnt, cmdSts, cmdType);
4939 + memset(copy,0,sizeof(CopyData));
4941 + if (result != NULL)
4943 + PGR_Reload_Start_Time();
4950 + return STATUS_ERROR;
4955 +PGR_Set_Copy_Data(CopyData * copy, char *str, int len,int end)
4959 + int read_index = 0;
4960 + int send_size = 0;
4963 + int rest_buf_size = 0;
4964 + int status = STATUS_OK;
4968 + #define BUFF_OFFSET (8)
4970 + if ((PGR_Copy_Data_Need_Replicate == false) ||
4973 + return (CopyData *)NULL;
4975 + memset(term,0,sizeof(term));
4979 + buf_size = COPYBUFSIZ - BUFF_OFFSET;
4982 + rest_buf_size = buf_size - copy->cnt;
4983 + while ((rest_len > 0) && (rest_buf_size > 0))
4985 + if (rest_buf_size < rest_len)
4987 + send_size = rest_buf_size;
4988 + rest_len -= send_size;
4992 + send_size = rest_len;
4995 + memcpy(&(copy->copy_data[copy->cnt]) ,str + read_index ,send_size);
4996 + copy->cnt += send_size;
4997 + read_index += send_size;
4998 + rest_buf_size = buf_size - copy->cnt;
4999 + if (strstr(copy->copy_data,term) != NULL)
5003 + if (rest_buf_size <= 0)
5005 + ep = strrchr(copy->copy_data,'\n');
5009 + save_len = copy->cnt - strlen(copy->copy_data) -1;
5010 + copy->cnt -= save_len ;
5011 + memset(&save,0,sizeof(CopyData));
5012 + memcpy(save.copy_data,(ep+1),save_len+1);
5013 + save.cnt = save_len;
5016 + status = PGR_Send_Copy(copy,0);
5017 + memset(copy,0,sizeof(CopyData));
5020 + memcpy(copy,&save,sizeof(CopyData));
5022 + rest_buf_size = buf_size - copy->cnt;
5027 + /* one record is bigger than COPYBUFSIZ */
5028 + /* buffer would be over flow*/
5029 + status = PGR_Send_Copy(copy,0);
5030 + memset(copy,0,sizeof(CopyData));
5031 + rest_buf_size = buf_size - copy->cnt;
5037 + status = PGR_Send_Copy(copy,end);
5038 + memset(copy,0,sizeof(CopyData));
5040 + if (status != STATUS_OK)
5042 + return (CopyData *)NULL;
5048 +PGR_replication(char * query_string, CommandDest dest, Node *parsetree, const char * commandTag)
5050 + char *result = NULL;
5051 + char cmdSts = CMD_STS_OTHER;
5052 + char cmdType = CMD_TYPE_OTHER;
5053 + int query_len = 0;
5055 + if ((query_string == NULL) ||
5056 + (commandTag == NULL))
5058 + return STATUS_ERROR;
5061 + Query_String = NULL;
5062 + query_len = strlen(query_string);
5064 + /* save query data for retry */
5065 + PGR_Retry_Query.query_string = query_string;
5066 + PGR_Retry_Query.query_len = query_len;
5067 + PGR_Retry_Query.cmdSts = cmdSts;
5068 + PGR_Retry_Query.cmdType = cmdType;
5069 + PGR_Retry_Query.useFlag = DATA_USE;
5071 + if (!strcmp(commandTag,"BEGIN")) cmdType = CMD_TYPE_BEGIN ;
5072 + else if (!strcmp(commandTag,"COMMIT")) cmdType = CMD_TYPE_COMMIT ;
5073 + else if (!strcmp(commandTag,"SELECT")) cmdType = CMD_TYPE_SELECT ;
5074 + else if (!strcmp(commandTag,"INSERT")) cmdType = CMD_TYPE_INSERT ;
5075 + else if (!strcmp(commandTag,"UPDATE")) cmdType = CMD_TYPE_UPDATE ;
5076 + else if (!strcmp(commandTag,"DELETE")) cmdType = CMD_TYPE_DELETE ;
5077 + else if (!strcmp(commandTag,"VACUUM")) cmdType = CMD_TYPE_VACUUM ;
5078 + else if (!strcmp(commandTag,"ANALYZE")) cmdType = CMD_TYPE_ANALYZE ;
5079 + else if (!strcmp(commandTag,"REINDEX")) cmdType = CMD_TYPE_REINDEX ;
5080 + else if (!strcmp(commandTag,"ROLLBACK")) cmdType = CMD_TYPE_ROLLBACK ;
5081 + else if (!strcmp(commandTag,"RESET")) cmdType = CMD_TYPE_RESET ;
5082 + else if (!strcmp(commandTag,"START TRANSACTION")) cmdType = CMD_TYPE_BEGIN ;
5084 + /* only "replication_server" statement-name is replicated for SHOW. */
5085 + /* see CreateCommandTag() @ backend/tcop/postgres.c */
5087 + else if (!strcmp(commandTag,"COPY"))
5089 + cmdType = CMD_TYPE_COPY ;
5090 + if (is_copy_from(query_string))
5092 + PGR_Copy_Data_Need_Replicate = true;
5096 + PGR_Copy_Data_Need_Replicate = false;
5097 + return STATUS_NOT_REPLICATE;
5100 + else if (!strcmp(commandTag,"SET"))
5102 + cmdType = CMD_TYPE_SET;
5104 + VariableSetStmt *stmt = (VariableSetStmt *)parsetree;
5105 + if (strcmp(stmt->name, "TRANSACTION ISOLATION LEVEL") &&
5106 + strcmp(stmt->name, "datestyle") &&
5107 + strcmp(stmt->name, "autocommit") &&
5108 + strcmp(stmt->name, "client_encoding") &&
5109 + strcmp(stmt->name, "password_encryption") &&
5110 + strcmp(stmt->name, "search_path") &&
5111 + strcmp(stmt->name, "session_authorization") &&
5112 + strcmp(stmt->name, "timezone"))
5114 + return STATUS_NOT_REPLICATE;
5116 + if (strstr(query_string,SYS_QUERY_1) != NULL)
5118 + return STATUS_NOT_REPLICATE;
5121 + else if (!strcmp(commandTag,"CREATE TABLE"))
5123 + if (is_create_temp_table(query_string))
5125 + Create_Temp_Table_Mode = true;
5128 + if (Create_Temp_Table_Mode)
5130 + cmdSts = CMD_STS_TEMP_TABLE ;
5132 + if (Transaction_Mode > 0)
5134 + cmdSts = CMD_STS_TRANSACTION ;
5138 + if ((cmdType == CMD_TYPE_COMMIT ) ||
5139 + (cmdType == CMD_TYPE_ROLLBACK ))
5141 + cmdSts = CMD_STS_TRANSACTION ;
5142 + if (ReplicateCurrentTime != NULL)
5144 + ReplicateCurrentTime->useFlag = DATA_INIT;
5145 + ReplicateCurrentTime->use_seed = 0;
5149 + if (Session_Authorization_Mode)
5151 + cmdSts = CMD_STS_SET_SESSION_AUTHORIZATION ;
5152 + if (cmdType == CMD_TYPE_SESSION_AUTHORIZATION_END)
5154 + Session_Authorization_Mode = false;
5157 + if ((cmdSts == CMD_STS_TRANSACTION ) ||
5158 + (cmdSts == CMD_STS_SET_SESSION_AUTHORIZATION ) ||
5159 + (cmdSts == CMD_STS_TEMP_TABLE ))
5161 + /* check partitional replication table */
5162 + if (is_not_replication_query(query_string, query_len, cmdType)== true )
5164 + PGR_Copy_Data_Need_Replicate = false;
5165 + return STATUS_NOT_REPLICATE;
5167 + Query_String = NULL;
5168 + if (( do_not_replication_command(commandTag) == true) &&
5169 + (strcmp(commandTag,"SELECT")))
5171 + return STATUS_NOT_REPLICATE;
5174 + if (Debug_pretty_print)
5175 + elog(DEBUG1,"transaction query send :%s",(char *)query_string);
5176 + PGR_Retry_Query.cmdSts = cmdSts;
5177 + PGR_Retry_Query.cmdType = cmdType;
5178 + result = PGR_Send_Replicate_Command(query_string,query_len, cmdSts,cmdType);
5179 + if (result != NULL)
5181 + if (!strncmp(result,PGR_DEADLOCK_DETECTION_MSG,strlen(PGR_DEADLOCK_DETECTION_MSG)))
5184 + PGR_Send_Message_To_Frontend(result);
5188 + return STATUS_DEADLOCK_DETECT;
5190 + else if (!strncmp(result,PGR_REPLICATION_ABORT_MSG,strlen(PGR_REPLICATION_ABORT_MSG)))
5194 + return STATUS_REPLICATION_ABORT;
5198 + return STATUS_CONTINUE;
5202 + return STATUS_ERROR;
5207 + cmdSts = CMD_STS_QUERY ;
5208 + if ( do_not_replication_command(commandTag) == false)
5210 + Query_String = NULL;
5211 + /* check partitional replication table */
5212 + if (is_not_replication_query(query_string, query_len, cmdType)== true )
5214 + PGR_Copy_Data_Need_Replicate = false;
5215 + return STATUS_NOT_REPLICATE;
5217 + result = PGR_Send_Replicate_Command(query_string,query_len,cmdSts,cmdType);
5218 + if (result != NULL)
5220 + if (!strncmp(result,PGR_DEADLOCK_DETECTION_MSG,strlen(PGR_DEADLOCK_DETECTION_MSG)))
5224 + return STATUS_DEADLOCK_DETECT;
5226 + else if (!strncmp(result,PGR_REPLICATION_ABORT_MSG,strlen(PGR_REPLICATION_ABORT_MSG)))
5230 + return STATUS_REPLICATION_ABORT;
5233 + PGR_Send_Message_To_Frontend(result);
5237 + return STATUS_CONTINUE;
5241 + return STATUS_ERROR;
5246 + if (( is_serial_control_query(cmdType,query_string) == true) ||
5247 + ( is_select_into_query(cmdType,query_string) == true))
5249 + Query_String = NULL;
5250 + PGR_Need_Notice = true;
5251 + PGR_Check_Lock.check_lock_conflict = true;
5252 + result = PGR_Send_Replicate_Command(query_string,query_len,cmdSts,cmdType);
5253 + if (result != NULL)
5256 + PGR_Send_Message_To_Frontend(result);
5258 + if (!strncmp(result,PGR_DEADLOCK_DETECTION_MSG,strlen(PGR_DEADLOCK_DETECTION_MSG)))
5261 + return STATUS_DEADLOCK_DETECT;
5265 + return STATUS_CONTINUE;
5269 + return STATUS_ERROR;
5274 + Query_String = query_string;
5275 + /*PGR_Sock_To_Replication_Server = -1;*/
5277 + return STATUS_CONTINUE_SELECT;
5280 + return STATUS_CONTINUE;
5285 +PGR_Is_System_Command(char * query)
5289 + if (query == NULL)
5293 + ptr = strstr(query,PGR_SYSTEM_COMMAND_FUNC);
5296 + ptr = strchr(ptr,'(');
5305 +set_command_args(char argv[ PGR_CMD_ARG_NUM ][256],char *str)
5314 + len = strlen(str);
5316 + for ( i = 0 ; i < len ; i++,ptr++)
5318 + if (cnt >= PGR_CMD_ARG_NUM)
5320 + if (( *ptr == ',') || (*ptr == ')'))
5322 + argv[cnt][j] = '\0';
5327 + argv[cnt][j] = *ptr;
5330 + if (cnt < PGR_CMD_ARG_NUM)
5331 + argv[cnt][j] = '\0';
5338 +add_replication_server(char * hostname,char * port, char * recovery_port)
5342 + int recoveryPortNumber;
5343 + ReplicateServerInfo * sp;
5345 + if ((hostname == NULL) ||
5346 + (port == NULL ) ||
5347 + (recovery_port == NULL ))
5349 + return STATUS_ERROR;
5351 + if (ReplicateServerData == NULL)
5353 + return STATUS_ERROR;
5355 + portNumber = atoi(port);
5356 + recoveryPortNumber = atoi(recovery_port);
5358 + sp = ReplicateServerData;
5359 + while (sp->useFlag != DATA_END){
5360 + if((!strncmp(sp->hostName,hostname,sizeof(sp->hostName))) &&
5361 + (sp->portNumber == portNumber) &&
5362 + (sp->recoveryPortNumber == recoveryPortNumber))
5364 + if (sp->useFlag != DATA_USE)
5366 + PGR_Set_Replication_Server_Status(sp, DATA_INIT);
5373 + if (cnt < MAX_SERVER_NUM)
5375 + strncpy(sp->hostName,hostname,sizeof(sp->hostName));
5376 + sp->portNumber = portNumber;
5377 + sp->recoveryPortNumber = recoveryPortNumber;
5378 + PGR_Set_Replication_Server_Status(sp, DATA_INIT);
5379 + memset((sp+1),0,sizeof(ReplicateServerInfo));
5380 + (sp + 1)->useFlag = DATA_END;
5384 + return STATUS_ERROR;
5390 +change_replication_server(char * hostname,char * port, char * recovery_port)
5394 + int recoveryPortNumber;
5395 + ReplicateServerInfo * sp;
5397 + if ((hostname == NULL) ||
5398 + (port == NULL ) ||
5399 + (recovery_port == NULL ))
5401 + return STATUS_ERROR;
5403 + if (ReplicateServerData == NULL)
5405 + return STATUS_ERROR;
5407 + portNumber = atoi(port);
5408 + recoveryPortNumber = atoi(recovery_port);
5410 + sp = ReplicateServerData;
5411 + while (sp->useFlag != DATA_END){
5412 + if((!strcmp(sp->hostName,hostname)) &&
5413 + (sp->portNumber == portNumber) &&
5414 + (sp->recoveryPortNumber == recoveryPortNumber))
5416 + PGR_Set_Replication_Server_Status(sp, DATA_USE);
5420 + if (sp->useFlag == DATA_USE)
5422 + PGR_Set_Replication_Server_Status(sp, DATA_INIT);
5432 +PGR_Set_Current_Time(char * sec, char * usec)
5435 + struct timeval local_tp;
5436 + struct timezone local_tpz;
5437 + struct timeval tv;
5439 + if ((sec == NULL) ||
5442 + return STATUS_ERROR;
5444 + rtn = gettimeofday(&local_tp, &local_tpz);
5445 + tv.tv_sec = atol(sec);
5446 + tv.tv_usec = atol(usec);
5447 + ReplicateCurrentTime->offset_sec = local_tp.tv_sec - tv.tv_sec;
5448 + ReplicateCurrentTime->offset_usec = local_tp.tv_usec - tv.tv_usec;
5449 + ReplicateCurrentTime->tp.tv_sec = tv.tv_sec;
5450 + ReplicateCurrentTime->tp.tv_usec = tv.tv_usec;
5451 + ReplicateCurrentTime->useFlag = DATA_USE;
5452 + ReplicateCurrentTime->use_seed = 0;
5458 +PGR_Set_Current_Replication_Query_ID(char *id) {
5459 + MyProc->replicationId=atol(id);
5464 +set_response_mode(char * mode)
5466 + int response_mode = 0;
5470 + response_mode = atoi(mode);
5471 + if (response_mode < 0)
5473 + if (CurrentReplicateServer == NULL)
5475 + PGR_get_replicate_server_info();
5476 + if (CurrentReplicateServer == NULL)
5481 + if (CurrentReplicateServer->response_mode != response_mode)
5483 + CurrentReplicateServer->response_mode = response_mode;
5488 +PGR_Call_System_Command(char * command)
5492 + char argv[ PGR_CMD_ARG_NUM ][256];
5495 + char * hostName = NULL;
5497 + if ((command == NULL) || (ReplicateCurrentTime == NULL))
5499 + return STATUS_ERROR;
5501 + ptr = strstr(command,PGR_SYSTEM_COMMAND_FUNC);
5503 + return STATUS_ERROR;
5504 + ptr = strchr(ptr,'(');
5506 + return STATUS_ERROR;
5508 + ptr = strchr(ptr,')');
5510 + return STATUS_ERROR;
5512 + argc = set_command_args(argv,args);
5514 + return STATUS_ERROR;
5515 + func_no = atoi(argv[0]);
5518 + /* set current system time */
5519 + case PGR_SET_CURRENT_TIME_FUNC_NO:
5520 + if (atol(argv[1]) == 0)
5522 + CreateCheckPoint(false,true);
5527 + if ((atoi(argv[3]) > 0) &&
5528 + (is_this_query_replicated(argv[3]) == true))
5530 + return STATUS_SKIP_QUERY;
5533 + PGR_Set_Current_Time(argv[1],argv[2]);
5534 + set_replication_id(argv[3]);
5535 + set_response_mode(argv[4]);
5536 + PGR_Set_Current_Replication_Query_ID(argv[5]);
5540 + /* add new replication server data */
5541 + case PGR_STARTUP_REPLICATION_SERVER_FUNC_NO:
5542 + hostName = get_hostName(argv[1]);
5543 + add_replication_server(hostName,argv[2],argv[3]);
5545 + /* change new replication server */
5546 + case PGR_CHANGE_REPLICATION_SERVER_FUNC_NO:
5547 + hostName = get_hostName(argv[1]);
5548 + change_replication_server(hostName,argv[2],argv[3]);
5550 + case PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO:
5551 + PGR_Set_Current_Replication_Query_ID(argv[1]);
5553 + case PGR_QUERY_CONFIRM_ANSWER_FUNC_NO:
5554 + if ((atoi(argv[3]) > 0) &&
5555 + (is_this_query_replicated(argv[3]) == true))
5557 + /* skip this query */
5558 + return STATUS_SKIP_QUERY;
5562 + PGR_Set_Current_Time(argv[1],argv[2]);
5563 + set_replication_id(argv[3]);
5566 + /* get current oid */
5567 + case PGR_GET_OID_FUNC_NO:
5568 + return_current_oid();
5570 + /* set current oid */
5571 + case PGR_SET_OID_FUNC_NO:
5572 + sync_oid(argv[1]);
5574 + /* set noticed session abort */
5575 + case PGR_NOTICE_ABORT_FUNC_NO:
5576 + PGR_Noticed_Abort = true;
5583 +PGR_GetTimeOfDay(struct timeval *tp, struct timezone *tpz)
5588 + rtn = gettimeofday(tp, tpz);
5589 + if (ReplicateCurrentTime == NULL)
5593 + if (ReplicateCurrentTime->useFlag == DATA_USE)
5595 + if (ReplicateCurrentTime->use_seed != 0)
5597 + tp->tv_sec -= ReplicateCurrentTime->offset_sec;
5598 + if (tp->tv_usec < ReplicateCurrentTime->offset_usec)
5600 + tp->tv_usec += (1000000 - ReplicateCurrentTime->offset_usec);
5605 + tp->tv_usec -= ReplicateCurrentTime->offset_usec;
5610 + tp->tv_sec = ReplicateCurrentTime->tp.tv_sec;
5611 + tp->tv_usec = ReplicateCurrentTime->tp.tv_usec;
5622 + if (ReplicateCurrentTime != NULL)
5624 + if ( ReplicateCurrentTime->use_seed == 0)
5626 + srand( ReplicateCurrentTime->tp.tv_usec );
5627 + ReplicateCurrentTime->use_seed = 1;
5635 +PGR_scan_terminate( char * str)
5647 + memset(tag,0,sizeof(tag));
5648 + while ( *p != '\0' )
5650 + if ((!strncmp(p,"--",2)) ||
5651 + (!strncmp(p,"//",2)))
5653 + while (( *p != '\n') && (*p != '\0'))
5671 + while (( *p != '\n') && (*p != '\0'))
5673 + if (isalnum(*p) == 0)
5683 + if (i >= sizeof(tag))
5706 + if ((!sflag) && (!dflag) && (!lflag))
5716 +is_copy_from(char * query)
5722 + if (query == NULL)
5725 + for ( i = 0 ; i <= 1 ; i ++)
5727 + /* get 'copy table_name' string */
5728 + while(isspace(*p))
5730 + while ((*p != '\0') && (*p != '(') && (!isspace(*p)))
5733 + while(isspace(*p))
5735 + /* skip table column */
5740 + while (*p != '\0')
5753 + while(isspace(*p))
5756 + /* get 'from' or 'to' */
5758 + memset(buf,0,sizeof(buf));
5759 + while ((*p != '\0') && (!isspace(*p)) && ( i < sizeof(buf)-1))
5761 + buf[i] = (char)toupper(*p);
5765 + if (!strcmp(buf,"FROM"))
5776 +is_create_temp_table(char * query)
5779 + char buf[MAX_WORDS][MAX_WORD_LETTERS];
5781 + if (query == NULL)
5783 + len = strlen(query);
5784 + wc = get_words(buf,query,len,1);
5787 + if ((!strncmp(buf[0],"CREATE", strlen("CREATE"))) &&
5788 + (!strncmp(buf[1],"TEMP",strlen("TEMP"))) &&
5789 + (!strncmp(buf[2],"TABLE",strlen("TABLE"))))
5797 +get_words( char words[MAX_WORDS][MAX_WORD_LETTERS] ,char * string,int length,int upper)
5801 + char * buf = NULL;
5803 + if (string == NULL)
5804 + return STATUS_ERROR;
5805 + buf = malloc(length);
5807 + return STATUS_ERROR;
5809 + memset(buf,0,length);
5812 + for (i = 0 ; i < length ; i ++)
5814 + if ((*p == '\0') || (wc >= MAX_WORDS))
5816 + while (isspace(*p))
5822 + while ((*p != '\0') && (! isspace(*p)))
5825 + *(buf+lc) = (char)toupper(*p);
5833 + memset(words[wc],0,MAX_WORD_LETTERS);
5834 + memcpy(words[wc],buf,lc);
5835 + memset(buf,0,length);
5844 +Comp_Not_Replicate(PGR_Not_Replicate_Type * nrp1,PGR_Not_Replicate_Type* nrp2)
5848 + if ((nrp1 == NULL) ||
5853 + rtn = strcasecmp(nrp1->table_name,nrp2->table_name);
5856 + rtn = strcasecmp(nrp1->db_name,nrp2->db_name);
5862 +PGR_Is_Stand_Alone(void)
5864 + ReplicateServerInfo * sp = NULL;
5866 + if (PGR_Stand_Alone == NULL)
5868 + if (PGR_Stand_Alone->is_stand_alone == true)
5870 + sp = PGR_get_replicate_server_info();
5880 +PGR_Send_Message_To_Frontend(char * msg)
5882 + StringInfoData msgbuf;
5884 + pq_beginmessage(&msgbuf, 'N');
5886 + if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
5888 + /* New style with separate fields */
5893 + pq_sendbyte(&msgbuf, PG_DIAG_SEVERITY);
5894 + pq_sendstring(&msgbuf, "NOTICE" );
5896 + /* unpack MAKE_SQLSTATE code */
5897 + ssval = ERRCODE_WARNING ;
5898 + for (i = 0; i < 5; i++)
5900 + tbuf[i] = PGUNSIXBIT(ssval);
5905 + pq_sendbyte(&msgbuf, PG_DIAG_SQLSTATE);
5906 + pq_sendstring(&msgbuf, tbuf);
5908 + /* M field is required per protocol, so always send something */
5909 + pq_sendbyte(&msgbuf, PG_DIAG_MESSAGE_PRIMARY);
5911 + pq_sendstring(&msgbuf, msg);
5913 + pq_sendstring(&msgbuf, _("missing error text"));
5915 + pq_sendbyte(&msgbuf, '\0'); /* terminator */
5919 + /* Old style --- gin up a backwards-compatible message */
5920 + StringInfoData buf;
5922 + initStringInfo(&buf);
5924 + appendStringInfo(&buf, "%s: ", "NOTICE");
5927 + appendStringInfoString(&buf, msg);
5929 + appendStringInfoString(&buf, _("missing error text"));
5931 + appendStringInfoChar(&buf, '\n');
5933 + pq_sendstring(&msgbuf, buf.data);
5938 + pq_endmessage(&msgbuf);
5941 + * This flush is normally not necessary, since postgres.c will flush out
5942 + * waiting data when control returns to the main loop. But it seems best
5943 + * to leave it here, so that the client has some clue what happened if the
5944 + * backend dies before getting back to the main loop ... error/notice
5945 + * messages should not be a performance-critical path anyway, so an extra
5946 + * flush won't hurt much ...
5952 +is_serial_control_query(char cmdType,char * query)
5954 + char * buf = NULL;
5959 + if ((cmdType != CMD_TYPE_SELECT ) ||
5966 + len = strlen(query) +1;
5967 + buf = malloc(len);
5971 + memset(buf,0,len);
5972 + for ( i = 0 ; i < len ; i ++)
5974 + *(buf+i) = toupper(*(query+i));
5976 + if ((strstr(buf,"NEXTVAL") != NULL) ||
5977 + (strstr(buf,"SETVAL") != NULL))
5989 +is_select_into_query(char cmdType,char * query)
5991 + char * buf = NULL;
5996 + if ((cmdType != CMD_TYPE_SELECT ) ||
6003 + len = strlen(query) +1;
6004 + buf = malloc(len);
6008 + memset(buf,0,len);
6009 + for ( i = 0 ; i < len ; i ++)
6011 + *(buf+i) = toupper(*(query+i));
6013 + if (strstr(buf,"INTO") != NULL)
6019 + if (strstr(buf,"CREATE") != NULL)
6031 +send_response_to_replication_server(const char * notice)
6033 + ReplicateHeader header;
6036 + if (PGR_Lock_Noticed)
6040 + if ((notice == NULL) ||
6041 + (PGR_Sock_To_Replication_Server < 0))
6043 + return STATUS_ERROR;
6046 + memset(&header,0,sizeof(ReplicateHeader));
6047 + header.cmdSys = CMD_SYS_CALL;
6048 + header.cmdSts = CMD_STS_RESPONSE;
6049 + if (!strcmp(notice,PGR_QUERY_ABORTED_NOTICE_CMD))
6051 + header.cmdType = CMD_TYPE_FRONTEND_CLOSED;
6053 + header.query_size = htonl(strlen(notice));
6054 + status = send_replicate_packet(PGR_Sock_To_Replication_Server,&header,(char *)notice);
6059 +PGR_Notice_Transaction_Query_Done(void)
6061 + send_response_to_replication_server(PGR_QUERY_DONE_NOTICE_CMD);
6065 +PGR_Notice_Transaction_Query_Aborted(void)
6067 + send_response_to_replication_server(PGR_QUERY_ABORTED_NOTICE_CMD);
6071 +PGR_Notice_Conflict(void)
6073 + const char * msg = NULL ;
6074 + int rtn = STATUS_OK;
6076 + msg = PGR_LOCK_CONFLICT_NOTICE_CMD ;
6077 + if (PGR_Check_Lock.deadlock == true)
6079 + msg = PGR_DEADLOCK_DETECT_NOTICE_CMD ;
6081 + if (PGR_Check_Lock.dest == TO_FRONTEND)
6083 + ReadyForQuery(DestRemote);
6084 + EndCommand(msg,DestRemote);
6085 +#ifdef CONTROL_LOCK_CONFLICT
6086 + rtn = wait_lock_answer();
6087 +#endif /* CONTROL_LOCK_CONFLICT */
6091 + send_response_to_replication_server(msg);
6092 +#ifdef CONTROL_LOCK_CONFLICT
6093 + rtn = PGR_Recv_Trigger (PGR_Replication_Timeout);
6094 +#endif /* CONTROL_LOCK_CONFLICT */
6099 +#ifdef CONTROL_LOCK_CONFLICT
6101 +wait_lock_answer(void)
6103 + char result[PGR_MESSAGE_BUFSIZE+4];
6106 + memset(result,0,sizeof(result));
6107 + rtn = read_trigger(result, PGR_MESSAGE_BUFSIZE);
6109 + return STATUS_ERROR;
6114 +read_trigger(char * result, int buf_size)
6120 + if ((result == NULL) || (buf_size <= 0 ))
6125 + pq_getbytes(result,buf_size);
6127 + while ((r = pq_getbytes(&c,1)) == 0)
6129 + if (i < buf_size -1)
6131 + *(result + i) = c;
6144 +#endif /* CONTROL_LOCK_CONFLICT */
6147 +PGR_Recv_Trigger (int user_timeout)
6149 + char result[PGR_MESSAGE_BUFSIZE];
6154 + if (PGR_Lock_Noticed)
6158 + if (PGR_Sock_To_Replication_Server < 0)
6159 + return STATUS_ERROR;
6160 + memset(result,0,sizeof(result));
6161 + rtn = PGR_recv_replicate_result(PGR_Sock_To_Replication_Server,result,user_timeout);
6164 + func_no = atoi(result);
6167 + func_no = STATUS_OK;
6173 + if (user_timeout == 0)
6175 + PGR_Set_Replication_Server_Status(CurrentReplicateServer, DATA_ERR);
6177 + return STATUS_ERROR;
6184 +PGR_Set_Transaction_Mode(int mode,const char * commandTag)
6186 + if (commandTag == NULL)
6190 + if ((!strcmp(commandTag,"BEGIN")) ||
6191 + (!strcmp(commandTag,"START TRANSACTION")) )
6197 + if ((!strncmp(commandTag,"COMMIT",strlen("COMMIT"))) ||
6198 + (!strncmp(commandTag,"ROLLBACK",strlen("ROLLBACK"))))
6207 +do_not_replication_command(const char * commandTag)
6209 + if (commandTag == NULL)
6213 + if ((!strcmp(commandTag,"SELECT")) ||
6214 + (!strcmp(commandTag,"CLOSE CURSOR")) ||
6215 + (!strcmp(commandTag,"MOVE")) ||
6216 + (!strcmp(commandTag,"FETCH")) ||
6217 + (!strcmp(commandTag,"EXPLAIN")))
6228 +PGR_Set_Replication_Server_Status( ReplicateServerInfo * sp, int status)
6234 + if (sp->useFlag != status)
6236 + sp->useFlag = status;
6241 +PGR_Is_Skip_Replication(char * query)
6245 + if ((query == NULL) ||
6246 + (MyProcPort == NULL))
6250 + snprintf(skip_2,sizeof(skip_2),SKIP_QUERY_2,MyProcPort->user_name);
6251 + if ((strncmp(query,SKIP_QUERY_1,strlen(SKIP_QUERY_1)) == 0) ||
6252 + (strncmp(query,skip_2,strlen(skip_2)) == 0))
6256 + if ((strncmp(query,SKIP_QUERY_3,strlen(SKIP_QUERY_3)) == 0) ||
6257 + (strncmp(query,SKIP_QUERY_4,strlen(SKIP_QUERY_4)) == 0))
6265 +PGR_Did_Commit_Transaction(void)
6270 + ReplicateHeader header;
6271 + char * serverName = NULL;
6273 + char * result = NULL;
6274 + ReplicateServerInfo * sp = NULL;
6275 + ReplicateServerInfo * base = NULL;
6276 + int socket_type = 0;
6277 + char argv[ PGR_CMD_ARG_NUM ][256];
6281 + if (ReplicateCurrentTime->useFlag != DATA_USE)
6285 + sp = PGR_get_replicate_server_info();
6288 + if (Debug_pretty_print)
6289 + elog(DEBUG1,"PGR_get_replicate_server_info get error");
6292 + sock = PGR_get_replicate_server_socket( sp , PGR_QUERY_SOCKET);
6295 + if (Debug_pretty_print)
6296 + elog(DEBUG1,"PGR_get_replicate_server_socket fail");
6299 + result = malloc(PGR_MESSAGE_BUFSIZE);
6300 + if (result == NULL)
6304 + memset(result,0,PGR_MESSAGE_BUFSIZE);
6306 + serverName = sp->hostName;
6307 + portNumber = (int)sp->portNumber;
6308 + header.cmdSys = CMD_SYS_CALL;
6309 + header.cmdSts = CMD_STS_TRANSACTION_ABORT;
6310 + header.cmdType = CMD_TYPE_COMMIT_CONFIRM;
6311 + header.port = htons(PostPortNumber);
6312 + header.pid = htons(getpid());
6313 + header.tv.tv_sec = htonl(ReplicateCurrentTime->tp.tv_sec);
6314 + header.tv.tv_usec = htonl(ReplicateCurrentTime->tp.tv_usec);
6315 + header.query_size = htonl(0);
6316 + strncpy(header.dbName ,(char *)(MyProcPort->database_name),sizeof(header.dbName)-1);
6317 + strncpy(header.userName , (char *)(MyProcPort->user_name),sizeof(header.userName)-1);
6318 + strncpy(header.password , PGR_password->password, PASSWORD_MAX_LENGTH );
6319 + memcpy(header.md5Salt ,MyProcPort->md5Salt, sizeof(header.md5Salt));
6320 + memcpy(header.cryptSalt ,MyProcPort->cryptSalt, sizeof(header.cryptSalt));
6321 + if (PGRSelfHostName != NULL)
6323 + strncpy(header.from_host, PGRSelfHostName, HOSTNAME_MAX_LENGTH);
6325 + header.replicate_id = htonl(ReplicationLog_Info.PGR_Replicate_ID);
6326 + header.request_id = 0;
6329 + PGR_Sock_To_Replication_Server = sock;
6332 + while (send_replicate_packet(sock,&header,"") != STATUS_OK)
6334 + if (cnt > MAX_RETRY_TIMES )
6336 + sock = get_new_replication_socket( base, sp, socket_type);
6339 + if (Debug_pretty_print)
6340 + elog(DEBUG1,"all replication servers may be down");
6341 + PGR_Stand_Alone->is_stand_alone = true;
6346 + PGR_Sock_To_Replication_Server = sock;
6352 + if (PGR_recv_replicate_result(sock,result,6) < 0)
6359 + argc = set_command_args(argv,result);
6362 + func_no = atoi(argv[0]);
6363 + if (func_no == PGR_TRANSACTION_CONFIRM_ANSWER_FUNC_NO)
6365 + /* the transaction was commited in other server */
6366 + if (atoi(argv[1]) == PGR_ALREADY_COMMITTED)
6380 +PGRsend_system_command(char cmdSts, char cmdType)
6382 + ReplicateServerInfo * sp = NULL;
6384 + int socket_type = 0;
6385 + char * result = NULL;
6386 + char * serverName = NULL;
6388 + ReplicateHeader header;
6390 + ReplicateServerInfo * base = NULL;
6392 + sp = PGR_get_replicate_server_info();
6395 + if (Debug_pretty_print)
6396 + elog(DEBUG1,"PGR_get_replicate_server_info get error");
6397 + return STATUS_ERROR;
6399 + sock = PGR_get_replicate_server_socket( sp , PGR_QUERY_SOCKET);
6402 + if (Debug_pretty_print)
6403 + elog(DEBUG1,"PGR_get_replicate_server_socket fail");
6404 + return STATUS_ERROR;
6406 + result = malloc(PGR_MESSAGE_BUFSIZE);
6407 + if (result == NULL)
6409 + return STATUS_ERROR;
6411 + memset(result,0,PGR_MESSAGE_BUFSIZE);
6413 + serverName = sp->hostName;
6414 + portNumber = (int)sp->portNumber;
6415 + header.cmdSys = CMD_SYS_CALL;
6416 + header.cmdSts = cmdSts;
6417 + header.cmdType = cmdType;
6418 + header.port = htons(PostPortNumber);
6419 + header.pid = htons(getpid());
6420 + header.tv.tv_sec = htonl(ReplicateCurrentTime->tp.tv_sec);
6421 + header.tv.tv_usec = htonl(ReplicateCurrentTime->tp.tv_usec);
6422 + header.query_size = htonl(0);
6423 + strncpy(header.dbName ,(char *)(MyProcPort->database_name),sizeof(header.dbName)-1);
6424 + strncpy(header.userName , (char *)(MyProcPort->user_name),sizeof(header.userName)-1);
6425 + strncpy(header.password , PGR_password->password, PASSWORD_MAX_LENGTH );
6426 + memcpy(header.md5Salt ,MyProcPort->md5Salt, sizeof(header.md5Salt));
6427 + memcpy(header.cryptSalt ,MyProcPort->cryptSalt, sizeof(header.cryptSalt));
6428 + if (PGRSelfHostName != NULL)
6430 + strncpy(header.from_host, PGRSelfHostName, HOSTNAME_MAX_LENGTH);
6432 + header.replicate_id = htonl(ReplicationLog_Info.PGR_Replicate_ID);
6433 + header.request_id = 0;
6436 + PGR_Sock_To_Replication_Server = sock;
6438 + while (send_replicate_packet(sock,&header,"") != STATUS_OK)
6440 + if (cnt > MAX_RETRY_TIMES )
6442 + sock = get_new_replication_socket( base, sp, socket_type);
6445 + if (Debug_pretty_print)
6446 + elog(DEBUG1,"all replication servers may be down");
6447 + PGR_Stand_Alone->is_stand_alone = true;
6450 + return STATUS_ERROR;
6452 + PGR_Sock_To_Replication_Server = sock;
6463 +get_hostName(char * str)
6465 + char * top = NULL;
6469 + while ( *p != '\0')
6486 +PGR_Remove_Comment(char * str)
6490 + while( *p != '\0')
6492 + while(isspace(*p))
6496 + if ((!memcmp(p,"--",2)) ||
6497 + (!memcmp(p,"//",2)))
6499 + while((*p != '\n') && (*p != '\0'))
6511 +PGR_Force_Replicate_Query(void)
6513 + if (PGR_Retry_Query.useFlag == DATA_USE)
6515 + PGR_Send_Replicate_Command(PGR_Retry_Query.query_string,
6516 + PGR_Retry_Query.query_len,
6517 + PGR_Retry_Query.cmdSts,
6518 + PGR_Retry_Query.cmdType);
6523 +PGR_Notice_DeadLock(void)
6525 + ReplicateHeader header;
6527 + memset(&header,0,sizeof(ReplicateHeader));
6528 + header.cmdSys = CMD_SYS_CALL;
6529 + header.cmdSts = CMD_STS_NOTICE;
6530 + header.cmdType = CMD_TYPE_DEADLOCK_DETECT;
6531 + header.query_size = 0;
6532 + send_replicate_packet(PGR_Sock_To_Replication_Server,&header,(char *)NULL);
6536 +PGR_Set_Cluster_Status(int status)
6538 + if (ClusterDBData != NULL)
6540 + if (ClusterDBData->status != status)
6542 + ClusterDBData->status = status;
6548 +PGR_Get_Cluster_Status(void)
6550 + if (ClusterDBData != NULL)
6552 + return (ClusterDBData->status);
6558 +PGR_Check_Replicate_Server_Status(ReplicateServerInfo * sp)
6560 + ReplicateHeader header;
6561 + char * result = NULL;
6565 + result = malloc(PGR_MESSAGE_BUFSIZE + 4);
6566 + if (result == NULL)
6568 + if (Debug_pretty_print)
6569 + elog(DEBUG1,"malloc failed in PGR_Check_Replicate_Server_Status()");
6570 + return STATUS_ERROR;
6573 + memset(&header, 0, sizeof(ReplicateHeader));
6574 + memset(result, 0, PGR_MESSAGE_BUFSIZE + 4);
6576 + header.cmdSys = CMD_SYS_PREREPLICATE;
6577 + header.cmdSts = CMD_STS_OTHER;
6578 + header.cmdType = CMD_TYPE_OTHER;
6579 + header.port = htons(PostPortNumber);
6580 + header.pid = htons(getpid());
6581 + header.query_size = 0;
6582 + strncpy(header.dbName ,(char *)(MyProcPort->database_name),sizeof(header.dbName)-1);
6583 + strncpy(header.userName , (char *)(MyProcPort->user_name),sizeof(header.userName)-1);
6584 + strncpy(header.password , PGR_password->password, PASSWORD_MAX_LENGTH );
6585 + memcpy(header.md5Salt ,MyProcPort->md5Salt, sizeof(header.md5Salt));
6586 + memcpy(header.cryptSalt ,MyProcPort->cryptSalt, sizeof(header.cryptSalt));
6587 + header.request_id = htonl(get_next_request_id());
6589 + if (PGRSelfHostName != NULL) {
6590 + strncpy(header.from_host, PGRSelfHostName, HOSTNAME_MAX_LENGTH);
6593 + /* open a new socket for lifecheck */
6594 + if ((status = PGR_Create_Socket_Connect(&fdP, sp->hostName, sp->portNumber)) == STATUS_ERROR) {
6595 + if (Debug_pretty_print) {
6596 + elog(DEBUG1,"create socket failed in PGR_Check_Replicate_Server_Status()");
6599 + /* status = STATUS_OK */
6601 + if ((status = send_replicate_packet(fdP, &header, (char *)NULL)) == STATUS_OK) {
6602 + /* receive result to check for possible deadlock */
6603 + status = (0 >= PGR_recv_replicate_result(fdP, result ,0))
6604 + ? STATUS_OK : STATUS_ERROR;
6609 + PGR_Close_Sock(&fdP);
6615 +return_current_oid(void)
6617 + char msg[PGR_MESSAGE_BUFSIZE];
6619 + LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
6621 + if (ShmemVariableCache->nextOid < ((Oid) FirstBootstrapObjectId))
6623 + ShmemVariableCache->nextOid = FirstBootstrapObjectId;
6624 + ShmemVariableCache->oidCount = 0;
6627 + if (ShmemVariableCache->oidCount == 0)
6629 + XLogPutNextOid(ShmemVariableCache->nextOid + VAR_OID_PREFETCH);
6630 + ShmemVariableCache->oidCount = VAR_OID_PREFETCH;
6632 + LWLockRelease(OidGenLock);
6634 + memset(msg,0,sizeof(msg));
6635 + snprintf(msg, sizeof(msg), "%u", ShmemVariableCache->nextOid);
6636 + if (PGR_Check_Lock.dest == TO_FRONTEND)
6638 + pq_puttextmessage('C',msg);
6643 + send_response_to_replication_server(msg);
6649 +sync_oid(char * oid)
6651 + uint32_t next_oid = 0;
6653 + char msg[PGR_MESSAGE_BUFSIZE];
6655 + LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
6657 + next_oid = strtoul(oid, NULL, 10);
6658 + if (next_oid <= 0)
6659 + return STATUS_ERROR;
6661 + offset = next_oid - ShmemVariableCache->nextOid ;
6663 + return STATUS_ERROR;
6665 + if (next_oid < FirstBootstrapObjectId)
6667 + ShmemVariableCache->nextOid = FirstBootstrapObjectId;
6668 + ShmemVariableCache->oidCount = 0;
6671 + /* If we run out of logged for use oids then we must log more */
6672 + while (ShmemVariableCache->oidCount - offset <= 0)
6674 + offset -= (ShmemVariableCache->oidCount) ;
6675 + (ShmemVariableCache->nextOid) += (ShmemVariableCache->oidCount);
6676 + XLogPutNextOid(ShmemVariableCache->nextOid + VAR_OID_PREFETCH);
6677 + ShmemVariableCache->oidCount = VAR_OID_PREFETCH;
6680 + (ShmemVariableCache->nextOid) += offset;
6681 + (ShmemVariableCache->oidCount) -= offset;
6683 + LWLockRelease(OidGenLock);
6685 + memset(msg,0,sizeof(msg));
6686 + snprintf(msg, sizeof(msg), "%u", ShmemVariableCache->nextOid);
6687 + if (PGR_Check_Lock.dest == TO_FRONTEND)
6689 + pq_puttextmessage('C',msg);
6694 + send_response_to_replication_server(msg);
6700 +PGR_lo_import(char * filename)
6702 + char * result = NULL;
6707 + if ((PGR_Is_Replicated_Query == true) ||
6708 + (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6712 + if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6713 + (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6718 + len = strlen(filename);
6719 + buf_size = sizeof(LOArgs) + len;
6720 + lo_args = (LOArgs *)malloc(buf_size + 4);
6721 + if (lo_args == (LOArgs *)NULL)
6723 + return STATUS_ERROR;
6725 + memset(lo_args, 0, buf_size + 4);
6726 + lo_args->arg1 = htonl((uint32_t)len);
6727 + memcpy(lo_args->buf, filename, len);
6729 + result = PGR_Send_Replicate_Command((char *)lo_args,
6731 + CMD_STS_LARGE_OBJECT,
6732 + CMD_TYPE_LO_IMPORT);
6735 + if (result != NULL)
6741 + return STATUS_ERROR;
6745 +PGR_lo_create(int flags)
6747 + char * result = NULL;
6750 + if ((PGR_Is_Replicated_Query == true) ||
6751 + (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6755 + if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6756 + (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6760 + memset(&lo_args, 0, sizeof(LOArgs));
6761 + lo_args.arg1 = htonl(flags);
6763 + result = PGR_Send_Replicate_Command((char *)&lo_args,
6765 + CMD_STS_LARGE_OBJECT,
6766 + CMD_TYPE_LO_CREATE);
6768 + if (result != NULL)
6774 + return STATUS_ERROR;
6778 +PGR_lo_open(Oid lobjId,int32 mode)
6780 + char * result = NULL;
6783 + if ((PGR_Is_Replicated_Query == true) ||
6784 + (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6788 + if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6789 + (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6793 + memset(&lo_args, 0, sizeof(LOArgs));
6794 + lo_args.arg1 = htonl((uint32_t)lobjId);
6795 + lo_args.arg2 = htonl((uint32_t)mode);
6797 + result = PGR_Send_Replicate_Command((char *)&lo_args,
6799 + CMD_STS_LARGE_OBJECT,
6800 + CMD_TYPE_LO_OPEN);
6802 + if (result != NULL)
6808 + return STATUS_ERROR;
6812 +PGR_lo_close(int32 fd)
6814 + char * result = NULL;
6817 + if ((PGR_Is_Replicated_Query == true) ||
6818 + (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6822 + if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6823 + (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6827 + memset(&lo_args, 0, sizeof(LOArgs));
6828 + lo_args.arg1 = htonl((uint32_t)fd);
6830 + result = PGR_Send_Replicate_Command((char *)&lo_args,
6832 + CMD_STS_LARGE_OBJECT,
6833 + CMD_TYPE_LO_CLOSE);
6835 + if (result != NULL)
6841 + return STATUS_ERROR;
6845 +PGR_lo_write(int fd, char *buf, int len)
6847 + char * result = NULL;
6848 + LOArgs *lo_args = NULL;
6851 + if ((PGR_Is_Replicated_Query == true) ||
6852 + (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6856 + if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6857 + (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6861 + buf_size = sizeof(LOArgs) + len;
6862 + lo_args = malloc(buf_size + 4);
6863 + if (lo_args == (LOArgs *)NULL)
6865 + return STATUS_ERROR;
6867 + memset(lo_args, 0, buf_size + 4);
6868 + lo_args->arg1 = htonl((uint32_t)fd);
6869 + lo_args->arg2 = htonl((uint32_t)len);
6870 + memcpy(lo_args->buf, buf, len);
6871 + result = PGR_Send_Replicate_Command((char *)lo_args,
6873 + CMD_STS_LARGE_OBJECT,
6874 + CMD_TYPE_LO_WRITE);
6877 + if (result != NULL)
6883 + return STATUS_ERROR;
6887 +PGR_lo_lseek(int32 fd, int32 offset, int32 whence)
6889 + char * result = NULL;
6892 + if ((PGR_Is_Replicated_Query == true) ||
6893 + (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6897 + if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6898 + (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6902 + memset(&lo_args, 0, sizeof(LOArgs));
6903 + lo_args.arg1 = htonl((uint32_t)fd);
6904 + lo_args.arg2 = htonl((uint32_t)offset);
6905 + lo_args.arg3 = htonl((uint32_t)whence);
6907 + result = PGR_Send_Replicate_Command((char *)&lo_args,
6909 + CMD_STS_LARGE_OBJECT,
6910 + CMD_TYPE_LO_LSEEK);
6912 + if (result != NULL)
6918 + return STATUS_ERROR;
6922 +PGR_lo_unlink(Oid lobjId)
6924 + char * result = NULL;
6927 + if ((PGR_Is_Replicated_Query == true) ||
6928 + (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6932 + if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6933 + (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6937 + memset(&lo_args, 0, sizeof(LOArgs));
6938 + lo_args.arg1 = htonl((uint32_t)lobjId);
6940 + result = PGR_Send_Replicate_Command((char *)&lo_args,
6942 + CMD_STS_LARGE_OBJECT,
6943 + CMD_TYPE_LO_UNLINK);
6945 + if (result != NULL)
6951 + return STATUS_ERROR;
6955 +PGRGetNewObjectId(Oid last_id)
6961 + newId = (Oid)PGRget_replication_id();
6965 + newId = last_id + 1;
6971 +PGR_Send_Input_Message(char cmdType,StringInfo input_message)
6974 + char * ptr = NULL;
6975 + char * result = NULL;
6977 + if (input_message == NULL)
6979 + return STATUS_ERROR;
6981 + if (PGR_Is_Replicated_Query == true)
6985 + len = input_message->len+1;
6986 + ptr = input_message->data;
6988 + /* check setting of configuration value */
6989 + if ( PGRnotReplicatePreparedSelect == true)
6991 + if (is_concerned_with_prepared_select(cmdType, ptr+1) == true)
6996 + result = PGR_Send_Replicate_Command(ptr,len, CMD_STS_PREPARE,cmdType);
6997 + if (result != NULL)
6999 + PGR_Reload_Start_Time();
7006 + return STATUS_ERROR;
7011 +is_concerned_with_prepared_select(char cmdType, char * query_string)
7013 + if (cmdType == CMD_TYPE_P_PARSE)
7015 + switch (parse_message(query_string))
7017 + case PGR_MESSAGE_SELECT:
7018 + pgr_skip_in_prepared_query = true;
7020 + case PGR_MESSAGE_PREPARE:
7021 + if (is_prepared_as_select(query_string) == true)
7023 + pgr_skip_in_prepared_query = true;
7026 + case PGR_MESSAGE_EXECUTE:
7027 + case PGR_MESSAGE_DEALLOCATE:
7028 + if (is_statement_as_select(query_string) == true)
7030 + pgr_skip_in_prepared_query = true;
7034 + if (pgr_skip_in_prepared_query == true)
7039 + if (pgr_skip_in_prepared_query == true)
7041 + if (cmdType == CMD_TYPE_P_SYNC)
7043 + pgr_skip_in_prepared_query = false;
7051 +skip_non_blank(char * ptr, int max)
7054 + while(!isspace(*(ptr+i)))
7056 + if ((*(ptr+1) == '(') || (*(ptr+1) == ')'))
7068 +skip_blank(char * ptr, int max)
7071 + while(isspace(*(ptr+i)))
7081 +parse_message(char * query_string)
7087 + if (query_string == NULL)
7089 + return PGR_MESSAGE_OTHER;
7091 + len = strlen (query_string);
7094 + return PGR_MESSAGE_OTHER;
7096 + ptr = (char *)query_string;
7099 + rtn = skip_blank(ptr+i, len-i);
7101 + return PGR_MESSAGE_OTHER;
7104 + if (!strncasecmp(ptr+i,"SELECT",strlen("SELECT")))
7106 + return PGR_MESSAGE_SELECT;
7108 + if (!strncasecmp(ptr+i,"PREPARE",strlen("PREPARE")))
7110 + return PGR_MESSAGE_PREPARE;
7112 + if (!strncasecmp(ptr+i,"EXECUTE",strlen("EXECUTE")))
7114 + return PGR_MESSAGE_EXECUTE;
7116 + if (!strncasecmp(ptr+i,"DEALLOCATE",strlen("DEALLOCATE")))
7118 + return PGR_MESSAGE_DEALLOCATE;
7120 + return PGR_MESSAGE_OTHER;
7124 +is_prepared_as_select(char * query_string)
7131 + if (query_string == NULL)
7135 + ptr = (char *)query_string;
7136 + len = strlen (query_string);
7138 + /* skip "PREPARE" word */
7139 + rtn = skip_non_blank(ptr+i, len-i);
7144 + rtn = skip_blank(ptr+i, len-i);
7148 + /* skip plan_name */
7149 + rtn = skip_non_blank(ptr+i, len-i);
7154 + rtn = skip_blank(ptr+i, len-i);
7160 + if (*(ptr+i) == '(')
7166 + if (*(ptr+i) == ')')
7168 + else if (*(ptr+i) == '(')
7175 + rtn = skip_blank(ptr+i, len-i);
7180 + /* skip "AS" word */
7181 + i += strlen("AS");
7185 + rtn = skip_blank(ptr+i, len-i);
7189 + /* check "SELECT" word */
7190 + if (len-i < strlen("SELECT"))
7192 + if (!strncasecmp(ptr+i,"SELECT",strlen("SELECT")))
7201 +is_statement_as_select(char * query_string)
7208 + bool result = false;
7210 + char * name = NULL;
7211 + if (query_string == NULL)
7215 + ptr = (char *)query_string;
7216 + len = strlen (query_string);
7218 + /* skip "EXECUTE" or "DEALLOCATE" word */
7219 + rtn = skip_non_blank(ptr+i, len-i);
7224 + rtn = skip_blank(ptr+i, len-i);
7228 + if ((name = malloc(len)) == NULL)
7230 + memset(name,0,len);
7232 + while(isalnum(*(ptr+i)))
7234 + *(name+j) = *(ptr+i);
7241 + result = PGR_is_select_prepared_statement(&stmt);
7247 +PGR_is_select_prepare_query(void)
7249 + if (debug_query_string == NULL)
7253 + return (is_prepared_as_select((char *)debug_query_string));
7257 +PGR_get_md5salt(char * md5Salt, char * string)
7260 + char * ptr = NULL;
7265 + bool set_flag = false;
7267 + ptr = (char *)md5Salt;
7268 + len = strlen(string);
7269 + for ( i = 0 ; i < len ; i ++)
7271 + if (*(string+i) == ')')
7273 + buf[index++] = '\0';
7274 + *ptr = (char)atoi(buf);
7279 + buf[index++] = *(string+i);
7281 + if (*(string+i) == '(')
7285 + ptr = (char *)(md5Salt + cnt);
7292 +#endif /* USE_REPLICATION */
7293 diff -aruN postgresql-8.2.4/src/backend/libpq/replicate_com.c pgcluster-1.7.0rc7/src/backend/libpq/replicate_com.c
7294 --- postgresql-8.2.4/src/backend/libpq/replicate_com.c 1970-01-01 01:00:00.000000000 +0100
7295 +++ pgcluster-1.7.0rc7/src/backend/libpq/replicate_com.c 2007-02-18 22:52:16.000000000 +0100
7297 +/*--------------------------------------------------------------------
7302 + * This file is composed of the functions to call with the source
7303 + * at backend for the replication.
7304 + * Low level I/O functions that called by in these functions are
7305 + * contained in 'replicate_com.c'.
7307 + *--------------------------------------------------------------------
7310 +/*--------------------------------------
7311 + * INTERFACE ROUTINES
7315 + * PGR_Free_Conf_Data
7317 + * PGR_Create_Socket_Connect
7318 + * PGR_Create_Socket_Bind
7319 + * PGR_Create_Acception
7321 + * PGR_Get_Conf_Data
7322 + *-------------------------------------
7324 +#ifdef USE_REPLICATION
7326 +#include "postgres.h"
7328 +#include <signal.h>
7332 +#include <unistd.h>
7335 +#include <sys/types.h>
7336 +#include <sys/stat.h>
7337 +#include <sys/socket.h>
7338 +#include <sys/ipc.h>
7339 +#include <sys/shm.h>
7341 +#include <netinet/in.h>
7342 +#ifdef HAVE_NETINET_TCP_H
7343 +#include <netinet/tcp.h>
7345 +#include <arpa/inet.h>
7346 +#include <sys/file.h>
7349 +#include "libpq/libpq.h"
7350 +#include "miscadmin.h"
7351 +#include "nodes/print.h"
7352 +#include "utils/guc.h"
7353 +#include "parser/parser.h"
7354 +#include "access/xact.h"
7355 +#include "replicate_com.h"
7357 +int PGR_Create_Socket_Connect(int * fdP, char * hostName , unsigned short portNumber);
7358 +void PGR_Close_Sock(int * sock);
7359 +int PGR_Create_Socket_Bind(int * fdP, char * hostName , unsigned short portNumber);
7360 +int PGR_Create_Acception(int fd, int * sockP, char * hostName , unsigned short portNumber);
7361 +int PGR_Free_Conf_Data(void);
7362 +int PGR_Get_Conf_Data(char * dir , char * fname);
7363 +void PGRset_recovery_packet_no(RecoveryPacket * packet, int packet_no);
7364 +unsigned int PGRget_ip_by_name(char * host);
7365 +int PGRget_time_value(char *str);
7367 +static char * get_string(char * buf);
7368 +static bool is_start_tag(char * ptr);
7369 +static bool is_end_tag(char * ptr);
7370 +static void init_conf_data(ConfDataType *conf);
7371 +static int get_key(char * key, char * str);
7372 +static int get_conf_key_value(char * key, char * value , char * str);
7373 +static int add_conf_data(char *table,int rec_no, char *key,char * value);
7374 +static int get_table_data(FILE * fp,char * table, int rec_no);
7375 +static int get_single_data(char * str);
7376 +static int get_conf_file(char * fname);
7378 +/*--------------------------------------------------------------------
7380 + * PGR_Create_Socket_Connect()
7382 + * create new socket
7385 + * char * hostName:
7386 + * unsigned short portNumber:
7389 + * NG: STATUS_ERROR
7390 + *--------------------------------------------------------------------
7393 +PGR_Create_Socket_Connect(int * fdP, char * hostName , unsigned short portNumber)
7398 + struct sockaddr_in addr;
7401 + if ((*hostName == '\0') || (portNumber < 1000))
7404 + return STATUS_ERROR;
7406 + if ((*fdP = socket(AF_INET, SOCK_STREAM, 0)) < 0)
7409 + return STATUS_ERROR;
7411 + if ((setsockopt(*fdP, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one))) == -1)
7413 + PGR_Close_Sock(fdP);
7414 + return STATUS_ERROR;
7416 + if (setsockopt(*fdP, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) < 0)
7418 + PGR_Close_Sock(fdP);
7419 + return STATUS_ERROR;
7422 + addr.sin_family = AF_INET;
7423 + if ((hostName == NULL ) || (hostName[0] == '\0'))
7424 + addr.sin_addr.s_addr = htonl(INADDR_ANY);
7427 + struct hostent *hp;
7429 + hp = gethostbyname(hostName);
7430 + if ((hp == NULL) || (hp->h_addrtype != AF_INET))
7432 + PGR_Close_Sock(fdP);
7433 + return STATUS_ERROR;
7435 + memmove((char *) &(addr.sin_addr), (char *) hp->h_addr, hp->h_length);
7438 + addr.sin_port = htons(portNumber);
7439 + len = sizeof(struct sockaddr_in);
7441 + if ((sock = connect(*fdP,(struct sockaddr*)&addr,len)) < 0)
7443 + PGR_Close_Sock(fdP);
7444 + return STATUS_ERROR;
7451 +PGR_Create_Socket_Bind(int * fdP, char * hostName , unsigned short portNumber)
7456 + struct sockaddr_in addr;
7459 + if ((*fdP = socket(AF_INET, SOCK_STREAM, 0)) < 0)
7461 + return STATUS_ERROR;
7463 + if ((setsockopt(*fdP, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one))) == -1)
7465 + PGR_Close_Sock(fdP);
7466 + return STATUS_ERROR;
7468 + addr.sin_family = AF_INET;
7469 + if ((hostName == NULL ) || (hostName[0] == '\0'))
7470 + addr.sin_addr.s_addr = htonl(INADDR_ANY);
7473 + struct hostent *hp;
7475 + hp = gethostbyname(hostName);
7476 + if ((hp == NULL) || (hp->h_addrtype != AF_INET))
7478 + PGR_Close_Sock(fdP);
7479 + return STATUS_ERROR;
7481 + memmove((char *) &(addr.sin_addr), (char *) hp->h_addr, hp->h_length);
7484 + addr.sin_port = htons(portNumber);
7485 + len = sizeof(struct sockaddr_in);
7487 + err = bind(*fdP, (struct sockaddr *) & addr, len);
7490 + PGR_Close_Sock(fdP);
7491 + return STATUS_ERROR;
7493 + err = listen(*fdP, MAX_SOCKET_QUEUE );
7496 + PGR_Close_Sock(fdP);
7497 + return STATUS_ERROR;
7503 +PGR_Create_Acception(int fd, int * sockP, char * hostName , unsigned short portNumber)
7506 + struct sockaddr addr;
7510 + len = sizeof(struct sockaddr);
7511 + if ((sock = accept(fd, &addr, &len)) < 0)
7514 + return STATUS_ERROR;
7517 + if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) < 0)
7519 + return STATUS_ERROR;
7521 + if (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one)) < 0)
7523 + return STATUS_ERROR;
7531 +PGR_Close_Sock(int * sock)
7533 + close( (int)*sock);
7538 +get_string(char * buf)
7540 + int i,len1,len2,start_flag;
7541 + char *readp, *writep;
7543 + writep = readp = buf;
7545 + while (*(readp +i) != '\0')
7547 + if (!isspace(*(readp+ i)))
7553 + start_flag = len2 = 0;
7554 + while (*readp != '\0')
7556 + if (*readp == '#')
7561 + if (isspace(*readp))
7563 + if ((len2 >= len1) || (!start_flag))
7584 +is_start_tag(char * ptr)
7586 + if ((*ptr == '<') && (*(ptr+1) != '/'))
7594 +is_end_tag(char * ptr)
7596 + if ((*ptr == '<') && (*(ptr+1) == '/'))
7604 +init_conf_data(ConfDataType *conf)
7606 + memset(conf->table,0,sizeof(conf->table));
7607 + memset(conf->key,0,sizeof(conf->key));
7608 + memset(conf->value,0,sizeof(conf->value));
7610 + conf->last = NULL;
7611 + conf->next = NULL;
7615 +get_key(char * key, char * str)
7618 + char * ptr_s,*ptr_e;
7620 + ptr_s = strchr(str,'<');
7621 + if (ptr_s == NULL)
7623 + return STATUS_ERROR;
7625 + if (*(ptr_s+1) == '/')
7629 + ptr_e = strchr(str,'>');
7630 + if (ptr_e == NULL)
7632 + return STATUS_ERROR;
7635 + strcpy(key,ptr_s + offset);
7641 +get_conf_key_value(char * key, char * value , char * str)
7644 + int len1,len2,start_flag;
7645 + char * ptr_s,*ptr_e;
7647 + if(get_key(key,str) == STATUS_ERROR)
7649 + return STATUS_ERROR;
7651 + ptr_e = strchr(str,'>');
7652 + if (ptr_e == NULL)
7654 + return STATUS_ERROR;
7656 + ptr_s = ptr_e + 1;
7659 + while ((*ptr_s != '<') && (*ptr_s != '\0'))
7661 + if (! isspace(*ptr_s))
7667 + ptr_s = ptr_e + 1;
7668 + i = len2 = start_flag = 0;
7669 + while ((*ptr_s != '<') && (*ptr_s != '\0'))
7671 + if (isspace(*ptr_s))
7673 + if ((len2 >= len1) || (!start_flag))
7678 + *(value + i) = *ptr_s;
7683 + *(value + i) = *ptr_s;
7689 + *(value + i) = '\0';
7694 +add_conf_data(char *table,int rec_no, char *key,char * value)
7696 + ConfDataType * conf_data;
7698 + conf_data = (ConfDataType *)malloc(sizeof(ConfDataType));
7699 + if (conf_data == NULL)
7701 + return STATUS_ERROR;
7703 + init_conf_data(conf_data);
7704 + if (table != NULL)
7706 + memcpy(conf_data->table,table,sizeof(conf_data->table));
7710 + memset(conf_data->table,0,sizeof(conf_data->table));
7712 + memcpy(conf_data->key,key,sizeof(conf_data->key));
7713 + memcpy(conf_data->value,value,sizeof(conf_data->value));
7714 + conf_data->rec_no = rec_no;
7715 + if (ConfData_Top == (ConfDataType *)NULL)
7717 + ConfData_Top = conf_data;
7718 + conf_data->last = (char *)NULL;
7720 + if (ConfData_End == (ConfDataType *)NULL)
7722 + conf_data->last = (char *)NULL;
7726 + conf_data->last = (char *)ConfData_End;
7727 + ConfData_End->next = (char *)conf_data;
7729 + ConfData_End = conf_data;
7730 + conf_data->next = (char *)NULL;
7735 +get_table_data(FILE * fp,char * table, int rec_no)
7738 + char key_buf[1024];
7739 + char value_buf[1024];
7743 + while (fgets(buf,sizeof(buf),fp) != NULL)
7746 + * pic up a data string
7748 + ptr = get_string(buf);
7749 + len = strlen(ptr);
7754 + if (is_end_tag(ptr))
7756 + if(get_key(key_buf,ptr) == STATUS_ERROR)
7758 + return STATUS_ERROR;
7760 + if (!strcmp(key_buf,table))
7765 + if (is_start_tag(ptr))
7767 + if(get_conf_key_value(key_buf,value_buf,ptr) == STATUS_ERROR)
7769 + return STATUS_ERROR;
7771 + add_conf_data(table,rec_no,key_buf,value_buf);
7774 + return STATUS_ERROR;
7778 +get_single_data(char * str)
7780 + char key_buf[1024];
7781 + char value_buf[1024];
7782 + if(get_conf_key_value(key_buf,value_buf,str) == STATUS_ERROR)
7784 + return STATUS_ERROR;
7786 + add_conf_data(NULL,0,key_buf,value_buf);
7792 +get_conf_file(char * fname)
7797 + char key_buf[1024];
7798 + char last_key_buf[1024];
7803 + * configuration file open
7805 + if ((fp = fopen(fname,"r")) == NULL)
7807 + return STATUS_ERROR;
7810 + * configuration file read
7812 + memset(last_key_buf,0,sizeof(last_key_buf));
7813 + memset(key_buf,0,sizeof(key_buf));
7814 + while (fgets(buf,sizeof(buf),fp) != NULL)
7817 + * pic up a data string
7819 + ptr = get_string(buf);
7820 + len = strlen(ptr);
7825 + if (is_start_tag(ptr))
7827 + if(get_key(key_buf,ptr) == STATUS_ERROR)
7830 + return STATUS_ERROR;
7832 + if (strstr(ptr,"</") == NULL)
7834 + if (strcmp(last_key_buf,key_buf))
7837 + strcpy(last_key_buf,key_buf);
7839 + get_table_data(fp,key_buf,rec_no);
7844 + get_single_data(ptr);
7853 +PGR_Free_Conf_Data(void)
7855 + ConfDataType * conf, *nextp;
7857 + if (ConfData_Top == (ConfDataType *)NULL)
7859 + return STATUS_ERROR;
7861 + conf = ConfData_Top;
7863 + while (conf != (ConfDataType *)NULL)
7865 + nextp = (ConfDataType*)conf->next;
7869 + ConfData_Top = ConfData_End = (ConfDataType *)NULL;
7874 +PGR_Get_Conf_Data(char * dir , char * fname)
7880 + if ((dir == NULL) || ( fname == NULL))
7882 + return STATUS_ERROR;
7884 + conf_file = malloc(strlen(dir) + strlen(fname) + 2);
7885 + if (conf_file == NULL)
7887 + return STATUS_ERROR;
7889 + sprintf(conf_file,"%s/%s",dir,fname);
7891 + ConfData_Top = ConfData_End = (ConfDataType * )NULL;
7892 + status = get_conf_file(conf_file);
7900 +PGRset_recovery_packet_no(RecoveryPacket * packet, int packet_no)
7902 + if (packet == NULL)
7906 + packet->packet_no = htons(packet_no) ;
7911 +PGRget_ip_by_name(char * host)
7913 + struct hostent *hp = NULL;
7914 + unsigned int ip = 0;
7915 + unsigned char uc = 0;
7918 + if ((host == NULL) || (*host == '\0'))
7922 + hp = gethostbyname( host );
7927 + for (i = 3 ; i>= 0 ; i --)
7929 + uc = (unsigned char)hp->h_addr_list[0][i];
7938 +PGRget_time_value(char *str)
7947 + len = strlen(str);
7949 + for (i = 0; i < len ; i ++,ptr++)
7951 + if ((! isdigit(*ptr)) && (! isspace(*ptr)))
7968 + return (atoi(str) * unit);
7971 +#endif /* USE_REPLICATION */
7972 diff -aruN postgresql-8.2.4/src/backend/main/main.c pgcluster-1.7.0rc7/src/backend/main/main.c
7973 --- postgresql-8.2.4/src/backend/main/main.c 2007-01-04 01:58:01.000000000 +0100
7974 +++ pgcluster-1.7.0rc7/src/backend/main/main.c 2007-02-18 22:52:16.000000000 +0100
7975 @@ -316,6 +316,13 @@
7976 printf(_(" -r FILENAME send stdout and stderr to given file\n"));
7977 printf(_(" -x NUM internal use\n"));
7979 +#ifdef USE_REPLICATION
7980 + printf(_("\nOptions for PGCluster only:\n"));
7981 + printf(_(" -R recovery startup with rsync\n"));
7982 + printf(_(" -u recovery startup with rsync(it is not create backup files.\n"));
7983 + printf(_(" -U recovery startup with pg_dump\n"));
7984 +#endif /* USE_REPLICATION */
7986 printf(_("\nPlease read the documentation for the complete list of run-time\n"
7987 "configuration settings and how to set them on the command line or in\n"
7988 "the configuration file.\n\n"
7989 diff -aruN postgresql-8.2.4/src/backend/parser/gram.y pgcluster-1.7.0rc7/src/backend/parser/gram.y
7990 --- postgresql-8.2.4/src/backend/parser/gram.y 2006-11-05 23:42:09.000000000 +0100
7991 +++ pgcluster-1.7.0rc7/src/backend/parser/gram.y 2007-02-18 22:52:16.000000000 +0100
7992 @@ -412,10 +412,10 @@
7995 READ REAL REASSIGN RECHECK REFERENCES REINDEX RELATIVE_P RELEASE RENAME
7996 - REPEATABLE REPLACE RESET RESTART RESTRICT RETURNING RETURNS REVOKE RIGHT
7997 + REPEATABLE REPLACE REPLICATION RESET RESTART RESTRICT RETURNING RETURNS REVOKE RIGHT
7998 ROLE ROLLBACK ROW ROWS RULE
8000 - SAVEPOINT SCHEMA SCROLL SECOND_P SECURITY SELECT SEQUENCE
8001 + SAVEPOINT SCHEMA SCROLL SECOND_P SECURITY SELECT SEQUENCE SERVER
8002 SERIALIZABLE SESSION SESSION_USER SET SETOF SHARE
8003 SHOW SIMILAR SIMPLE SMALLINT SOME STABLE START STATEMENT
8004 STATISTICS STDIN STDOUT STORAGE STRICT_P SUBSTRING SUPERUSER_P SYMMETRIC
8005 @@ -1224,6 +1224,12 @@
8009 + | SHOW REPLICATION SERVER
8011 + VariableShowStmt *n = makeNode(VariableShowStmt);
8012 + n->name = "replication_server";
8017 VariableShowStmt *n = makeNode(VariableShowStmt);
8018 @@ -8678,6 +8684,7 @@
8026 @@ -8692,6 +8699,7 @@
8034 diff -aruN postgresql-8.2.4/src/backend/parser/keywords.c pgcluster-1.7.0rc7/src/backend/parser/keywords.c
8035 --- postgresql-8.2.4/src/backend/parser/keywords.c 2006-10-07 23:51:02.000000000 +0200
8036 +++ pgcluster-1.7.0rc7/src/backend/parser/keywords.c 2007-02-18 22:52:16.000000000 +0100
8038 {"relative", RELATIVE_P},
8039 {"release", RELEASE},
8041 + {"replication", REPLICATION},
8042 {"repeatable", REPEATABLE},
8043 {"replace", REPLACE},
8045 diff -aruN postgresql-8.2.4/src/backend/parser/parse_clause.c pgcluster-1.7.0rc7/src/backend/parser/parse_clause.c
8046 --- postgresql-8.2.4/src/backend/parser/parse_clause.c 2006-11-28 13:54:41.000000000 +0100
8047 +++ pgcluster-1.7.0rc7/src/backend/parser/parse_clause.c 2007-02-18 22:52:16.000000000 +0100
8049 #include "rewrite/rewriteManip.h"
8050 #include "utils/guc.h"
8052 +#ifdef USE_REPLICATION
8053 +#include "replicate.h"
8054 +#endif /* USE_REPLICATION */
8056 #define ORDER_CLAUSE 0
8057 #define GROUP_CLAUSE 1
8058 @@ -154,7 +157,18 @@
8059 * analyze.c will eventually do the corresponding heap_close(), but *not*
8062 +#ifdef USE_REPLICATION
8063 + if (PGRautoLockTable == true)
8065 + pstate->p_target_relation = heap_openrv(relation, ShareRowExclusiveLock);
8069 + pstate->p_target_relation = heap_openrv(relation, RowExclusiveLock);
8072 pstate->p_target_relation = heap_openrv(relation, RowExclusiveLock);
8073 +#endif /* USE_REPLICATION */
8077 diff -aruN postgresql-8.2.4/src/backend/parser/parse_relation.c pgcluster-1.7.0rc7/src/backend/parser/parse_relation.c
8078 --- postgresql-8.2.4/src/backend/parser/parse_relation.c 2006-10-04 02:29:56.000000000 +0200
8079 +++ pgcluster-1.7.0rc7/src/backend/parser/parse_relation.c 2007-02-18 22:52:16.000000000 +0100
8081 #include "utils/lsyscache.h"
8082 #include "utils/syscache.h"
8084 +#ifdef USE_REPLICATION
8085 +#include "replicate.h"
8086 +#endif /* USE_REPLICATION */
8089 bool add_missing_from;
8090 @@ -636,7 +639,14 @@
8091 * to a rel in a statement, be careful to get the right access level
8092 * depending on whether we're doing SELECT FOR UPDATE/SHARE.
8094 +#ifdef USE_REPLICATION
8095 + if (PGRautoLockTable == true)
8096 + lockmode = isLockedRel(pstate, refname) ? ShareRowExclusiveLock : AccessShareLock;
8098 + lockmode = isLockedRel(pstate, refname) ? RowShareLock : AccessShareLock;
8100 lockmode = isLockedRel(pstate, refname) ? RowShareLock : AccessShareLock;
8101 +#endif /* USE_REPLICATION */
8102 rel = heap_openrv(relation, lockmode);
8103 rte->relid = RelationGetRelid(rel);
8105 diff -aruN postgresql-8.2.4/src/backend/postmaster/postmaster.c pgcluster-1.7.0rc7/src/backend/postmaster/postmaster.c
8106 --- postgresql-8.2.4/src/backend/postmaster/postmaster.c 2007-01-04 01:58:01.000000000 +0100
8107 +++ pgcluster-1.7.0rc7/src/backend/postmaster/postmaster.c 2007-02-18 22:52:16.000000000 +0100
8109 #include "storage/spin.h"
8112 +#ifdef USE_REPLICATION
8113 +#include "replicate.h"
8114 +#endif /* USE_REPLICATION */
8117 * List of active backends (or child processes anyway; we don't actually
8118 @@ -363,6 +366,61 @@
8119 #define EXIT_STATUS_0(st) ((st) == 0)
8120 #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
8122 +#ifdef USE_REPLICATION
8123 +char * Query_String = NULL;
8124 +ReplicateServerInfo * ReplicateServerData = NULL;
8125 +ReplicateServerInfo * CurrentReplicateServer = NULL;
8126 +ReplicateServerInfo * LastReplicateServer = NULL;
8127 +int ReplicateServerShmid = -1;
8128 +int TransactionQuery = 0;
8129 +int TransactionSock = -1;
8130 +int Transaction_Mode = 0;
8131 +bool PGR_Noticed_Abort = false;
8132 +bool Session_Authorization_Mode = false;
8133 +bool Create_Temp_Table_Mode = false;
8134 +ConfDataType * ConfData_Top = (ConfDataType *)NULL;
8135 +ConfDataType * ConfData_End = (ConfDataType *)NULL;
8136 +int RecoveryPortNumber = 0;
8137 +char * RsyncPath = NULL;
8138 +char * RsyncOption = NULL;
8139 +char * PgDumpPath = NULL;
8140 +bool RsyncCompress = true;
8141 +ReplicateNow * ReplicateCurrentTime = NULL;
8142 +CopyData * PGRCopyData = NULL;
8143 +bool PGR_Copy_Data_Need_Replicate = false;
8144 +PGR_Stand_Alone_Type * PGR_Stand_Alone = NULL;
8145 +PGR_Not_Replicate_Type * PGR_Not_Replicate = NULL;
8146 +int PGR_Not_Replicate_Rec_Num = 0;
8147 +bool PGR_Is_Replicated_Query = false;
8148 +PGR_Check_Lock_Type PGR_Check_Lock;
8149 +int PGR_Sock_To_Replication_Server = -1;
8150 +bool PGR_Need_Notice = false;
8151 +bool PGR_Lock_Noticed = false;
8152 +bool PGR_Recovery_Option = false;
8153 +int PGR_recovery_mode = 0;
8154 +char * PGRSelfHostName = NULL;
8155 +int PGR_Pending_Sem_Num = 0;
8156 +bool PGR_Reliable_Mode_Wait = true;
8157 +PGR_Retry_Query_Type PGR_Retry_Query;
8158 +int ClusterDBShmid = -1;
8159 +ClusterDBInfo * ClusterDBData = NULL;
8160 +PGR_Password_Info * PGR_password = NULL;
8161 +int PGR_Replication_Timeout = 60;
8162 +int PGR_Lifecheck_Timeout = 3;
8163 +int PGR_Lifecheck_Interval = 11;
8165 +/* initialize in utils/misc/guc.c */
8166 +bool PGRforceLoadBalance = false;
8167 +bool PGRcheckConstraintWithLock = false;
8168 +bool PGRautoLockTable = true;
8169 +bool PGRnotReplicatePreparedSelect = false;
8171 +bool needToUpdateReplicateIdOnNextQueryIsDone=false;
8172 +bool PGR_Is_Sync_OID = false;
8174 +static int Master_Pid = 0;
8175 +static int Lifecheck_Pid = 0;
8176 +#endif /* USE_REPLICATION */
8179 * Postmaster main entry point
8180 @@ -375,6 +433,11 @@
8181 char *userDoption = NULL;
8184 +#ifdef USE_REPLICATION
8185 + PGR_Check_Lock.check_lock_conflict = false;
8186 + PGR_Check_Lock.status_lock_conflict = STATUS_OK;
8187 +#endif /* USE REPLICATION */
8189 MyProcPid = PostmasterPid = getpid();
8191 IsPostmasterEnvironment = true;
8192 @@ -420,10 +483,24 @@
8193 * tcop/postgres.c (the option sets should not conflict)
8194 * and with the common help() function in main/main.c.
8196 - while ((opt = getopt(argc, argv, "A:B:c:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:")) != -1)
8197 + while ((opt = getopt(argc, argv, "A:B:c:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:URu")) != -1)
8201 +#ifdef USE_REPLICATION
8203 + PGR_Recovery_Option = true;
8204 + PGR_recovery_mode = PGR_HOT_RECOVERY;
8207 + PGR_Recovery_Option = true;
8208 + PGR_recovery_mode = PGR_COLD_RECOVERY;
8211 + PGR_Recovery_Option = true;
8212 + PGR_recovery_mode = PGR_WITHOUT_BACKUP;
8214 +#endif /* USE_REPLICATION */
8216 SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
8218 @@ -696,6 +773,30 @@
8220 CreateDataDirLockFile(true);
8222 +#ifdef USE_REPLICATION
8223 + if (PGR_Get_Conf_Data( DataDir, CLUSTER_CONF_FILE ) == STATUS_OK)
8225 + if (PGR_Init_Replicate_Server_Data() != STATUS_OK)
8227 + fprintf(stderr,"PGR_Init_Replicate_Server_Data failed\n");
8228 + ExitPostmaster(0);
8230 + PGR_Set_Replicate_Server_Socket();
8231 + PGR_Free_Conf_Data();
8232 + if ((PGR_Recovery_Option) &&
8233 + (PGR_recovery_mode != PGR_HOT_RECOVERY))
8235 + fprintf(stderr,"Start in recovery mode! \n");
8236 + fprintf(stderr,"Please wait until a data synchronization finishes from Master DB... \n");
8237 + if (PGR_Recovery_Main(PGR_recovery_mode) != STATUS_OK)
8239 + fprintf(stderr,"PGR_Recovery_Main() failed with cold recovery\n");
8240 + ExitPostmaster(0);
8244 +#endif /* USE_REPLICATION */
8247 * If timezone is not set, determine what the OS uses. (In theory this
8248 * should be done during GUC initialization, but because it can take as
8249 @@ -960,6 +1061,21 @@
8251 StartupPID = StartupDataBase();
8253 +#ifdef USE_REPLICATION
8254 + Master_Pid = PGR_Master_Main();
8255 + if (Master_Pid < 0)
8257 + elog(DEBUG1,"PGR_Master_Main failed");
8258 + ExitPostmaster(1);
8260 + Lifecheck_Pid = PGR_Lifecheck_Main();
8261 + if (Lifecheck_Pid < 0)
8263 + elog(DEBUG1,"PGR_Lifecheck_Main failed");
8264 + ExitPostmaster(1);
8266 +#endif /* USE_REPLICATION */
8268 status = ServerLoop();
8271 @@ -1133,6 +1249,60 @@
8272 last_touch_time = time(NULL);
8274 nSockets = initMasks(&readmask);
8275 +#ifdef USE_REPLICATION
8276 + if (PGR_Recovery_Option)
8279 + pid = fork_process();
8280 + if (pid == 0) /* child */
8282 + fprintf(stderr,"Start in recovery mode! \n");
8283 + fprintf(stderr,"Please wait until a data synchronization finishes from Master DB... \n");
8284 + IsUnderPostmaster = true; /* we are a postmaster subprocess now */
8286 + /* Close the postmaster's sockets */
8287 + ClosePostmasterPorts(false);
8288 + /* Lose the postmaster's on-exit routines and port connections */
8290 + /* Release postmaster's working memory context */
8291 + MemoryContextSwitchTo(TopMemoryContext);
8292 + MemoryContextDelete(PostmasterContext);
8293 + PostmasterContext = NULL;
8294 + if (PGR_recovery_mode == PGR_HOT_RECOVERY)
8296 + if (PGR_Recovery_Main(PGR_recovery_mode) != STATUS_OK)
8298 + elog(DEBUG1,"PGR_Recovery_Main() failed with hot recovery.");
8299 + ExitPostmaster(1);
8304 + if (PGR_recovery_queue_data_req() != STATUS_OK)
8306 + elog(DEBUG1,"PGR_recovery_queue_data_req failed");
8307 + ExitPostmaster(1);
8310 + PGR_recovery_finish_send();
8311 + PGR_Recovery_Option = false;
8312 + fprintf(stderr,"OK! The data synchronization with Master DB was finished. \n");
8314 + ExitPostmaster(0);
8318 + ExitPostmaster(1);
8321 + if (PGR_password != NULL)
8323 + if(PGR_password->password != NULL)
8324 + memset(PGR_password->password,0,PASSWORD_MAX_LENGTH);
8325 + memset(PGR_password->md5Salt,0,sizeof(PGR_password->md5Salt));
8326 + memset(PGR_password->cryptSalt,0,sizeof(PGR_password->cryptSalt));
8328 +#endif /* USE_REPLICATION */
8332 @@ -1591,6 +1761,9 @@
8334 (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
8335 errmsg("sorry, too many clients already")));
8336 +#ifdef USE_REPLICATION
8337 + return STATUS_ERROR;
8342 @@ -1858,6 +2031,23 @@
8343 (errmsg_internal("postmaster received signal %d",
8344 postgres_signal_arg)));
8346 +#ifdef USE_REPLICATION
8347 + if (PGR_Get_Cluster_Status() == STATUS_RECOVERY)
8349 + PGR_recovery_error_send();
8350 + PGR_Recovery_Option = false;
8352 + if (Master_Pid > 0)
8354 + kill (Master_Pid,postgres_signal_arg);
8356 + if (Lifecheck_Pid > 0)
8358 + kill (Lifecheck_Pid,postgres_signal_arg);
8361 +#endif /* USE_REPLICATION */
8363 switch (postgres_signal_arg)
8366 @@ -3452,6 +3642,16 @@
8367 * MUST -- vadim 05-10-1999
8370 +#ifdef USE_REPLICATION
8371 + if (PGR_Get_Cluster_Status() == STATUS_RECOVERY)
8373 + write_stderr("sorry, recovery failed.");
8374 + PGR_recovery_error_send();
8375 + PGR_Recovery_Option = false;
8378 +#endif /* USE_REPLICATION */
8383 diff -aruN postgresql-8.2.4/src/backend/storage/large_object/inv_api.c pgcluster-1.7.0rc7/src/backend/storage/large_object/inv_api.c
8384 --- postgresql-8.2.4/src/backend/storage/large_object/inv_api.c 2006-09-07 17:37:25.000000000 +0200
8385 +++ pgcluster-1.7.0rc7/src/backend/storage/large_object/inv_api.c 2007-02-18 22:52:16.000000000 +0100
8387 #include "utils/fmgroids.h"
8388 #include "utils/resowner.h"
8390 +#ifdef USE_REPLICATION
8391 +#include "replicate.h"
8392 +#endif /* USE_REPLICATION */
8396 * All accesses to pg_largeobject and its index make use of a single Relation
8398 * use. We can use the index on pg_largeobject for checking OID
8399 * uniqueness, even though it has additional columns besides OID.
8401 +#ifdef USE_REPLICATION
8402 + PGR_Is_Sync_OID = true;
8403 +#endif /* USE_REPLICATION */
8404 if (!OidIsValid(lobjId))
8409 CommandCounterIncrement();
8411 +#ifdef USE_REPLICATION
8412 + PGR_Is_Sync_OID = false;
8413 +#endif /* USE_REPLICATION */
8417 diff -aruN postgresql-8.2.4/src/backend/storage/lmgr/deadlock.c pgcluster-1.7.0rc7/src/backend/storage/lmgr/deadlock.c
8418 --- postgresql-8.2.4/src/backend/storage/lmgr/deadlock.c 2006-09-23 01:20:13.000000000 +0200
8419 +++ pgcluster-1.7.0rc7/src/backend/storage/lmgr/deadlock.c 2007-02-18 22:52:16.000000000 +0100
8421 #include "storage/proc.h"
8422 #include "utils/memutils.h"
8424 +#ifdef USE_REPLICATION
8425 +#include "replicate.h"
8426 +#endif /* USE_REPLICATION */
8428 /* One edge in the waits-for graph */
8430 @@ -217,6 +220,13 @@
8431 if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges))
8432 elog(FATAL, "deadlock seems to have disappeared");
8434 +#ifdef USE_REPLICATION
8435 + if (PGR_Notice_Conflict() == STATUS_ERROR)
8439 + PGR_Lock_Noticed =true;
8441 return true; /* cannot find a non-deadlocked state */
8444 @@ -426,6 +436,18 @@
8448 +#ifdef USE_REPLICATION
8450 + * In PGCluster mode , conflicts with procs has younger rep-id didn't
8451 + * matter. It's also processed younger proc's CheckDeadLock().
8452 + * It's nesseary to make sure all nodes have same deadlock order.
8453 + * So, always most young (rep-id) process only will rollback by deadlock.
8455 + if ( MyProc->replicationId!=0 &&
8456 + MyProc -> replicationId < checkProc->replicationId)
8461 * Have we already seen this proc?
8463 diff -aruN postgresql-8.2.4/src/backend/storage/lmgr/lmgr.c pgcluster-1.7.0rc7/src/backend/storage/lmgr/lmgr.c
8464 --- postgresql-8.2.4/src/backend/storage/lmgr/lmgr.c 2006-10-04 02:29:57.000000000 +0200
8465 +++ pgcluster-1.7.0rc7/src/backend/storage/lmgr/lmgr.c 2007-02-18 22:52:16.000000000 +0100
8467 #include "utils/inval.h"
8468 #include "utils/lsyscache.h"
8470 +#ifdef USE_REPLICATION
8471 +#include "replicate.h"
8472 +#endif /* USE_REPLICATION */
8475 * RelationInitLockInfo
8476 @@ -476,9 +479,16 @@
8478 SET_LOCKTAG_TRANSACTION(tag, xid);
8480 +#ifdef USE_REPLICATION
8481 + if (!LockAcquire(&tag, ExclusiveLock, false,false))
8482 + elog(ERROR, "XactLockTableWait: LockAcquire failed");
8484 + LockRelease(&tag, ExclusiveLock,false);
8486 (void) LockAcquire(&tag, ShareLock, false, false);
8488 LockRelease(&tag, ShareLock, false);
8489 +#endif /* USE_REPLICATION */
8491 if (!TransactionIdIsInProgress(xid))
8493 @@ -635,3 +645,37 @@
8495 return false; /* default case */
8498 +#ifdef USE_REPLICATION
8500 + * XactLockTableWait
8502 + * Wait for the specified transaction to commit or abort.
8505 +XactLockTableWaitForCluster(TransactionId xid,Buffer buffer)
8508 + TransactionId myxid = GetCurrentTransactionId();
8510 + Assert(!TransactionIdEquals( xid, myxid ));
8512 + LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
8514 + SET_LOCKTAG_TRANSACTION(tag, xid);
8516 + if (!LockAcquire(&tag, ExclusiveLock, false,false))
8517 + elog(ERROR, "XactLockTableWait: LockAcquire failed");
8519 + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
8521 + LockRelease(&tag, ExclusiveLock,false);
8524 + * Transaction was committed/aborted/crashed - we have to update
8525 + * pg_clog if transaction is still marked as running.
8527 + if (!TransactionIdDidCommit(xid) && !TransactionIdDidAbort(xid))
8528 + TransactionIdAbort(xid);
8530 +#endif /*USE_REPLICATION*/
8531 diff -aruN postgresql-8.2.4/src/backend/storage/lmgr/lock.c pgcluster-1.7.0rc7/src/backend/storage/lmgr/lock.c
8532 --- postgresql-8.2.4/src/backend/storage/lmgr/lock.c 2006-10-04 02:29:57.000000000 +0200
8533 +++ pgcluster-1.7.0rc7/src/backend/storage/lmgr/lock.c 2007-02-18 22:52:16.000000000 +0100
8535 #include "utils/ps_status.h"
8536 #include "utils/resowner.h"
8538 +#ifdef USE_REPLICATION
8539 +#include "storage/lmgr.h"
8540 +#include "replicate.h"
8541 +#endif /* USE_REPLICATION */
8543 /* This configuration variable is used to set the lock table size */
8544 int max_locks_per_xact; /* set by guc.c */
8545 @@ -737,6 +741,10 @@
8546 status = LockCheckConflicts(lockMethodTable, lockmode,
8547 lock, proclock, MyProc);
8549 +#ifdef USE_REPLICATION
8550 + PGR_Check_Lock.status_lock_conflict = status;
8551 + PGR_Check_Lock.deadlock = false;
8552 +#endif /* USE_REPLICATION */
8553 if (status == STATUS_OK)
8555 /* No conflict with held or previously requested locks */
8556 @@ -746,6 +754,17 @@
8559 Assert(status == STATUS_FOUND);
8560 +#ifdef USE_REPLICATION
8561 + if ((PGR_Need_Notice == true) &&
8562 + (PGR_Check_Lock.check_lock_conflict == true))
8564 + if (!PGR_Lock_Noticed && PGR_Notice_Conflict() == STATUS_ERROR)
8568 + PGR_Lock_Noticed = true;
8570 +#endif /* USE_REPLICATION */
8573 * We can't acquire the lock immediately. If caller specified no
8574 diff -aruN postgresql-8.2.4/src/backend/storage/lmgr/proc.c pgcluster-1.7.0rc7/src/backend/storage/lmgr/proc.c
8575 --- postgresql-8.2.4/src/backend/storage/lmgr/proc.c 2006-11-21 21:59:52.000000000 +0100
8576 +++ pgcluster-1.7.0rc7/src/backend/storage/lmgr/proc.c 2007-02-18 22:52:16.000000000 +0100
8578 #include "storage/procarray.h"
8579 #include "storage/spin.h"
8581 +#ifdef USE_REPLICATION
8582 +#include "replicate.h"
8583 +#endif /* USE_REPLICATION */
8586 int DeadlockTimeout = 1000;
8588 MyProc->lwWaitLink = NULL;
8589 MyProc->waitLock = NULL;
8590 MyProc->waitProcLock = NULL;
8591 +#ifdef USE_REPLICATION
8592 + MyProc->replicationId = 0;
8594 for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
8595 SHMQueueInit(&(MyProc->myProcLocks[i]));
8598 MyProc->lwWaitLink = NULL;
8599 MyProc->waitLock = NULL;
8600 MyProc->waitProcLock = NULL;
8601 +#ifdef USE_REPLICATION
8602 + MyProc->replicationId = 0;
8604 for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
8605 SHMQueueInit(&(MyProc->myProcLocks[i]));
8607 @@ -737,6 +746,17 @@
8611 +#ifdef USE_REPLICATION
8612 + if(proc->replicationId == 0 ||
8613 + (MyProc->replicationId > proc->replicationId &&
8614 + proc->heldLocks & aheadRequests) ) {
8615 + elog(DEBUG1,"origin's RId = %d , MyProc->RId = %d , skip",proc->replicationId,MyProc->replicationId);
8616 + aheadRequests |= (1 << proc->waitLockMode);
8617 + proc = (PGPROC *) MAKE_PTR(proc->links.next);
8622 /* Break out of loop to put myself before him */
8625 @@ -752,8 +772,21 @@
8629 +#ifdef USE_REPLICATION
8630 + proc = (PGPROC *) &(waitQueue->links);
8631 + for (i = 0; i < waitQueue->size+1; i++){
8632 + elog(DEBUG1,"origin's RId = %d , MyProc->RId = %d",proc->replicationId,MyProc->replicationId);
8633 + if(proc->replicationId == 0 ||
8634 + MyProc->replicationId > proc->replicationId) {
8635 + proc= (PGPROC *) MAKE_PTR(proc->links.next);
8641 /* I hold no locks, so I can't push in front of anyone. */
8642 proc = (PGPROC *) &(waitQueue->links);
8643 +#endif /* USE_REPLICATION */
8647 @@ -776,7 +809,11 @@
8648 * CheckDeadLock's recovery code, except that we shouldn't release the
8649 * semaphore since we haven't tried to lock it yet.
8651 +#ifdef USE_REPLICATION
8652 + if (early_deadlock && proc->replicationId < MyProc->replicationId)
8657 RemoveFromWaitQueue(MyProc, hashcode);
8658 return STATUS_ERROR;
8659 @@ -976,6 +1013,9 @@
8663 +#ifdef USE_REPLICATION
8664 + bool pgr_notice = false;
8665 +#endif /* USE_REPLICATION */
8668 * Acquire exclusive lock on the entire shared lock data structures. Must
8669 @@ -1047,6 +1087,10 @@
8673 +#ifdef USE_REPLICATION
8674 + pgr_notice = true;
8678 * Release locks acquired at head of routine. Order is not critical, so
8679 * do it back-to-front to avoid waking another CheckDeadLock instance
8680 @@ -1055,6 +1099,12 @@
8682 for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
8683 LWLockRelease(FirstLockMgrLock + i);
8684 +#ifdef USE_REPLICATION
8685 + if (pgr_notice == true)
8687 + PGR_Notice_DeadLock();
8693 @@ -1110,6 +1160,15 @@
8695 TimestampTz fin_time;
8696 struct itimerval timeval;
8697 +#ifdef USE_REPLICATION
8700 + if (ReplicateCurrentTime != NULL)
8702 + useFlag = ReplicateCurrentTime->useFlag;
8703 + ReplicateCurrentTime->useFlag = DATA_INIT;
8705 +#endif /* USE_REPLICATION */
8707 if (is_statement_timeout)
8709 @@ -1154,6 +1213,12 @@
8710 fin_time = GetCurrentTimestamp();
8711 fin_time = TimestampTzPlusMilliseconds(fin_time, delayms);
8712 deadlock_timeout_active = true;
8713 +#ifdef USE_REPLICATION
8714 + if (ReplicateCurrentTime != NULL)
8716 + ReplicateCurrentTime->useFlag = useFlag;
8718 +#endif /* USE_REPLICATION */
8719 if (fin_time >= statement_fin_time)
8722 @@ -1167,6 +1232,12 @@
8723 MemSet(&timeval, 0, sizeof(struct itimerval));
8724 timeval.it_value.tv_sec = delayms / 1000;
8725 timeval.it_value.tv_usec = (delayms % 1000) * 1000;
8726 +#ifdef USE_REPLICATION
8727 + if (ReplicateCurrentTime != NULL)
8729 + ReplicateCurrentTime->useFlag = useFlag;
8731 +#endif /* USE_REPLICATION */
8732 if (setitimer(ITIMER_REAL, &timeval, NULL))
8735 @@ -1232,12 +1303,30 @@
8736 CheckStatementTimeout(void)
8739 +#ifdef USE_REPLICATION
8741 +#endif /* USE_REPLICATION */
8743 if (!statement_timeout_active)
8744 return true; /* do nothing if not active */
8746 +#ifdef USE_REPLICATION
8747 + if (ReplicateCurrentTime != NULL)
8749 + useFlag = ReplicateCurrentTime->useFlag;
8750 + ReplicateCurrentTime->useFlag = DATA_INIT;
8752 +#endif /* USE_REPLICATION */
8754 now = GetCurrentTimestamp();
8756 +#ifdef USE_REPLICATION
8757 + if (ReplicateCurrentTime != NULL)
8759 + ReplicateCurrentTime->useFlag = useFlag;
8761 +#endif /* USE_REPLICATION */
8763 if (now >= statement_fin_time)
8766 diff -aruN postgresql-8.2.4/src/backend/tcop/postgres.c pgcluster-1.7.0rc7/src/backend/tcop/postgres.c
8767 --- postgresql-8.2.4/src/backend/tcop/postgres.c 2007-01-04 01:58:01.000000000 +0100
8768 +++ pgcluster-1.7.0rc7/src/backend/tcop/postgres.c 2007-02-18 22:52:16.000000000 +0100
8773 +#ifdef USE_REPLICATION
8774 +#include "replicate.h"
8775 +#endif /* USE_REPLICATION */
8778 extern char *optarg;
8781 /* wait N seconds to allow attach from a debugger */
8782 int PostAuthDelay = 0;
8785 +#ifdef USE_REPLICATION
8786 +bool PGR_Not_Replication_Query = false;
8787 +#endif /* USE_REPLICATION */
8791 @@ -753,6 +759,24 @@
8792 bool was_logged = false;
8795 +#ifdef USE_REPLICATION
8796 + char * query_ptr = NULL;
8797 + char * null_ptr = NULL;
8801 + PGR_Reliable_Mode_Wait = false;
8802 + query_ptr = (char *)query_string;
8803 + if (PGR_Is_Replicated_Query == false)
8805 + PGR_Is_Replicated_Query = PGR_Is_Replicated_Command(query_ptr);
8807 + PGR_Retry_Query.query_string = (char *)query_string;
8808 + PGR_Retry_Query.query_len = strlen(query_string);
8809 + PGR_Retry_Query.cmdSts = CMD_STS_OTHER;
8810 + PGR_Retry_Query.cmdType = CMD_TYPE_OTHER;
8811 +#endif /* USE_REPLICATION */
8814 * Report query to various monitoring facilities.
8816 @@ -831,6 +855,18 @@
8817 DestReceiver *receiver;
8820 +#ifdef USE_REPLICATION
8821 + PGR_Not_Replication_Query = false;
8822 + PGR_Reliable_Mode_Wait = false;
8824 + PGR_Retry_Query.query_string = NULL;
8825 + PGR_Retry_Query.query_len = 0;
8826 + PGR_Retry_Query.cmdSts = CMD_STS_OTHER;
8827 + PGR_Retry_Query.cmdType = CMD_TYPE_OTHER;
8828 + PGR_Retry_Query.useFlag = DATA_INIT;
8829 + PGR_Lock_Noticed = false;
8830 +#endif /* USE_REPLICATION */
8833 * Get the command name for use in status display (it also becomes the
8834 * default completion tag, down inside PortalRun). Set ps_status and
8835 @@ -853,10 +889,232 @@
8837 if (IsAbortedTransactionBlockState() &&
8838 !IsTransactionExitStmt(parsetree))
8840 +#ifdef USE_REPLICATION
8841 + Transaction_Mode = 0;
8844 (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
8845 errmsg("current transaction is aborted, "
8846 "commands ignored until end of transaction block")));
8849 +#ifdef USE_REPLICATION
8850 + Query_String = NULL;
8851 + query_ptr = PGR_Remove_Comment(query_ptr);
8852 + PGR_Check_Lock.dest = TO_FRONTEND;
8853 + PGR_Need_Notice = false;
8854 + PGR_Check_Lock.check_lock_conflict = false;
8856 + /* skip replication during recovery mode runing */
8857 + if (PGR_Get_Cluster_Status() == STATUS_RECOVERY)
8860 + PGR_Not_Replication_Query = true;
8862 + PGR_Is_Replicated_Query = true;
8863 + if (!strcmp(commandTag,"SELECT"))
8865 + if (PGR_Is_System_Command(query_ptr))
8867 + status = PGR_Call_System_Command(query_ptr);
8868 + if (status == STATUS_SKIP_QUERY)
8870 + EndCommand(PGR_ALREADY_REPLICATED_NOTICE_CMD,dest);
8875 + EndCommand("SYSTEM_COMMAND",dest);
8880 + Transaction_Mode = PGR_Set_Transaction_Mode(Transaction_Mode,commandTag);
8881 + if (Transaction_Mode > 0)
8883 + PGR_Need_Notice = true;
8884 + PGR_Check_Lock.check_lock_conflict = true;
8886 + goto Skip_Replication;
8890 + if (!xact_started)
8892 + start_xact_command();
8893 + xact_started = true;
8896 + if (skip_cnt == 0)
8898 + skip_cnt = PGR_Is_Skip_Replication(query_ptr);
8900 + null_ptr = PGR_scan_terminate (query_ptr);
8901 + if(null_ptr != NULL)
8905 + Transaction_Mode = PGR_Set_Transaction_Mode(Transaction_Mode,commandTag);
8906 + if ((PGR_Is_Replicated_Query ) ||
8917 + PGR_Copy_Data_Need_Replicate = false;
8918 + if (!strncmp(commandTag,"SELECT",strlen("SELECT")))
8920 + if (PGR_Is_System_Command(query_ptr))
8922 + status = PGR_Call_System_Command(query_ptr);
8923 + if (status == STATUS_SKIP_QUERY)
8925 + EndCommand(PGR_ALREADY_REPLICATED_NOTICE_CMD,dest);
8930 + EndCommand("SYSTEM_COMMAND",dest);
8935 + PGR_Check_Lock.status_lock_conflict = STATUS_OK;
8936 + PGR_Check_Lock.dest = TO_FRONTEND;
8940 + PGR_Copy_Data_Need_Replicate = false;
8942 + /* check cluster db status */
8944 + if ((PGR_Get_Cluster_Status() == STATUS_RECOVERY) &&
8945 + (PGR_Not_Replication_Query == false) &&
8946 + (Transaction_Mode == 0 ) )
8948 + elog(WARNING, "This query is not permitted while recovery db ");
8949 + if(null_ptr != NULL)
8952 + query_ptr = null_ptr +1;
8957 + if (PGR_Is_Stand_Alone() == true)
8959 + if (PGR_Stand_Alone->permit == PERMIT_READ_ONLY)
8961 + if (!strcmp(commandTag, "SHOW")) {
8962 + VariableShowStmt *stmt = (VariableShowStmt *)parsetree;
8963 + if (!strcmp(stmt->name, "replication_server")) {
8964 + PGR_Not_Replication_Query = true;
8968 + if (PGR_Not_Replication_Query == false)
8969 + elog(ERROR, "This query is not permitted when all replication servers fell down ");
8972 + else if ((PGRforceLoadBalance == false) &&
8973 + ((PGR_Not_Replication_Query == false ) ||
8974 + (!strcmp(commandTag,"SELECT"))))
8976 + status = PGR_replication(query_ptr,dest,parsetree,commandTag);
8977 + if (status == STATUS_REPLICATED)
8981 + finish_xact_command();
8982 + xact_started = false;
8984 + CommandCounterIncrement();
8987 + else if (status == STATUS_ERROR)
8989 + if (!strcmp(commandTag, "SHOW")) {
8990 + VariableShowStmt *stmt = (VariableShowStmt *)parsetree;
8991 + if (!strcmp(stmt->name, "replication_server")) {
8992 + PGR_Not_Replication_Query = true;
8995 + else if (PGR_Stand_Alone->permit == PERMIT_READ_ONLY)
8997 + elog(ERROR, "This query is not permitted when all replication servers fell down ");
9000 + else if (status == STATUS_DEADLOCK_DETECT)
9002 + PGR_Need_Notice = false;
9003 + elog(ERROR, "postmaster deadlock detected");
9006 + else if (status == STATUS_REPLICATION_ABORT)
9008 + PGR_Need_Notice = false;
9009 + elog(ERROR, "replication server should be down, transaction aborted.");
9012 + else if (status != STATUS_CONTINUE)
9014 + PGR_Check_Lock.dest = TO_FRONTEND;
9018 + PGR_Check_Lock.dest = TO_REPLICATION_SERVER;
9019 + PGR_Reliable_Mode_Wait = true;
9023 + if(null_ptr != NULL)
9026 + query_ptr = null_ptr +1;
9028 + if (!PGR_Is_Replicated_Query )
9030 + if ((!strcmp(commandTag,"BEGIN")) ||
9031 + (!strcmp(commandTag, "START TRANSACTION")) ||
9032 + (Transaction_Mode == 0 ) )
9034 + PGR_Reload_Start_Time();
9037 + if (((IsA(parsetree, TransactionStmt)) ||
9038 + (Transaction_Mode > 0) ||
9039 + (Create_Temp_Table_Mode == true) ||
9040 + (Session_Authorization_Mode == true)) ||
9041 + (!strcmp(commandTag,"COPY")))
9043 + PGR_Need_Notice = true;
9044 + PGR_Check_Lock.check_lock_conflict = true;
9048 + if (PGR_Not_Replication_Query == false)
9050 + PGR_Need_Notice = true;
9051 + PGR_Check_Lock.check_lock_conflict = true;
9055 + if ((PGR_Is_Replicated_Query ) &&
9056 + (!strncmp(commandTag, "SELECT",strlen("SELECT"))))
9058 + PGR_Need_Notice = true;
9059 + PGR_Check_Lock.check_lock_conflict = true;
9064 +#endif /* USE_REPLICATION */
9066 /* Make sure we are in a transaction command */
9067 start_xact_command();
9068 @@ -983,7 +1241,44 @@
9069 * command the client sent, regardless of rewriting. (But a command
9070 * aborted by error will not send an EndCommand report at all.)
9072 +#ifdef USE_REPLICATION
9074 + * In Non-CONTROL LOCK CONFLICT mode, we *MUST NOT* send command tag twice.
9075 + * So , if it was already sent for lock notification , we didn't send
9076 + * tag here. also ReadyForQuery,too.
9078 + if(!(PGR_Is_Replicated_Query && PGR_Lock_Noticed))
9080 EndCommand(completionTag, dest);
9082 +#ifdef USE_REPLICATION
9083 + if(PGR_Is_Replicated_Query &&
9084 + needToUpdateReplicateIdOnNextQueryIsDone) {
9085 + ++(ReplicationLog_Info.PGR_Replicate_ID);
9087 + if (CurrentReplicateServer != NULL)
9089 + /* set replicate id in this system */
9090 + ++(CurrentReplicateServer->replicate_id);
9092 + elog(DEBUG1,"increased replicate_id to %d",CurrentReplicateServer->replicate_id);
9093 + needToUpdateReplicateIdOnNextQueryIsDone=false;
9096 + if (PGR_Get_Cluster_Status() != STATUS_RECOVERY)
9098 + if ((PGR_Need_Notice == true) &&
9099 + (PGRforceLoadBalance == false))
9101 + PGR_Notice_Transaction_Query_Done();
9103 + if ((Transaction_Mode == 0) &&
9104 + (ReplicateCurrentTime != NULL))
9106 + ReplicateCurrentTime->use_seed = 1;
9110 } /* end loop over parsetrees */
9113 @@ -1144,11 +1439,15 @@
9115 if (IsAbortedTransactionBlockState() &&
9116 !IsTransactionExitStmt(parsetree))
9118 +#ifdef USE_REPLICATION
9119 + Transaction_Mode = 0;
9122 (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
9123 errmsg("current transaction is aborted, "
9124 "commands ignored until end of transaction block")));
9128 * OK to analyze, rewrite, and plan this query. Note that the
9129 * originally specified parameter set is not required to be complete,
9130 @@ -1382,11 +1681,15 @@
9131 if (IsAbortedTransactionBlockState() &&
9132 (!IsTransactionExitStmtList(pstmt->query_list) ||
9135 +#ifdef USE_REPLICATION
9136 + Transaction_Mode = 0;
9139 (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
9140 errmsg("current transaction is aborted, "
9141 "commands ignored until end of transaction block")));
9145 * Create the portal. Allow silent replacement of an existing portal only
9146 * if the unnamed portal is specified.
9147 @@ -1769,11 +2072,15 @@
9149 if (IsAbortedTransactionBlockState() &&
9150 !IsTransactionExitStmtList(portal->parseTrees))
9152 +#ifdef USE_REPLICATION
9153 + Transaction_Mode = 0;
9156 (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
9157 errmsg("current transaction is aborted, "
9158 "commands ignored until end of transaction block")));
9161 /* Check for cancel signal before we start execution */
9162 CHECK_FOR_INTERRUPTS();
9164 @@ -2101,11 +2408,15 @@
9166 if (IsAbortedTransactionBlockState() &&
9167 PreparedStatementReturnsTuples(pstmt))
9169 +#ifdef USE_REPLICATION
9170 + Transaction_Mode = 0;
9173 (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
9174 errmsg("current transaction is aborted, "
9175 "commands ignored until end of transaction block")));
9178 if (whereToSendOutput != DestRemote)
9179 return; /* can't actually do anything... */
9181 @@ -2171,11 +2482,15 @@
9183 if (IsAbortedTransactionBlockState() &&
9186 +#ifdef USE_REPLICATION
9187 + Transaction_Mode = 0;
9190 (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
9191 errmsg("current transaction is aborted, "
9192 "commands ignored until end of transaction block")));
9195 if (whereToSendOutput != DestRemote)
9196 return; /* can't actually do anything... */
9198 @@ -2332,6 +2647,9 @@
9199 * backend. This is necessary precisely because we don't clean up our
9200 * shared memory state.
9202 +#ifdef USE_REPLICATION
9204 +#endif /* USE_REPLICATION */
9208 @@ -2369,6 +2687,9 @@
9212 +#ifdef USE_REPLICATION
9214 +#endif /* USE_REPLICATION */
9218 @@ -2383,6 +2704,9 @@
9220 authdie(SIGNAL_ARGS)
9222 +#ifdef USE_REPLICATION
9224 +#endif /* USE_REPLICATION */
9228 @@ -3369,6 +3693,14 @@
9229 pgstat_report_activity("<IDLE>");
9232 +#ifdef USE_REPLICATION
9234 + * In Non-CONTROL LOCK CONFLICT mode, we *MUST NOT* send command tag twice.
9235 + * So , if it was already sent for lock notification , we didn't send
9236 + * tag here. also ReadyForQuery,too.
9238 + if(!(PGR_Is_Replicated_Query && PGR_Lock_Noticed))
9240 ReadyForQuery(whereToSendOutput);
9241 send_ready_for_query = false;
9243 @@ -3409,6 +3741,26 @@
9244 if (ignore_till_sync && firstchar != EOF)
9247 +#ifdef USE_REPLICATION
9248 + if ((firstchar == CMD_TYPE_P_PARSE) ||
9249 + (firstchar == CMD_TYPE_P_BIND) ||
9250 + (firstchar == CMD_TYPE_P_DESCRIBE) ||
9251 + (firstchar == CMD_TYPE_P_EXECUTE) ||
9252 + (firstchar == CMD_TYPE_P_SYNC) ||
9253 + (firstchar == CMD_TYPE_P_CLOSE))
9255 + if (PGR_Send_Input_Message(firstchar, &input_message) != STATUS_OK)
9257 + if ((PGR_Is_Stand_Alone() == true) &&
9258 + (PGR_Stand_Alone->permit == PERMIT_READ_ONLY))
9260 + elog(WARNING, "This query is not permitted when all replication servers fell down ");
9265 +#endif /* USE_REPLICATION */
9269 case 'Q': /* simple query */
9270 @@ -3622,6 +3974,27 @@
9274 +#ifdef USE_REPLICATION
9275 + if (PGRforceLoadBalance == false)
9277 + if (PGR_Is_Replicated_Query == false)
9279 + PGR_Noticed_Abort = true;
9280 + PGRsend_system_command(CMD_STS_TRANSACTION_ABORT, CMD_TYPE_FRONTEND_CLOSED);
9282 + else if ((Transaction_Mode >= 1) && (PGR_Noticed_Abort == false))
9284 + if (PGR_Did_Commit_Transaction() == true)
9286 + pgstat_report_activity("commit");
9287 + exec_simple_query("commit");
9292 + PGR_Notice_Transaction_Query_Aborted();
9294 +#endif /* USE_REPLICATION */
9296 * Reset whereToSendOutput to prevent ereport from attempting
9297 * to send any more messages to client.
9298 diff -aruN postgresql-8.2.4/src/backend/tcop/pquery.c pgcluster-1.7.0rc7/src/backend/tcop/pquery.c
9299 --- postgresql-8.2.4/src/backend/tcop/pquery.c 2006-10-04 02:29:58.000000000 +0200
9300 +++ pgcluster-1.7.0rc7/src/backend/tcop/pquery.c 2007-02-18 22:52:16.000000000 +0100
9302 #include "tcop/utility.h"
9303 #include "utils/memutils.h"
9305 +#ifdef USE_REPLICATION
9306 +#include "replicate.h"
9307 +#endif /* USE_REPLICATION */
9310 * ActivePortal is the currently executing Portal (the most closely nested,
9311 @@ -188,6 +191,19 @@
9312 strcpy(completionTag, "???");
9315 +#ifdef USE_REPLICATION
9316 + if ((PGR_Is_Replicated_Query == true ) &&
9317 + (PGR_Get_Cluster_Status() != STATUS_RECOVERY))
9320 + * Replicated *SELECT* query is used to replicate
9321 + * ONLY lock and function execution , results . All of
9322 + * them will be discarded by pgrp processes.
9323 + * So , we don't need to send it.
9325 + dest = None_Receiver;
9327 +#endif /*USE_REPLICATION */
9330 /* Now take care of any queued AFTER triggers */
9331 diff -aruN postgresql-8.2.4/src/backend/tcop/utility.c pgcluster-1.7.0rc7/src/backend/tcop/utility.c
9332 --- postgresql-8.2.4/src/backend/tcop/utility.c 2006-10-04 02:29:58.000000000 +0200
9333 +++ pgcluster-1.7.0rc7/src/backend/tcop/utility.c 2007-02-18 22:52:16.000000000 +0100
9335 #include "utils/guc.h"
9336 #include "utils/syscache.h"
9338 +#ifdef USE_REPLICATION
9339 +#include "replicate.h"
9340 +#endif /* USE_REPLICATION */
9343 * Error-checking support for DROP commands
9344 @@ -1289,29 +1292,48 @@
9348 +#ifdef USE_REPLICATION
9349 + PGR_Not_Replication_Query = true;
9350 +#endif /* USE_REPLICATION */
9353 case T_TransactionStmt:
9355 TransactionStmt *stmt = (TransactionStmt *) parsetree;
9357 +#ifdef USE_REPLICATION
9358 + bool isInTransaction=IsTransactionBlock();
9359 +#endif /* USE_REPLICATION */
9363 case TRANS_STMT_BEGIN:
9365 +#ifdef USE_REPLICATION
9366 + PGR_Not_Replication_Query=isInTransaction;
9367 +#endif /* USE_REPLICATION */
9370 case TRANS_STMT_START:
9371 tag = "START TRANSACTION";
9372 +#ifdef USE_REPLICATION
9373 + PGR_Not_Replication_Query=isInTransaction;
9374 +#endif /* USE_REPLICATION */
9377 case TRANS_STMT_COMMIT:
9379 +#ifdef USE_REPLICATION
9380 + PGR_Not_Replication_Query=!isInTransaction;
9381 +#endif /* USE_REPLICATION */
9384 case TRANS_STMT_ROLLBACK:
9385 case TRANS_STMT_ROLLBACK_TO:
9387 +#ifdef USE_REPLICATION
9388 + PGR_Not_Replication_Query=!isInTransaction;
9389 +#endif /* USE_REPLICATION */
9392 case TRANS_STMT_SAVEPOINT:
9393 @@ -1343,10 +1365,16 @@
9395 case T_DeclareCursorStmt:
9396 tag = "DECLARE CURSOR";
9397 +#ifdef USE_REPLICATION
9398 + PGR_Not_Replication_Query = true;
9399 +#endif /* USE_REPLICATION */
9402 case T_ClosePortalStmt:
9403 tag = "CLOSE CURSOR";
9404 +#ifdef USE_REPLICATION
9405 + PGR_Not_Replication_Query = true;
9406 +#endif /* USE_REPLICATION */
9410 @@ -1355,6 +1383,9 @@
9412 tag = (stmt->ismove) ? "MOVE" : "FETCH";
9414 +#ifdef USE_REPLICATION
9415 + PGR_Not_Replication_Query = true;
9416 +#endif /* USE_REPLICATION */
9419 case T_CreateDomainStmt:
9420 @@ -1677,10 +1708,16 @@
9424 +#ifdef USE_REPLICATION
9425 + PGR_Not_Replication_Query = true;
9426 +#endif /* USE_REPLICATION */
9431 +#ifdef USE_REPLICATION
9432 + PGR_Not_Replication_Query = true;
9433 +#endif /* USE_REPLICATION */
9436 case T_VariableSetStmt:
9437 @@ -1689,6 +1726,14 @@
9439 case T_VariableShowStmt:
9441 +#ifdef USE_REPLICATION
9443 + VariableShowStmt *stmt = (VariableShowStmt *)parsetree;
9444 + if (strcasecmp(stmt->name, "replication_server")) {
9445 + PGR_Not_Replication_Query = true;
9448 +#endif /* USE_REPLICATION */
9451 case T_VariableResetStmt:
9452 @@ -1755,10 +1800,16 @@
9454 case T_CheckPointStmt:
9456 +#ifdef USE_REPLICATION
9457 + PGR_Not_Replication_Query = true;
9458 +#endif /* USE_REPLICATION */
9463 +#ifdef USE_REPLICATION
9464 + PGR_Not_Replication_Query = true;
9465 +#endif /* USE_REPLICATION */
9468 case T_CreateConversionStmt:
9469 @@ -1783,14 +1834,35 @@
9473 +#ifdef USE_REPLICATION
9474 + if ((PGRnotReplicatePreparedSelect == true) &&
9475 + (PGR_is_select_prepare_query() == true))
9477 + PGR_Not_Replication_Query = true;
9479 +#endif /* USE_REPLICATION */
9484 +#ifdef USE_REPLICATION
9485 + if ((PGRnotReplicatePreparedSelect == true) &&
9486 + (PGR_is_select_prepared_statement((PrepareStmt *)parsetree) == true))
9488 + PGR_Not_Replication_Query = true;
9490 +#endif /* USE_REPLICATION */
9493 case T_DeallocateStmt:
9495 +#ifdef USE_REPLICATION
9496 + if ((PGRnotReplicatePreparedSelect == true) &&
9497 + (PGR_is_select_prepared_statement((PrepareStmt *)parsetree) == true))
9499 + PGR_Not_Replication_Query = true;
9501 +#endif /* USE_REPLICATION */
9505 @@ -1800,6 +1872,13 @@
9509 +#ifdef USE_REPLICATION
9510 + if(PGRforceLoadBalance == true)
9512 + PGR_Not_Replication_Query = true;
9514 +#endif /* USE_REPLICATION */
9519 @@ -1835,7 +1914,12 @@
9520 tag = "SELECT FOR SHARE";
9525 +#ifdef USE_REPLICATION
9526 + PGR_Not_Replication_Query = true;
9527 +#endif /* USE_REPLICATION */
9532 @@ -1853,6 +1937,9 @@
9533 elog(WARNING, "unrecognized commandType: %d",
9534 (int) parsetree->commandType);
9536 +#ifdef USE_REPLICATION
9537 + PGR_Not_Replication_Query = true;
9538 +#endif /* USE_REPLICATION */
9542 diff -aruN postgresql-8.2.4/src/backend/utils/adt/float.c pgcluster-1.7.0rc7/src/backend/utils/adt/float.c
9543 --- postgresql-8.2.4/src/backend/utils/adt/float.c 2006-10-05 03:40:45.000000000 +0200
9544 +++ pgcluster-1.7.0rc7/src/backend/utils/adt/float.c 2007-02-18 22:52:16.000000000 +0100
9546 #include "utils/array.h"
9547 #include "utils/builtins.h"
9549 +#ifdef USE_REPLICATION
9550 +#include "replicate.h"
9551 +#endif /* USE_REPLICATION */
9554 /* from my RH5.2 gcc math.h file - thomas 2000-04-03 */
9555 @@ -1886,7 +1889,11 @@
9558 /* result [0.0 - 1.0) */
9559 +#ifdef USE_REPLICATION
9560 + result = ((double) PGR_Random()) / ((double) MAX_RANDOM_VALUE + 1);
9562 result = (double) random() / ((double) MAX_RANDOM_VALUE + 1);
9563 +#endif /* USE_REPLICATION */
9565 PG_RETURN_FLOAT8(result);
9567 diff -aruN postgresql-8.2.4/src/backend/utils/adt/nabstime.c pgcluster-1.7.0rc7/src/backend/utils/adt/nabstime.c
9568 --- postgresql-8.2.4/src/backend/utils/adt/nabstime.c 2006-07-14 16:52:24.000000000 +0200
9569 +++ pgcluster-1.7.0rc7/src/backend/utils/adt/nabstime.c 2007-02-18 22:52:16.000000000 +0100
9571 #include "utils/builtins.h"
9572 #include "utils/nabstime.h"
9574 +#ifdef USE_REPLICATION
9575 +#include "replicate.h"
9576 +#endif /* USE_REPLICATION */
9578 #define MIN_DAYNUM (-24856) /* December 13, 1901 */
9579 #define MAX_DAYNUM 24854 /* January 18, 2038 */
9585 +#ifdef USE_REPLICATION
9586 + struct timeval tp;
9587 + PGR_GetTimeOfDay(&tp,NULL);
9591 +#endif /* USE_REPLICATION */
9592 return (AbsoluteTime) now;
9595 @@ -1031,9 +1041,14 @@
9599 +#ifdef USE_REPLICATION
9600 + struct timeval tp;
9601 + PGR_GetTimeOfDay(&tp,NULL);
9605 PG_RETURN_ABSOLUTETIME(INVALID_ABSTIME);
9608 PG_RETURN_ABSOLUTETIME((AbsoluteTime) sec);
9611 @@ -1588,7 +1603,11 @@
9615 +#ifdef USE_REPLICATION
9616 + PGR_GetTimeOfDay(&tp,NULL);
9618 gettimeofday(&tp, NULL);
9619 +#endif /* USE_REPLICATION */
9620 tt = (pg_time_t) tp.tv_sec;
9621 pg_strftime(templ, sizeof(templ), "%a %b %d %H:%M:%S.%%06d %Y %Z",
9622 pg_localtime(&tt, global_timezone));
9623 diff -aruN postgresql-8.2.4/src/backend/utils/adt/ri_triggers.c pgcluster-1.7.0rc7/src/backend/utils/adt/ri_triggers.c
9624 --- postgresql-8.2.4/src/backend/utils/adt/ri_triggers.c 2006-10-04 02:29:59.000000000 +0200
9625 +++ pgcluster-1.7.0rc7/src/backend/utils/adt/ri_triggers.c 2007-02-18 22:52:16.000000000 +0100
9627 #include "utils/typcache.h"
9628 #include "miscadmin.h"
9630 +#ifdef USE_REPLICATION
9631 +#include "replicate.h"
9632 +#endif /* USE_REPLICATION */
9636 @@ -271,8 +274,18 @@
9639 quoteRelationName(pkrelname, pk_rel);
9640 +#ifdef USE_REPLICATION
9641 + if (PGRcheckConstraintWithLock)
9642 + snprintf(querystr, sizeof(querystr), "SELECT 1 FROM ONLY %s x FOR UPDATE OF x",
9645 + snprintf(querystr, sizeof(querystr), "SELECT 1 FROM ONLY %s x ",
9649 snprintf(querystr, sizeof(querystr), "SELECT 1 FROM ONLY %s x FOR SHARE OF x",
9651 +#endif /* USE_REPLICATION */
9653 /* Prepare and save the plan */
9654 qplan = ri_PlanCheck(querystr, 0, NULL,
9656 queryoids[i] = SPI_gettypeid(fk_rel->rd_att,
9657 qkey.keypair[i][RI_KEYPAIR_FK_IDX]);
9659 +#ifdef USE_REPLICATION
9660 + if (PGRcheckConstraintWithLock)
9661 +#endif /* USE_REPLICATION */
9662 strcat(querystr, " FOR SHARE OF x");
9664 /* Prepare and save the plan */
9666 queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
9667 qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
9669 +#ifdef USE_REPLICATION
9670 + if (PGRcheckConstraintWithLock)
9671 +#endif /* USE_REPLICATION */
9672 strcat(querystr, " FOR SHARE OF x");
9674 /* Prepare and save the plan */
9676 queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
9677 qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
9679 +#ifdef USE_REPLICATION
9680 + if (PGRcheckConstraintWithLock)
9681 +#endif /* USE_REPLICATION */
9682 strcat(querystr, " FOR SHARE OF x");
9684 /* Prepare and save the plan */
9686 queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
9687 qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
9689 +#ifdef USE_REPLICATION
9690 + if (PGRcheckConstraintWithLock)
9691 +#endif /* USE_REPLICATION */
9692 strcat(querystr, " FOR SHARE OF x");
9694 /* Prepare and save the plan */
9695 @@ -1428,6 +1453,9 @@
9696 queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
9697 qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
9699 +#ifdef USE_REPLICATION
9700 + if (PGRcheckConstraintWithLock)
9701 +#endif /* USE_REPLICATION */
9702 strcat(querystr, " FOR SHARE OF x");
9704 /* Prepare and save the plan */
9705 @@ -1607,6 +1635,9 @@
9706 queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
9707 qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
9709 +#ifdef USE_REPLICATION
9710 + if (PGRcheckConstraintWithLock)
9711 +#endif /* USE_REPLICATION */
9712 strcat(querystr, " FOR SHARE OF x");
9714 /* Prepare and save the plan */
9715 diff -aruN postgresql-8.2.4/src/backend/utils/adt/timestamp.c pgcluster-1.7.0rc7/src/backend/utils/adt/timestamp.c
9716 --- postgresql-8.2.4/src/backend/utils/adt/timestamp.c 2006-11-11 02:14:19.000000000 +0100
9717 +++ pgcluster-1.7.0rc7/src/backend/utils/adt/timestamp.c 2007-02-18 22:52:16.000000000 +0100
9719 #error -ffast-math is known to break this code
9722 +#ifdef USE_REPLICATION
9723 +#include "replicate.h"
9724 +#endif /* USE_REPLICATION */
9726 /* Set at postmaster start */
9727 TimestampTz PgStartTime;
9728 @@ -948,7 +951,11 @@
9732 +#ifdef USE_REPLICATION
9733 + PGR_GetTimeOfDay(&tp,NULL);
9735 gettimeofday(&tp, NULL);
9738 result = (TimestampTz) tp.tv_sec -
9739 ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY);
9740 diff -aruN postgresql-8.2.4/src/backend/utils/error/assert.c pgcluster-1.7.0rc7/src/backend/utils/error/assert.c
9741 --- postgresql-8.2.4/src/backend/utils/error/assert.c 2006-03-05 16:58:46.000000000 +0100
9742 +++ pgcluster-1.7.0rc7/src/backend/utils/error/assert.c 2007-02-18 22:52:16.000000000 +0100
9747 +#ifdef USE_REPLICATION
9748 +#include "replicate.h"
9749 +#endif /* USE_REPLICATION */
9752 * ExceptionalCondition - Handles the failure of an Assert()
9755 fileName, lineNumber);
9758 +#ifdef USE_REPLICATION
9759 + if ((PGR_Check_Lock.dest == TO_REPLICATION_SERVER ) &&
9760 + (PGR_Need_Notice == true))
9762 + PGR_Notice_Transaction_Query_Aborted();
9764 + if (PGR_Copy_Data_Need_Replicate)
9766 + PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
9768 +#endif /* USE_REPLICATION */
9770 #ifdef SLEEP_ON_ASSERT
9773 diff -aruN postgresql-8.2.4/src/backend/utils/error/elog.c pgcluster-1.7.0rc7/src/backend/utils/error/elog.c
9774 --- postgresql-8.2.4/src/backend/utils/error/elog.c 2006-11-28 13:54:42.000000000 +0100
9775 +++ pgcluster-1.7.0rc7/src/backend/utils/error/elog.c 2007-02-18 22:52:16.000000000 +0100
9777 #include "utils/memutils.h"
9778 #include "utils/ps_status.h"
9780 +#ifdef USE_REPLICATION
9781 +#include "replicate.h"
9782 +#endif /* USE_REPLICATION */
9784 /* Global variables */
9785 ErrorContextCallback *error_context_stack = NULL;
9786 @@ -314,6 +317,16 @@
9787 MemoryContext oldcontext;
9788 ErrorContextCallback *econtext;
9790 +#ifdef USE_REPLICATION
9792 + bool parse_error_flag = false;
9794 + if ((edata->message) && (strstr(edata->message,"parse error") != NULL))
9796 + parse_error_flag = true;
9798 +#endif /* USE_REPLICATION */
9801 CHECK_STACK_DEPTH();
9803 @@ -363,6 +376,24 @@
9804 * handler should reset it to something else soon.
9807 +#ifdef USE_REPLICATION
9808 + if (parse_error_flag)
9810 + if ((PGR_Check_Lock.dest != TO_FRONTEND) &&
9811 + (Transaction_Mode > 0))
9813 + PGR_Force_Replicate_Query();
9816 + if (PGR_Copy_Data_Need_Replicate)
9818 + PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
9820 + else if (PGR_Need_Notice == true)
9822 + PGR_Notice_Transaction_Query_Done();
9824 +#endif /* USE_REPLICATION */
9828 @@ -377,7 +408,16 @@
9829 * client_min_messages above FATAL, so don't look at output_to_client.
9831 if (elevel >= FATAL && whereToSendOutput == DestRemote)
9833 +#ifdef USE_REPLICATION
9834 + if (PGR_Copy_Data_Need_Replicate)
9836 + PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
9838 +#endif /* USE_REPLICATION */
9839 pq_endcopyout(true);
9843 /* Emit the message to the right places */
9845 @@ -417,6 +457,34 @@
9846 if (PG_exception_stack == NULL && whereToSendOutput == DestRemote)
9847 whereToSendOutput = DestNone;
9849 +#ifdef USE_REPLICATION
9850 + if (CurrentReplicateServer != NULL)
9852 + if (PGR_Need_Notice == true)
9854 + PGR_Notice_Transaction_Query_Aborted();
9856 + if (PGR_Copy_Data_Need_Replicate)
9858 + PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
9862 + if ((!PGR_Is_Replicated_Query ) &&
9863 + (PGR_Check_Lock.dest != TO_FRONTEND) &&
9864 + (PGR_Reliable_Mode_Wait == true) &&
9865 + (CurrentReplicateServer->response_mode == PGR_RELIABLE_MODE))
9867 + status = PGR_Recv_Trigger(0);
9871 + if (TransactionSock != -1)
9873 + close (TransactionSock);
9874 + TransactionSock = -1;
9876 +#endif /* USE_REPLICATION */
9878 * fflush here is just to improve the odds that we get to see the
9879 * error message, in case things are so hosed that proc_exit crashes.
9880 @@ -436,6 +504,34 @@
9882 if (elevel >= PANIC)
9884 +#ifdef USE_REPLICATION
9885 + if (CurrentReplicateServer != NULL)
9887 + if (PGR_Need_Notice == true)
9889 + PGR_Notice_Transaction_Query_Aborted();
9891 + if (PGR_Copy_Data_Need_Replicate)
9893 + PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
9897 + if ((!PGR_Is_Replicated_Query ) &&
9898 + (PGR_Check_Lock.dest != TO_FRONTEND) &&
9899 + (PGR_Reliable_Mode_Wait == true) &&
9900 + (CurrentReplicateServer->response_mode == PGR_RELIABLE_MODE))
9902 + status = PGR_Recv_Trigger(PGR_Replication_Timeout);
9906 + if (TransactionSock != -1)
9908 + close (TransactionSock);
9909 + TransactionSock = -1;
9911 +#endif /* USE_REPLICATION */
9913 * Serious crash time. Postmaster will observe SIGABRT process exit
9914 * status and kill the other backends too.
9915 diff -aruN postgresql-8.2.4/src/backend/utils/fmgr/fmgr.c pgcluster-1.7.0rc7/src/backend/utils/fmgr/fmgr.c
9916 --- postgresql-8.2.4/src/backend/utils/fmgr/fmgr.c 2006-10-04 02:30:01.000000000 +0200
9917 +++ pgcluster-1.7.0rc7/src/backend/utils/fmgr/fmgr.c 2007-02-18 22:52:16.000000000 +0100
9919 #include "utils/fmgrtab.h"
9920 #include "utils/lsyscache.h"
9921 #include "utils/syscache.h"
9922 +#ifdef USE_REPLICATION
9923 +#include "replicate.h"
9924 +#endif /* USE_REPLICATION */
9927 * Declaration for old-style function pointer type. This is now used only
9928 @@ -218,7 +221,12 @@
9929 ReleaseSysCache(procedureTuple);
9933 +#ifdef USE_REPLICATION
9934 + if (PGR_Replicate_Function_Call() != STATUS_OK)
9938 +#endif /* USE_REPLICATION */
9939 switch (procedureStruct->prolang)
9941 case INTERNALlanguageId:
9942 diff -aruN postgresql-8.2.4/src/backend/utils/mb/mbutils.c pgcluster-1.7.0rc7/src/backend/utils/mb/mbutils.c
9943 --- postgresql-8.2.4/src/backend/utils/mb/mbutils.c 2006-10-04 02:30:02.000000000 +0200
9944 +++ pgcluster-1.7.0rc7/src/backend/utils/mb/mbutils.c 2007-02-18 22:52:16.000000000 +0100
9946 #include "utils/memutils.h"
9947 #include "utils/syscache.h"
9949 +#ifdef USE_REPLICATION
9950 +#include "replicate.h"
9951 +#endif /* USE_REPLICATION */
9953 * We handle for actual FE and BE encoding setting encoding-identificator
9954 * and encoding-name too. It prevent searching and conversion from encoding
9955 @@ -442,6 +445,11 @@
9959 +#ifdef USE_REPLICATION
9960 + if (PGR_Is_Replicated_Query)
9961 + return (char *)src;
9962 +#endif /* USE_REPLICATION */
9964 if (is_client_to_server)
9966 src_encoding = ClientEncoding->encoding;
9967 diff -aruN postgresql-8.2.4/src/backend/utils/misc/guc.c pgcluster-1.7.0rc7/src/backend/utils/misc/guc.c
9968 --- postgresql-8.2.4/src/backend/utils/misc/guc.c 2006-11-29 15:50:07.000000000 +0100
9969 +++ pgcluster-1.7.0rc7/src/backend/utils/misc/guc.c 2007-02-18 22:52:16.000000000 +0100
9974 +#ifdef USE_REPLICATION
9975 +#include "replicate.h"
9976 +#endif /* USE_REPLICATION */
9978 #include "access/gin.h"
9979 #include "access/twophase.h"
9982 char *session_authorization_string;
9984 +#ifdef USE_REPLICATION
9985 +static void ShowReplicationServerConfig(DestReceiver *dest);
9986 +#endif /* USE_REPLICATION */
9989 * Displayable names for context types (enum GucContext)
9990 @@ -970,6 +976,40 @@
9991 &pg_krb_caseins_users,
9994 +#ifdef USE_REPLICATION
9996 + {"pgr_force_loadbalance", PGC_USERSET, CLIENT_CONN_STATEMENT,
9997 + gettext_noop("force loadbalance mode"),
10000 + &PGRforceLoadBalance,
10001 + false, NULL, NULL
10004 + {"check_constraint_with_lock", PGC_USERSET, CLIENT_CONN_STATEMENT,
10005 + gettext_noop("check constrain with lock"),
10008 + &PGRcheckConstraintWithLock,
10009 + false, NULL, NULL
10012 + {"auto_lock_table", PGC_USERSET, CLIENT_CONN_STATEMENT,
10013 + gettext_noop("auto lock table"),
10016 + &PGRautoLockTable,
10020 + {"not_replicate_prepared_select", PGC_USERSET, CLIENT_CONN_STATEMENT,
10021 + gettext_noop("not replicate the prepared as select"),
10024 + &PGRnotReplicatePreparedSelect,
10025 + false, NULL, NULL
10030 {"escape_string_warning", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS,
10031 @@ -4830,6 +4870,10 @@
10033 if (pg_strcasecmp(name, "all") == 0)
10034 ShowAllGUCConfig(dest);
10035 +#ifdef USE_REPLICATION
10036 + else if (strcasecmp(name, "replication_server") == 0)
10037 + ShowReplicationServerConfig(dest);
10040 ShowGUCConfigOption(name, dest);
10042 @@ -6512,5 +6556,72 @@
10046 +#ifdef USE_REPLICATION
10048 + * SHOW REPLICATION SERVER command
10051 +ShowReplicationServerConfig(DestReceiver *dest)
10053 + TupOutputState *tstate;
10054 + TupleDesc tupdesc;
10056 + char buffer[256];
10057 + ReplicateServerInfo *sp;
10059 + /* need a tuple descriptor representing two TEXT columns */
10060 + tupdesc = CreateTemplateTupleDesc(4, false);
10061 + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "status",
10062 + TEXTOID, -1, 0 );
10063 + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "host_name",
10064 + TEXTOID, -1, 0 );
10065 + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "port_num",
10066 + TEXTOID, -1, 0 );
10067 + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "recovery_port_num",
10068 + TEXTOID, -1, 0 );
10070 + /* prepare for projection of tuples */
10071 + tstate = begin_tup_output_tupdesc(dest, tupdesc);
10073 + sp = ReplicateServerData;
10074 + while (sp->useFlag != DATA_END) {
10075 + if (PGR_Check_Replicate_Server_Status(sp) == STATUS_ERROR) {
10076 + PGR_Set_Replication_Server_Status(sp, DATA_ERR);
10082 + sp = ReplicateServerData;
10083 + while (sp->useFlag != DATA_END) {
10084 + if (sp->useFlag == DATA_USE) {
10085 + values[0] = "ALIVE";
10086 + } else if (sp->useFlag == DATA_ERR) {
10087 + values[0] = "DEAD";
10088 + } else if (sp->useFlag == DATA_INIT) {
10089 + values[0] = "STANDBY";
10091 + values[0] = "UNKNOWN";
10094 + values[1] = (char *) sp->hostName;
10096 + snprintf(buffer, sizeof(buffer), "%d", sp->portNumber);
10097 + values[2] = pstrdup(buffer);
10099 + snprintf(buffer, sizeof(buffer), "%d", sp->recoveryPortNumber);
10100 + values[3] = pstrdup(buffer);
10102 + do_tup_output(tstate, values);
10104 + pfree(values[2]);
10105 + pfree(values[3]);
10110 + end_tup_output(tstate);
10112 +#endif /* USE_REPLICATION */
10114 #include "guc-file.c"
10115 diff -aruN postgresql-8.2.4/src/backend/utils/misc/postgresql.conf.sample pgcluster-1.7.0rc7/src/backend/utils/misc/postgresql.conf.sample
10116 --- postgresql-8.2.4/src/backend/utils/misc/postgresql.conf.sample 2007-01-20 22:42:06.000000000 +0100
10117 +++ pgcluster-1.7.0rc7/src/backend/utils/misc/postgresql.conf.sample 2007-02-18 22:52:16.000000000 +0100
10118 @@ -469,3 +469,12 @@
10119 #---------------------------------------------------------------------------
10121 #custom_variable_classes = '' # list of custom variable class names
10124 +#---------------------------------------------------------------------------
10126 +#---------------------------------------------------------------------------
10128 +# auto_lock_table = true
10129 +# check_constraint_with_lock = false
10130 +# not_replicate_prepared_select = false
10131 diff -aruN postgresql-8.2.4/src/bin/initdb/initdb.c pgcluster-1.7.0rc7/src/bin/initdb/initdb.c
10132 --- postgresql-8.2.4/src/bin/initdb/initdb.c 2006-10-04 20:58:08.000000000 +0200
10133 +++ pgcluster-1.7.0rc7/src/bin/initdb/initdb.c 2007-02-18 22:52:16.000000000 +0100
10134 @@ -122,6 +122,11 @@
10135 static int n_buffers = 50;
10136 static int n_fsm_pages = 20000;
10138 +#ifdef USE_REPLICATION
10139 +static char *cluster_conf_file;
10140 +static char *pgreplicate_conf_file;
10141 +static char *pglb_conf_file;
10142 +#endif /* USE_REPLICATION */
10144 * Warning messages for authentication methods
10146 @@ -1352,6 +1357,14 @@
10150 +#ifdef USE_REPLICATION
10151 + /* cluster.conf */
10152 + conflines = readfile(cluster_conf_file);
10153 + snprintf(path, sizeof(path), "%s/cluster.conf", pg_data);
10154 + writefile(path, conflines);
10155 + chmod(path, 0600);
10157 +#endif /* USE_REPLICATION */
10161 @@ -2712,6 +2725,11 @@
10162 set_input(&info_schema_file, "information_schema.sql");
10163 set_input(&features_file, "sql_features.txt");
10164 set_input(&system_views_file, "system_views.sql");
10165 +#ifdef USE_REPLICATION
10166 + set_input(&cluster_conf_file, "cluster.conf.sample");
10167 + set_input(&pgreplicate_conf_file, "pgreplicate.conf.sample");
10168 + set_input(&pglb_conf_file, "pglb.conf.sample");
10169 +#endif /* USE_REPLICATION */
10171 set_info_version();
10173 @@ -2730,6 +2748,16 @@
10174 desc_file, shdesc_file,
10176 hba_file, ident_file);
10177 +#ifdef USE_REPLICATION
10179 + "PGCLUSTER_VERSION=%s\n"
10180 + "CLUSTER_CONF_SAMPLE=%s\nPGREPLICATE_CONF_SAMPLE=%s\n"
10181 + "PGLB_CONF_SAMPLE=%s\n",
10182 + PGCLUSTER_VERSION,
10183 + cluster_conf_file,
10184 + pgreplicate_conf_file,
10186 +#endif /* USE_REPLICATION */
10190 @@ -2744,6 +2772,11 @@
10191 check_input(info_schema_file);
10192 check_input(features_file);
10193 check_input(system_views_file);
10194 +#ifdef USE_REPLICATION
10195 + check_input(cluster_conf_file);
10196 + check_input(pgreplicate_conf_file);
10197 + check_input(pglb_conf_file);
10198 +#endif /* USE_REPLICATION */
10202 diff -aruN postgresql-8.2.4/src/bin/pg_dump/pg_dump.c pgcluster-1.7.0rc7/src/bin/pg_dump/pg_dump.c
10203 --- postgresql-8.2.4/src/bin/pg_dump/pg_dump.c 2006-10-10 01:36:59.000000000 +0200
10204 +++ pgcluster-1.7.0rc7/src/bin/pg_dump/pg_dump.c 2007-02-18 22:52:16.000000000 +0100
10205 @@ -119,6 +119,9 @@
10206 /* flag to turn on/off dollar quoting */
10207 static int disable_dollar_quoting = 0;
10209 +#ifdef USE_REPLICATION
10210 + bool nonReplicate=true;
10213 static void help(const char *progname);
10214 static void expand_schema_name_patterns(SimpleStringList *patterns,
10215 @@ -235,6 +238,9 @@
10216 {"column-inserts", no_argument, NULL, 'D'},
10217 {"host", required_argument, NULL, 'h'},
10218 {"ignore-version", no_argument, NULL, 'i'},
10219 +#ifdef USE_REPLICATION
10220 + {"non-replicate", no_argument ,NULL, 'r'},
10222 {"no-reconnect", no_argument, NULL, 'R'},
10223 {"oids", no_argument, NULL, 'o'},
10224 {"no-owner", no_argument, NULL, 'O'},
10225 @@ -368,6 +374,11 @@
10229 +#ifdef USE_REPLICATION
10231 + nonReplicate = true;
10235 /* no-op, still accepted for backwards compatibility */
10237 @@ -553,6 +564,11 @@
10239 * Start serializable transaction to dump consistent data.
10241 +#ifdef USE_REPLICATION
10242 + if(nonReplicate) {
10243 + do_sql_command(g_conn, "set pgr_force_loadbalance to on");
10245 +#endif /* USE_REPLICATION */
10246 do_sql_command(g_conn, "BEGIN");
10248 do_sql_command(g_conn, "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE");
10249 @@ -751,6 +767,9 @@
10250 printf(_(" -o, --oids include OIDs in dump\n"));
10251 printf(_(" -O, --no-owner skip restoration of object ownership\n"
10252 " in plain text format\n"));
10253 +#ifdef USE_REPLICATION
10254 + printf(_(" -r, --non-replicate No queries replicate. Available only in pgcluster.\n"));
10256 printf(_(" -s, --schema-only dump only the schema, no data\n"));
10257 printf(_(" -S, --superuser=NAME specify the superuser user name to use in\n"
10258 " plain text format\n"));
10259 diff -aruN postgresql-8.2.4/src/bin/pg_dump/pg_dumpall.c pgcluster-1.7.0rc7/src/bin/pg_dump/pg_dumpall.c
10260 --- postgresql-8.2.4/src/bin/pg_dump/pg_dumpall.c 2006-11-21 23:19:46.000000000 +0100
10261 +++ pgcluster-1.7.0rc7/src/bin/pg_dump/pg_dumpall.c 2007-02-18 22:52:16.000000000 +0100
10263 {"oids", no_argument, NULL, 'o'},
10264 {"no-owner", no_argument, NULL, 'O'},
10265 {"port", required_argument, NULL, 'p'},
10266 +#ifdef USE_REPLICATION
10267 + {"non-replicate", no_argument ,NULL, 'r'},
10269 {"password", no_argument, NULL, 'W'},
10270 {"schema-only", no_argument, NULL, 's'},
10271 {"superuser", required_argument, NULL, 'S'},
10272 @@ -161,7 +164,7 @@
10274 pgdumpopts = createPQExpBuffer();
10276 - while ((c = getopt_long(argc, argv, "acdDgh:ioOp:sS:U:vWxX:", long_options, &optindex)) != -1)
10277 + while ((c = getopt_long(argc, argv, "acdDgh:ioOp:rsS:U:vWxX:", long_options, &optindex)) != -1)
10281 @@ -215,6 +218,11 @@
10285 +#ifdef USE_REPLICATION
10287 + appendPQExpBuffer(pgdumpopts, " -r");
10289 +#endif /* USE_REPLICATION */
10291 schema_only = true;
10292 appendPQExpBuffer(pgdumpopts, " -s");
10293 @@ -397,6 +405,9 @@
10294 printf(_("\nConnection options:\n"));
10295 printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
10296 printf(_(" -p, --port=PORT database server port number\n"));
10297 +#ifdef USE_REPLICATION
10298 + printf(_(" -r, --non-replicate No queries replicate. Available only in pgcluster.\n"));
10299 +#endif /* USE_REPLICATION */
10300 printf(_(" -U, --username=NAME connect as specified database user\n"));
10301 printf(_(" -W, --password force password prompt (should happen automatically)\n"));
10303 diff -aruN postgresql-8.2.4/src/include/commands/prepare.h pgcluster-1.7.0rc7/src/include/commands/prepare.h
10304 --- postgresql-8.2.4/src/include/commands/prepare.h 2006-10-04 02:30:08.000000000 +0200
10305 +++ pgcluster-1.7.0rc7/src/include/commands/prepare.h 2007-02-18 22:52:16.000000000 +0100
10307 extern bool PreparedStatementReturnsTuples(PreparedStatement *stmt);
10308 extern List *FetchPreparedStatementTargetList(PreparedStatement *stmt);
10310 +#ifdef USE_REPLICATION
10311 +extern bool PGR_is_select_prepared_statement(PrepareStmt *stmt);
10312 +#endif /* USE_REPLICATION */
10314 #endif /* PREPARE_H */
10315 diff -aruN postgresql-8.2.4/src/include/pg_config.h.in pgcluster-1.7.0rc7/src/include/pg_config.h.in
10316 --- postgresql-8.2.4/src/include/pg_config.h.in 2006-11-06 04:44:38.000000000 +0100
10317 +++ pgcluster-1.7.0rc7/src/include/pg_config.h.in 2007-02-18 22:52:17.000000000 +0100
10318 @@ -673,3 +673,7 @@
10319 /* Define to empty if the keyword `volatile' does not work. Warning: valid
10320 code using `volatile' can become incorrect without. Disable with care. */
10323 +/* PGCluster version */
10324 +#undef PGCLUSTER_VERSION
10326 diff -aruN postgresql-8.2.4/src/include/replicate.h pgcluster-1.7.0rc7/src/include/replicate.h
10327 --- postgresql-8.2.4/src/include/replicate.h 1970-01-01 01:00:00.000000000 +0100
10328 +++ pgcluster-1.7.0rc7/src/include/replicate.h 2007-02-18 22:52:17.000000000 +0100
10330 +/*-------------------------------------------------------------------------
10333 + * Primary include file for replicate server .c files
10335 + * This should be the first file included by replicate modules.
10337 + *-------------------------------------------------------------------------
10339 +#ifndef REPLICATE_H
10340 +#define REPLICATE_H
10342 +#ifndef _SYS_TIME_H
10343 +#include <sys/time.h>
10345 +#include "tcop/dest.h"
10346 +#include "storage/proc.h"
10347 +#include "lib/stringinfo.h"
10348 +#include "replicate_com.h"
10350 +#define STAND_ALONE_TAG "When_Stand_Alone"
10351 +#define NOT_REPLICATE_INFO_TAG "Not_Replicate_Info"
10352 +#define DB_NAME_TAG "DB_Name"
10353 +#define TABLE_NAME_TAG "Table_Name"
10354 +#define RSYNC_PATH_TAG "Rsync_Path"
10355 +#define RSYNC_OPTION_TAG "Rsync_Option"
10356 +#define RSYNC_COMPRESS_TAG "Rsync_Compress"
10357 +#define PG_DUMP_PATH_TAG "Pg_Dump_Path"
10359 +#define CLUSTER_CONF_FILE "cluster.conf"
10360 +#define DEFAULT_RSYNC "/usr/bin/rsync"
10361 +#define DEFAULT_PG_DUMP "/usr/local/pgsql/bin/pg_dump"
10362 +#define NOT_SESSION_AUTHORIZATION (0)
10363 +#define SESSION_AUTHORIZATION_BEGIN (1)
10364 +#define SESSION_AUTHORIZATION_END (2)
10366 +#define READ_ONLY_IF_STAND_ALONE "read_only"
10367 +#define READ_WRITE_IF_STAND_ALONE "read_write"
10368 +#define PERMIT_READ_ONLY (1)
10369 +#define PERMIT_READ_WRITE (2)
10370 +#define STATUS_REPLICATED (3)
10371 +#define STATUS_CONTINUE (4)
10372 +#define STATUS_CONTINUE_SELECT (5)
10373 +#define STATUS_NOT_REPLICATE (6)
10374 +#define STATUS_SKIP_QUERY (7)
10375 +#define STATUS_RECOVERY (11)
10376 +#define STATUS_REPLICATION_ABORT (98)
10377 +#define STATUS_DEADLOCK_DETECT (99)
10379 +#define TO_REPLICATION_SERVER (0)
10380 +#define TO_FRONTEND (1)
10382 +#define PGR_DEADLOCK_DETECTION_MSG "deadlock detected!"
10383 +#define PGR_REPLICATION_ABORT_MSG "replication aborted!"
10384 +#define SKIP_QUERY_1 "begin; select getdatabaseencoding(); commit"
10385 +#define SKIP_QUERY_2 "BEGIN; SELECT usesuper FROM pg_catalog.pg_user WHERE usename = '%s'; COMMIT"
10386 +#define SKIP_QUERY_3 "SET autocommit TO 'on'"
10387 +#define SKIP_QUERY_4 "SET search_path = public"
10388 +#define SYS_QUERY_1 "set pgr_force_loadbalance to on"
10390 +#define PGR_1ST_RECOVERY (1)
10391 +#define PGR_2ND_RECOVERY (2)
10392 +#define PGR_COLD_RECOVERY (1)
10393 +#define PGR_HOT_RECOVERY (2)
10394 +#define PGR_WITHOUT_BACKUP (3)
10396 +#define PGR_MESSAGE_OTHER (0)
10397 +#define PGR_MESSAGE_SELECT (1)
10398 +#define PGR_MESSAGE_PREPARE (2)
10399 +#define PGR_MESSAGE_EXECUTE (3)
10400 +#define PGR_MESSAGE_DEALLOCATE (4)
10404 + bool is_stand_alone;
10406 +} PGR_Stand_Alone_Type;
10410 + char db_name[DBNAME_MAX_LENGTH];
10411 + char table_name[TABLENAME_MAX_LENGTH];
10412 +} PGR_Not_Replicate_Type;
10416 + bool check_lock_conflict;
10418 + int status_lock_conflict;
10420 +} PGR_Check_Lock_Type;
10424 + char * query_string;
10429 +} PGR_Retry_Query_Type;
10432 +/* replicaition log */
10434 + uint32_t PGR_Replicate_ID;
10435 + uint32_t PGR_Request_ID;
10436 +} PGR_ReplicationLog_Info;
10441 + char cryptSalt[2];
10442 +} PGR_Password_Info;
10444 +extern char * Query_String;
10445 +extern int TransactionQuery;
10446 +extern int Transaction_Mode;
10447 +extern bool PGR_Noticed_Abort;
10448 +extern bool Session_Authorization_Mode;
10449 +extern bool Create_Temp_Table_Mode;
10450 +extern int RecoveryPortNumber;
10451 +extern char * RsyncPath;
10452 +extern char * RsyncOption;
10453 +extern bool RsyncCompress;
10454 +extern char * PgDumpPath;
10455 +extern int TransactionSock;
10456 +extern ReplicateNow * ReplicateCurrentTime;
10457 +extern CopyData * PGRCopyData;
10458 +extern bool PGR_Copy_Data_Need_Replicate;
10459 +extern PGR_Stand_Alone_Type * PGR_Stand_Alone;
10460 +extern PGR_Not_Replicate_Type * PGR_Not_Replicate;
10461 +extern int PGR_Not_Replicate_Rec_Num;
10462 +extern bool autocommit;
10463 +extern bool PGR_Is_Replicated_Query;
10464 +extern PGR_Check_Lock_Type PGR_Check_Lock;
10465 +extern int PGR_Sock_To_Replication_Server;
10466 +extern bool PGR_Need_Notice;
10467 +extern bool PGR_Lock_Noticed;
10468 +extern bool PGR_Recovery_Option;
10469 +extern int PGR_recovery_mode;
10470 +extern ReplicateServerInfo * CurrentReplicateServer;
10471 +extern ReplicateServerInfo * LastReplicateServer;
10472 +extern char * PGRSelfHostName;
10473 +extern int PGR_Pending_Sem_Num;
10474 +extern int PGR_Response_Mode;
10475 +extern bool PGR_Reliable_Mode_Wait;
10476 +extern PGR_Retry_Query_Type PGR_Retry_Query;
10477 +extern bool needToUpdateReplicateIdOnNextQueryIsDone;
10478 +extern PGR_ReplicationLog_Info ReplicationLog_Info;
10479 +extern bool PGR_Not_Replication_Query;
10480 +extern bool PGR_Is_Sync_OID;
10481 +extern PGR_Password_Info * PGR_password;
10483 +/* backend/utils/misc/guc.c */
10484 +extern bool PGRforceLoadBalance;
10485 +extern bool PGRcheckConstraintWithLock;
10486 +extern bool PGRautoLockTable;
10487 +extern bool PGRnotReplicatePreparedSelect;
10489 +/* in backend/libpq/replicate.c */
10490 +extern int PGR_Init_Replicate_Server_Data(void);
10491 +extern int PGR_Set_Replicate_Server_Socket(void);
10492 +extern int PGR_get_replicate_server_socket ( ReplicateServerInfo * sp , int socket_type );
10493 +extern ReplicateServerInfo * PGR_get_replicate_server_info(void);
10494 +extern ReplicateServerInfo * PGR_check_replicate_server_info(void);
10495 +extern char * PGR_Send_Replicate_Command(char * query_string, int query_len, char cmdSts ,char cmdType);
10496 +extern bool PGR_Is_Replicated_Command(char * query);
10497 +extern int Xlog_Check_Replicate(int operation);
10498 +extern int PGR_Replicate_Function_Call(void);
10499 +extern void PGR_delete_shm(void);
10500 +extern int PGR_replication(char * query_string, CommandDest dest, Node *parsetree, const char * commandTag);
10501 +extern bool PGR_Is_System_Command(char * query);
10502 +extern int PGR_Call_System_Command(char * command);
10503 +extern int PGR_GetTimeOfDay(struct timeval *tp,struct timezone *tpz);
10504 +extern long PGR_Random(void);
10505 +extern int PGR_Set_Current_Time(char * sec, char * usec);
10506 +extern int PGR_Send_Copy(CopyData * copy, int end);
10507 +extern CopyData * PGR_Set_Copy_Data(CopyData * copy, char *str, int len, int end);
10508 +extern char * PGR_scan_terminate( char * str);
10509 +extern bool PGR_Is_Stand_Alone(void);
10510 +extern void PGR_Send_Message_To_Frontend(char * msg);
10511 +extern void PGR_Notice_Transaction_Query_Done(void);
10512 +extern void PGR_Notice_Transaction_Query_Aborted(void);
10513 +extern int PGRsend_system_command(char cmdSts, char cmdType);
10514 +extern int PGR_Notice_Conflict(void);
10515 +extern int PGR_Recv_Trigger (int user_timeout);
10516 +extern void PGR_Set_Replication_Server_Status( ReplicateServerInfo * sp, int status);
10517 +extern int PGR_Is_Skip_Replication(char * query);
10518 +extern bool PGR_Did_Commit_Transaction(void);
10519 +extern int PGR_Set_Transaction_Mode(int mode,const char * commandTag);
10520 +extern char * PGR_Remove_Comment(char * str);
10521 +extern void PGR_Force_Replicate_Query(void);
10522 +extern void PGR_Notice_DeadLock(void);
10523 +extern void PGR_Set_Cluster_Status(int status);
10524 +extern int PGR_Get_Cluster_Status(void);
10525 +extern int PGR_Check_Replicate_Server_Status(ReplicateServerInfo * sp);
10526 +extern int PGR_lo_import(char * filename);
10527 +extern int PGR_lo_create(int flags);
10528 +extern int PGR_lo_open(Oid lobjId,int32 mode);
10529 +extern int PGR_lo_close(int32 fd);
10530 +extern int PGR_lo_write(int fd, char *buf, int len);
10531 +extern int PGR_lo_lseek(int32 fd, int32 offset, int32 whence);
10532 +extern int PGR_lo_unlink(Oid lobjId);
10533 +extern uint32_t PGRget_replication_id(void);
10534 +extern Oid PGRGetNewObjectId(Oid last_id);
10535 +extern int PGR_Send_Input_Message(char cmdType,StringInfo input_message);
10536 +extern bool PGR_is_select_prepare_query(void);
10537 +extern char * PGR_get_md5salt(char * md5Salt, char * string);
10538 +extern int PGR_recv_replicate_result(int sock,char * result,int user_timeout);
10540 +/* in backend/libpq/recovery.c */
10541 +extern int PGR_Master_Main(void);
10542 +extern int PGR_Recovery_Main(int mode);
10543 +extern int PGR_recovery_error_send(void);
10544 +extern int PGR_recovery_finish_send(void);
10545 +extern int PGR_recovery_queue_data_req(void);
10547 +/* in backend/libpq/lifecheck.c */
10548 +extern int PGR_Lifecheck_Main(void);
10550 +/* in backend/access/transam/xact.c */
10551 +extern void PGR_Reload_Start_Time(void);
10552 +#endif /* REPLICATE_H */
10553 diff -aruN postgresql-8.2.4/src/include/replicate_com.h pgcluster-1.7.0rc7/src/include/replicate_com.h
10554 --- postgresql-8.2.4/src/include/replicate_com.h 1970-01-01 01:00:00.000000000 +0100
10555 +++ pgcluster-1.7.0rc7/src/include/replicate_com.h 2007-03-01 16:27:15.000000000 +0100
10557 +/*-------------------------------------------------------------------------
10560 + * Primary include file for replicate server .c files
10562 + * This should be the first file included by replicate modules.
10564 + *-------------------------------------------------------------------------
10566 +#ifndef REPLICATE_COM_H
10567 +#define REPLICATE_COM_H 1
10569 +#ifndef _SYS_TYPES_H
10570 +#include <sys/types.h>
10572 +#ifndef _INTTYPES_H
10573 +#include <inttypes.h>
10575 +#ifndef _NETINET_IN_H
10576 +#include <netinet/in.h>
10580 +#include "pg_config.h"
10582 +/* default values */
10583 +#define DEFAULT_PGLB_PORT (6001)
10584 +#define DEFAULT_PGLB_RECOVERY_PORT (6101)
10585 +#define DEFAULT_PGLB_LIFECHECK_PORT (6201)
10586 +#define DEFAULT_CLUSTER_PORT (5432)
10587 +#define DEFAULT_CLUSTER_RECOVERY_PORT (7101)
10588 +#define DEFAULT_CLUSTER_LIFECHECK_PORT (7201)
10589 +#define DEFAULT_PGRP_PORT (8001)
10590 +#define DEFAULT_PGRP_RECOVERY_PORT (8101)
10591 +#define DEFAULT_PGRP_LIFECHECK_PORT (8201)
10592 +#define DEFAULT_PGRP_RLOG_PORT (8301)
10593 +#define MAX_DB_SERVER (32)
10595 +/**************************
10597 +* Packet ID definition *
10599 +***************************/
10600 +/*=========================
10601 + Replication packet id
10602 +===========================*/
10603 +#define CMD_SYS_REPLICATE 'R'
10604 +/*-------------------------
10606 +--------------------------*/
10607 +#define CMD_STS_SET_SESSION_AUTHORIZATION 'S'
10608 +#define CMD_STS_TRANSACTION 'T'
10609 +#define CMD_STS_TEMP_TABLE 'E'
10610 +#define CMD_STS_QUERY 'Q'
10611 +#define CMD_STS_OTHER 'O'
10613 +#define CMD_TYPE_VACUUM 'V'
10614 +#define CMD_TYPE_ANALYZE 'A'
10615 +#define CMD_TYPE_REINDEX 'N'
10616 +#define CMD_TYPE_SELECT 'S'
10617 +#define CMD_TYPE_EXPLAIN 'X'
10618 +#define CMD_TYPE_SET 'T'
10619 +#define CMD_TYPE_RESET 't'
10620 +#define CMD_TYPE_INSERT 'I'
10621 +#define CMD_TYPE_DELETE 'D'
10622 +#define CMD_TYPE_EXECUTE 'U'
10623 +#define CMD_TYPE_UPDATE 'U'
10624 +#define CMD_TYPE_BEGIN 'B'
10625 +#define CMD_TYPE_COMMIT 'E'
10626 +#define CMD_TYPE_ROLLBACK 'R'
10627 +#define CMD_TYPE_CONNECTION_CLOSE 'x'
10628 +#define CMD_TYPE_SESSION_AUTHORIZATION_BEGIN 'a'
10629 +#define CMD_TYPE_SESSION_AUTHORIZATION_END 'b'
10630 +#define CMD_TYPE_SAVEPOINT 's'
10631 +#define CMD_TYPE_ROLLBACK_TO_SAVEPOINT 'r'
10632 +#define CMD_TYPE_RELEASE_SAVEPOINT 'l'
10633 +#define CMD_TYPE_OTHER 'O'
10635 +/*=========================
10636 + System call packet id
10637 +===========================*/
10638 +#define CMD_SYS_CALL 'S'
10639 +#define CMD_SYS_PREREPLICATE 'Z'
10641 +#define CMD_STS_NOTICE 'N'
10642 +#define CMD_STS_RESPONSE 'R'
10643 +#define CMD_STS_TRANSACTION_ABORT 'A'
10644 +#define CMD_STS_QUERY_SUSPEND 'P'
10645 +#define CMD_STS_QUERY_DONE 'D'
10647 +#define CMD_TYPE_COMMIT_CONFIRM 'c'
10648 +#define CMD_TYPE_QUERY_CONFIRM 'q'
10649 +#define CMD_TYPE_DEADLOCK_DETECT 'd'
10650 +#define CMD_TYPE_FRONTEND_CLOSED 'x'
10652 +/*----------------------------
10654 +------------------------------*/
10655 +#define CMD_STS_COPY 'C'
10657 +#define CMD_TYPE_COPY 'C'
10658 +#define CMD_TYPE_COPY_DATA 'd'
10659 +#define CMD_TYPE_COPY_DATA_END 'e'
10661 +/*----------------------------
10663 +------------------------------*/
10664 +#define CMD_STS_LARGE_OBJECT 'L'
10666 +#define CMD_TYPE_LO_IMPORT 'I'
10667 +#define CMD_TYPE_LO_CREATE 'C'
10668 +#define CMD_TYPE_LO_OPEN 'O'
10669 +#define CMD_TYPE_LO_WRITE 'W'
10670 +#define CMD_TYPE_LO_LSEEK 'S'
10671 +#define CMD_TYPE_LO_CLOSE 'X'
10672 +#define CMD_TYPE_LO_UNLINK 'U'
10674 +/*-------------------------
10675 + Prepare/Params Query
10676 +--------------------------*/
10677 +#define CMD_STS_PREPARE 'P'
10679 +#define CMD_TYPE_P_PARSE 'P'
10680 +#define CMD_TYPE_P_BIND 'B'
10681 +#define CMD_TYPE_P_EXECUTE 'E'
10682 +#define CMD_TYPE_P_FASTPATH 'F'
10683 +#define CMD_TYPE_P_CLOSE 'C'
10684 +#define CMD_TYPE_P_DESCRIBE 'D'
10685 +#define CMD_TYPE_P_FLUSH 'H'
10686 +#define CMD_TYPE_P_SYNC 'S'
10688 +/*=========================
10689 + Lifecheck packet id
10690 +===========================*/
10691 +#define CMD_SYS_LIFECHECK 'W'
10692 +#define CMD_STS_LOADBALANCER 'A'
10693 +#define CMD_STS_CLUSTER 'B'
10694 +#define CMD_STS_REPLICATOR 'C'
10696 +#define PGR_TRANSACTION_SOCKET (0)
10697 +#define PGR_QUERY_SOCKET (1)
10699 +#define DATA_FREE (0)
10700 +#define DATA_INIT (1)
10701 +#define DATA_USE (2)
10702 +#define DATA_ERR (90)
10703 +#define DATA_END (-1)
10704 +#define HOSTNAME_MAX_LENGTH (128)
10705 +#define DBNAME_MAX_LENGTH (128)
10706 +#define USERNAME_MAX_LENGTH (128)
10707 +#define PASSWORD_MAX_LENGTH (128)
10708 +#define TABLENAME_MAX_LENGTH (128)
10709 +#define PATH_MAX_LENGTH (256)
10710 +#define MAX_SERVER_NUM (128)
10711 +#define MAX_RETRY_TIMES (3)
10712 +#define MAX_SOCKET_QUEUE (100000)
10713 +#define TRANSACTION_ERROR_RESULT "TRANSACTION_ERROR"
10714 +#define REPLICATE_SERVER_SHM_KEY (1020)
10715 +/* target -> replicate */
10716 +#define RECOVERY_PREPARE_REQ (1)
10717 +/* replicate -> master */
10718 +#define RECOVERY_PGDATA_REQ (2)
10719 +/* master -> replicate */
10720 +#define RECOVERY_PGDATA_ANS (3)
10721 +/* replicate -> target */
10722 +#define RECOVERY_PREPARE_ANS (4)
10723 +/* target -> replicate */
10724 +#define RECOVERY_START_REQ (5)
10725 +/* replicate -> master */
10726 +#define RECOVERY_FSYNC_REQ (6)
10727 +/* master -> replicate */
10728 +#define RECOVERY_FSYNC_ANS (7)
10729 +/* replicate -> target */
10730 +#define RECOVERY_START_ANS (8)
10731 +/* target -> replicate */
10732 +#define RECOVERY_QUEUE_DATA_REQ (9)
10733 +/* replicate -> target */
10734 +#define RECOVERY_QUEUE_DATA_ANS (10)
10735 +/* target -> replicate */
10736 +#define RECOVERY_FINISH (11)
10738 +#define RECOVERY_ERROR_OCCUPIED (100)
10739 +#define RECOVERY_ERROR_CONNECTION (101)
10740 +#define RECOVERY_ERROR_TARGET_ONLY (102)
10741 +#define RECOVERY_ERROR_ANS (200)
10743 +/* lifecheck ask from cluster db */
10744 +#define LIFECHECK_ASK_FROM_CLUSTER (1)
10745 +/* lifecheck response from replication server */
10746 +#define LIFECHECK_RES_FROM_REPLICATOR (2)
10747 +/* lifecheck ask from replication server */
10748 +#define LIFECHECK_ASK_FROM_REPLICATOR (3)
10749 +/* lifecheck response from cluster db */
10750 +#define LIFECHECK_RES_FROM_CLUSTER (4)
10752 +#define REPLICATION_SERVER_INFO_TAG "Replicate_Server_Info"
10753 +#define HOST_NAME_TAG "Host_Name"
10754 +#define PORT_TAG "Port"
10755 +#define RECOVERY_PORT_TAG "Recovery_Port"
10756 +#define LIFECHECK_PORT_TAG "LifeCheck_Port"
10757 +#define TIMEOUT_TAG "Replication_Timeout"
10758 +#define LIFECHECK_TIMEOUT_TAG "LifeCheck_Timeout"
10759 +#define LIFECHECK_INTERVAL_TAG "LifeCheck_Interval"
10761 +#define RECOVERY_INIT (0)
10762 +#define RECOVERY_PREPARE_START (1)
10763 +#define RECOVERY_START_1 (2)
10764 +#define RECOVERY_CLEARED (3)
10765 +#define RECOVERY_WAIT_CLEAN (10)
10766 +#define RECOVERY_ERROR (99)
10768 +/* response mode */
10769 +#define PGR_FAST_MODE (0)
10770 +#define PGR_NORMAL_MODE (1)
10771 +#define PGR_RELIABLE_MODE (2)
10773 +#define RECOVERY_TIMEOUT (600)
10774 +#ifndef COMPLETION_TAG_BUFSIZE
10775 +#define COMPLETION_TAG_BUFSIZE (128)
10778 +/* replicate log type */
10779 +#define FROM_R_LOG_TYPE (1)
10780 +#define FROM_C_DB_TYPE (2)
10781 +#define CONNECTION_SUSPENDED_TYPE (3)
10783 +#define PGR_SYSTEM_COMMAND_FUNC "PGR_SYSTEM_COMMAND_FUNCTION"
10784 +#define PGR_STARTUP_REPLICATION_SERVER_FUNC_NO (1)
10785 +#define PGR_CHANGE_REPLICATION_SERVER_FUNC_NO (2)
10786 +#define PGR_SET_CURRENT_TIME_FUNC_NO (3)
10787 +#define PGR_NOTICE_DEADLOCK_DETECTION_FUNC_NO (4)
10788 +#define PGR_TRANSACTION_CONFIRM_ANSWER_FUNC_NO (5)
10789 +#define PGR_RELIABLE_MODE_DONE_FUNC_NO (6)
10790 +#define PGR_NOTICE_ABORT_FUNC_NO (7)
10791 +#define PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO (8)
10792 +#define PGR_QUERY_CONFIRM_ANSWER_FUNC_NO (9)
10793 +#define PGR_GET_OID_FUNC_NO (10)
10794 +#define PGR_SET_OID_FUNC_NO (11)
10796 +#define PGR_CMD_ARG_NUM (10)
10797 +#define PGR_LOCK_CONFLICT_NOTICE_CMD "PGR_LOCK_CONFLICT_NOTICE_CMD"
10798 +#define PGR_DEADLOCK_DETECT_NOTICE_CMD "PGR_DEADLOCK_DETECT_NOTICE_CMD"
10799 +#define PGR_QUERY_DONE_NOTICE_CMD "PGR_QUERY_DONE_NOTICE_CMD"
10800 +#define PGR_QUERY_ABORTED_NOTICE_CMD "PGR_QUERY_ABORTED_NOTICE_CMD"
10801 +#define PGR_RETRY_LOCK_QUERY_CMD "PGR_RETRY_LOCK_QUERY_CMD"
10802 +#define PGR_NOT_YET_REPLICATE_NOTICE_CMD "PGR_NOT_YET_REPLICATE_NOTICE_CMD"
10803 +#define PGR_ALREADY_REPLICATED_NOTICE_CMD "PGR_ALREADY_REPLICATED_NOTICE_CMD"
10804 +#define PGR_NOT_YET_COMMIT (0)
10805 +#define PGR_ALREADY_COMMITTED (1)
10807 +#define COPYBUFSIZ (8192)
10808 +#define MAX_WORDS (24)
10809 +#define MAX_WORD_LETTERS (48)
10810 +#define PGR_MESSAGE_BUFSIZE (128)
10811 +#define INT_LENGTH (12)
10812 +#define PGR_MAX_COUNTER (0x0FFFFFFF)
10813 +#define PGR_GET_OVER_FLOW_FILTER (0xF0000000)
10814 +#define PGR_GET_DATA_FILTER (0x0FFFFFFF)
10815 +#define PGR_SET_OVER_FLOW (0x10000000)
10816 +#define PGR_MIN_COUNTER (0x0000000F)
10818 +#define STRCMP(x,y) (strncmp(x,y,strlen(y)))
10820 +/* life check target */
10821 +#define SYN_TO_LOAD_BALANCER (0)
10822 +#define SYN_TO_CLUSTER_DB (1)
10823 +#define SYN_TO_REPLICATION_SERVER (2)
10824 +#define LIFE_CHECK_TRY_COUNT (2)
10825 +#define LIFE_CHECK_STOP (0)
10826 +#define LIFE_CHECK_START (1)
10828 +#ifndef HAVE_UNION_SEMUN
10831 + struct semid_ds *buf;
10832 + unsigned short int *array;
10833 + struct seminfo *__buf;
10837 +typedef struct ReplicateHeaderType
10850 + E:commit/rollback/end
10854 + -- kind of replication log --
10855 + 1: send from replication log
10856 + 2: send from cluster db (should be retry)
10857 + 3: connection suspended
10861 + uint32_t query_size;
10862 + char from_host[HOSTNAME_MAX_LENGTH];
10863 + char dbName[DBNAME_MAX_LENGTH];
10864 + char userName[USERNAME_MAX_LENGTH];
10865 + struct timeval tv;
10866 + uint32_t query_id;
10867 + int isAutoCommit; /* 0 if autocommit is off. 1 if autocommit is on */
10868 + uint32_t request_id;
10869 + uint32_t replicate_id;
10870 + char password[PASSWORD_MAX_LENGTH];
10872 + char cryptSalt[2];
10873 + char dummySalt[2];
10874 +} ReplicateHeader;
10876 +typedef struct RecoveryPacketType
10878 + uint16_t packet_no; /*
10879 + 1:start recovery prepare
10882 + 4:send master info
10883 + 5:start queueing query
10889 + uint16_t max_connect;
10891 + uint16_t recoveryPort;
10892 + char hostName[HOSTNAME_MAX_LENGTH];
10893 + char pg_data[PATH_MAX_LENGTH];
10894 + char userName[USERNAME_MAX_LENGTH];
10908 +typedef struct ReplicateServerInfoType
10910 + uint32_t useFlag;
10911 + char hostName[HOSTNAME_MAX_LENGTH];
10912 + uint16_t portNumber;
10913 + uint16_t recoveryPortNumber;
10914 + uint16_t lifecheckPortNumber;
10915 + uint16_t RLogPortNumber;
10917 + uint32_t rlog_sock;
10918 + uint32_t replicate_id;
10919 + uint16_t response_mode;
10920 + uint16_t retry_count;
10921 +} ReplicateServerInfo;
10924 +typedef struct ReplicateNowType
10926 + uint32_t replicate_id;
10932 + struct timeval tp;
10935 +typedef struct CopyDataType
10938 + char copy_data[COPYBUFSIZ];
10941 +typedef struct ClusterDBInfoType
10960 +extern ConfDataType * ConfData_Top;
10961 +extern ConfDataType * ConfData_End;
10962 +extern ReplicateServerInfo * ReplicateServerData;
10963 +extern ClusterDBInfo * ClusterDBData;
10964 +extern int ReplicateServerShmid;
10965 +extern int ClusterDBShmid;
10966 +extern bool PGR_Under_Replication_Server;
10967 +extern int PGR_Replication_Timeout;
10968 +extern int PGR_Lifecheck_Timeout;
10969 +extern int PGR_Lifecheck_Interval;
10971 +/* in backend/libpq/replicate_com.c */
10972 +extern int PGR_Create_Socket_Connect(int * fdP, char * hostName , unsigned short portNumber);
10973 +extern void PGR_Close_Sock(int * sock);
10974 +extern int PGR_Create_Socket_Bind(int * fdP, char * hostName , unsigned short portNumber);
10975 +extern int PGR_Create_Acception(int fd, int * sockP, char * hostName , unsigned short portNumber);
10976 +extern int PGR_Free_Conf_Data(void);
10977 +extern int PGR_Get_Conf_Data(char * dir , char * fname);
10978 +extern void PGRset_recovery_packet_no(RecoveryPacket * packet, int packet_no);
10979 +extern unsigned int PGRget_ip_by_name(char * host);
10980 +extern int PGRget_time_value(char *str);
10982 +extern void PGRwrite_log_file(FILE * fp, const char * fmt,...);
10983 +extern void show_debug(const char * fmt,...);
10984 +extern void show_error(const char * fmt,...);
10988 +#endif /* REPLICATE_COM_H */
10989 diff -aruN postgresql-8.2.4/src/include/storage/lmgr.h pgcluster-1.7.0rc7/src/include/storage/lmgr.h
10990 --- postgresql-8.2.4/src/include/storage/lmgr.h 2006-08-18 18:09:13.000000000 +0200
10991 +++ pgcluster-1.7.0rc7/src/include/storage/lmgr.h 2007-02-18 22:52:17.000000000 +0100
10995 #include "storage/lock.h"
10996 +#include "storage/bufmgr.h"
10997 #include "utils/rel.h"
11001 /* Knowledge about which locktags describe temp objects */
11002 extern bool LockTagIsTemp(const LOCKTAG *tag);
11004 +extern void XactLockTableWaitForCluster(TransactionId xid,Buffer buffer);
11005 #endif /* LMGR_H */
11006 diff -aruN postgresql-8.2.4/src/include/storage/proc.h pgcluster-1.7.0rc7/src/include/storage/proc.h
11007 --- postgresql-8.2.4/src/include/storage/proc.h 2006-10-04 02:30:10.000000000 +0200
11008 +++ pgcluster-1.7.0rc7/src/include/storage/proc.h 2007-02-18 22:52:17.000000000 +0100
11010 SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS];
11012 struct XidCache subxids; /* cache for subtransaction XIDs */
11013 +#ifdef USE_REPLICATION
11014 + unsigned int replicationId; /* id for replication. */
11018 /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
11019 diff -aruN postgresql-8.2.4/src/interfaces/libpq/Makefile pgcluster-1.7.0rc7/src/interfaces/libpq/Makefile
11020 --- postgresql-8.2.4/src/interfaces/libpq/Makefile 2006-12-28 01:01:12.000000000 +0100
11021 +++ pgcluster-1.7.0rc7/src/interfaces/libpq/Makefile 2007-02-18 22:52:17.000000000 +0100
11024 OBJS= fe-auth.o fe-connect.o fe-exec.o fe-misc.o fe-print.o fe-lobj.o \
11025 fe-protocol2.o fe-protocol3.o pqexpbuffer.o pqsignal.o fe-secure.o \
11026 - md5.o ip.o wchar.o encnames.o noblock.o pgstrcasecmp.o thread.o \
11027 + dllist.o md5.o ip.o wchar.o encnames.o noblock.o pgstrcasecmp.o thread.o \
11028 $(filter crypt.o getaddrinfo.o inet_aton.o open.o snprintf.o strerror.o strlcpy.o, $(LIBOBJS))
11030 ifeq ($(PORTNAME), cygwin)
11032 encnames.c wchar.c : % : $(backend_src)/utils/mb/%
11033 rm -f $@ && $(LN_S) $< .
11035 +dllist.c : % : $(backend_src)/lib/dllist.c
11036 + rm -f $@ && $(LN_S) $< .
11039 # We need several not-quite-identical variants of .DEF files to build libpq
11040 # DLLs for Windows. These are made from the single source file exports.txt.
11041 @@ -169,7 +172,7 @@
11042 rm -f '$(DESTDIR)$(includedir)/libpq-fe.h' '$(DESTDIR)$(includedir_internal)/libpq-int.h' '$(DESTDIR)$(includedir_internal)/pqexpbuffer.h' '$(DESTDIR)$(datadir)/pg_service.conf.sample'
11044 clean distclean: clean-lib
11045 - rm -f $(OBJS) pg_config_paths.h crypt.c getaddrinfo.c inet_aton.c noblock.c open.c pgstrcasecmp.c snprintf.c strerror.c strlcpy.c thread.c md5.c ip.c encnames.c wchar.c pthread.h exports.list
11046 + rm -f $(OBJS) pg_config_paths.h crypt.c getaddrinfo.c inet_aton.c noblock.c open.c pgstrcasecmp.c snprintf.c strerror.c strlcpy.c thread.c md5.c ip.c encnames.c wchar.c pthread.h exports.list dllist.c
11047 rm -f pg_config_paths.h # Might be left over from a Win32 client-only build
11049 maintainer-clean: distclean
11050 diff -aruN postgresql-8.2.4/src/interfaces/libpq/fe-auth.c pgcluster-1.7.0rc7/src/interfaces/libpq/fe-auth.c
11051 --- postgresql-8.2.4/src/interfaces/libpq/fe-auth.c 2006-10-04 02:30:12.000000000 +0200
11052 +++ pgcluster-1.7.0rc7/src/interfaces/libpq/fe-auth.c 2007-02-18 22:52:17.000000000 +0100
11054 #include "fe-auth.h"
11055 #include "libpq/md5.h"
11057 +#ifdef USE_REPLICATION
11058 +#include "replicate_com.h"
11059 +bool PGR_Under_Replication_Server = false;
11060 +#endif /* USE_REPLICATION */
11064 @@ -412,6 +416,19 @@
11066 return STATUS_ERROR;
11068 +#ifdef USE_REPLICATION
11069 + if (PGR_Under_Replication_Server)
11072 + * When this module is called from the replication server,
11073 + * there is no need encrypt password.
11074 + * Since the password was already encrypted at the Cluster DB
11076 + int size = 2 * (MD5_PASSWD_LEN + 1);
11077 + memset(crypt_pwd,0, size);
11078 + strncpy(crypt_pwd,password, size);
11080 +#endif /* USE_REPLICATION */
11083 case AUTH_REQ_CRYPT:
11084 diff -aruN postgresql-8.2.4/src/makefiles/Makefile.aix pgcluster-1.7.0rc7/src/makefiles/Makefile.aix
11085 --- postgresql-8.2.4/src/makefiles/Makefile.aix 2006-09-19 17:36:08.000000000 +0200
11086 +++ pgcluster-1.7.0rc7/src/makefiles/Makefile.aix 2007-02-18 22:52:17.000000000 +0100
11088 $(CC) $(LDFLAGS) $(LDFLAGS_SL) -o $@ $*.o -Wl,-bE:$*$(EXPSUFF) $(SHLIB_LINK)
11091 +CFLAGS += -pthread
11092 +LDFLAGS += -L/usr/lib/threads
11093 diff -aruN postgresql-8.2.4/src/makefiles/Makefile.freebsd pgcluster-1.7.0rc7/src/makefiles/Makefile.freebsd
11094 --- postgresql-8.2.4/src/makefiles/Makefile.freebsd 2006-04-19 18:32:08.000000000 +0200
11095 +++ pgcluster-1.7.0rc7/src/makefiles/Makefile.freebsd 2007-02-18 22:52:17.000000000 +0100
11102 diff -aruN postgresql-8.2.4/src/makefiles/Makefile.hpux pgcluster-1.7.0rc7/src/makefiles/Makefile.hpux
11103 --- postgresql-8.2.4/src/makefiles/Makefile.hpux 2006-02-07 18:36:13.000000000 +0100
11104 +++ pgcluster-1.7.0rc7/src/makefiles/Makefile.hpux 2007-02-18 22:52:17.000000000 +0100
11106 # correctly in the LP64 data model.
11107 LIBS := -lxnet $(LIBS)
11109 +# add thread lib for PGCluster
11110 +LIBS := -lpthread $(LIBS)
11112 # Set up rpath so that the executables don't need SHLIB_PATH to be set.
11113 # (Note: --disable-rpath is a really bad idea on this platform...)
11114 ifeq ($(with_gnu_ld), yes)
11115 diff -aruN postgresql-8.2.4/src/makefiles/Makefile.linux pgcluster-1.7.0rc7/src/makefiles/Makefile.linux
11116 --- postgresql-8.2.4/src/makefiles/Makefile.linux 2005-12-09 22:19:36.000000000 +0100
11117 +++ pgcluster-1.7.0rc7/src/makefiles/Makefile.linux 2007-02-18 22:52:17.000000000 +0100
11119 $(CC) -shared -o $@ $<
11123 diff -aruN postgresql-8.2.4/src/makefiles/Makefile.netbsd pgcluster-1.7.0rc7/src/makefiles/Makefile.netbsd
11124 --- postgresql-8.2.4/src/makefiles/Makefile.netbsd 2006-04-19 18:32:08.000000000 +0200
11125 +++ pgcluster-1.7.0rc7/src/makefiles/Makefile.netbsd 2007-02-18 22:52:17.000000000 +0100
11131 diff -aruN postgresql-8.2.4/src/makefiles/Makefile.openbsd pgcluster-1.7.0rc7/src/makefiles/Makefile.openbsd
11132 --- postgresql-8.2.4/src/makefiles/Makefile.openbsd 2006-04-19 18:32:08.000000000 +0200
11133 +++ pgcluster-1.7.0rc7/src/makefiles/Makefile.openbsd 2007-02-18 22:52:17.000000000 +0100
11139 diff -aruN postgresql-8.2.4/src/makefiles/Makefile.solaris pgcluster-1.7.0rc7/src/makefiles/Makefile.solaris
11140 --- postgresql-8.2.4/src/makefiles/Makefile.solaris 2005-12-09 22:19:36.000000000 +0100
11141 +++ pgcluster-1.7.0rc7/src/makefiles/Makefile.solaris 2007-02-18 22:52:17.000000000 +0100
11143 $(LD) -G -Bdynamic -o $@ $<
11147 diff -aruN postgresql-8.2.4/src/makefiles/Makefile.sunos4 pgcluster-1.7.0rc7/src/makefiles/Makefile.sunos4
11148 --- postgresql-8.2.4/src/makefiles/Makefile.sunos4 2002-09-05 00:54:18.000000000 +0200
11149 +++ pgcluster-1.7.0rc7/src/makefiles/Makefile.sunos4 2007-02-18 22:52:17.000000000 +0100
11151 $(LD) -assert pure-text -Bdynamic -o $@ $<
11155 diff -aruN postgresql-8.2.4/src/pgcluster/Makefile pgcluster-1.7.0rc7/src/pgcluster/Makefile
11156 --- postgresql-8.2.4/src/pgcluster/Makefile 1970-01-01 01:00:00.000000000 +0100
11157 +++ pgcluster-1.7.0rc7/src/pgcluster/Makefile 2007-02-18 22:52:17.000000000 +0100
11159 +#-------------------------------------------------------------------------
11161 +# Makefile for src/pgcluster (server programs)
11163 +#-------------------------------------------------------------------------
11165 +subdir = src/pgcluster
11166 +top_builddir = ../..
11167 +include $(top_builddir)/src/Makefile.global
11169 +DIRS := libpgc pgrp pglb tool
11171 +all install installdirs uninstall depend distprep:
11172 + @for dir in $(DIRS); do $(MAKE) -C $$dir $@ || exit; done
11174 +clean distclean maintainer-clean:
11175 + -@for dir in $(DIRS); do $(MAKE) -C $$dir $@; done
11176 diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/Makefile pgcluster-1.7.0rc7/src/pgcluster/libpgc/Makefile
11177 --- postgresql-8.2.4/src/pgcluster/libpgc/Makefile 1970-01-01 01:00:00.000000000 +0100
11178 +++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/Makefile 2007-02-18 22:52:17.000000000 +0100
11180 +#-------------------------------------------------------------------------
11183 +# Makefile for libpq subsystem (common library for replication server)
11185 +#-------------------------------------------------------------------------
11187 +subdir = src/pgcluster/libpgc
11188 +top_builddir = ../../..
11189 +include $(top_builddir)/src/Makefile.global
11191 +OBJS = sem.o show.o signal.o
11196 + $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS)
11199 + $(CC) -MM $(CFLAGS) *.c >depend
11204 + rm -f SUBSYS.o $(OBJS)
11206 +ifeq (depend,$(wildcard depend))
11209 diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/libpgc.h pgcluster-1.7.0rc7/src/pgcluster/libpgc/libpgc.h
11210 --- postgresql-8.2.4/src/pgcluster/libpgc/libpgc.h 1970-01-01 01:00:00.000000000 +0100
11211 +++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/libpgc.h 2007-02-18 22:52:17.000000000 +0100
11213 +/*-------------------------------------------------------------------------
11216 + * external definition of the function for pgreplicate and pglb
11218 + * This should be the first file included by replicate modules.
11220 + *-------------------------------------------------------------------------
11225 +#include <stdio.h>
11227 +/* character length of IP address */
11228 +#define ADDRESS_LENGTH (24)
11230 +/* logging file data tag in configuration file */
11231 +#define LOG_INFO_TAG "Log_File_Info"
11232 +#define FILE_NAME_TAG "File_Name"
11233 +#define FILE_SIZE_TAG "File_Size"
11234 +#define LOG_ROTATION_TAG "Rotate"
11237 + char file_name[256];
11243 +extern LogFileInf * LogFileData;
11244 +/* external definition of the function in sem.c */
11245 +extern void PGRsem_unlock( int semid, short sem_num );
11246 +extern void PGRsem_lock( int semid, short sem_num );
11248 +/* external definition of the function in show.c */
11249 +extern FILE * PGRopen_log_file(char * fname, int max_size, int rotation);
11250 +extern void PGRclose_log_file(FILE * fp);
11251 +extern void show_debug(const char * fmt,...);
11252 +extern void show_error(const char * fmt,...);
11253 +extern void PGRwrite_log_file(FILE * fp, const char * fmt,...);
11255 +/* external definition of the function in signal.c */
11256 +typedef void (*PGRsighandler)(int);
11257 +extern PGRsighandler PGRsignal(int signo, PGRsighandler sighandler);
11259 +#endif /* LIBPGC_H */
11260 diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/sem.c pgcluster-1.7.0rc7/src/pgcluster/libpgc/sem.c
11261 --- postgresql-8.2.4/src/pgcluster/libpgc/sem.c 1970-01-01 01:00:00.000000000 +0100
11262 +++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/sem.c 2007-02-18 22:52:17.000000000 +0100
11264 +/*--------------------------------------------------------------------
11269 + * This file is composed of the functions to call with the source
11270 + * at pgreplicate for the semapho control.
11272 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
11273 + *--------------------------------------------------------------------
11275 +#include <stdio.h>
11276 +#include <unistd.h>
11277 +#include <sys/types.h>
11278 +#include <errno.h>
11279 +#include <sys/ipc.h>
11280 +#include <sys/sem.h>
11281 +#include <signal.h>
11283 +extern void show_debug(const char * fmt,...);
11285 +void PGRsem_unlock( int semid, short sem_num );
11286 +void PGRsem_lock( int semid, short sem_num );
11288 +#define PGR_SEM_UNLOCK_WAIT_MSEC (100)
11289 +#define PGR_SEM_LOCK_WAIT_MSEC (500)
11292 +PGRsem_unlock( int semid, short sem_num )
11295 + struct sembuf sops;
11297 + sops.sem_num = sem_num;
11299 + /*sops.sem_flg = IPC_NOWAIT;*/
11300 + sops.sem_flg = 0;
11303 + status = semop(semid, &sops, 1);
11304 + if ((status == -1) && (errno != EINTR))
11306 + usleep(PGR_SEM_UNLOCK_WAIT_MSEC);
11308 + } while (status == -1);
11312 +PGRsem_lock( int semid, short sem_num )
11315 + struct sembuf sops;
11317 + sops.sem_num = sem_num;
11318 + sops.sem_op = -1;
11319 + /*sops.sem_flg = IPC_NOWAIT;*/
11320 + sops.sem_flg = 0;
11323 + status = semop(semid, &sops, 1);
11324 + if ((status == -1) && (errno != EINTR))
11326 + usleep(PGR_SEM_LOCK_WAIT_MSEC);
11328 + } while (status == -1);
11331 diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/show.c pgcluster-1.7.0rc7/src/pgcluster/libpgc/show.c
11332 --- postgresql-8.2.4/src/pgcluster/libpgc/show.c 1970-01-01 01:00:00.000000000 +0100
11333 +++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/show.c 2007-02-18 22:52:17.000000000 +0100
11335 +/*--------------------------------------------------------------------
11340 + * This file is composed of the logging and debug functions
11342 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
11343 + *--------------------------------------------------------------------
11345 +#include <sys/time.h>
11346 +#include <sys/types.h>
11347 +#include <sys/stat.h>
11348 +#include <stdio.h>
11349 +#include <stdarg.h>
11350 +#include <stdlib.h>
11351 +#include <errno.h>
11352 +#include <string.h>
11354 +#include <unistd.h>
11355 +#include "libpgc.h"
11357 +#define TIMESTAMP_SIZE 19 /* format `YYYY-MM-DD HH:MM:SS' */
11359 +/*--------------------------------------
11360 + * PROTOTYPE DECLARATION
11361 + *--------------------------------------
11363 +static char* get_current_timestamp(void);
11364 +static int file_rotation(char * fname, int max_rotation);
11366 +FILE * PGRopen_log_file(char * fname, int max_size, int rotation);
11367 +void PGRclose_log_file(FILE * fp);
11368 +void show_debug(const char * fmt,...);
11369 +void show_error(const char * fmt,...);
11370 +void PGRwrite_log_file(FILE * fp, const char * fmt,...);
11372 +extern int Debug_Print;
11373 +extern int Log_Print;
11375 +LogFileInf * LogFileData = NULL;
11378 +get_current_timestamp(void)
11381 + static char buf[TIMESTAMP_SIZE + 1];
11383 + now = time(NULL);
11384 + strftime(buf, sizeof(buf),
11385 + "%Y-%m-%d %H:%M:%S", localtime(&now));
11390 +show_debug(const char * fmt,...)
11398 + timestamp = get_current_timestamp();
11399 + fprintf(stdout,"%s [%d] DEBUG:",timestamp, getpid());
11400 + va_start(ap,fmt);
11401 + vfprintf(stdout,fmt,ap);
11403 + fprintf(stdout,"\n");
11405 + if ((Log_Print) && (LogFileData != NULL))
11407 + FILE * fp = NULL;
11408 + fp = PGRopen_log_file(LogFileData->file_name, LogFileData->max_size, LogFileData->rotation);
11409 + va_start(ap,fmt);
11410 + vsnprintf(buf,sizeof(buf),fmt,ap);
11412 + PGRwrite_log_file(fp, buf);
11413 + PGRclose_log_file(fp);
11419 +show_error(const char * fmt,...)
11422 + char buf[256], *timestamp;
11426 + timestamp = get_current_timestamp();
11427 + fprintf(stderr,"%s [%d] ERROR:",timestamp, getpid());
11428 + va_start(ap,fmt);
11429 + vfprintf(stderr,fmt,ap);
11431 + fprintf(stderr,"\n");
11434 + if ((Log_Print) && (LogFileData != NULL))
11436 + FILE * fp = NULL;
11437 + fp = PGRopen_log_file(LogFileData->file_name, LogFileData->max_size, LogFileData->rotation);
11438 + va_start(ap,fmt);
11439 + vsnprintf(buf,sizeof(buf),fmt,ap);
11441 + PGRwrite_log_file(fp, buf);
11442 + PGRclose_log_file(fp);
11447 +PGRwrite_log_file(FILE * fp, const char * fmt,...)
11459 + if (time(&t) < 0)
11463 + snprintf(log,sizeof(log),"%s ",ctime(&t));
11464 + p = strchr(log,'\n');
11469 + va_start(ap,fmt);
11470 + vsnprintf(buf,sizeof(buf),fmt,ap);
11473 + strcat(log,"\n");
11474 + if (fputs(log,fp) >= 0)
11481 +PGRopen_log_file(char * fname, int max_size, int rotation)
11486 + if (fname == NULL)
11488 + return (FILE *)NULL;
11491 + if (max_size > 0)
11493 + rtn = stat(fname,&st);
11496 + if (st.st_size > max_size)
11498 + if (file_rotation(fname, rotation) < 0)
11500 + return (FILE *)NULL;
11505 + return (fopen(fname,"a"));
11509 +PGRclose_log_file(FILE * fp)
11519 +file_rotation(char * fname, int max_rotation)
11521 + char * func = "file_rotation()";
11525 + char old_fname[256];
11526 + char new_fname[256];
11528 + if ((fname == NULL) || (max_rotation < 0))
11533 + for ( i = max_rotation ; i > 1 ; i -- )
11535 + sprintf(old_fname,"%s.%d",fname,i-1);
11536 + rtn = stat(old_fname,&st);
11539 + sprintf(new_fname,"%s.%d",fname,i);
11540 + rtn = rename(old_fname, new_fname);
11543 + show_error("%s:rotate failed: (%s)",func,strerror(errno));
11548 + if (max_rotation > 0)
11550 + sprintf(new_fname,"%s.1",fname);
11551 + rtn = rename(fname, new_fname);
11555 + rtn = unlink(fname);
11561 diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/signal.c pgcluster-1.7.0rc7/src/pgcluster/libpgc/signal.c
11562 --- postgresql-8.2.4/src/pgcluster/libpgc/signal.c 1970-01-01 01:00:00.000000000 +0100
11563 +++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/signal.c 2007-02-18 22:52:17.000000000 +0100
11565 +/*--------------------------------------------------------------------
11570 + * This file is composed of the functions to set signal handler
11572 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
11573 + *--------------------------------------------------------------------
11576 +#include <signal.h>
11577 +#include "pg_config.h"
11578 +#include "libpgc.h"
11581 + * Set up a signal handler
11584 +PGRsignal(int signo, PGRsighandler sighandler)
11586 +#if !defined(HAVE_POSIX_SIGNALS)
11587 + return signal(signo, func);
11589 + struct sigaction act,
11592 + act.sa_handler = sighandler;
11593 + sigemptyset(&act.sa_mask);
11594 + act.sa_flags = 0;
11595 + if (sigaction(signo, &act, &oact) < 0)
11597 + return oact.sa_handler;
11598 +#endif /* !HAVE_POSIX_SIGNALS */
11600 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/AUTHORS pgcluster-1.7.0rc7/src/pgcluster/pglb/AUTHORS
11601 --- postgresql-8.2.4/src/pgcluster/pglb/AUTHORS 1970-01-01 01:00:00.000000000 +0100
11602 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/AUTHORS 2007-02-18 22:52:17.000000000 +0100
11606 +pglb was written by Atsushi Mitani.
11607 +pglb is based on pg_pool which is written by Tatsuo Ishii.
11608 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/COPYING pgcluster-1.7.0rc7/src/pgcluster/pglb/COPYING
11609 --- postgresql-8.2.4/src/pgcluster/pglb/COPYING 1970-01-01 01:00:00.000000000 +0100
11610 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/COPYING 2007-02-18 22:52:17.000000000 +0100
11612 +Copyright (c) 2003-2006 Atsushi Mitani
11614 +Permission to use, copy, modify, and distribute this software and
11615 +its documentation for any purpose and without fee is hereby
11616 +granted, provided that the above copyright notice appear in all
11617 +copies and that both that copyright notice and this permission
11618 +notice appear in supporting documentation, and that the name of the
11619 +author not be used in advertising or publicity pertaining to
11620 +distribution of the software without specific, written prior
11621 +permission. The author makes no representations about the
11622 +suitability of this software for any purpose. It is provided "as
11623 +is" without express or implied warranty.
11625 +Portions copyright (c) 2003-2006, Tatsuo Ishii
11626 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/Makefile pgcluster-1.7.0rc7/src/pgcluster/pglb/Makefile
11627 --- postgresql-8.2.4/src/pgcluster/pglb/Makefile 1970-01-01 01:00:00.000000000 +0100
11628 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/Makefile 2007-02-18 22:52:17.000000000 +0100
11630 +#-------------------------------------------------------------------------
11632 +# Makefile for src/pgcluster/pgrp
11634 +#-------------------------------------------------------------------------
11636 +subdir = src/pgcluster/pglb
11637 +top_builddir = ../../..
11638 +include $(top_builddir)/src/Makefile.global
11640 +OBJS= child.o cluster_table.o load_balance.o main.o pool_auth.o \
11641 + pool_connection_pool.o pool_process_query.o pool_stream.o \
11642 + pool_params.o recovery.o socket.o lifecheck.o
11644 +EXTRA_OBJS = $(top_builddir)/src/backend/libpq/replicate_com.o ../libpgc/SUBSYS.o
11646 +CFLAGS += -DPRINT_DEBUG
11647 +override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -DBINDIR=\"$(bindir)\"
11651 +pglb: $(OBJS) $(libpq_builddir)/libpq.a
11652 + $(CC) $(CFLAGS) $(OBJS) $(EXTRA_OBJS) $(libpq) $(LDFLAGS) $(LIBS) -o $@
11654 +install: all installdirs
11655 + $(INSTALL_PROGRAM) pglb$(X) $(DESTDIR)$(bindir)/pglb$(X)
11656 + $(INSTALL_DATA) pglb.conf.sample $(DESTDIR)$(datadir)/pglb.conf.sample
11659 + $(mkinstalldirs) $(DESTDIR)$(bindir)
11660 + $(mkinstalldirs) $(DESTDIR)$(datadir)
11663 + rm -f $(addprefix $(DESTDIR)$(bindir)/, pglb$(X))
11664 + rm -f $(DESTDIR)$(datadir)/pglb.conf.sample
11666 +clean distclean maintainer-clean:
11667 + rm -f pglb$(X) $(OBJS)
11668 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/child.c pgcluster-1.7.0rc7/src/pgcluster/pglb/child.c
11669 --- postgresql-8.2.4/src/pgcluster/pglb/child.c 1970-01-01 01:00:00.000000000 +0100
11670 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/child.c 2007-02-18 22:52:17.000000000 +0100
11672 +/*--------------------------------------------------------------------
11677 + * This file is composed of the functions to call with the source
11678 + * at child process of pglb.
11680 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
11681 + * Portions Copyright (c) 2003-2006, Tatsuo Ishii
11682 + *--------------------------------------------------------------------
11685 + * Permission to use, copy, modify, and distribute this software and
11686 + * its documentation for any purpose and without fee is hereby
11687 + * granted, provided that the above copyright notice appear in all
11688 + * copies and that both that copyright notice and this permission
11689 + * notice appear in supporting documentation, and that the name of the
11690 + * author not be used in advertising or publicity pertaining to
11691 + * distribution of the software without specific, written prior
11692 + * permission. The author makes no representations about the
11693 + * suitability of this software for any purpose. It is provided "as
11694 + * is" without express or implied warranty.
11697 +#include "postgres.h"
11699 +#include <stdio.h>
11700 +#include <string.h>
11701 +#include <unistd.h>
11702 +#include <signal.h>
11703 +#include <sys/wait.h>
11704 +#include <sys/time.h>
11705 +#include <ctype.h>
11706 +#include <sys/types.h>
11707 +#include <sys/stat.h>
11708 +#include <sys/socket.h>
11709 +#include <sys/ipc.h>
11710 +#include <netdb.h>
11711 +#include <netinet/in.h>
11712 +#include <errno.h>
11713 +#include <fcntl.h>
11715 +#include <sys/param.h>
11716 +#include <arpa/inet.h>
11717 +#include <sys/file.h>
11719 +#ifdef HAVE_NETINET_TCP_H
11720 +#include <netinet/tcp.h>
11723 +#ifdef HAVE_CRYPT_H
11724 +#include <crypt.h>
11727 +#include "postgres_fe.h"
11728 +#include "libpq/pqcomm.h"
11730 +#include "replicate_com.h"
11733 +/*--------------------------------------
11734 + * GLOBAL VARIABLE DECLARATION
11735 + *--------------------------------------
11737 +POOL_CONNECTION * Frontend = NULL;
11739 +/*--------------------------------------
11740 + * PROTOTYPE DECLARATION
11741 + *--------------------------------------
11743 +int PGRpre_fork_children(ClusterTbl * ptr);
11744 +int PGRpre_fork_child(ClusterTbl * ptr);
11745 +int PGRdo_child( int use_pool);
11746 +int PGRcreate_child(ClusterTbl * cluster_p);
11747 +pid_t PGRscan_child_tbl(ClusterTbl * cluster_p);
11748 +void notice_backend_error(void);
11749 +void do_pooling_child(int sig);
11750 +int PGRset_status_to_child_tbl(pid_t pid, int status);
11751 +int PGRadd_child_tbl(ClusterTbl * cluster_p, pid_t pid, int status);
11752 +int PGRget_child_status(pid_t pid);
11753 +void PGRreturn_connection_full_error(void);
11754 +void PGRreturn_no_connection_error(void);
11755 +void PGRquit_children_on_cluster(int rec_no);
11758 +static void set_nonblock(int fd);
11760 +static void unset_nonblock(int fd);
11761 +static POOL_CONNECTION *do_accept(int unix_fd, int inet_fd);
11762 +static PGR_StartupPacket *read_startup_packet(POOL_CONNECTION *cp);
11763 +static int send_startup_packet(POOL_CONNECTION_POOL_SLOT *cp);
11764 +static void cancel_request(CancelPacket *sp, int secondary_backend);
11765 +static POOL_CONNECTION_POOL *connect_backend(PGR_StartupPacket *sp, POOL_CONNECTION *frontend);
11766 +static int send_params(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
11767 +static void child_end(int sig);
11768 +static void PGRreturn_with_error(char *msg);
11771 +/*--------------------------------------------------------------------
11773 + * PGRpre_fork_children()
11775 + * pre forked child precesses
11777 + * ClusterTbl * ptr: pointer of cluster server table (I)
11780 + * NG: STATUS_ERROR
11781 + *--------------------------------------------------------------------
11784 +PGRpre_fork_children(ClusterTbl * ptr)
11790 + return STATUS_ERROR;
11793 + while ((ptr->useFlag != TBL_END) && (cnt < ClusterNum))
11795 + PGRpre_fork_child(ptr);
11799 + return STATUS_OK;
11802 +/*--------------------------------------------------------------------
11804 + * PGRpre_fork_child()
11806 + * pre forked child precess
11808 + * ClusterTbl * ptr: pointer of cluster server table (I)
11811 + * NG: STATUS_ERROR
11812 + *--------------------------------------------------------------------
11815 +PGRpre_fork_child(ClusterTbl * ptr)
11822 + return STATUS_ERROR;
11824 + if (ptr->useFlag == TBL_END)
11826 + return STATUS_ERROR;
11828 + for ( i = 0 ; i < ptr->max_connect * Max_Pool ; i ++)
11830 + pid = PGRcreate_child(ptr);
11832 + return STATUS_OK;
11834 +/*--------------------------------------------------------------------
11838 + * execute child process
11840 + * int use_pool: usage flag of connection pooling (I)
11843 + * NG: STATUS_ERROR
11844 + *--------------------------------------------------------------------
11847 +PGRdo_child( int use_pool)
11849 + char * func = "PGRdo_child()";
11851 + PGR_StartupPacket *sp = NULL;
11852 + POOL_CONNECTION *frontend = NULL;
11853 + POOL_CONNECTION_POOL *backend = NULL;
11855 + int connection_reuse = 1;
11856 + int ssl_request = 0;
11860 +#ifdef PRINT_DEBUG
11861 + show_debug("%s:I am %d",func, pid);
11864 + /* set up signal handlers */
11865 + PGRsignal(SIGALRM, SIG_DFL);
11866 + PGRsignal(SIGTERM, child_end);
11867 + PGRsignal(SIGHUP, child_end);
11868 + PGRsignal(SIGINT, child_end);
11869 + PGRsignal(SIGUSR1, SIG_IGN);
11870 + PGRsignal(SIGUSR2, SIG_IGN);
11873 + /* set listen fds to none block */
11874 + set_nonblock(Frontend_FD.unix_fd);
11875 + set_nonblock(Frontend_FD.inet_fd);
11879 + /* perform accept() */
11880 + frontend = do_accept(Frontend_FD.unix_fd,Frontend_FD.inet_fd);
11881 + if (frontend == NULL)
11883 + /* accept() failed. return to the accept() loop */
11884 + PGRset_status_to_child_tbl(pid,TBL_FREE);
11885 + return STATUS_ERROR;
11888 + /* unset frontend fd tp none block */
11889 + unset_nonblock(frontend->fd);
11891 + /* read the startup packet */
11896 + free(sp->startup_packet);
11897 + free(sp->database);
11902 + sp = read_startup_packet(frontend);
11905 + /* failed to read the startup packet. return to the
11907 + pool_close(frontend);
11908 + PGRset_status_to_child_tbl(pid,TBL_FREE);
11909 + return STATUS_ERROR;
11911 + PGRset_status_to_child_tbl(pid,TBL_ACCEPT);
11913 + /* cancel request? */
11914 + if (sp->major == 1234 && sp->minor == 5678)
11916 + cancel_request((CancelPacket *)sp->startup_packet, 0);
11917 + pool_close(frontend);
11918 + return STATUS_ERROR;
11922 + if (sp->major == 1234 && sp->minor == 5679)
11924 + /* SSL not supported */
11925 +#ifdef PRINT_DEBUG
11926 + show_debug("%s:SSLRequest: sent N; retry startup",func);
11928 + if (ssl_request && use_pool)
11930 + pool_close(frontend);
11931 + return STATUS_ERROR;
11935 + * say to the frontend "we do not suppport SSL"
11936 + * note that this is not a NOTICE response despite it's an 'N'!
11938 + pool_write_and_flush(frontend, "N", 1);
11940 + goto retry_startup;
11944 + * Ok, negotiaton with frontend has been done. Let's go to the next step.
11947 + * if there's no connection associated with user and database,
11948 + * we need to connect to the backend and send the startup packet.
11951 + if ((backend = pool_get_cp(sp->user, sp->database, sp->major)) == NULL)
11953 + connection_reuse = 0;
11955 + if ((backend = connect_backend(sp, frontend)) == NULL)
11958 + PGRset_status_on_cluster_tbl(TBL_ERROR,CurrentCluster);
11959 + return STATUS_ERROR;
11961 + goto retry_accept;
11966 + /* reuse existing connection to backend */
11968 + if (pool_do_reauth(frontend, backend))
11970 + pool_close(frontend);
11971 + return STATUS_ERROR;
11974 + if (MAJOR(backend) == 3)
11976 + if (send_params(frontend, backend))
11978 + pool_close(frontend);
11979 + return STATUS_ERROR;
11983 + /* send ReadyForQuery to frontend */
11984 + pool_write(frontend, "Z", 1);
11986 + if (MAJOR(backend) == 3)
11992 + pool_write(frontend, &len, sizeof(len));
11993 + tstate = TSTATE(backend);
11994 + pool_write(frontend, &tstate, 1);
11997 + if (pool_flush(frontend) < 0)
11999 + pool_close(frontend);
12000 + return STATUS_ERROR;
12005 + /* query process loop */
12008 + POOL_STATUS status;
12010 + status = pool_process_query(frontend, backend, 0);
12014 + /* client exits */
12016 + /* do not cache connection to template0, template1, regression */
12017 + if (!strcmp(sp->database, "template0") || !strcmp(sp->database, "template1") ||
12018 + !strcmp(sp->database, "regression") || use_pool == NOT_USE_CONNECTION_POOL)
12020 + pool_close(frontend);
12021 + pool_send_frontend_exits(backend);
12022 + pool_discard_cp(sp->user, sp->database, sp->major);
12026 + POOL_STATUS status1;
12028 + /* send reset request to backend */
12029 + status1 = pool_process_query(frontend, backend, 1);
12030 + pool_close(frontend);
12032 + /* if we detect errors on resetting connection, we need to discard
12033 + * this connection since it might be in unknown status
12035 + if (status1 != POOL_CONTINUE)
12036 + pool_discard_cp(sp->user, sp->database, sp->major);
12038 + pool_connection_pool_timer(backend);
12042 + /* error occured. discard backend connection pool
12043 + and disconnect connection to the frontend */
12045 + show_error("%s:do_child: exits with status 1 due to error",func);
12048 + /* fatal error occured. just exit myself... */
12050 + show_error("%s:do_child: fatal error occured",func);
12051 + notice_backend_error();
12054 + /* not implemented yet */
12056 + do_accept(Frontend_FD.unix_fd,Frontend_FD.inet_fd);
12057 +#ifdef PRINT_DEBUG
12058 + show_debug("%s:accept while idle",func);
12066 + if (status != POOL_CONTINUE)
12069 + if ((status == POOL_ERROR) ||
12070 + (status == POOL_FATAL))
12072 + PGRset_status_to_child_tbl(pid,TBL_FREE);
12073 + return STATUS_ERROR;
12075 + PGRset_status_to_child_tbl(pid,TBL_INIT);
12076 + return STATUS_OK;
12079 +/*--------------------------------------------------------------------
12081 + * PGRcreate_child()
12083 + * create child process
12085 + * ClusterTbl * ptr: pointer of cluster server table (I)
12088 + * NG: STATUS_ERROR
12089 + *--------------------------------------------------------------------
12092 +PGRcreate_child(ClusterTbl * cluster_p)
12094 + char * func = "PGRcreate_child()";
12097 + if (cluster_p == NULL)
12098 + return STATUS_ERROR;
12100 +#ifdef PRINT_DEBUG
12101 + show_debug("%s:create child [%d@%s]",func,cluster_p->port,cluster_p->hostName);
12103 + PGRsignal(SIGCHLD,PGRrecreate_child);
12104 + pgid = getpgid((pid_t)0);
12108 + show_error("%s:fork() failed. (%s)",func,strerror(errno));
12109 + return STATUS_ERROR;
12113 + CurrentCluster = cluster_p;
12114 + if (pool_init_cp())
12116 + show_error("%s:pool_init_cp failed",func);
12119 + PGRsignal(SIGCHLD,PGRchild_wait);
12120 + PGRsignal(SIGTERM, child_end);
12121 + PGRsignal(SIGHUP, child_end);
12122 + PGRsignal(SIGINT, child_end);
12123 + PGRsignal(SIGUSR1,do_pooling_child);
12124 + setpgid((pid_t)0,pgid);
12128 + PGRsignal(SIGUSR1,do_pooling_child);
12130 +#ifdef PRINT_DEBUG
12131 + show_debug("%s:create child end [%d@%s]",func,cluster_p->port,cluster_p->hostName);
12133 + child_end(SIGTERM);
12137 + PGRadd_child_tbl(cluster_p,pid,TBL_INIT);
12142 +/*--------------------------------------------------------------------
12144 + * PGRscan_child_tbl()
12146 + * get a child process id that is waiting for connection
12147 + * with the cluster server
12149 + * ClusterTbl * ptr: pointer of cluster server table (I)
12151 + * OK: child process id
12153 + *--------------------------------------------------------------------
12156 +PGRscan_child_tbl(ClusterTbl * cluster_p)
12158 + char * func = "PGRscan_child_tbl()";
12161 + if ( cluster_p == NULL)
12163 + show_error("%s:Cluster_Tbl is not initialize",func);
12164 + return STATUS_ERROR;
12169 + show_error("%s:Child_Tbl is not initialize",func);
12170 + return STATUS_ERROR;
12172 + while(p->useFlag != TBL_END)
12179 + if ((p->useFlag == TBL_INIT) &&
12180 + (p->rec_no == cluster_p->rec_no))
12182 + p->useFlag = TBL_USE;
12190 +/* notice backend connection error using SIGUSR2 */
12192 +notice_backend_error(void)
12194 + pid_t pid = getpid();
12196 + PGRset_status_to_child_tbl(pid,TBL_ERROR);
12197 + PGRset_status_on_cluster_tbl(TBL_ERROR_NOTICE,CurrentCluster);
12200 + kill(parent, SIGUSR2);
12207 + * start up pooling child process
12210 +do_pooling_child(int sig)
12212 + char * func = "do_pooling_child()";
12217 + rtn = PGRdo_child(USE_CONNECTION_POOL);
12218 + PGRrelease_connection(CurrentCluster);
12219 + if (rtn != STATUS_OK)
12221 + show_error("%s:PGRdo_child failed",func);
12222 + child_end(SIGTERM);
12228 + * set status in child process table
12231 +PGRset_status_to_child_tbl(pid_t pid, int status)
12233 + char * func = "PGRset_status_to_child_tbl()";
12239 + show_error("%s:Child_Tbl is not initialize",func);
12240 + return STATUS_ERROR;
12242 + while(p->useFlag != TBL_END)
12244 + if (p->pid == pid)
12246 + p->useFlag = status;
12247 + return STATUS_OK;
12251 + return STATUS_ERROR;
12255 + * add child process data in child process table
12258 +PGRadd_child_tbl(ClusterTbl * cluster_p, pid_t pid, int status)
12260 + char * func = "PGRadd_child_tbl()";
12264 + if ( cluster_p == NULL)
12266 + show_error("%s:Cluster_Tbl is not initialize",func);
12267 + return STATUS_ERROR;
12271 + show_error("%s:Child_Tbl is not initialize",func);
12272 + return STATUS_ERROR;
12274 + while(p->useFlag != TBL_END)
12276 + if ((p->useFlag == TBL_FREE) ||
12277 + (p->useFlag == TBL_ERROR))
12279 + p->useFlag = status;
12280 + p->rec_no = cluster_p->rec_no;
12282 + return STATUS_OK;
12286 + return STATUS_ERROR;
12290 +PGRget_child_status(pid_t pid)
12292 + char * func = "PGRget_child_status()";
12298 + show_error("%s:Child_Tbl is not initialize",func);
12299 + return STATUS_ERROR;
12302 + while (p->useFlag != TBL_END)
12304 + if (p->pid == pid)
12306 + return p->useFlag;
12310 + return STATUS_ERROR;
12314 +PGRreturn_connection_full_error(void)
12316 + PGRreturn_with_error( "Sorry, backend connection is full\n");
12320 +PGRreturn_no_connection_error(void) {
12321 + PGRreturn_with_error("pglb could not connect to server: no cluster available.\n");
12325 +PGRreturn_with_error (char *msg)
12327 + PGR_StartupPacket *sp = NULL;
12328 + POOL_CONNECTION *frontend = NULL;
12331 + /* perform accept() */
12332 + frontend = do_accept(Frontend_FD.unix_fd,Frontend_FD.inet_fd);
12333 + if (frontend == NULL)
12335 + /* accept() failed. return to the accept() loop */
12338 + sp = read_startup_packet(frontend);
12341 + /* failed to read the startup packet. return to the
12343 + pool_close(frontend);
12346 + pool_write_and_flush(frontend, "E", 1);
12347 + pool_write_and_flush(frontend, msg, strlen(msg)+1);
12348 + pool_close(frontend);
12353 +PGRquit_children_on_cluster(int rec_no)
12355 + char * func = "PGRquit_children_on_cluster()";
12358 + if (Child_Tbl == NULL)
12362 + PGRsignal(SIGCHLD,SIG_IGN);
12364 + while(p->useFlag != TBL_END)
12366 + if (p->rec_no == rec_no)
12368 + if (kill (p->pid,SIGTERM) == -1)
12370 + show_error("%s:could not stop pid: %d (%s)",func,p->pid,strerror(errno));
12373 + PGRchild_wait(SIGTERM);
12374 + p->useFlag = DATA_FREE;
12378 + if (Use_Connection_Pool)
12380 + PGRsignal(SIGCHLD,PGRrecreate_child);
12384 + PGRsignal(SIGCHLD,PGRchild_wait);
12388 +/* -------------------------------------------------------------------
12389 + * private functions
12390 + * -------------------------------------------------------------------
12395 + * set non-block flag
12397 +static void set_nonblock(int fd)
12399 + char* func = "set_nonblock()";
12402 + /* set fd to none blocking */
12403 + var = fcntl(fd, F_GETFL, 0);
12406 + show_error("%s:fcntl failed. %s", func,strerror(errno));
12407 + child_end(SIGTERM);
12409 + if (fcntl(fd, F_SETFL, var | O_NONBLOCK) == -1)
12411 + show_error("%s:fcntl failed. %s", func,strerror(errno));
12412 + child_end(SIGTERM);
12418 + * unset non-block flag
12420 +static void unset_nonblock(int fd)
12422 + char * func = "unset_nonblock()";
12425 + /* set fd to none blocking */
12426 + var = fcntl(fd, F_GETFL, 0);
12429 + show_error("%s,fcntl failed. %s", func,strerror(errno));
12430 + child_end(SIGTERM);
12432 + if (fcntl(fd, F_SETFL, var & ~O_NONBLOCK) == -1)
12434 + show_error("%s,fcntl failed. %s", func,strerror(errno));
12435 + child_end(SIGTERM);
12441 +* perform accept() and returns new fd
12443 +static POOL_CONNECTION *do_accept(int unix_fd, int inet_fd)
12445 + char * func = "do_accept()";
12448 + struct sockaddr addr;
12449 + socklen_t addrlen;
12453 + POOL_CONNECTION *cp;
12454 +#ifdef ACCEPT_PERFORMANCE
12455 + struct timeval now1, now2;
12456 + static long atime;
12460 + FD_ZERO(&readmask);
12461 + FD_SET(unix_fd, &readmask);
12463 + FD_SET(inet_fd, &readmask);
12465 + fds = select(Max(unix_fd, inet_fd)+1, &readmask, NULL, NULL, NULL);
12468 + if (errno == EAGAIN || errno == EINTR)
12471 + show_error("%s:select() failed. reason %s",func, strerror(errno));
12478 + if (FD_ISSET(unix_fd, &readmask))
12483 + if (FD_ISSET(inet_fd, &readmask))
12490 + * Note that some SysV systems do not work here. For those
12491 + * systems, we need some locking mechanism for the fd.
12493 + addrlen = sizeof(addr);
12495 +#ifdef ACCEPT_PERFORMANCE
12496 + gettimeofday(&now1,0);
12498 + afd = accept(fd, &addr, &addrlen);
12502 + * "Resource temporarily unavailable" (EAGAIN or EWOULDBLOCK)
12503 + * can be silently ignored.
12505 + if (errno != EAGAIN && errno != EWOULDBLOCK)
12506 + show_error("%s:accept() failed. reason: %s",func, strerror(errno));
12509 +#ifdef ACCEPT_PERFORMANCE
12510 + gettimeofday(&now2,0);
12511 + atime += (now2.tv_sec - now1.tv_sec)*1000000 + (now2.tv_usec - now1.tv_usec);
12513 + if (cnt % 100 == 0)
12515 + show_error("%s:cnt: %d atime: %ld",func, cnt, atime);
12518 +#ifdef PRINT_DEBUG
12519 + show_debug("%s:I am %d accept fd %d",func, getpid(), afd);
12522 + /* set NODELAY and KEEPALIVE options if INET connection */
12527 + if (setsockopt(afd, IPPROTO_TCP, TCP_NODELAY,
12531 + show_error("%s:do_accept: setsockopt() failed: %s",func, strerror(errno));
12535 + if (setsockopt(afd, SOL_SOCKET, SO_KEEPALIVE,
12539 + show_error("%s:do_accept: setsockopt() failed: %s", func,strerror(errno));
12545 + if ((cp = pool_open(afd)) == NULL)
12554 +* read startup packet
12556 +static PGR_StartupPacket *read_startup_packet(POOL_CONNECTION *cp)
12558 + char * func = "read_startup_packet()";
12559 + PGR_StartupPacket *sp;
12560 + PGR_StartupPacket_v2 *sp2;
12565 + sp = (PGR_StartupPacket *)malloc(sizeof(PGR_StartupPacket));
12568 + show_error("%s:read_startup_packet: out of memory",func);
12572 + /* read startup packet length */
12573 + if (pool_read(cp, &len, sizeof(len)))
12578 + len = ntohl(len);
12579 + len -= sizeof(len);
12583 + show_error("%s:read_startup_packet: incorrect packet length (%d)", func,len);
12588 + sp->startup_packet = calloc(len, 1);
12589 + if (!sp->startup_packet)
12591 + show_error("%s:read_startup_packet: out of memory",func);
12596 + /* read startup packet */
12597 + if (pool_read(cp, sp->startup_packet, len))
12604 + memcpy(&protov, sp->startup_packet, sizeof(protov));
12605 + sp->major = ntohl(protov)>>16;
12606 + sp->minor = ntohl(protov) & 0x0000ffff;
12607 + p = sp->startup_packet;
12609 + switch(sp->major)
12611 + case PROTO_MAJOR_V2: /* V2 */
12612 + sp2 = (PGR_StartupPacket_v2 *)(sp->startup_packet);
12614 + sp->database = calloc(SM_DATABASE+1, 1);
12615 + if (!sp->database)
12617 + show_error("%s:read_startup_packet: out of memory",func);
12621 + strncpy(sp->database, sp2->database, SM_DATABASE);
12623 + sp->user = calloc(SM_USER+1, 1);
12626 + show_error("%s:read_startup_packet: out of memory",func);
12630 + strncpy(sp->user, sp2->user, SM_USER);
12634 + case PROTO_MAJOR_V3: /* V3 */
12635 + p += sizeof(int); /* skip protocol version info */
12639 + if (!strcmp("user", p))
12641 + p += (strlen(p) + 1);
12642 + sp->user = strdup(p);
12645 + show_error("%s:read_startup_packet: out of memory",func);
12650 + else if (!strcmp("database", p))
12652 + p += (strlen(p) + 1);
12653 + sp->database = strdup(p);
12654 + if (!sp->database)
12656 + show_error("%s:read_startup_packet: out of memory",func);
12661 + p += (strlen(p) + 1);
12665 + case 1234: /* cancel or SSL request */
12666 + /* set dummy database, user info */
12667 + sp->database = calloc(1, 1);
12668 + if (!sp->database)
12670 + show_error("%s:read_startup_packet: out of memory",func);
12674 + sp->user = calloc(1, 1);
12677 + show_error("%s:read_startup_packet: out of memory",func);
12684 + show_error("%s:read_startup_packet: invalid major no: %d",func, sp->major);
12689 +#ifdef PRINT_DEBUG
12690 + show_debug("%s:Protocol Major: %d Minor: %d database: %s user: %s",
12691 + func,sp->major, sp->minor, sp->database, sp->user);
12698 +* send startup packet
12700 +static int send_startup_packet(POOL_CONNECTION_POOL_SLOT *cp)
12704 + len = htonl(cp->sp->len + sizeof(len));
12705 + pool_write(cp->con, &len, sizeof(len));
12706 + return pool_write_and_flush(cp->con, cp->sp->startup_packet, cp->sp->len);
12710 + * process cancel request
12712 +static void cancel_request(CancelPacket *sp, int secondary_backend)
12714 + char * func = "cancel_request()";
12717 + POOL_CONNECTION *con;
12718 + char hostName[128];
12720 +#ifdef PRINT_DEBUG
12721 + show_debug("%s:Cancel request received",func);
12724 + if (CurrentCluster == NULL)
12728 + if (gethostname(hostName,sizeof(hostName)) < 0)
12730 + show_error("%s:gethostname() failed. (%s)",func,strerror(errno));
12733 + if (secondary_backend)
12735 + if (PGRis_same_host(hostName,CurrentCluster->hostName))
12736 + fd = connect_unix_domain_socket(1);
12738 + fd = connect_inet_domain_socket(1);
12742 + if (PGRis_same_host(hostName,CurrentCluster->hostName))
12743 + fd = connect_unix_domain_socket(0);
12745 + fd = connect_inet_domain_socket(0);
12750 + show_error("%s:Could not create socket for sending cancel request",func);
12754 + con = pool_open(fd);
12758 + len = htonl(sizeof(len) + sizeof(CancelPacket));
12759 + pool_write(con, &len, sizeof(len));
12761 + if (pool_write_and_flush(con, sp, sizeof(CancelPacket)) < 0)
12762 + show_error("%s:Could not send cancel request packet",func);
12766 +static POOL_CONNECTION_POOL *connect_backend(PGR_StartupPacket *sp, POOL_CONNECTION *frontend)
12768 + char * func ="connect_backend()";
12769 + POOL_CONNECTION_POOL *backend;
12771 + /* connect to the backend */
12772 + backend = pool_create_cp();
12773 + if (backend == NULL)
12775 + pool_send_error_message(frontend, sp->major, "XX000", "connection cache is full", "",
12776 + "increace max_pool", __FILE__, __LINE__);
12777 + pool_close(frontend);
12781 + /* mark this is a backend connection */
12782 + backend->slots[0]->con->isbackend = 1;
12784 + * save startup packet info
12786 + backend->slots[0]->sp = sp;
12788 + if (pool_config_replication_enabled)
12790 + backend->slots[1]->con->isbackend = 1;
12791 + backend->slots[1]->con->issecondary_backend = 1;
12793 + * save startup packet info
12795 + backend->slots[1]->sp = sp;
12798 + /* send startup packet */
12799 + if (send_startup_packet(backend->slots[0]) < 0)
12801 + show_error("%s:do_child: fails to send startup packet to the backend",func);
12802 + pool_close(frontend);
12806 + /* send startup packet */
12807 + if (pool_config_replication_enabled)
12809 + if (send_startup_packet(backend->slots[1]) < 0)
12811 + show_error("%s:do_child: fails to send startup packet to the secondary backend",func);
12812 + pool_close(frontend);
12818 + * do authentication stuff
12820 + if (pool_do_auth(frontend, backend))
12822 + pool_close(frontend);
12823 + pool_discard_cp(sp->user, sp->database, sp->major);
12829 +static int send_params(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend)
12831 + char * func = "send_params()";
12833 + char *name, *value;
12834 + int len, sendlen;
12837 + while (pool_get_param(&MASTER(backend)->params, index++, &name, &value) == 0)
12839 + pool_write(frontend, "S", 1);
12840 + len = sizeof(sendlen) + strlen(name) + 1 + strlen(value) + 1;
12841 + sendlen = htonl(len);
12842 + pool_write(frontend, &sendlen, sizeof(sendlen));
12843 + pool_write(frontend, name, strlen(name) + 1);
12844 + pool_write(frontend, value, strlen(value) + 1);
12847 + if (pool_flush(frontend))
12849 + show_error("%s:pool_send_params: pool_flush() failed",func);
12856 + * ending function of child process
12859 +child_end(int sig)
12861 + PGRsignal(sig,SIG_IGN);
12866 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/cluster_table.c pgcluster-1.7.0rc7/src/pgcluster/pglb/cluster_table.c
12867 --- postgresql-8.2.4/src/pgcluster/pglb/cluster_table.c 1970-01-01 01:00:00.000000000 +0100
12868 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/cluster_table.c 2007-02-18 22:52:17.000000000 +0100
12870 +/*--------------------------------------------------------------------
12875 + * This file is composed of the functions to use a cluster table.
12877 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
12878 + *--------------------------------------------------------------------
12881 + * Permission to use, copy, modify, and distribute this software and
12882 + * its documentation for any purpose and without fee is hereby
12883 + * granted, provided that the above copyright notice appear in all
12884 + * copies and that both that copyright notice and this permission
12885 + * notice appear in supporting documentation, and that the name of the
12886 + * author not be used in advertising or publicity pertaining to
12887 + * distribution of the software without specific, written prior
12888 + * permission. The author makes no representations about the
12889 + * suitability of this software for any purpose. It is provided "as
12890 + * is" without express or implied warranty.
12893 +#include <stdio.h>
12894 +#include <stdarg.h>
12895 +#include <string.h>
12896 +#include <stdlib.h>
12897 +#include <unistd.h>
12898 +#include <signal.h>
12899 +#include <sys/wait.h>
12900 +#include <sys/time.h>
12901 +#include <ctype.h>
12902 +#include <sys/types.h>
12903 +#include <sys/stat.h>
12904 +#include <sys/socket.h>
12905 +#include <sys/ipc.h>
12906 +#include <sys/shm.h>
12907 +#include <sys/sem.h>
12908 +#include <sys/msg.h>
12909 +#include <netdb.h>
12910 +#include <netinet/in.h>
12911 +#include <errno.h>
12912 +#include <fcntl.h>
12914 +#include <sys/param.h>
12915 +#include <sys/select.h>
12916 +#include <arpa/inet.h>
12917 +#include <sys/file.h>
12919 +#ifdef HAVE_NETINET_TCP_H
12920 +#include <netinet/tcp.h>
12923 +#include "replicate_com.h"
12927 +/*--------------------------------------
12928 + * PROTOTYPE DECLARATION
12929 + *--------------------------------------
12931 +int PGRis_cluster_alive(void) ;
12932 +ClusterTbl * PGRscan_cluster(void);
12933 +void PGRset_key_of_cluster(ClusterTbl * ptr, RecoveryPacket * packet);
12934 +ClusterTbl * PGRadd_cluster_tbl (ClusterTbl * conf_data);
12935 +ClusterTbl * PGRset_status_on_cluster_tbl (int status, ClusterTbl * ptr);
12936 +ClusterTbl * PGRsearch_cluster_tbl(ClusterTbl * conf_data);
12938 +static int set_cluster_tbl(ClusterTbl * ptr , ClusterTbl * conf_data);
12939 +static ClusterTbl * search_free_cluster_tbl(void );
12940 +static void write_cluster_status_file(ClusterTbl * ptr);
12942 +int PGRis_cluster_alive(void)
12944 + ClusterTbl * ptr = NULL;
12946 + ptr = Cluster_Tbl;
12948 + PGRsem_lock(ClusterSemid,MAX_DB_SERVER);
12949 + while (ptr->useFlag != TBL_END)
12951 + if ((ptr->useFlag == TBL_USE) || (ptr->useFlag == TBL_INIT))
12957 + PGRsem_unlock(ClusterSemid,MAX_DB_SERVER);
12958 + return use==0 ? STATUS_ERROR : STATUS_OK;
12962 +PGRscan_cluster(void)
12964 + char * func = "PGRscan_cluster";
12965 + ClusterTbl * ptr = NULL;
12966 + ClusterTbl * rtn = NULL;
12967 + int min_use_rate = 100;
12968 + int use_rate = 0;
12972 + ptr = Cluster_Tbl;
12975 + show_error("%s:Cluster Table is not initialize",func);
12976 + return (ClusterTbl *)NULL;
12978 +#ifdef PRINT_DEBUG
12979 + show_debug("%s:%d ClusterDB can be used",func,ClusterNum);
12981 + PGRsem_lock(ClusterSemid,MAX_DB_SERVER);
12982 + while ((cnt <= ClusterNum) && (ptr->useFlag != TBL_END))
12984 +#ifdef PRINT_DEBUG
12985 + show_debug("%s:%s [%d],useFlag->%d max->%d use_num->%d\n",
12986 + func, ptr->hostName,ptr->port,ptr->useFlag,ptr->max_connect,ptr->use_num);
12989 + if ((ptr->useFlag != TBL_USE) && (ptr->useFlag != TBL_INIT))
12994 + if (ptr->max_connect <= ptr->use_num)
12999 + if (ptr->use_num > 0)
13001 + use_rate = ptr->use_num * 100 / ptr->max_connect ;
13009 + if (min_use_rate > use_rate)
13011 + min_use_rate = use_rate;
13019 + if (rtn->useFlag == TBL_INIT)
13021 + PGRset_status_on_cluster_tbl (TBL_USE,rtn);
13024 + PGRsem_unlock(ClusterSemid,MAX_DB_SERVER);
13029 +PGRset_key_of_cluster(ClusterTbl * ptr, RecoveryPacket * packet)
13031 + int max_connect = 0;
13034 + memset(ptr,0,sizeof(ClusterTbl));
13035 + memcpy(ptr->hostName,packet->hostName,sizeof(ptr->hostName));
13036 + max_connect = ntohs(packet->max_connect);
13037 + if (max_connect >= 0)
13039 + ptr->max_connect = max_connect;
13043 + ptr->max_connect = DEFAULT_CONNECT_NUM;
13045 + port = ntohs(packet->port);
13048 + ptr->port = port;
13052 + ptr->port = DEFAULT_PORT;
13057 +PGRadd_cluster_tbl (ClusterTbl * conf_data)
13059 + char * func = "PGRadd_cluster_tbl()";
13060 + ClusterTbl * ptr;
13062 + ptr = PGRsearch_cluster_tbl(conf_data);
13063 + if ((ptr != NULL) &&
13064 + ((ptr->useFlag == TBL_USE ) || ((ptr->useFlag == TBL_INIT))))
13066 + ptr->max_connect = conf_data->max_connect;
13067 + ptr->use_num = 0;
13071 + ptr = search_free_cluster_tbl();
13072 + if (ptr == (ClusterTbl *) NULL)
13074 + show_error("%s:no more free space in cluster table",func);
13075 + return (ClusterTbl *)NULL;
13077 + if (ClusterNum < Max_DB_Server)
13079 + set_cluster_tbl( ptr, conf_data);
13082 + return (ClusterTbl *)NULL;
13086 +PGRset_status_on_cluster_tbl (int status, ClusterTbl * ptr)
13088 +#ifdef PRINT_DEBUG
13089 + char * func = "PGRset_status_on_cluster_tbl()";
13092 + if (ptr != (ClusterTbl*)NULL)
13094 + if (ptr->useFlag != status)
13096 +#ifdef PRINT_DEBUG
13097 + show_debug("%s:host:%s port:%d max:%d use:%d status%d",
13098 + func, ptr->hostName,ptr->port,ptr->max_connect,ptr->useFlag,status);
13100 + ptr->useFlag = status;
13101 + write_cluster_status_file(ptr);
13102 + if (status == TBL_INIT)
13104 + if (ClusterNum < Max_DB_Server)
13107 + else if (status != TBL_STOP)
13109 + if (ClusterNum > 0)
13118 +write_cluster_status_file(ClusterTbl * ptr)
13120 + switch( ptr->useFlag)
13123 + PGRwrite_log_file(StatusFp,"port(%d) host:%s free",
13128 + PGRwrite_log_file(StatusFp,"port(%d) host:%s initialize",
13133 + PGRwrite_log_file(StatusFp,"port(%d) host:%s start use",
13138 + PGRwrite_log_file(StatusFp,"port(%d) host:%s error",
13143 + PGRwrite_log_file(StatusFp,"port(%d) host:%s end",
13151 +PGRsearch_cluster_tbl(ClusterTbl * conf_data)
13157 + ptr = Cluster_Tbl;
13158 + while ((cnt <= ClusterNum) && (rec_num < Max_DB_Server))
13160 + if (ptr->port > 0)
13162 + if ((!strcmp(ptr->hostName,conf_data->hostName)) &&
13163 + (ptr->port == conf_data->port))
13167 + if ((ptr->useFlag == TBL_USE) || (ptr->useFlag == TBL_INIT))
13175 + return (ClusterTbl *)NULL;
13179 +set_cluster_tbl(ClusterTbl * ptr , ClusterTbl * conf_data)
13183 + rec_no = ptr->rec_no;
13184 + memcpy(ptr->hostName,conf_data->hostName,sizeof(ptr->hostName));
13185 + ptr->max_connect = conf_data->max_connect;
13186 + ptr->port = conf_data->port;
13187 + ptr->use_num = conf_data->use_num;
13188 + ptr->rate = conf_data->rate;
13189 + PGRset_status_on_cluster_tbl (TBL_INIT, ptr);
13191 + return STATUS_OK;
13194 +static ClusterTbl *
13195 +search_free_cluster_tbl(void )
13200 + ptr = Cluster_Tbl;
13201 + while ((cnt <= ClusterNum ) && (cnt < Max_DB_Server))
13203 + if ((ptr->useFlag == TBL_FREE) || (ptr->useFlag == TBL_ERROR))
13210 + return (ClusterTbl *)NULL;
13213 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/lifecheck.c pgcluster-1.7.0rc7/src/pgcluster/pglb/lifecheck.c
13214 --- postgresql-8.2.4/src/pgcluster/pglb/lifecheck.c 1970-01-01 01:00:00.000000000 +0100
13215 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/lifecheck.c 2007-03-01 16:27:15.000000000 +0100
13217 +/*--------------------------------------------------------------------
13222 + * This file is composed of the functions to call with the source
13223 + * at pgreplicate for the lifecheck.
13225 + * Portions Copyright (c) 2003-2007, Atsushi Mitani
13226 + *--------------------------------------------------------------------
13228 +#include "postgres.h"
13229 +#include "postgres_fe.h"
13231 +#include <pthread.h>
13232 +#include <stdio.h>
13233 +#include <stdarg.h>
13234 +#include <sys/types.h>
13235 +#include <fcntl.h>
13236 +#include <errno.h>
13237 +#include <ctype.h>
13239 +#include <sys/ipc.h>
13240 +#include <sys/shm.h>
13241 +#include <sys/sem.h>
13242 +#include <sys/msg.h>
13243 +#include <signal.h>
13245 +#include "libpq-fe.h"
13246 +#include "libpq-int.h"
13247 +#include "fe-auth.h"
13249 +#include <sys/socket.h>
13250 +#include <unistd.h>
13251 +#include <netdb.h>
13252 +#include <arpa/inet.h>
13254 +#ifdef HAVE_NETINET_TCP_H
13255 +#include <netinet/tcp.h>
13258 +#ifdef HAVE_SYS_SELECT_H
13259 +#include <sys/select.h>
13263 +#ifdef HAVE_CRYPT_H
13264 +#include <crypt.h>
13269 +#include "mb/pg_wchar.h"
13272 +#include "access/xact.h"
13273 +#include "lib/dllist.h"
13274 +#include "libpq/pqformat.h"
13275 +#include "replicate_com.h"
13278 +#define PING_DB "template1"
13279 +#define PING_QUERY "SELECT 1"
13281 +static ClusterTbl * PGR_Cluster_DB_4_Lifecheck = (ClusterTbl*)NULL;
13283 +/*--------------------------------------
13284 + * PROTOTYPE DECLARATION
13285 + *--------------------------------------
13287 +int PGRlifecheck_main(int fork_wait_time);
13288 +PGconn * PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt );
13290 +static bool is_started_loadbalance(void);
13291 +static void set_timeout(SIGNAL_ARGS);
13292 +static int lifecheck_loop(void);
13293 +static int ping_cluster(PGconn * conn);
13294 +static void set_cluster_status(ClusterTbl * host_ptr, int status);
13297 +PGRlifecheck_main(int fork_wait_time)
13299 + bool started = false;
13303 + pgid = getpgid(0);
13307 + return STATUS_OK;
13311 + * in child process,
13312 + * call recovery module
13316 + PGRsignal(SIGHUP, PGRexit_subprocess);
13317 + PGRsignal(SIGTERM, PGRexit_subprocess);
13318 + PGRsignal(SIGINT, PGRexit_subprocess);
13319 + PGRsignal(SIGQUIT, PGRexit_subprocess);
13320 + PGRsignal(SIGALRM, set_timeout);
13322 + if (fork_wait_time > 0) {
13323 + sleep(fork_wait_time);
13326 + if (PGRuserName == NULL)
13328 + PGRuserName = getenv("LOGNAME");
13329 + if (PGRuserName == NULL)
13331 + PGRuserName = getenv("USER");
13332 + if (PGRuserName == NULL)
13333 + PGRuserName = "postgres";
13339 + started = is_started_loadbalance();
13342 + /* wait next lifecheck as interval */
13343 + sleep(PGR_Lifecheck_Interval);
13347 + /* life check to all cluster dbs */
13348 + lifecheck_loop();
13350 + /* wait next lifecheck as interval */
13351 + sleep(PGR_Lifecheck_Interval);
13353 + return STATUS_OK;
13357 +is_started_loadbalance(void)
13359 + ClusterTbl * host_ptr = (ClusterTbl*)NULL;
13361 + host_ptr = Cluster_Tbl;
13362 + if (host_ptr == NULL)
13366 + while(host_ptr->useFlag != TBL_END)
13368 + if (host_ptr->useFlag == TBL_USE)
13378 +set_timeout(SIGNAL_ARGS)
13380 + if (PGR_Cluster_DB_4_Lifecheck != NULL)
13382 + set_cluster_status( PGR_Cluster_DB_4_Lifecheck, TBL_ERROR);
13384 + PGRsignal(SIGALRM, set_timeout);
13388 +lifecheck_loop(void)
13390 + ClusterTbl * host_ptr = (ClusterTbl*)NULL;
13392 + char * host = NULL;
13393 + PGconn * conn = NULL;
13395 + host_ptr = Cluster_Tbl;
13396 + if (host_ptr == NULL)
13398 + return STATUS_ERROR;
13401 + while(host_ptr->useFlag != TBL_END)
13404 + * check the status of the cluster DB
13406 + if ((host_ptr->useFlag != TBL_USE) || (host_ptr->useFlag != TBL_INIT))
13411 + snprintf(port,sizeof(port),"%d", host_ptr->port);
13412 + host = (char *)(host_ptr->hostName);
13413 + /* set host data */
13414 + PGR_Cluster_DB_4_Lifecheck = host_ptr;
13416 + /* set alarm as lifecheck timeout */
13417 + alarm(PGR_Lifecheck_Timeout);
13420 + conn = PGRcreateConn(host,port, PING_DB ,PGRuserName,"","","");
13421 + if ((conn != NULL) &&
13422 + (ping_cluster(conn) == STATUS_OK))
13424 + set_cluster_status(host_ptr,TBL_USE);
13428 + set_cluster_status(host_ptr,TBL_ERROR);
13430 + /* reset alarm */
13438 + return STATUS_OK;
13442 +ping_cluster(PGconn * conn)
13445 + PGresult * res = (PGresult *)NULL;
13447 + res = PQexec(conn, PING_QUERY );
13449 + status = PQresultStatus(res);
13454 + if ((status == PGRES_NONFATAL_ERROR ) ||
13455 + (status == PGRES_FATAL_ERROR ))
13457 + return STATUS_ERROR;
13459 + return STATUS_OK;
13463 +PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt )
13466 + PGconn * conn = NULL;
13469 + memset(pwd,0,sizeof(pwd));
13470 + if (*password != '\0')
13472 + if ((strncmp(password,"md5",3) == 0) && (md5Salt != NULL))
13474 + sprintf(pwd,"%s(%d)(%d)(%d)(%d)",password,
13475 + *md5Salt,*(md5Salt+1),*(md5Salt+2),*(md5Salt+3));
13479 + strncpy(pwd,password,sizeof(pwd));
13482 + conn = PQsetdbLogin(host, port, NULL, NULL, database, userName, pwd);
13483 + /* check to see that the backend Connection was successfully made */
13485 + while (PQstatus(conn) == CONNECTION_BAD)
13487 + if (conn != NULL)
13492 + conn = PQsetdbLogin(host, port, NULL, NULL, database, userName, pwd);
13493 + if (cnt > PGLB_CONNECT_RETRY_TIME )
13495 + if (conn != NULL)
13500 + return (PGconn *)NULL;
13503 + if(PQstatus(conn) == CONNECTION_BAD && h_errno==2)
13505 + usleep(PGR_SEND_WAIT_MSEC);
13508 + else if(!strncasecmp(PQerrorMessage(conn),"FATAL: Sorry, too many clients already",30) ||
13509 + !strncasecmp(PQerrorMessage(conn),"FATAL: Non-superuser connection limit",30) )
13511 + usleep(PGR_SEND_WAIT_MSEC);
13514 + else if(!strncasecmp(PQerrorMessage(conn),"FATAL: The database system is starting up",40) )
13516 + usleep(PGR_SEND_WAIT_MSEC);
13520 + usleep(PGR_SEND_WAIT_MSEC);
13528 +set_cluster_status(ClusterTbl * host_ptr, int status)
13530 + if (host_ptr == NULL)
13532 + if (status == TBL_ERROR)
13534 + host_ptr->retry_count ++;
13535 + if (host_ptr->retry_count > PGLB_CONNECT_RETRY_TIME )
13537 + PGRset_status_on_cluster_tbl(status, host_ptr);
13542 + host_ptr->retry_count = 0;
13543 + PGRset_status_on_cluster_tbl(status, host_ptr);
13546 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/load_balance.c pgcluster-1.7.0rc7/src/pgcluster/pglb/load_balance.c
13547 --- postgresql-8.2.4/src/pgcluster/pglb/load_balance.c 1970-01-01 01:00:00.000000000 +0100
13548 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/load_balance.c 2007-02-18 22:52:17.000000000 +0100
13550 +/*--------------------------------------------------------------------
13555 + * This file is composed of the functions of load balance modules
13556 + * with connection pooling or not
13558 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
13559 + *--------------------------------------------------------------------
13562 + * Permission to use, copy, modify, and distribute this software and
13563 + * its documentation for any purpose and without fee is hereby
13564 + * granted, provided that the above copyright notice appear in all
13565 + * copies and that both that copyright notice and this permission
13566 + * notice appear in supporting documentation, and that the name of the
13567 + * author not be used in advertising or publicity pertaining to
13568 + * distribution of the software without specific, written prior
13569 + * permission. The author makes no representations about the
13570 + * suitability of this software for any purpose. It is provided "as
13571 + * is" without express or implied warranty.
13574 +#include "postgres.h"
13575 +#include <stdio.h>
13576 +#include <stdlib.h>
13577 +#include <string.h>
13578 +#include <unistd.h>
13579 +#include <signal.h>
13580 +#include <sys/wait.h>
13581 +#include <sys/time.h>
13582 +#include <ctype.h>
13583 +#include <sys/types.h>
13584 +#include <sys/stat.h>
13585 +#include <sys/socket.h>
13586 +#include <sys/ipc.h>
13587 +#include <sys/sem.h>
13588 +#include <netdb.h>
13589 +#include <netinet/in.h>
13590 +#include <errno.h>
13591 +#include <fcntl.h>
13593 +#include <sys/param.h>
13594 +#include <sys/select.h>
13595 +#include <arpa/inet.h>
13596 +#include <sys/file.h>
13598 +#ifdef HAVE_NETINET_TCP_H
13599 +#include <netinet/tcp.h>
13602 +#include "replicate_com.h"
13605 +/*--------------------------------------
13606 + * PROTOTYPE DECLARATION
13607 + *--------------------------------------
13609 +int PGRload_balance(void);
13610 +int PGRload_balance_with_pool(void);
13611 +char PGRis_connection_full(ClusterTbl * ptr);
13612 +void PGRrelease_connection(ClusterTbl * ptr);
13613 +void PGRchild_wait(int sig);
13615 +/*--------------------------------------------------------------------
13617 + * PGRload_balance()
13619 + * load balance module that normal connection is used
13624 + * NG: STATUS_ERROR
13625 + *--------------------------------------------------------------------
13628 +PGRload_balance(void)
13630 + char * func = "PGRload_balance()";
13634 + ClusterTbl * cluster_p = NULL;
13636 + PGRsignal(SIGCHLD, PGRchild_wait);
13637 + /* get the least locaded cluster server info */
13638 + cluster_p = PGRscan_cluster();
13640 + while (cluster_p == NULL )
13642 + if ( count > PGLB_CONNECT_RETRY_TIME)
13644 + show_error("%s:no cluster available",func);
13645 + return STATUS_ERROR;
13647 + cluster_p = PGRscan_cluster();
13651 + pgid = getpgid((pid_t)0);
13655 + show_error("%s:fork() failed. (%s)",func,strerror(errno));
13660 + setpgid((pid_t)0,pgid);
13661 + CurrentCluster = cluster_p;
13663 + if (pool_init_cp())
13665 + show_error("%s:pool_init_cp failed",func);
13668 + PGRsem_lock(ClusterSemid,cluster_p->rec_no);
13669 + if (PGRget_child_status(getpid()) == STATUS_ERROR)
13671 + PGRadd_child_tbl(cluster_p, getpid(), TBL_USE);
13673 + PGRsem_unlock(ClusterSemid,cluster_p->rec_no);
13674 + PGRdo_child(NOT_USE_CONNECTION_POOL );
13675 + PGRrelease_connection(cluster_p);
13676 + PGRset_status_to_child_tbl(getpid(), TBL_FREE);
13679 + else if (pid > 0)
13681 + PGRsem_lock(ClusterSemid,cluster_p->rec_no);
13682 + if (PGRget_child_status(pid) == STATUS_ERROR)
13684 + PGRadd_child_tbl(cluster_p, pid, TBL_USE);
13686 + PGRsem_unlock(ClusterSemid,cluster_p->rec_no);
13687 + status = PGRget_child_status(pid);
13688 + while (status == TBL_USE)
13690 + status = PGRget_child_status(pid);
13693 + return STATUS_OK;
13697 + return STATUS_ERROR;
13701 +/*--------------------------------------------------------------------
13703 + * PGRload_balance_with_pool()
13705 + * load balance module that connection pooling system is used
13710 + * NG: STATUS_ERROR
13711 + *--------------------------------------------------------------------
13714 +PGRload_balance_with_pool(void)
13716 + char * func = "PGRload_balance_with_pool()";
13719 + ClusterTbl * cluster_p = NULL;
13720 + int status = TBL_USE;
13722 + /* get the least locaded cluster server info */
13723 + cluster_p = PGRscan_cluster();
13725 + while (cluster_p == NULL )
13727 + if ( count > PGLB_CONNECT_RETRY_TIME)
13729 + show_error("%s:no cluster available",func);
13730 + PGRreturn_no_connection_error();
13731 + return STATUS_ERROR;
13733 + cluster_p = PGRscan_cluster();
13736 + pid = PGRscan_child_tbl(cluster_p);
13737 + if ((pid == 0) || (pid == STATUS_ERROR))
13739 + show_error("%s:no child process available",func);
13740 + return STATUS_ERROR;
13742 + kill(pid,SIGUSR1);
13744 + status = PGRget_child_status(pid);
13745 + while (status == TBL_USE)
13747 + status = PGRget_child_status(pid);
13751 + return STATUS_OK;
13756 +PGRis_connection_full(ClusterTbl * ptr)
13764 + PGRsem_lock(ClusterSemid,ptr->rec_no);
13765 + if (ptr->max_connect > ptr->use_num)
13769 + PGRsem_unlock(ClusterSemid,ptr->rec_no);
13774 +PGRrelease_connection(ClusterTbl * ptr)
13780 + PGRsem_lock(ClusterSemid,MAX_DB_SERVER);
13781 + if (ptr->use_num > 0)
13785 + PGRsem_unlock(ClusterSemid,MAX_DB_SERVER);
13789 +PGRchild_wait(int sig)
13795 + pid = waitpid(-1,&ret,WNOHANG);
13796 + if ((pid <= 0) && (WTERMSIG(ret) > 0))
13800 + } while(pid > 0);
13802 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/main.c pgcluster-1.7.0rc7/src/pgcluster/pglb/main.c
13803 --- postgresql-8.2.4/src/pgcluster/pglb/main.c 1970-01-01 01:00:00.000000000 +0100
13804 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/main.c 2007-03-01 16:27:15.000000000 +0100
13806 +/*--------------------------------------------------------------------
13811 + * This file is composed of the main function of pglb.
13813 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
13814 + *--------------------------------------------------------------------
13817 + * Permission to use, copy, modify, and distribute this software and
13818 + * its documentation for any purpose and without fee is hereby
13819 + * granted, provided that the above copyright notice appear in all
13820 + * copies and that both that copyright notice and this permission
13821 + * notice appear in supporting documentation, and that the name of the
13822 + * author not be used in advertising or publicity pertaining to
13823 + * distribution of the software without specific, written prior
13824 + * permission. The author makes no representations about the
13825 + * suitability of this software for any purpose. It is provided "as
13826 + * is" without express or implied warranty.
13829 +#include "postgres.h"
13830 +#include <stdio.h>
13831 +#include <stdarg.h>
13832 +#include <string.h>
13833 +#include <stdlib.h>
13834 +#include <unistd.h>
13835 +#include <signal.h>
13836 +#include <sys/wait.h>
13837 +#include <sys/time.h>
13838 +#include <sys/types.h>
13839 +#include <sys/stat.h>
13840 +#include <sys/socket.h>
13841 +#include <sys/ipc.h>
13842 +#include <sys/shm.h>
13843 +#include <sys/sem.h>
13844 +#include <sys/msg.h>
13845 +#include <netdb.h>
13846 +#include <errno.h>
13847 +#include <fcntl.h>
13849 +#include <sys/param.h>
13850 +#include <sys/select.h>
13851 +#include <netinet/in.h>
13852 +#include <arpa/inet.h>
13853 +#include <sys/file.h>
13854 +#include <arpa/inet.h>
13856 +#ifdef HAVE_NETINET_TCP_H
13857 +#include <netinet/tcp.h>
13860 +#ifdef HAVE_CRYPT_H
13861 +#include <crypt.h>
13864 +#ifdef HAVE_GETOPT_H
13865 +#include <getopt.h>
13868 +#include "replicate_com.h"
13873 +#define IPC_NMAXSEM (32)
13874 +/*--------------------------------------
13875 + * GLOBAL VARIABLE DECLARATION
13876 + *--------------------------------------
13878 +/* for replicate_com.h */
13879 +ConfDataType * ConfData_Top = (ConfDataType *)NULL;
13880 +ConfDataType * ConfData_End = (ConfDataType *)NULL;
13881 +int MapTableShmid = -1;
13882 +int LifeCheckStartShmid = -1;
13883 +char * LifeCheckStartFlag = NULL;
13884 +int LifeCheckTimeOut = 10;
13885 +FILE * StatusFp = (FILE *)NULL;
13886 +char * PGRStatusFileName = NULL;
13887 +char * PGRLogFileName = NULL;
13888 +char * PGRuserName = NULL;
13889 +int Log_Print = 0;
13890 +int Debug_Print = 0;
13892 +char * ResolvedName = NULL;
13893 +int Recv_Port_Number = 0;
13894 +int Recovery_Port_Number = 0;
13895 +uint16_t LifeCheck_Port_Number = 0;
13896 +int Use_Connection_Pool = 0;
13898 +int Connection_Life_Time = 0;
13899 +int Max_DB_Server = 0;
13900 +int MaxBackends = 0;
13901 +ClusterTbl * Cluster_Tbl = (ClusterTbl *)NULL;
13902 +int ClusterNum = 0;
13903 +int ClusterShmid = 0;
13904 +int ClusterSemid = 0;
13905 +ChildTbl * Child_Tbl = (ChildTbl *)NULL;
13906 +int ChildShmid = 0;
13907 +char * PGR_Data_Path = NULL;
13908 +char * PGR_Write_Path = NULL;
13909 +char * Backend_Socket_Dir = NULL;
13910 +FrontSocket Frontend_FD;
13911 +ClusterTbl * CurrentCluster = NULL;
13912 +int PGR_Lifecheck_Timeout = 3;
13913 +int PGR_Lifecheck_Interval = 15;
13915 +int fork_wait_time = 0;
13917 +extern char *optarg;
13919 +/*--------------------------------------
13920 + * PROTOTYPE DECLARATION
13921 + *--------------------------------------
13923 +static int init_pglb(char * path);
13924 +static void pglb_exit(int signal_args);
13925 +static void load_balance_main(void);
13926 +static void daemonize(void);
13927 +static void write_pid_file(void);
13928 +static void stop_pglb(void);
13929 +static int is_exist_pid_file(void);
13930 +static ClusterTbl * scan_cluster_by_pid(pid_t pid);
13931 +static void usage(void);
13932 +static void close_child(int signal_args);
13934 +void PGRrecreate_child(int signal_args);
13935 +void PGRexit_subprocess(int sig);
13937 +/*--------------------------------------------------------------------
13941 + * Reading of the setup file
13942 + * and the initialization of the memory area.
13944 + * char * path: path of the setup file (I)
13947 + * NG: STATUS_ERROR
13948 + *--------------------------------------------------------------------
13951 +init_pglb(char * path)
13953 + char * func = "init_pglb()";
13955 + ConfDataType * conf;
13956 + ClusterTbl cluster_tbl[MAX_DB_SERVER];
13960 + int max_connect = 0;
13961 + union semun sem_arg;
13965 + * read configuration file
13967 + if (path == NULL)
13971 + if (PGR_Get_Conf_Data(path,PGLB_CONF_FILE) != STATUS_OK)
13973 + show_error("%s:PGR_Get_Conf_Data failed",func);
13974 + return STATUS_ERROR;
13977 + size = sizeof(LogFileInf);
13978 + LogFileData = (LogFileInf *) malloc(size);
13979 + if (LogFileData == NULL)
13981 + show_error("%s:malloc() failed. reason: %s", func,strerror(errno));
13982 + return STATUS_ERROR;
13984 + memset(LogFileData,0,size);
13986 + /* cluster db status file open */
13987 + if (PGRStatusFileName == NULL)
13989 + snprintf(fname,sizeof(fname),"%s/%s",PGR_Write_Path,PGLB_STATUS_FILE);
13993 + memcpy(fname,PGRStatusFileName,sizeof(fname));
13995 + StatusFp = fopen(fname, "a");
13996 + if (StatusFp == NULL)
13998 + show_error("%s:open() %s file failed. (%s)",
13999 + func,fname, strerror(errno));
14003 + Backend_Socket_Dir = malloc(128);
14004 + if (Backend_Socket_Dir == NULL)
14006 + show_error("%s:malloc() failed. (%s)",func,strerror(errno));
14007 + return STATUS_ERROR;
14009 + memset(Backend_Socket_Dir,0,128);
14010 + /* set initiarize data */
14011 + strcpy(Backend_Socket_Dir,"/tmp");
14013 + Connection_Life_Time = 0;
14014 + Use_Connection_Pool = 0;
14016 + conf = ConfData_Top;
14017 + while (conf != (ConfDataType *)NULL)
14019 + /* get cluster db servers name */
14020 + if (!strcmp(conf->table,CLUSTER_SERVER_TAG))
14022 + rec_no = conf->rec_no;
14023 + if (!strcmp(conf->key,HOST_NAME_TAG))
14025 + memcpy(cluster_tbl[rec_no].hostName,conf->value,sizeof(cluster_tbl[rec_no].hostName));
14026 + conf = (ConfDataType*)conf->next;
14029 + if (!strcmp(conf->key,PORT_TAG))
14031 + cluster_tbl[rec_no].port = atoi(conf->value);
14032 + conf = (ConfDataType*)conf->next;
14035 + if (!strcmp(conf->key,MAX_CONNECT_TAG))
14037 + cluster_tbl[rec_no].max_connect = atoi(conf->value);
14038 + conf = (ConfDataType*)conf->next;
14042 + /* get logging file data */
14043 + else if (!strcmp(conf->table, LOG_INFO_TAG))
14045 + if (!strcmp(conf->key, FILE_NAME_TAG))
14047 + strncpy(LogFileData->file_name, conf->value ,sizeof(LogFileData->file_name));
14048 + LogFileData->fp = NULL;
14049 + conf = (ConfDataType*)conf->next;
14052 + if (!strcmp(conf->key, FILE_SIZE_TAG))
14057 + len = strlen(conf->value);
14058 + ptr = conf->value;
14059 + for (i = 0; i < len ; i ++,ptr++)
14061 + if ((! isdigit(*ptr)) && (! isspace(*ptr)))
14071 + unit = 1024*1024;
14075 + unit = 1024*1024*1024;
14082 + LogFileData->max_size = atoi(conf->value) * unit;
14083 + conf = (ConfDataType*)conf->next;
14086 + if (!strcmp(conf->key, LOG_ROTATION_TAG))
14088 + LogFileData->rotation = atoi(conf->value);
14089 + conf = (ConfDataType*)conf->next;
14095 + if (!strcmp(conf->key,HOST_NAME_TAG))
14098 + ip=PGRget_ip_by_name(conf->value);
14099 + if (ResolvedName == NULL)
14101 + ResolvedName = malloc(ADDRESS_LENGTH);
14103 + if (ResolvedName == NULL)
14109 + memset(ResolvedName,0,ADDRESS_LENGTH);
14112 + sprintf(ResolvedName,
14115 + (ip >> 8) & 0xff ,
14116 + (ip >> 16) & 0xff ,
14117 + (ip >> 24) & 0xff );
14118 + conf = (ConfDataType*)conf->next;
14121 + /* get port number for receive querys */
14122 + else if (!strcmp(conf->key,RECV_PORT_TAG))
14124 + Recv_Port_Number = atoi(conf->value);
14125 + conf = (ConfDataType*)conf->next;
14128 + /* get port number for recovery session */
14129 + else if (!strcmp(conf->key,RECOVERY_PORT_TAG))
14131 + Recovery_Port_Number = atoi(conf->value);
14132 + conf = (ConfDataType*)conf->next;
14135 + else if (!strcmp(conf->key,MAX_CLUSTER_TAG))
14137 + Max_DB_Server = atoi(conf->value);
14138 + conf = (ConfDataType*)conf->next;
14141 + else if (!strcmp(conf->key,USE_CONNECTION_POOL_TAG))
14143 + if (!strcmp(conf->value,"yes"))
14145 + Use_Connection_Pool = 1;
14147 + conf = (ConfDataType*)conf->next;
14150 + else if (!strcmp(conf->key,MAX_POOL_TAG))
14152 + Max_Pool = atoi(conf->value);
14153 + if (Max_Pool < 0)
14155 + conf = (ConfDataType*)conf->next;
14158 + else if (!strcmp(conf->key,CONNECTION_LIFE_TIME))
14160 + Connection_Life_Time = atoi(conf->value);
14161 + if (Connection_Life_Time < 0)
14162 + Connection_Life_Time = 0;
14163 + conf = (ConfDataType*)conf->next;
14166 + else if (!strcmp(conf->key,BACKEND_SOCKET_DIR_TAG))
14168 + strncpy(Backend_Socket_Dir,conf->value,128);
14169 + conf = (ConfDataType*)conf->next;
14172 + else if (!STRCMP(conf->key,LIFECHECK_TIMEOUT_TAG))
14174 + /* get lifecheck timeout */
14175 + PGR_Lifecheck_Timeout = PGRget_time_value(conf->value);
14176 + if ((PGR_Lifecheck_Timeout < 1) || (PGR_Lifecheck_Timeout > 3600))
14178 + show_error("%s is out of range. It should be between 1sec-1hr.\n",LIFECHECK_TIMEOUT_TAG);
14179 + return STATUS_ERROR;
14181 + conf = (ConfDataType*)conf->next;
14184 + else if (!STRCMP(conf->key,LIFECHECK_INTERVAL_TAG))
14186 + /* get lifecheck interval */
14187 + PGR_Lifecheck_Interval = PGRget_time_value(conf->value);
14188 + if ((PGR_Lifecheck_Interval < 1) || (PGR_Lifecheck_Interval > 3600))
14190 + show_error("%s is out of range. It should between 1sec-1hr.\n",LIFECHECK_INTERVAL_TAG);
14191 + return STATUS_ERROR;
14193 + conf = (ConfDataType*)conf->next;
14197 + conf = (ConfDataType*)conf->next;
14199 + if (Max_DB_Server <= 0)
14201 + show_error("%s:Max_DB_Server is wrong value. %s/%s file should be broken",func, path, PGLB_CONF_FILE);
14204 + /* shared memory allocation for cluster table */
14205 + size = sizeof(ClusterTbl) * Max_DB_Server;
14207 + ClusterShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
14208 + if (ClusterShmid < 0)
14210 + show_error("%s:ClusterShm shmget() failed. (%s)", func,strerror(errno));
14211 + return STATUS_ERROR;
14213 + Cluster_Tbl = (ClusterTbl *)shmat(ClusterShmid,0,0);
14214 + if (Cluster_Tbl == (ClusterTbl *)-1)
14216 + show_error("%s:shmat() failed. (%s)", func,strerror(errno));
14217 + return STATUS_ERROR;
14219 + memset(Cluster_Tbl,0,size);
14221 + if ((ClusterSemid = semget(IPC_PRIVATE,MAX_DB_SERVER+1,IPC_CREAT | IPC_EXCL | 0600)) < 0)
14223 + show_error("%s:semget() failed. (%s)",func,strerror(errno));
14224 + return STATUS_ERROR;
14226 + for ( i = 0 ; i <= MAX_DB_SERVER ; i ++)
14228 + semctl(ClusterSemid, i, GETVAL, sem_arg);
14230 + semctl(ClusterSemid, i, SETVAL, sem_arg);
14233 + /* set cluster db server name into cluster db server table */
14234 + for ( i = 0 ; i < Max_DB_Server ; i ++)
14236 + (Cluster_Tbl + i)->rec_no = i;
14238 + (Cluster_Tbl + i)->useFlag = TBL_END;
14240 + for ( i = 0 ; i <= rec_no ; i ++)
14242 + cluster_tbl[i].use_num = 0;
14243 + cluster_tbl[i].rate = 0;
14244 + if (cluster_tbl[i].max_connect < 0)
14246 + cluster_tbl[i].max_connect = 0;
14248 + if (max_connect < cluster_tbl[i].max_connect)
14250 + max_connect = cluster_tbl[i].max_connect;
14252 + PGRadd_cluster_tbl(&cluster_tbl[i]);
14255 + /* shared memory allocation for children table */
14256 + size = sizeof(ChildTbl) * (Max_DB_Server + 1) * max_connect * Max_Pool;
14257 +#ifdef PRINT_DEBUG
14258 + show_debug("%s:Child_Tbl size is[%d]",func,size);
14261 + ChildShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
14262 + if (ChildShmid < 0)
14264 + show_error("%s:ChildShm shmget() failed. (%s)",func, strerror(errno));
14265 + return STATUS_ERROR;
14267 + Child_Tbl = (ChildTbl *)shmat(ChildShmid,0,0);
14268 + if (Child_Tbl == (ChildTbl *)-1)
14270 + show_error("%s:shmat() failed. (%s)", func,strerror(errno));
14271 + return STATUS_ERROR;
14273 + memset(Child_Tbl, 0, size);
14274 + (Child_Tbl + ( Max_DB_Server * max_connect * Max_Pool) -1)->useFlag = TBL_END;
14276 + PGR_Free_Conf_Data();
14278 + return STATUS_OK;
14281 +/*--------------------------------------------------------------------
14285 + * Closing of pglb process
14287 + * int signal_args: signal number (I)
14290 + *--------------------------------------------------------------------
14293 +pglb_exit(int signal_args)
14298 + Child_Tbl->useFlag = TBL_END;
14299 + PGRsignal(SIGCHLD,SIG_IGN);
14300 + PGRsignal(signal_args,SIG_IGN);
14301 + kill (0,signal_args);
14302 + while (wait(NULL) > 0 )
14305 + if (ClusterShmid > 0)
14307 + rtn = shmdt((char *)Cluster_Tbl);
14308 + shmctl(ClusterShmid,IPC_RMID,(struct shmid_ds *)NULL);
14309 + ClusterShmid = 0;
14310 + Cluster_Tbl = NULL;
14312 + if (ChildShmid > 0)
14314 + rtn = shmdt((char *)Child_Tbl);
14315 + shmctl(ChildShmid,IPC_RMID,(struct shmid_ds *)NULL);
14317 + Child_Tbl = NULL;
14319 + if (ClusterSemid > 0)
14321 + semctl(ClusterSemid, 0, IPC_RMID);
14322 + ClusterSemid = 0;
14325 + if (StatusFp != NULL)
14327 + fflush(StatusFp);
14328 + fclose(StatusFp);
14330 + if (Frontend_FD.unix_fd != 0)
14332 + close(Frontend_FD.unix_fd);
14333 + Frontend_FD.unix_fd = 0;
14334 + snprintf(fname, sizeof(fname), "%s/.s.PGSQL.%d", Backend_Socket_Dir,Recv_Port_Number);
14337 + if (Frontend_FD.inet_fd != 0)
14339 + close(Frontend_FD.inet_fd);
14340 + Frontend_FD.inet_fd = 0;
14345 + snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGLB_PID_FILE);
14348 + if (ResolvedName != NULL)
14350 + free(ResolvedName);
14351 + ResolvedName = NULL;
14356 +/*--------------------------------------------------------------------
14358 + * load_balance_main()
14360 + * This is a main module of load balance function
14365 + *--------------------------------------------------------------------
14368 +load_balance_main(void)
14370 + char * func = "load_balance_main()";
14375 + Frontend_FD.unix_fd = PGRcreate_unix_domain_socket(Backend_Socket_Dir, Recv_Port_Number);
14376 + if (Frontend_FD.unix_fd < 0)
14378 + show_error("%s:PGRcreate_unix_domain_socket failed",func);
14379 + pglb_exit(SIGTERM);
14381 + Frontend_FD.inet_fd = PGRcreate_recv_socket(ResolvedName, Recv_Port_Number);
14382 + if (Frontend_FD.inet_fd < 0)
14384 + show_error("%s:PGRcreate_recv_socket failed",func);
14385 + pglb_exit(SIGTERM);
14387 + if (Use_Connection_Pool)
14389 + PGRsignal(SIGCHLD,PGRrecreate_child);
14390 + rtn = PGRpre_fork_children(Cluster_Tbl);
14391 + if (rtn != STATUS_OK)
14393 + show_error("%s:PGRpre_fork_children failed",func);
14394 + pglb_exit(SIGTERM);
14401 + struct timeval timeout;
14403 + timeout.tv_sec = 60;
14404 + timeout.tv_usec = 0;
14407 + * Wait for something to happen.
14410 + FD_SET(Frontend_FD.unix_fd,&rmask);
14411 + if(Frontend_FD.inet_fd)
14412 + FD_SET(Frontend_FD.inet_fd,&rmask);
14413 + rtn = select(Max(Frontend_FD.unix_fd, Frontend_FD.inet_fd) + 1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
14416 + if(PGRis_cluster_alive() == STATUS_ERROR) {
14417 + show_error("%s:all clusters were dead.",func);
14418 + PGRreturn_no_connection_error();
14423 + if (Use_Connection_Pool)
14425 + status = PGRload_balance_with_pool();
14429 + status = PGRload_balance();
14431 + if (status != STATUS_OK)
14433 + show_error("%s:load balance process failed",func);
14434 + if ( count > PGLB_CONNECT_RETRY_TIME)
14436 + show_error("%s:no cluster available",func);
14437 + PGRreturn_connection_full_error();
14451 +/*--------------------------------------------------------------------
14455 + * Daemonize this process
14460 + *--------------------------------------------------------------------
14465 + char * func = "daemonize()";
14470 + if (pid == (pid_t) -1)
14472 + show_error("%s:fork() failed. (%s)",func, strerror(errno));
14474 + return; /* not reached */
14476 + else if (pid > 0)
14481 +#ifdef HAVE_SETSID
14482 + if (setsid() < 0)
14484 + show_error("%s:setsid() failed. (%s)", func,strerror(errno));
14489 + i = open("/dev/null", O_RDWR);
14497 +/*--------------------------------------------------------------------
14499 + * write_pid_file()
14501 + * The process ID is written in the file.
14502 + * This process ID is used when finish pglb.
14507 + *--------------------------------------------------------------------
14510 +write_pid_file(void)
14512 + char * func = "write_pid_file()";
14515 + char pidbuf[128];
14517 + snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGLB_PID_FILE);
14518 + fd = fopen(fname, "w");
14521 + show_error("%s:open() %s file failed. (%s)",
14522 + func,fname, strerror(errno));
14525 + snprintf(pidbuf, sizeof(pidbuf), "%d", getpid());
14526 + fwrite(pidbuf, strlen(pidbuf), 1, fd);
14529 + show_error("%s:fwrite() %s file failed. (%s)",
14530 + func,fname, strerror(errno));
14536 +/*--------------------------------------------------------------------
14540 + * Stop the pglb process
14545 + *--------------------------------------------------------------------
14550 + char * func = "stop_pglb()";
14553 + char pidbuf[128];
14556 + if (PGR_Write_Path == NULL)
14558 + PGR_Write_Path = ".";
14560 + snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGLB_PID_FILE);
14561 + fd = fopen(fname, "r");
14564 + show_error("%s:open() %s file failed. (%s)",
14565 + func,fname, strerror(errno));
14568 + memset(pidbuf,0,sizeof(pidbuf));
14569 + fread(pidbuf, sizeof(pidbuf), 1, fd);
14571 + pid = atoi(pidbuf);
14572 + if (kill (pid,SIGTERM) == -1)
14574 + show_error("%s:could not stop pid: %d (%s)",func,pid,strerror(errno));
14580 +/*--------------------------------------------------------------------
14582 + * is_exist_pid_file()
14584 + * Check existence of pid file.
14588 + * 1: the pid file is exist
14589 + * 0: the pid file is not exist
14590 + *--------------------------------------------------------------------
14593 +is_exist_pid_file(void)
14598 + snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGLB_PID_FILE);
14599 + if (stat(fname,&buf) == 0)
14601 + /* pid file is exist */
14606 + /* pid file is not exist */
14612 +/*--------------------------------------------------------------------
14614 + * PGRrecreate_child()
14616 + * create the child process again which it hunged up
14618 + * int signal_args: signal number (expecting the SIGCHLD)
14621 + *--------------------------------------------------------------------
14624 +PGRrecreate_child(int signal_args)
14628 + ClusterTbl * cluster_p;
14633 +#ifdef HAVE_WAITPID
14634 + while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
14637 + while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
14640 + cluster_p = scan_cluster_by_pid(pid);
14641 + pid = PGRcreate_child(cluster_p);
14643 + if ((pid < 0) && (errno == EINTR))
14647 +/*--------------------------------------------------------------------
14651 + * Hung up child process
14653 + * int signal_args: signal number (expecting the SIGUSR2)
14656 + *--------------------------------------------------------------------
14659 +close_child(int signal_args)
14661 + char * func = "close_child()";
14662 + ChildTbl * child;
14663 + ClusterTbl * cluster;
14666 + if (( Cluster_Tbl == NULL) || (Child_Tbl == NULL))
14668 + show_error("%s:Cluster_Tbl or Child_Tbl is not initialize",func);
14671 + cluster = Cluster_Tbl;
14672 + while(cluster->useFlag != TBL_END)
14674 + if (cluster->useFlag == TBL_ERROR_NOTICE)
14676 + rec_no = cluster->rec_no;
14677 + PGRset_status_on_cluster_tbl(TBL_ERROR,cluster);
14686 + child = Child_Tbl;
14687 + while(child->useFlag != TBL_END)
14689 + if (child->rec_no == rec_no)
14691 + if (kill (child->pid,SIGTERM) == -1)
14693 + show_error("%s:could not stop pid: %d (%s)",func,child->pid,strerror(errno));
14696 + PGRchild_wait(signal_args);
14697 + child->useFlag = DATA_FREE;
14701 + PGRsignal(SIGUSR2, close_child);
14704 +/*--------------------------------------------------------------------
14706 + * scan_cluster_by_pid()
14708 + * get cluster server record from child process id
14710 + * pid_t pid: child process id (I)
14712 + * OK: pointer of cluster table
14714 + *--------------------------------------------------------------------
14716 +static ClusterTbl *
14717 +scan_cluster_by_pid(pid_t pid)
14719 + char * func = "scan_cluster_by_pid()";
14720 + ChildTbl * child_p;
14721 + ClusterTbl * cluster_p;
14724 + child_p = Child_Tbl;
14725 + if (child_p == NULL)
14727 + show_error("%s:Child Table is not initialize",func);
14730 + cluster_p = Cluster_Tbl;
14731 + if (cluster_p == NULL)
14733 + show_error("%s:Cluster Table is not initialize",func);
14737 + while (child_p->useFlag != TBL_END)
14739 + if (child_p->pid == pid)
14745 + if (child_p->useFlag == TBL_END)
14747 + show_error("%s:pid:%d not found in child table",func,pid);
14752 + while ((cluster_p->useFlag != TBL_END) && (cnt < ClusterNum))
14754 + if (cluster_p->rec_no == child_p->rec_no)
14756 + return cluster_p;
14764 +/*--------------------------------------------------------------------
14768 + * show usage of pglb
14773 + *--------------------------------------------------------------------
14780 + path = getenv("PGDATA");
14781 + if (path == NULL)
14783 + fprintf(stderr,"pglb version [%s]\n",PGLB_VERSION);
14784 + fprintf(stderr,"A load balancer for PostgreSQL\n\n");
14785 + fprintf(stderr,"usage: pglb [-D path_of_config_file] [-W path_of_work_files] [-n][-v][-h][stop | restart]\n");
14786 + fprintf(stderr," config file default path: %s/%s\n",path, PGLB_CONF_FILE);
14787 + fprintf(stderr," -l: print error logs in the log file.\n");
14788 + fprintf(stderr," -n: don't run in daemon mode.\n");
14789 + fprintf(stderr," -v: debug mode. need '-n' flag\n");
14790 + fprintf(stderr," -h: print this help\n");
14791 + fprintf(stderr," stop: stop pglb\n");
14792 + fprintf(stderr," restart: restart pglb\n");
14795 +/*--------------------------------------------------------------------
14799 + * main module of pglb
14801 + * int argc: number of parameter
14802 + * char ** argv: value of parameter
14805 + *--------------------------------------------------------------------
14808 +main(int argc, char ** argv)
14811 + char * r_path = NULL;
14812 + char * w_path = NULL;
14815 + PGRsignal(SIGHUP, pglb_exit);
14816 + PGRsignal(SIGINT, pglb_exit);
14817 + PGRsignal(SIGQUIT, pglb_exit);
14818 + PGRsignal(SIGTERM, pglb_exit);
14819 + PGRsignal(SIGALRM, SIG_IGN); /* ignored */
14820 + PGRsignal(SIGPIPE, SIG_IGN); /* ignored */
14821 + PGRsignal(SIGTTIN, SIG_IGN); /* ignored */
14822 + PGRsignal(SIGTTOU, SIG_IGN); /* ignored */
14823 + PGRsignal(SIGCHLD,PGRchild_wait);
14824 + PGRsignal(SIGUSR1, SIG_IGN); /* ignored */
14825 + PGRsignal(SIGUSR2, close_child); /* close child process */
14826 + r_path = getenv("PGDATA");
14827 + if (r_path == NULL)
14830 + while ((opt = getopt(argc, argv, "U:D:W:w:lvnh")) != -1)
14840 + PGRuserName = strdup(optarg);
14859 + fork_wait_time = atoi(optarg);
14860 + if (fork_wait_time < 0)
14861 + fork_wait_time = 0;
14881 + PGR_Data_Path = r_path;
14882 + if (w_path == NULL)
14884 + PGR_Write_Path = PGR_Data_Path;
14888 + PGR_Write_Path = w_path;
14891 + if (optind == (argc-1) &&
14892 + ((!strcmp(argv[optind],"stop")) ||
14893 + (!strcmp(argv[optind],"restart"))))
14896 + if (!strcmp(argv[optind],"stop"))
14901 + else if (optind == argc)
14903 + if (is_exist_pid_file())
14905 + fprintf(stderr,"pid file %s/%s found. is another pglb running?", PGR_Write_Path, PGLB_PID_FILE);
14909 + else if (optind < argc)
14919 + write_pid_file();
14921 + if (init_pglb(PGR_Data_Path) != STATUS_OK)
14926 + /* call recovery process */
14927 + PGRrecovery_main(fork_wait_time);
14929 + /* call lifecheck process */
14930 + PGRlifecheck_main(fork_wait_time);
14932 + /* start loadbalance module */
14933 + load_balance_main();
14935 + return STATUS_OK;
14939 +PGRexit_subprocess(int sig)
14943 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pglb.conf.sample pgcluster-1.7.0rc7/src/pgcluster/pglb/pglb.conf.sample
14944 --- postgresql-8.2.4/src/pgcluster/pglb/pglb.conf.sample 1970-01-01 01:00:00.000000000 +0100
14945 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pglb.conf.sample 2007-02-18 22:52:17.000000000 +0100
14947 +#============================================================
14948 +# Load Balance Server configuration file
14949 +#-------------------------------------------------------------
14951 +#-------------------------------------------------------------
14952 +# This file controls:
14953 +# o which hosts are db cluster server
14954 +# o which port use connect to db cluster server
14955 +# o how many connections are allowed on each DB server
14956 +#============================================================
14957 +#-------------------------------------------------------------
14958 +# set cluster DB server information
14959 +# o Host_Name : Hostname of Cluster
14960 +# Please write a host name by FQDN or IP address.
14961 +# o Port : Connection port for postmaster
14962 +# o Max_Connection : Maximum number of connections to postmaster
14963 +#-------------------------------------------------------------
14964 +#<Cluster_Server_Info>
14965 +# <Host_Name> master.pgcluster.org </Host_Name>
14966 +# <Port> 5432 </Port>
14967 +# <Max_Connect> 32 </Max_Connect>
14968 +#</Cluster_Server_Info>
14969 +#<Cluster_Server_Info>
14970 +# <Host_Name> post2.pgcluster.org </Host_Name>
14971 +# <Port> 5432 </Port>
14972 +# <Max_Connect> 32 </Max_Connect>
14973 +#</Cluster_Server_Info>
14974 +#<Cluster_Server_Info>
14975 +# <Host_Name> post3.pgcluster.org </Host_Name>
14976 +# <Port> 5432 </Port>
14977 +# <Max_Connect> 32 </Max_Connect>
14978 +#</Cluster_Server_Info>
14979 +#-------------------------------------------------------------
14980 +# set Load Balance server information
14981 +# o Host_Name : The host name of this load balance server
14982 +# Please write a host name by FQDN or IP address.
14983 +# o Backend_Socket_Dir : Unix domain socket path for the backend
14984 +# o Receive_Port Connection port from client
14985 +# o Recovery_Port : Connection port for recovery process
14986 +# o Max_Cluster_Num : Maximum number of cluster DB servers
14987 +# o Use_Connection_Pooling : Use connection pool [yes/no]
14988 +# o Lifecheck_Timeout : Timeout of the lifecheck response
14989 +# o Lifecheck_Interval : Interval time of the lifecheck
14991 +# 10s -- 10 seconds
14992 +# 10min -- 10 minutes
14994 +#-------------------------------------------------------------
14995 +<Host_Name> loadbalancer.pgcluster.org </Host_Name>
14996 +<Backend_Socket_Dir> /tmp </Backend_Socket_Dir>
14997 +<Receive_Port> 5432 </Receive_Port>
14998 +<Recovery_Port> 6001 </Recovery_Port>
14999 +<Max_Cluster_Num> 128 </Max_Cluster_Num>
15000 +<Use_Connection_Pooling> no </Use_Connection_Pooling>
15001 +<LifeCheck_Timeout> 3s </LifeCheck_Timeout>
15002 +<LifeCheck_Interval> 15s </LifeCheck_Interval>
15003 +#-------------------------------------------------------------
15004 +# A setup of a log files
15006 +# o File_Name : Log file name with full path
15007 +# o File_Size : Maximum size of each log files
15008 +# Please specify in a number and unit(K or M)
15012 +# o Rotate : Rotation times
15013 +# If specified 0, old versions are removed.
15014 +#-------------------------------------------------------------
15016 + <File_Name> /tmp/pglb.log </File_Name>
15017 + <File_Size> 1M </File_Size>
15018 + <Rotate> 3 </Rotate>
15020 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pglb.h pgcluster-1.7.0rc7/src/pgcluster/pglb/pglb.h
15021 --- postgresql-8.2.4/src/pgcluster/pglb/pglb.h 1970-01-01 01:00:00.000000000 +0100
15022 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pglb.h 2007-03-01 16:27:49.000000000 +0100
15024 +/*--------------------------------------------------------------------
15028 + * Portions Copyright (c) 2003-2006 Atsushi Mitani
15029 + *--------------------------------------------------------------------
15034 +#define PGLB_VERSION "1.7.0rc7"
15036 +#include "../libpgc/libpgc.h"
15043 + * define this if you do not want to issue RESET ALL at each new
15044 + * connection. Also you need to define this for 7.1 or prior
15045 + * PostgreSQL since they do not support RESET ALL
15047 +#undef NO_RESET_ALL
15049 +/* undef this if you have problems with non blocking accept() */
15050 +#define NONE_BLOCK
15052 +#define POOLMAXPATHLEN 8192
15054 +/* configuration file name */
15055 +#define POOL_CONF_FILE_NAME "pgpool.conf"
15057 +/* pid file directory */
15058 +#define DEFAULT_LOGDIR "/tmp"
15060 +/* Unix domain socket directory */
15061 +#define DEFAULT_SOCKET_DIR "/tmp"
15063 +/* pid file name */
15064 +#define PID_FILE_NAME "pgpool.pid"
15066 +/* strict mode comment in SQL */
15067 +#define STRICT_MODE_STR "/*STRICT*/"
15068 +#define STRICT_MODE(s) (strncasecmp((s), STRICT_MODE_STR, strlen(STRICT_MODE_STR)) == 0)
15071 + POOL_CONTINUE = 0,
15078 +/* protocol major version numbers */
15079 +#define PROTO_MAJOR_V2 2
15080 +#define PROTO_MAJOR_V3 3
15083 + * startup packet definitions (v2) stolen from PostgreSQL
15085 +#define SM_DATABASE 64
15086 +#define SM_USER 32
15087 +#define SM_OPTIONS 64
15088 +#define SM_UNUSED 64
15091 +typedef struct PGR_StartupPacket_v2
15093 + int protoVersion; /* Protocol version */
15094 + char database[SM_DATABASE]; /* Database name */
15095 + char user[SM_USER]; /* User name */
15096 + char options[SM_OPTIONS]; /* Optional additional args */
15097 + char unused[SM_UNUSED]; /* Unused */
15098 + char tty[SM_TTY]; /* Tty for debug output */
15099 +} PGR_StartupPacket_v2;
15101 +/* startup packet info */
15104 + char *startup_packet; /* raw startup packet without packet length (malloced area) */
15105 + int len; /* raw startup packet length */
15106 + int major; /* protocol major version */
15107 + int minor; /* protocol minor version */
15108 + char *database; /* database name in startup_packet (malloced area) */
15109 + char *user; /* user name in startup_packet (malloced area) */
15110 +} PGR_StartupPacket;
15112 +typedef struct CancelPacket
15114 + int protoVersion; /* Protocol version */
15115 + int pid; /* bcckend process id */
15116 + int key; /* cancel key */
15120 + * configuration paramters
15123 + int inetdomain; /* should we make an INET domain socket too? */
15124 + int port; /* port # to bind */
15125 + char *socket_dir; /* pgpool socket directory */
15126 + char *backend_host_name; /* backend host name */
15127 + int backend_port; /* backend port # */
15128 + char *secondary_backend_host_name; /* secondary backend host name */
15129 + int secondary_backend_port; /* secondary backend port # */
15130 + int num_init_children; /* # of children initially pre-forked */
15131 + int child_life_time; /* if idle for this seconds, child exits */
15132 + int connection_life_time; /* if idle for this seconds, connection closes */
15133 + int max_pool; /* max # of connection pool per child */
15134 + char *logdir; /* logging directory */
15135 + char *backend_socket_dir; /* Unix domain socket directory for the PostgreSQL server */
15136 + int replication_mode; /* replication mode */
15137 + int replication_strict; /* if non 0, wait for completion of the
15138 + query sent to master to avoid deadlock */
15140 + * if secondary does not respond in this milli seconds, abort this session.
15141 + * this is not compatible with replication_strict = 1. 0 means no timeout.
15143 + int replication_timeout;
15145 + int load_balance_mode; /* load balance mode */
15147 + /* followings do not exist in the configuration file */
15148 + char *current_backend_host_name; /* current backend host name */
15149 + int current_backend_port; /* current backend port # */
15150 + int replication_enabled; /* replication mode enabled */
15152 + int replication_stop_on_mismatch; /* if there's a data mismatch between master and secondary
15153 + * start degenration to stop replication mode
15157 +#define MAX_PASSWORD_SIZE (1024)
15160 + int num; /* number of entries */
15161 + char **names; /* parameter names */
15162 + char **values; /* values */
15166 + * stream connection structure
15169 + int fd; /* fd for connection */
15170 + FILE *write_fd; /* stream write connection */
15172 + char *hp; /* pending data buffer head address */
15173 + int po; /* pending data offset */
15174 + int bufsz; /* pending data buffer size */
15175 + int len; /* pending data length */
15177 + char *sbuf; /* buffer for pool_read_string */
15178 + int sbufsz; /* its size in bytes */
15180 + char *buf2; /* buffer for pool_read2 */
15181 + int bufsz2; /* its size in bytes */
15183 + int isbackend; /* this connection is for backend if non 0 */
15184 + int issecondary_backend; /* this connection is for secondary backend if non 0 */
15186 + char tstate; /* transaction state (V3 only) */
15189 + * following are used to remember when re-use the authenticated connection
15191 + int auth_kind; /* 3: clear text password, 4: crypt password, 5: md5 password */
15192 + int pwd_size; /* password (sent back from frontend) size in host order */
15193 + char password[MAX_PASSWORD_SIZE]; /* password (sent back from frontend) */
15194 + char salt[4]; /* password salt */
15197 + * following are used to remember current session paramter status.
15198 + * re-used connection will need them (V3 only)
15200 + ParamStatus params;
15202 + int no_forward; /* if non 0, do not write to frontend */
15204 +} POOL_CONNECTION;
15207 + * connection pool structure
15210 + PGR_StartupPacket *sp; /* startup packet info */
15211 + int pid; /* backend pid */
15212 + int key; /* cancel key */
15213 + POOL_CONNECTION *con;
15214 + time_t closetime; /* absolute time in second when the connection closed
15215 + * if 0, that means the connection is under use.
15217 +} POOL_CONNECTION_POOL_SLOT;
15219 +#define MAX_CONNECTION_SLOTS 2
15222 + int num; /* number of slots */
15223 + POOL_CONNECTION_POOL_SLOT *slots[MAX_CONNECTION_SLOTS];
15224 +} POOL_CONNECTION_POOL;
15226 +#define MASTER_CONNECTION(p) ((p)->slots[0])
15227 +#define SECONDARY_CONNECTION(p) ((p)->slots[1])
15228 +#define MASTER(p) MASTER_CONNECTION(p)->con
15229 +#define SECONDARY(p) SECONDARY_CONNECTION(p)->con
15230 +#define MAJOR(p) MASTER_CONNECTION(p)->sp->major
15231 +#define TSTATE(p) MASTER(p)->tstate
15233 +#define Max(x, y) ((x) > (y) ? (x) : (y))
15234 +#define Min(x, y) ((x) < (y) ? (x) : (y))
15247 + char hostName[HOSTNAME_MAX_LENGTH];
15248 + unsigned short port;
15249 + short max_connect;
15267 +#define UNIX_DOMAIN_FD (0)
15268 +#define INET_DOMAIN_FD (1)
15274 +#define pool_config_inetdomain (0)
15275 +#define pool_config_replication_mode (0)
15276 +#define pool_config_replication_strict (0)
15277 +#define pool_config_replication_timeout (0)
15278 +#define pool_config_replication_enabled (0)
15279 +#define pool_config_load_balance_mode (0)
15280 +#define pool_config_replication_stop_on_mismatch (0)
15281 +#define pool_config_port (Recv_Port_Number)
15282 +#define pool_config_socket_dir (Backend_Socket_Dir)
15283 +#define pool_config_backend_host_name (CurrentCluster->hostName)
15284 +#define pool_config_backend_port (CurrentCluster->port)
15285 +#define pool_config_secondary_backend_host_name (CurrentCluster->hostName)
15286 +#define pool_config_secondary_backend_port (CurrentCluster->port)
15287 +#define pool_config_num_init_children (CurrentCluster->max_connect)
15288 +#define pool_config_child_life_time (Connection_Life_Time)
15289 +#define pool_config_connection_life_time (Connection_Life_Time)
15290 +#define pool_config_max_pool (Max_Pool)
15291 +#define pool_config_logdir "./"
15292 +#define pool_config_backend_socket_dir (Backend_Socket_Dir)
15293 +#define pool_config_current_backend_host_name (CurrentCluster->hostName)
15294 +#define pool_config_current_backend_port (CurrentCluster->port)
15295 +#define REPLICATION (0)
15296 +#define IN_LOAD_BALANCE (0)
15301 +#define MAX_DB_SERVER (32)
15302 +#define PGLB_MAX_SOCKET_QUEUE (10000)
15303 +#define CLUSTER_TBL_SHM_KEY (1010)
15304 +#define PGLB_CONNECT_RETRY_TIME (3)
15305 +#define DEFAULT_CONNECT_NUM (32)
15306 +#define DEFAULT_PORT (5432)
15307 +#define BUF_SIZE (16384)
15308 +#define TBL_FREE (0)
15309 +#define TBL_INIT (1)
15310 +#define TBL_USE (2)
15311 +#define TBL_STOP (3)
15312 +#define TBL_ACCEPT (10)
15313 +#define TBL_ERROR_NOTICE (98)
15314 +#define TBL_ERROR (99)
15315 +#define TBL_END (-1)
15316 +#define STATUS_OK (0)
15317 +#define STATUS_ERROR (-1)
15318 +#ifdef RECOVERY_PREPARE_REQ
15319 +#define ADD_DB RECOVERY_PREPARE_REQ
15321 +#define ADD_DB (1)
15323 +#ifdef RECOVERY_PGDATA_ANS
15324 +#define STOP_DB RECOVERY_PGDATA_ANS
15326 +#define STOP_DB (3)
15328 +#ifdef RECOVERY_FINISH
15329 +#define START_DB RECOVERY_FINISH
15331 +#define START_DB (9)
15333 +#define DELETE_DB (99)
15334 +#define QUERY_TERMINATE (0x00)
15335 +#define RESPONSE_TERMINATE (0x5a)
15336 +#define PGLB_CONF_FILE "pglb.conf"
15337 +#define PGLB_PID_FILE "pglb.pid"
15338 +#define PGLB_STATUS_FILE "pglb.sts"
15339 +#define PGLB_LOG_FILE "pglb.log"
15340 +#define CLUSTER_SERVER_TAG "Cluster_Server_Info"
15341 +#define MAX_CONNECT_TAG "Max_Connect"
15342 +#define RECOVERY_PORT_TAG "Recovery_Port"
15343 +#define RECV_PORT_TAG "Receive_Port"
15344 +#define MAX_CLUSTER_TAG "Max_Cluster_Num"
15345 +#define USE_CONNECTION_POOL_TAG "Use_Connection_Pooling"
15346 +#define MAX_POOL_TAG "Max_Pool_Each_Server"
15347 +#define BACKEND_SOCKET_DIR_TAG "Backend_Socket_Dir"
15348 +#define CONNECTION_LIFE_TIME "Connection_Life_Time"
15349 +#define NOT_USE_CONNECTION_POOL (0)
15350 +#define USE_CONNECTION_POOL (1)
15352 +#define PGR_SEND_RETRY_CNT (100)
15353 +#define PGR_SEND_WAIT_MSEC (500)
15354 +#define PGR_RECV_RETRY_CNT (100)
15355 +#define PGR_RECV_WAIT_MSEC (500)
15357 +extern int Recv_Port_Number;
15358 +extern int Recovery_Port_Number;
15359 +extern uint16_t LifeCheck_Port_Number;
15360 +extern int Use_Connection_Pool;
15361 +extern int Max_Pool;
15362 +extern int Connection_Life_Time;
15363 +extern int Msg_Id;
15364 +extern ClusterTbl * Cluster_Tbl;
15365 +extern int Max_DB_Server;
15366 +extern int MaxBackends;
15367 +extern char * Backend_Socket_Dir;
15368 +extern int ClusterShmid;
15369 +extern int ClusterSemid;
15370 +extern int ChildShmid;
15371 +extern int ClusterNum;
15372 +extern ChildTbl * Child_Tbl;
15373 +extern char * PGR_Data_Path;
15374 +extern char * PGR_Write_Path;
15375 +extern char * Backend_Socket_Dir;
15376 +extern FrontSocket Frontend_FD;
15377 +extern FILE * StatusFp;
15378 +extern char * ResolvedName;
15379 +extern char * PGRuserName;
15382 +extern POOL_CONNECTION * Frontend;
15383 +extern ClusterTbl * CurrentCluster;
15385 +extern char * Function;
15387 +extern POOL_CONNECTION_POOL *pool_connection_pool; /* connection pool */
15389 +/* extern of main.c */
15390 +extern void PGRrecreate_child(int signal_args);
15391 +extern void PGRexit_subprocess(int sig);
15393 +/* extern of child.c */
15394 +extern int PGRpre_fork_children(ClusterTbl * ptr);
15395 +extern int PGRpre_fork_child(ClusterTbl * ptr);
15396 +extern int PGRdo_child( int use_pool);
15397 +extern int PGRcreate_child(ClusterTbl * cluster_p);
15398 +extern pid_t PGRscan_child_tbl(ClusterTbl * cluster_p);
15399 +extern void notice_backend_error(void);
15400 +extern void do_pooling_child(int sig);
15401 +extern int PGRset_status_to_child_tbl(pid_t pid, int status);
15402 +extern int PGRadd_child_tbl(ClusterTbl * cluster_p, pid_t pid, int status);
15403 +extern int PGRget_child_status(pid_t pid);
15404 +extern void PGRreturn_connection_full_error(void);
15405 +extern void PGRreturn_no_connection_error(void);
15406 +extern void PGRquit_children_on_cluster(int rec_no);
15408 +/* extern of cluster_table.c */
15409 +extern int PGRis_cluster_alive(void) ;
15410 +extern ClusterTbl * PGRscan_cluster(void);
15411 +extern void PGRset_key_of_cluster(ClusterTbl * ptr, RecoveryPacket * packet);
15412 +extern ClusterTbl * PGRadd_cluster_tbl (ClusterTbl * conf_data);
15413 +extern ClusterTbl * PGRset_status_on_cluster_tbl (int status, ClusterTbl * ptr);
15414 +extern ClusterTbl * PGRsearch_cluster_tbl(ClusterTbl * conf_data);
15416 +/* extern of load_balance.c */
15417 +extern int PGRload_balance(void);
15418 +extern int PGRload_balance_with_pool(void);
15419 +extern char PGRis_connection_full(ClusterTbl * ptr);
15420 +extern void PGRuse_connection(ClusterTbl * ptr);
15421 +extern void PGRrelease_connection(ClusterTbl * ptr);
15422 +extern void PGRchild_wait(int sig);
15424 +/* extern of recovery.c */
15425 +extern void PGRrecovery_main(int fork_wait_fime);
15427 +/* extern of socket.c */
15428 +extern int PGRcreate_unix_domain_socket(char * sock_dir, unsigned short port);
15429 +extern int PGRcreate_recv_socket(char * hostName , unsigned short portNumber);
15430 +extern int PGRcreate_acception(int fd, char * hostName , unsigned short portNumber);
15431 +extern void PGRclose_sock(int * sock);
15432 +extern int PGRread_byte(int sock,char * buf,int len, int flag);
15433 +extern int PGRcreate_cluster_socket( int * sock, ClusterTbl * ptr );
15435 +/* extern of pool_auth.c */
15436 +extern int pool_do_auth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp);
15437 +extern int pool_do_reauth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp);
15438 +extern int pool_read_message_length(POOL_CONNECTION_POOL *cp);
15439 +extern signed char pool_read_kind(POOL_CONNECTION_POOL *cp);
15441 +/* extern of pool_connection_pool.c */
15442 +extern int pool_init_cp(void);
15443 +extern POOL_CONNECTION_POOL *pool_get_cp(char *user, char *database, int protoMajor);
15444 +extern void pool_discard_cp(char *user, char *database, int protoMajor);
15445 +extern POOL_CONNECTION_POOL *pool_create_cp(void);
15446 +extern void pool_connection_pool_timer(POOL_CONNECTION_POOL *backend);
15447 +extern void pool_backend_timer_handler(int sig);
15448 +extern int connect_inet_domain_socket(int secondary_backend);
15449 +extern int connect_unix_domain_socket(int secondary_backend);
15450 +extern char PGRis_same_host(char * host1, char * host2);
15451 +extern void pool_finish(void);
15453 +/* extern of pool_process_query.c */
15454 +extern POOL_STATUS pool_process_query(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, int connection_reuse);
15455 +extern POOL_STATUS ErrorResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
15456 +extern void pool_enable_timeout();
15457 +extern void pool_disable_timeout();
15458 +extern int pool_check_fd(POOL_CONNECTION *cp, int notimeout);
15459 +extern void pool_send_frontend_exits(POOL_CONNECTION_POOL *backend);
15460 +extern POOL_STATUS SimpleForwardToFrontend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
15461 +extern POOL_STATUS SimpleForwardToBackend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
15462 +extern POOL_STATUS ParameterStatus(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
15463 +extern void pool_send_error_message(POOL_CONNECTION *frontend, int protoMajor, char *code, char *message, char *detail, char *hint, char *file, int line);
15465 +/* extern of pool_params.c */
15466 +extern int pool_init_params(ParamStatus *params);
15467 +extern void pool_discard_params(ParamStatus *params);
15468 +extern char *pool_find_name(ParamStatus *params, char *name, int *pos);
15469 +extern int pool_get_param(ParamStatus *params, int index, char **name, char **value);
15470 +extern int pool_add_param(ParamStatus *params, char *name, char *value);
15471 +extern void pool_param_debug_print(ParamStatus *params);
15473 +/* extern of pool_stream.c */
15474 +extern POOL_CONNECTION *pool_open(int fd);
15475 +extern void pool_close(POOL_CONNECTION *cp);
15476 +extern int pool_read(POOL_CONNECTION *cp, void *buf, int len);
15477 +extern char *pool_read2(POOL_CONNECTION *cp, int len);
15478 +extern int pool_write(POOL_CONNECTION *cp, void *buf, int len);
15479 +extern int pool_flush(POOL_CONNECTION *cp);
15480 +extern int pool_write_and_flush(POOL_CONNECTION *cp, void *buf, int len);
15481 +extern char *pool_read_string(POOL_CONNECTION *cp, int *len, int line);
15484 + * external prototype in show.c
15486 +extern void show_error(const char * fmt,...);
15487 +extern void show_debug(const char * fmt,...);
15488 +extern void PGRwrite_log_file(FILE * fp, const char * fmt,...);
15491 + * external prototype in lifecheck.c
15493 +extern int PGRlifecheck_main(int fork_wait_time);
15495 +#endif /* PGLB_H */
15496 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_auth.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_auth.c
15497 --- postgresql-8.2.4/src/pgcluster/pglb/pool_auth.c 1970-01-01 01:00:00.000000000 +0100
15498 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_auth.c 2007-02-18 22:52:17.000000000 +0100
15500 +/*--------------------------------------------------------------------
15505 + * authenticaton stuff
15507 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
15508 + * Portions Copyright (c) 2003-2006, Tatsuo Ishii
15509 + *--------------------------------------------------------------------
15512 + * Permission to use, copy, modify, and distribute this software and
15513 + * its documentation for any purpose and without fee is hereby
15514 + * granted, provided that the above copyright notice appear in all
15515 + * copies and that both that copyright notice and this permission
15516 + * notice appear in supporting documentation, and that the name of the
15517 + * author not be used in advertising or publicity pertaining to
15518 + * distribution of the software without specific, written prior
15519 + * permission. The author makes no representations about the
15520 + * suitability of this software for any purpose. It is provided "as
15521 + * is" without express or implied warranty.
15524 +#include <sys/types.h>
15525 +#include <netinet/in.h>
15526 +#include <sys/param.h>
15527 +#include <arpa/inet.h>
15528 +#include <errno.h>
15529 +#include <string.h>
15530 +#include <sys/time.h>
15531 +#include <stdio.h>
15532 +#include "replicate_com.h"
15535 +int pool_do_auth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp);
15536 +int pool_do_reauth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp);
15537 +int pool_read_message_length(POOL_CONNECTION_POOL *cp);
15538 +signed char pool_read_kind(POOL_CONNECTION_POOL *cp);
15540 +static POOL_STATUS pool_send_auth_ok(POOL_CONNECTION *frontend, int pid, int key, int protoMajor);
15541 +static int do_clear_text_password(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor);
15542 +static int do_crypt(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor);
15543 +static int do_md5(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor);
15546 +* do authentication against backend. if success return 0 otherwise non 0.
15548 +int pool_do_auth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp)
15550 + char * func = "pool_do_auth()";
15552 + signed char kind;
15558 + protoMajor = MAJOR(cp);
15560 + kind = pool_read_kind(cp);
15566 + /* error response? */
15569 + /* we assume error response at this stage is likely version
15570 + * protocol mismatch (v3 frontend vs. v2 backend). So we throw
15571 + * a V2 protocol error response in the hope that v3 frontend
15572 + * will negotiate again using v2 protocol.
15574 + show_error("%s:pool_do_auth: maybe protocol version mismatch (current version %d)",func, protoMajor);
15575 + ErrorResponse(frontend, cp);
15578 + else if (kind != 'R')
15580 + show_error("%s:pool_do_auth: expect \"R\" got %c",func, kind);
15585 + * message length (v3 only) */
15586 + if (protoMajor == PROTO_MAJOR_V3 && pool_read_message_length(cp) < 0)
15592 + * read authentication request kind.
15594 + * 0: authentication ok
15597 + * 3: clear text password
15598 + * 4: crypt password
15599 + * 5: md5 password
15600 + * 6: scm credential
15602 + * in replication mode, we only supports kind = 0, 3. this is because to "salt"
15603 + * cannot be replicated among master and secondary.
15604 + * in non replication mode, we supports kind = 0, 3, 4, 5
15607 + status = pool_read(MASTER(cp), &pid, sizeof(pid));
15610 + show_error("%s:pool_do_auth: read authentication kind failed",func);
15616 + status = pool_read(SECONDARY(cp), &pid1, sizeof(pid1));
15620 + show_error("%s:pool_do_auth: read authentication kind from secondary failed",func);
15625 + pid = ntohl(pid);
15630 + if (protoMajor == PROTO_MAJOR_V3)
15634 + pool_write(frontend, "R", 1);
15635 + msglen = htonl(8);
15636 + pool_write(frontend, &msglen, sizeof(msglen));
15637 + msglen = htonl(0);
15638 + if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
15643 + MASTER(cp)->auth_kind = 0;
15646 + /* clear text password authentication? */
15647 + else if (pid == 3)
15649 +#ifdef PRINT_DEBUG
15650 + show_debug("%s:trying clear text password authentication",func);
15653 + pid = do_clear_text_password(MASTER(cp), frontend, 0, protoMajor);
15655 + if (pid >= 0 && REPLICATION)
15657 + pid = do_clear_text_password(SECONDARY(cp), frontend, 0, protoMajor);
15661 + /* crypt authentication? */
15662 + else if (pid == 4)
15664 +#ifdef PRINT_DEBUG
15665 + show_debug("%s:trying crypt authentication",func);
15668 + pid = do_crypt(MASTER(cp), frontend, 0, protoMajor);
15670 + if (pid >= 0 && REPLICATION)
15672 + pid = do_crypt(SECONDARY(cp), frontend, 0, protoMajor);
15676 + /* md5 authentication? */
15677 + else if (pid == 5)
15679 +#ifdef PRINT_DEBUG
15680 + show_debug("%s:trying md5 authentication",func);
15683 + pid = do_md5(MASTER(cp), frontend, 0, protoMajor);
15685 + if (pid >= 0 && REPLICATION)
15687 + pid = do_md5(SECONDARY(cp), frontend, 0, protoMajor);
15693 + show_error("%s:pool_do_auth: backend does not return authenticaton ok",func);
15698 + * authentication ok. now read pid and secret key from the
15701 + kind = pool_read_kind(cp);
15707 + /* error response? */
15710 + if (protoMajor == PROTO_MAJOR_V2)
15711 + ErrorResponse(frontend, cp);
15713 + SimpleForwardToFrontend(kind, frontend, cp);
15716 + else if (kind != 'K')
15718 + if (protoMajor == PROTO_MAJOR_V3)
15720 + /* process parameter status */
15721 + while (kind == 'S')
15723 + if (ParameterStatus(frontend, cp) != POOL_CONTINUE)
15726 + pool_flush(frontend);
15728 + kind = pool_read_kind(cp);
15731 + show_error("%s:pool_do_auth: failed to read kind while processing ParamterStatus",func);
15738 + show_error("%s:pool_do_auth: expect \"K\" got %c",func, kind);
15744 + * message length (V3 only)
15746 + if (protoMajor == PROTO_MAJOR_V3 && (length = pool_read_message_length(cp)) != 12)
15748 + show_error("%s:pool_do_auth: invalid messages length(%d) for BackendKeyData",func, length);
15753 + * OK, read pid and secret key
15757 + pool_read(MASTER(cp), &pid, sizeof(pid));
15758 + MASTER_CONNECTION(cp)->pid = pid;
15761 + pool_read(MASTER(cp), &key, sizeof(key));
15762 + MASTER_CONNECTION(cp)->key = key;
15766 + pool_read(SECONDARY(cp), &pid1, sizeof(pid1));
15767 + SECONDARY_CONNECTION(cp)->pid = pid;
15770 + pool_read(SECONDARY(cp), &key1, sizeof(key1));
15771 + SECONDARY_CONNECTION(cp)->key = key;
15774 + return (pool_send_auth_ok(frontend, pid, key, protoMajor));
15778 +* do re-authentication for reused connection. if success return 0 otherwise non 0.
15780 +int pool_do_reauth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp)
15782 + char * func = "pool_do_reauth()";
15786 + protoMajor = MAJOR(cp);
15788 + switch(MASTER(cp)->auth_kind)
15796 + /* clear text password */
15797 + status = do_clear_text_password(MASTER(cp), frontend, 1, protoMajor);
15801 + /* crypt password */
15802 + status = do_crypt(MASTER(cp), frontend, 1, protoMajor);
15806 + /* md5 password */
15807 + status = do_md5(MASTER(cp), frontend, 1, protoMajor);
15811 + show_error("%s: unknown authentication request code %d",
15812 + func,MASTER(cp)->auth_kind);
15818 + if (protoMajor == PROTO_MAJOR_V3)
15822 + pool_write(frontend, "R", 1);
15823 + msglen = htonl(8);
15824 + pool_write(frontend, &msglen, sizeof(msglen));
15825 + msglen = htonl(0);
15826 + if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
15834 +#ifdef PRINT_DEBUG
15835 + show_debug("%s: authentication failed",func);
15840 + return (pool_send_auth_ok(frontend, MASTER_CONNECTION(cp)->pid, MASTER_CONNECTION(cp)->key, protoMajor) != POOL_CONTINUE);
15844 +* send authentication ok to frontend. if success return 0 otherwise non 0.
15846 +static POOL_STATUS pool_send_auth_ok(POOL_CONNECTION *frontend, int pid, int key, int protoMajor)
15851 + if (protoMajor == PROTO_MAJOR_V2)
15853 + /* return "Authentication OK" to the frontend */
15855 + pool_write(frontend, &kind, 1);
15857 + if (pool_write_and_flush(frontend, &len, sizeof(len)) < 0)
15863 + /* send backend key data */
15865 + pool_write(frontend, &kind, 1);
15866 + if (protoMajor == PROTO_MAJOR_V3)
15869 + pool_write(frontend, &len, sizeof(len));
15871 + pool_write(frontend, &pid, sizeof(pid));
15872 + if (pool_write_and_flush(frontend, &key, sizeof(key)) < 0)
15881 + * perform clear text password authetication
15883 +static int do_clear_text_password(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor)
15885 + char * func = "do_clear_text_password()";
15887 + static char password[MAX_PASSWORD_SIZE];
15893 + if (!backend->issecondary_backend)
15895 + pool_write(frontend, "R", 1); /* authenticaton */
15896 + if (protoMajor == PROTO_MAJOR_V3)
15899 + pool_write(frontend, &len, sizeof(len));
15901 + kind = htonl(3); /* clear text password authentication */
15902 + pool_write_and_flush(frontend, &kind, sizeof(kind)); /* indicating clear text password authentication */
15904 + /* read password packet */
15905 + if (protoMajor == PROTO_MAJOR_V2)
15907 + if (pool_read(frontend, &size, sizeof(size)))
15909 + show_error("%s: failed to read password packet size",func);
15917 + if (pool_read(frontend, &k, sizeof(k)))
15919 + show_error("%s: failed to read password packet \"p\"",func);
15924 + show_error("%s:packet does not start with \"p\"",func);
15927 + if (pool_read(frontend, &size, sizeof(size)))
15929 + show_error("%s: failed to read password packet size",func);
15934 + if ((ntohl(size) - 4) > sizeof(password))
15936 + show_error("%s: password is too long (size: %d)",func, ntohl(size) - 4);
15940 + if (pool_read(frontend, password, ntohl(size) - 4))
15942 + show_error("%s: failed to read password (size: %d)",func, ntohl(size) - 4);
15947 + /* connection reusing? */
15950 + if ((ntohl(size) - 4) != backend->pwd_size)
15952 +#ifdef PRINT_DEBUG
15953 + show_debug("%s; password size does not match in re-authetication",func);
15958 + if (memcmp(password, backend->password, backend->pwd_size) != 0)
15960 +#ifdef PRINT_DEBUG
15961 + show_debug("%s; password does not match in re-authetication",func);
15969 + /* send password packet to backend */
15970 + if (protoMajor == PROTO_MAJOR_V3)
15971 + pool_write(backend, "p", 1);
15972 + pool_write(backend, &size, sizeof(size));
15973 + pool_write_and_flush(backend, password, ntohl(size) -4);
15974 + if (pool_read(backend, &response, sizeof(response)))
15976 + show_error("%s: failed to read authentication response",func);
15980 + if (response != 'R')
15982 +#ifdef PRINT_DEBUG
15983 + show_debug("%s: backend does not return R while processing clear text password authentication",func);
15988 + if (protoMajor == PROTO_MAJOR_V3)
15990 + if (pool_read(backend, &len, sizeof(len)))
15992 + show_error("%s: failed to read authentication packet size",func);
15996 + if (ntohl(len) != 8)
15998 + show_error("%s: incorrect authentication packet size (%d)",func, ntohl(len));
16003 + /* expect to read "Authentication OK" response. kind should be 0... */
16004 + if (pool_read(backend, &kind, sizeof(kind)))
16006 +#ifdef PRINT_DEBUG
16007 + show_debug("%s: failed to read Authentication OK response",func);
16012 + /* if authenticated, save info */
16013 + if (!reauth && kind == 0)
16015 + if (!backend->issecondary_backend && protoMajor == PROTO_MAJOR_V3)
16019 + pool_write(frontend, "R", 1);
16020 + msglen = htonl(8);
16021 + pool_write(frontend, &msglen, sizeof(msglen));
16022 + msglen = htonl(0);
16023 + if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
16029 + backend->auth_kind = 3;
16030 + backend->pwd_size = ntohl(size) - 4;
16031 + memcpy(backend->password, password, backend->pwd_size);
16037 + * perform crypt authetication
16039 +static int do_crypt(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor)
16041 + char * func = "do_crypt()";
16044 + static char password[MAX_PASSWORD_SIZE];
16052 + if (pool_read(backend, salt, sizeof(salt)))
16054 + show_error("%s: failed to read salt",func);
16060 + memcpy(salt, backend->salt, sizeof(salt));
16064 + if (!backend->issecondary_backend)
16066 + pool_write(frontend, "R", 1); /* authenticaton */
16067 + if (protoMajor == PROTO_MAJOR_V3)
16070 + pool_write(frontend, &len, sizeof(len));
16072 + kind = htonl(4); /* crypt authentication */
16073 + pool_write(frontend, &kind, sizeof(kind)); /* indicating crypt authentication */
16074 + pool_write_and_flush(frontend, salt, sizeof(salt)); /* salt */
16076 + /* read password packet */
16077 + if (protoMajor == PROTO_MAJOR_V2)
16079 + if (pool_read(frontend, &size, sizeof(size)))
16081 + show_error("%s: failed to read password packet size",func);
16089 + if (pool_read(frontend, &k, sizeof(k)))
16091 + show_error("%s: failed to read password packet",func);
16096 + show_error("%s: password packet does not start with \"p\"",func);
16099 + if (pool_read(frontend, &size, sizeof(size)))
16101 + show_error("%s: failed to read password packet size",func);
16106 + if ((ntohl(size) - 4) > sizeof(password))
16108 + show_error("%s: password is too long(size: %d)", func,ntohl(size) - 4);
16112 + if (pool_read(frontend, password, ntohl(size) - 4))
16114 + show_error("%s: failed to read password (size: %d)", func,ntohl(size) - 4);
16119 + /* connection reusing? */
16122 +#ifdef PRINT_DEBUG
16123 + show_debug("%s:size: %d saved_size: %d",func, (ntohl(size) - 4), backend->pwd_size);
16125 + if ((ntohl(size) - 4) != backend->pwd_size)
16127 +#ifdef PRINT_DEBUG
16128 + show_debug("%s: password size does not match in re-authetication",func);
16133 + if (memcmp(password, backend->password, backend->pwd_size) != 0)
16135 +#ifdef PRINT_DEBUG
16136 + show_debug("%s: password does not match in re-authetication",func);
16144 + /* send password packet to backend */
16145 + if (protoMajor == PROTO_MAJOR_V3)
16146 + pool_write(backend, "p", 1);
16147 + pool_write(backend, &size, sizeof(size));
16148 + pool_write_and_flush(backend, password, ntohl(size) -4);
16149 + if (pool_read(backend, &response, sizeof(response)))
16151 + show_error("%s: failed to read authentication response",func);
16155 + if (response != 'R')
16157 +#ifdef PRINT_DEBUG
16158 + show_debug("%s: backend does not return R while processing crypt authentication(%02x) secondary: %d",func, response, backend->issecondary_backend);
16163 + if (protoMajor == PROTO_MAJOR_V3)
16165 + if (pool_read(backend, &len, sizeof(len)))
16167 + show_error("%s: failed to read authentication packet size",func);
16171 + if (ntohl(len) != 8)
16173 + show_error("%s: incorrect authentication packet size (%d)",func, ntohl(len));
16178 + /* expect to read "Authentication OK" response. kind should be 0... */
16179 + if (pool_read(backend, &kind, sizeof(kind)))
16181 +#ifdef PRINT_DEBUG
16182 + show_debug("%s: failed to read Authentication OK response",func);
16187 + /* if authenticated, save info */
16188 + if (!reauth && kind == 0)
16190 + if (protoMajor == PROTO_MAJOR_V3)
16194 + pool_write(frontend, "R", 1);
16195 + msglen = htonl(8);
16196 + pool_write(frontend, &msglen, sizeof(msglen));
16197 + msglen = htonl(0);
16198 + if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
16203 + backend->auth_kind = 4;
16204 + backend->pwd_size = ntohl(size) - 4;
16205 + memcpy(backend->password, password, backend->pwd_size);
16206 + memcpy(backend->salt, salt, sizeof(salt));
16212 + * perform MD5 authetication
16214 +static int do_md5(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor)
16216 + char * func = "do_md5()";
16219 + static char password[MAX_PASSWORD_SIZE];
16227 + if (pool_read(backend, salt, sizeof(salt)))
16229 + show_error("%s: failed to read salt",func);
16235 + memcpy(salt, backend->salt, sizeof(salt));
16239 + if (!backend->issecondary_backend)
16241 + pool_write(frontend, "R", 1); /* authenticaton */
16242 + if (protoMajor == PROTO_MAJOR_V3)
16245 + pool_write(frontend, &len, sizeof(len));
16248 + pool_write(frontend, &kind, sizeof(kind)); /* indicating MD5 */
16249 + pool_write_and_flush(frontend, salt, sizeof(salt)); /* salt */
16251 + /* read password packet */
16252 + if (protoMajor == PROTO_MAJOR_V2)
16254 + if (pool_read(frontend, &size, sizeof(size)))
16256 + show_error("%s: failed to read password packet size",func);
16264 + if (pool_read(frontend, &k, sizeof(k)))
16266 + show_error("%s: failed to read password packet \"p\"",func);
16271 + show_error("%s: password packet does not start with \"p\"",func);
16274 + if (pool_read(frontend, &size, sizeof(size)))
16276 + show_error("%s: failed to read password packet size",func);
16281 + if ((ntohl(size) - 4) > sizeof(password))
16283 + show_error("%s: password is too long(size: %d)",func, ntohl(size) - 4);
16287 + if (pool_read(frontend, password, ntohl(size) - 4))
16289 + show_error("%s: failed to read password (size: %d)",func, ntohl(size) - 4);
16294 + /* connection reusing? */
16297 + if ((ntohl(size) - 4) != backend->pwd_size)
16299 +#ifdef PRINT_DEBUG
16300 + show_debug("%s; password size does not match in re-authetication",func);
16305 + if (memcmp(password, backend->password, backend->pwd_size) != 0)
16307 +#ifdef PRINT_DEBUG
16308 + show_debug("%s; password does not match in re-authetication",func);
16316 + /* send password packet to backend */
16317 + if (protoMajor == PROTO_MAJOR_V3)
16318 + pool_write(backend, "p", 1);
16319 + pool_write(backend, &size, sizeof(size));
16320 + pool_write_and_flush(backend, password, ntohl(size) -4);
16321 + if (pool_read(backend, &response, sizeof(response)))
16323 + show_error("%s: failed to read authentication response",func);
16327 + if (response != 'R')
16329 +#ifdef PRINT_DEBUG
16330 + show_debug("%s: backend does not return R while processing MD5 authentication %c", func,response);
16335 + if (protoMajor == PROTO_MAJOR_V3)
16337 + if (pool_read(backend, &len, sizeof(len)))
16339 + show_error("%s: failed to read authentication packet size",func);
16343 + if (ntohl(len) != 8)
16345 + show_error("%s: incorrect authentication packet size (%d)",func, ntohl(len));
16350 + /* expect to read "Authentication OK" response. kind should be 0... */
16351 + if (pool_read(backend, &kind, sizeof(kind)))
16353 +#ifdef PRINT_DEBUG
16354 + show_debug("%s: failed to read Authentication OK response",func);
16359 + /* if authenticated, save info */
16360 + if (!reauth && kind == 0)
16362 + if (protoMajor == PROTO_MAJOR_V3)
16366 + pool_write(frontend, "R", 1);
16367 + msglen = htonl(8);
16368 + pool_write(frontend, &msglen, sizeof(msglen));
16369 + msglen = htonl(0);
16370 + if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
16375 + backend->auth_kind = 5;
16376 + backend->pwd_size = ntohl(size) - 4;
16377 + memcpy(backend->password, password, backend->pwd_size);
16378 + memcpy(backend->salt, salt, sizeof(salt));
16384 + * read message length (V3 only)
16386 +int pool_read_message_length(POOL_CONNECTION_POOL *cp)
16388 + char * func = "pool_read_message_length()";
16390 + int length, length1;
16392 + status = pool_read(MASTER(cp), &length, sizeof(length));
16395 + show_error("%s: error while reading message length",func);
16398 + length = ntohl(length);
16402 + status = pool_read(SECONDARY(cp), &length1, sizeof(length1));
16405 + show_error("%s: error while reading message length from secondary backend",func);
16408 + length1 = ntohl(length1);
16410 + if (length != length1)
16412 + show_error("%s: length does not match between backends master(%d) secondary(%d)",
16413 + func,length, length1);
16420 + show_error("%s:read_message_length: invalid message length (%d)", func, length);
16427 +signed char pool_read_kind(POOL_CONNECTION_POOL *cp)
16429 + char * func = "pool_read_kind()";
16431 + char kind, kind1;
16433 + status = pool_read(MASTER(cp), &kind, sizeof(kind));
16436 + show_error("%s:read_message_kind: error while reading message kind",func);
16442 + status = pool_read(SECONDARY(cp), &kind1, sizeof(kind1));
16445 + show_error("%s: error while reading message kind from secondary backend",func);
16449 + if (kind != kind1)
16451 + show_error("%s: kind does not match between backends master(%d) secondary(%d)",
16452 + func, kind, kind1);
16459 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_connection_pool.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_connection_pool.c
16460 --- postgresql-8.2.4/src/pgcluster/pglb/pool_connection_pool.c 1970-01-01 01:00:00.000000000 +0100
16461 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_connection_pool.c 2007-02-18 22:52:17.000000000 +0100
16463 +/*--------------------------------------------------------------------
16465 + * pool_connection_pool.c
16468 + * connection pool stuff
16470 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
16471 + * Portions Copyright (c) 2003-2006, Tatsuo Ishii
16472 + *--------------------------------------------------------------------
16475 + * Permission to use, copy, modify, and distribute this software and
16476 + * its documentation for any purpose and without fee is hereby
16477 + * granted, provided that the above copyright notice appear in all
16478 + * copies and that both that copyright notice and this permission
16479 + * notice appear in supporting documentation, and that the name of the
16480 + * author not be used in advertising or publicity pertaining to
16481 + * distribution of the software without specific, written prior
16482 + * permission. The author makes no representations about the
16483 + * suitability of this software for any purpose. It is provided "as
16484 + * is" without express or implied warranty.
16487 +#include "postgres.h"
16488 +#include <sys/types.h>
16489 +#include <sys/socket.h>
16490 +#include <sys/time.h>
16491 +#include <sys/un.h>
16492 +#include <arpa/inet.h>
16493 +#include <netdb.h>
16494 +#include <stdio.h>
16495 +#include <stdlib.h>
16496 +#include <errno.h>
16497 +#include <signal.h>
16498 +#include <string.h>
16499 +#include <unistd.h>
16502 +#ifdef HAVE_NETINET_TCP_H
16503 +#include <netinet/tcp.h>
16506 +#include "replicate_com.h"
16509 +POOL_CONNECTION_POOL *pool_connection_pool; /* connection pool */
16511 +int pool_init_cp(void);
16512 +POOL_CONNECTION_POOL *pool_get_cp(char *user, char *database, int protoMajor);
16513 +void pool_discard_cp(char *user, char *database, int protoMajor);
16514 +POOL_CONNECTION_POOL *pool_create_cp(void);
16515 +void pool_connection_pool_timer(POOL_CONNECTION_POOL *backend);
16516 +void pool_backend_timer_handler(int sig);
16517 +int connect_inet_domain_socket(int secondary_backend);
16518 +int connect_unix_domain_socket(int secondary_backend);
16519 +char PGRis_same_host(char * host1, char * host2);
16520 +void pool_finish(void);
16523 +static POOL_CONNECTION_POOL_SLOT *create_cp(POOL_CONNECTION_POOL_SLOT *cp, int secondary_backend);
16524 +static POOL_CONNECTION_POOL *new_connection(POOL_CONNECTION_POOL *p);
16529 +* initialize connection pools. this should be called once at the startup.
16531 +int pool_init_cp(void)
16533 + char * func = "pool_init_cp()";
16534 + pool_connection_pool = (POOL_CONNECTION_POOL *)malloc(sizeof(POOL_CONNECTION_POOL)*Max_Pool);
16535 + if (pool_connection_pool == NULL)
16537 + show_error("%s: malloc() failed[%s]",func,strerror(errno));
16540 + memset(pool_connection_pool, 0, sizeof(POOL_CONNECTION_POOL)*Max_Pool);
16546 +* find connection by user and database
16548 +POOL_CONNECTION_POOL *pool_get_cp(char *user, char *database, int protoMajor)
16550 + char * func = "pool_get_cp()";
16553 + POOL_CONNECTION_POOL *p = pool_connection_pool;
16557 + show_error("%s: pool_connection_pool is not initialized",func);
16561 + for (i=0;i<Max_Pool;i++)
16563 + if (MASTER_CONNECTION(p) &&
16564 + MASTER_CONNECTION(p)->sp->major == protoMajor &&
16565 + MASTER_CONNECTION(p)->sp->user != NULL &&
16566 + strcmp(MASTER_CONNECTION(p)->sp->user, user) == 0 &&
16567 + strcmp(MASTER_CONNECTION(p)->sp->database, database) == 0)
16569 + /* mark this connection is under use */
16570 + MASTER_CONNECTION(p)->closetime = 0;
16579 + * disconnect and release a connection to the database
16581 +void pool_discard_cp(char *user, char *database, int protoMajor)
16583 + char * func = "pool_discard_cp()";
16584 + POOL_CONNECTION_POOL *p = pool_get_cp(user, database, protoMajor);
16588 + show_error("%s: cannot get connection pool for user %s datbase %s", func,user, database);
16592 + free(MASTER_CONNECTION(p)->sp->user);
16593 + free(MASTER_CONNECTION(p)->sp->database);
16594 + free(MASTER_CONNECTION(p)->sp->startup_packet);
16595 + pool_close(MASTER_CONNECTION(p)->con);
16597 + memset(p, 0, sizeof(POOL_CONNECTION_POOL));
16602 +* create a connection pool by user and database
16604 +POOL_CONNECTION_POOL *pool_create_cp(void)
16606 + char * func = "pool_create_cp()";
16608 + time_t closetime;
16609 + POOL_CONNECTION_POOL *oldestp;
16611 + POOL_CONNECTION_POOL *p = pool_connection_pool;
16615 + show_error("%s: pool_connection_pool is not initialized",func);
16619 + for (i=0; i<Max_Pool; i++)
16621 + if (MASTER_CONNECTION(p) == NULL)
16622 + return new_connection(p);
16626 +#ifdef PRINT_DEBUG
16627 + show_debug("%s:no empty connection slot was found",func);
16631 + * no empty connection slot was found. look for the oldest connection and discard it.
16633 + oldestp = p = pool_connection_pool;
16634 + closetime = MASTER_CONNECTION(p)->closetime;
16635 + for (i=0; i<Max_Pool; i++)
16637 +#ifdef PRINT_DEBUG
16638 + show_debug("%s:user: %s database: %s closetime: %d",
16640 + MASTER_CONNECTION(p)->sp->user,
16641 + MASTER_CONNECTION(p)->sp->database,
16642 + MASTER_CONNECTION(p)->closetime);
16644 + if (MASTER_CONNECTION(p)->closetime < closetime)
16646 + closetime = MASTER_CONNECTION(p)->closetime;
16653 + pool_send_frontend_exits(p);
16655 +#ifdef PRINT_DEBUG
16656 + show_debug("%s:discarding old %d th connection. user: %s database: %s",
16658 + oldestp - pool_connection_pool,
16659 + MASTER_CONNECTION(p)->sp->user,
16660 + MASTER_CONNECTION(p)->sp->database);
16663 + free(MASTER_CONNECTION(p)->sp->user);
16664 + free(MASTER_CONNECTION(p)->sp->database);
16665 + free(MASTER_CONNECTION(p)->sp->startup_packet);
16666 + pool_close(MASTER_CONNECTION(p)->con);
16668 + memset(p, 0, sizeof(POOL_CONNECTION_POOL));
16670 + return new_connection(p);
16674 + * set backend connection close timer
16676 +void pool_connection_pool_timer(POOL_CONNECTION_POOL *backend)
16678 +#ifdef PRINT_DEBUG
16679 + char * func = "pool_connection_pool_timer()";
16681 + POOL_CONNECTION_POOL *p = pool_connection_pool;
16684 +#ifdef PRINT_DEBUG
16685 + show_debug("%s:pool_connection_pool_timer: called",func);
16688 + MASTER_CONNECTION(backend)->closetime = time(NULL); /* set connection close time */
16690 + if (Connection_Life_Time == 0)
16693 + /* look for any other timeout */
16694 + for (i=0;i<Max_Pool;i++, p++)
16696 + if (!MASTER_CONNECTION(p))
16698 + if (MASTER_CONNECTION(p)->sp->user == NULL)
16701 + if (p != backend && MASTER_CONNECTION(p)->closetime)
16705 + /* no other timer found. set my timer */
16706 +#ifdef PRINT_DEBUG
16707 + show_debug("%s: set alarm after %d seconds",func, Connection_Life_Time);
16709 + signal(SIGALRM, pool_backend_timer_handler);
16710 + alarm(Connection_Life_Time);
16714 + * backend connection close timer handler
16716 +void pool_backend_timer_handler(int sig)
16718 +#define TMINTMAX 0x7fffffff
16720 +#ifdef PRINT_DEBUG
16721 + char * func = "pool_backend_timer_handler()";
16723 + POOL_CONNECTION_POOL *p = pool_connection_pool;
16726 + time_t nearest = TMINTMAX;
16728 + now = time(NULL);
16730 +#ifdef PRINT_DEBUG
16731 + show_debug("%s:called at %d", func,now);
16734 + for (i=0;i<Max_Pool;i++, p++)
16736 + if (!MASTER_CONNECTION(p))
16738 + if (MASTER_CONNECTION(p)->sp->user == NULL)
16741 + /* timer expire? */
16742 + if (MASTER_CONNECTION(p)->closetime)
16744 +#ifdef PRINT_DEBUG
16745 + show_debug("%s: expire time: %d",
16747 + MASTER_CONNECTION(p)->closetime+Connection_Life_Time);
16750 + if (now >= (MASTER_CONNECTION(p)->closetime+Connection_Life_Time))
16752 + /* discard expired connection */
16753 +#ifdef PRINT_DEBUG
16754 + show_debug("%s: expires user %s database %s", func, MASTER_CONNECTION(p)->sp->user, MASTER_CONNECTION(p)->sp->database);
16757 + pool_send_frontend_exits(p);
16759 + free(MASTER_CONNECTION(p)->sp->user);
16760 + free(MASTER_CONNECTION(p)->sp->database);
16761 + free(MASTER_CONNECTION(p)->sp->startup_packet);
16762 + pool_close(MASTER_CONNECTION(p)->con);
16764 + memset(p, 0, sizeof(POOL_CONNECTION_POOL));
16768 + /* look for nearest timer */
16769 + if (MASTER_CONNECTION(p)->closetime < nearest)
16770 + nearest = MASTER_CONNECTION(p)->closetime;
16775 + /* any remaining timer */
16776 + if (nearest != TMINTMAX)
16778 + nearest = Connection_Life_Time - (now - nearest);
16779 + if (nearest <= 0)
16781 + signal(SIGALRM, pool_backend_timer_handler);
16786 +int connect_inet_domain_socket(int secondary_backend)
16788 + char * func = "connect_inet_domain_socket()";
16792 + struct sockaddr_in addr;
16793 + struct hostent *hp;
16795 + fd = socket(AF_INET, SOCK_STREAM, 0);
16798 + show_error("%s: socket() failed: %s",func, strerror(errno));
16802 + /* set nodelay */
16803 + if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
16807 + show_error("%s: setsockopt() failed: %s", func, strerror(errno));
16812 + memset((char *) &addr, 0, sizeof(addr));
16813 + ((struct sockaddr *)&addr)->sa_family = AF_INET;
16815 + addr.sin_port = htons(CurrentCluster->port);
16816 + len = sizeof(struct sockaddr_in);
16818 + hp = gethostbyname(CurrentCluster->hostName);
16820 + if ((hp == NULL) || (hp->h_addrtype != AF_INET))
16822 + show_error("%s: gethostbyname() failed: %s host: %s",func, strerror(errno), CurrentCluster->hostName);
16826 + memmove((char *) &(addr.sin_addr),
16827 + (char *) hp->h_addr,
16830 + if (connect(fd, (struct sockaddr *)&addr, len) < 0)
16832 + show_error("%s: connect() failed: %s",func,strerror(errno));
16839 +int connect_unix_domain_socket(int secondary_backend)
16841 + char * func = "connect_unix_domain_socket()";
16842 + struct sockaddr_un addr;
16847 + fd = socket(AF_UNIX, SOCK_STREAM, 0);
16850 + show_error("%s: setsockopt() failed: %s", func,strerror(errno));
16854 + port = CurrentCluster->port;
16855 + memset((char *) &addr, 0, sizeof(addr));
16856 + ((struct sockaddr *)&addr)->sa_family = AF_UNIX;
16857 + snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/.s.PGSQL.%d",
16858 + Backend_Socket_Dir,
16859 + CurrentCluster->port);
16860 +#ifdef PRINT_DEBUG
16861 + show_debug("%s:postmaster Unix domain socket: %s",func, addr.sun_path);
16864 + len = sizeof(struct sockaddr_un);
16866 + if (connect(fd, (struct sockaddr *)&addr, len) < 0)
16868 + show_error("%s: connect() failed: %s",func, strerror(errno));
16872 +#ifdef PRINT_DEBUG
16873 + show_debug("%s:connected to postmaster Unix domain socket: %s fd: %d", func,addr.sun_path, fd);
16878 +static POOL_CONNECTION_POOL_SLOT *create_cp(POOL_CONNECTION_POOL_SLOT *cp, int secondary_backend)
16880 + char * func = "create_cp()";
16882 + char hostName[HOSTNAME_MAX_LENGTH];
16884 + if (gethostname(hostName,sizeof(hostName)) < 0)
16886 + show_error("%s:gethostname() failed. (%s)",func,strerror(errno));
16889 + if (PGRis_same_host(hostName,CurrentCluster->hostName) == 1)
16891 +#ifdef PRINT_DEBUG
16892 + show_debug("%s:[%s] [%s] is same",func,hostName,CurrentCluster->hostName);
16894 + fd = connect_unix_domain_socket(secondary_backend);
16898 + fd = connect_inet_domain_socket(secondary_backend);
16903 + /* fatal error, notice to parent and exit */
16904 + notice_backend_error();
16908 + cp->con = pool_open(fd);
16909 + cp->closetime = 0;
16913 +static POOL_CONNECTION_POOL *new_connection(POOL_CONNECTION_POOL *p)
16915 + char * func = "new_connection()";
16916 + /* create master connection */
16917 + MASTER_CONNECTION(p) = malloc(sizeof(POOL_CONNECTION_POOL_SLOT));
16918 + if (MASTER_CONNECTION(p) == NULL)
16920 + show_error("%s: malloc() failed [%s]",func,strerror(errno));
16923 + create_cp(MASTER_CONNECTION(p), 0);
16925 + /* initialize Paramter Status save structure */
16926 + if (pool_init_params(&MASTER(p)->params))
16930 + p->num = 1; /* number of slots */
16935 +char PGRis_same_host(char * host1, char * host2)
16937 + unsigned int ip1, ip2;
16939 + if ((host1 == NULL) || (host2 == NULL))
16943 + ip1 = PGRget_ip_by_name( host1);
16944 + ip2 = PGRget_ip_by_name( host2);
16952 +void pool_finish(void)
16954 + char * func = "pool_finish()";
16957 + POOL_CONNECTION_POOL *p = pool_connection_pool;
16961 + show_error("%s:pool_connection_pool is not initialized",func);
16965 + for (i=0 ; i<Max_Pool ; i++)
16970 + if (MASTER_CONNECTION(p)->sp->user != NULL)
16972 + free(MASTER_CONNECTION(p)->sp->user);
16973 + MASTER_CONNECTION(p)->sp->user = NULL;
16975 + if (MASTER_CONNECTION(p)->sp->database != NULL)
16977 + free(MASTER_CONNECTION(p)->sp->database);
16978 + MASTER_CONNECTION(p)->sp->database = NULL;
16980 + if (MASTER_CONNECTION(p)->sp->startup_packet != NULL)
16982 + free(MASTER_CONNECTION(p)->sp->startup_packet);
16983 + MASTER_CONNECTION(p)->sp->startup_packet = NULL;
16986 + if (MASTER_CONNECTION(p)->con != NULL)
16988 + pool_close(MASTER_CONNECTION(p)->con);
16989 + MASTER_CONNECTION(p)->con = NULL;
16991 + memset(p, 0, sizeof(POOL_CONNECTION_POOL));
16994 + free((char *)pool_connection_pool);
16995 + pool_connection_pool = NULL;
16998 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_params.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_params.c
16999 --- postgresql-8.2.4/src/pgcluster/pglb/pool_params.c 1970-01-01 01:00:00.000000000 +0100
17000 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_params.c 2007-02-18 22:52:17.000000000 +0100
17002 +/*--------------------------------------------------------------------
17007 + * connection pool stuff
17009 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
17010 + * Portions Copyright (c) 2003-2006, Tatsuo Ishii
17011 + *--------------------------------------------------------------------
17014 + * Permission to use, copy, modify, and distribute this software and
17015 + * its documentation for any purpose and without fee is hereby
17016 + * granted, provided that the above copyright notice appear in all
17017 + * copies and that both that copyright notice and this permission
17018 + * notice appear in supporting documentation, and that the name of the
17019 + * author not be used in advertising or publicity pertaining to
17020 + * distribution of the software without specific, written prior
17021 + * permission. The author makes no representations about the
17022 + * suitability of this software for any purpose. It is provided "as
17023 + * is" without express or implied warranty.
17027 +#include <stdio.h>
17028 +#include <sys/time.h>
17030 +#include <stdlib.h>
17031 +#include <string.h>
17033 +#ifdef HAVE_NETINET_TCP_H
17034 +#include <netinet/tcp.h>
17037 +#include "replicate_com.h"
17040 +#define MAX_PARAM_ITEMS 128
17042 +int pool_init_params(ParamStatus *params);
17043 +void pool_discard_params(ParamStatus *params);
17044 +char *pool_find_name(ParamStatus *params, char *name, int *pos);
17045 +int pool_get_param(ParamStatus *params, int index, char **name, char **value);
17046 +int pool_add_param(ParamStatus *params, char *name, char *value);
17047 +void pool_param_debug_print(ParamStatus *params);
17050 + * initialize parameter structure
17052 +int pool_init_params(ParamStatus *params)
17054 + char * func = "pool_init_params()";
17057 + params->names = malloc(MAX_PARAM_ITEMS*sizeof(char *));
17058 + if (params->names == NULL)
17060 + show_error("%s: cannot allocate memory",func);
17063 + params->values = malloc(MAX_PARAM_ITEMS*sizeof(char *));
17064 + if (params->values == NULL)
17066 + show_error("%s: cannot allocate memory",func);
17073 + * discard parameter structure
17075 +void pool_discard_params(ParamStatus *params)
17079 + for (i=0;i<params->num;i++)
17081 + free(params->names[i]);
17082 + free(params->values[i]);
17084 + free(params->names);
17085 + free(params->values);
17089 + * find param value by name. if found, its value is returned
17090 + * also, pos is set
17091 + * if not found, NULL is returned
17093 +char *pool_find_name(ParamStatus *params, char *name, int *pos)
17097 + for (i=0;i<params->num;i++)
17099 + if (!strcmp(name, params->names[i]))
17102 + return params->values[i];
17109 + * return name and value by index.
17111 +int pool_get_param(ParamStatus *params, int index, char **name, char **value)
17113 + if (index < 0 || index >= params->num)
17116 + *name = params->names[index];
17117 + *value = params->values[index];
17123 + * add or replace name/value pair
17125 +int pool_add_param(ParamStatus *params, char *name, char *value)
17127 + char * func = "pool_add_param()";
17130 + if (pool_find_name(params, name, &pos))
17132 + /* name already exists */
17133 + if (strlen(params->values[pos]) < strlen(value))
17135 + params->values[pos] = realloc(params->values[pos], strlen(value) + 1);
17136 + if (params->values[pos] == NULL)
17138 + show_error("%s: cannot allocate memory",func);
17142 + strcpy(params->values[pos], value);
17148 + /* add name/value pair */
17149 + if (params->num >= MAX_PARAM_ITEMS)
17151 + show_error("%s: no more room for num",func);
17154 + num = params->num;
17155 + params->names[num] = strdup(name);
17156 + if (params->names[num] == NULL)
17158 + show_error("%s: cannot allocate memory",func);
17161 + params->values[num] = strdup(value);
17162 + if (params->values[num] == NULL)
17164 + show_error("%s: cannot allocate memory",func);
17172 +void pool_param_debug_print(ParamStatus *params)
17174 +#ifdef PRINT_DEBUG
17175 + char * func = "pool_param_debug_print()";
17179 + for (i=0;i<params->num;i++)
17181 +#ifdef PRINT_DEBUG
17182 + show_debug("%s: No.%d: name: %s value: %s",func, i, params->names[i], params->values[i]);
17186 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_process_query.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_process_query.c
17187 --- postgresql-8.2.4/src/pgcluster/pglb/pool_process_query.c 1970-01-01 01:00:00.000000000 +0100
17188 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_process_query.c 2007-02-18 22:52:17.000000000 +0100
17190 +/*--------------------------------------------------------------------
17192 + * pool_process_query.c
17195 + * query processing stuff
17197 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
17198 + * Portions Copyright (c) 2003-2006, Tatsuo Ishii
17199 + *--------------------------------------------------------------------
17202 + * Permission to use, copy, modify, and distribute this software and
17203 + * its documentation for any purpose and without fee is hereby
17204 + * granted, provided that the above copyright notice appear in all
17205 + * copies and that both that copyright notice and this permission
17206 + * notice appear in supporting documentation, and that the name of the
17207 + * author not be used in advertising or publicity pertaining to
17208 + * distribution of the software without specific, written prior
17209 + * permission. The author makes no representations about the
17210 + * suitability of this software for any purpose. It is provided "as
17211 + * is" without express or implied warranty.
17214 +#include <errno.h>
17215 +#include <sys/types.h>
17216 +#include <sys/time.h>
17217 +#include <arpa/inet.h>
17218 +#include <stdlib.h>
17219 +#include <unistd.h>
17220 +#include <string.h>
17221 +#include <netinet/in.h>
17223 +#include "postgres_fe.h"
17224 +#include "libpq/pqcomm.h"
17226 +#include "replicate_com.h"
17229 +POOL_STATUS pool_process_query(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, int connection_reuse);
17230 +POOL_STATUS ErrorResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17231 +void pool_enable_timeout(void);
17232 +void pool_disable_timeout(void);
17233 +int pool_check_fd(POOL_CONNECTION *cp, int notimeout);
17234 +void pool_send_frontend_exits(POOL_CONNECTION_POOL *backend);
17235 +POOL_STATUS SimpleForwardToFrontend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17236 +POOL_STATUS SimpleForwardToBackend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17237 +POOL_STATUS ParameterStatus(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17238 +void pool_send_error_message(POOL_CONNECTION *frontend, int protoMajor, char *code, char *message, char *detail, char *hint, char *file, int line);
17241 +static POOL_STATUS Query(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, char *query);
17242 +static POOL_STATUS ReadyForQuery(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, int send_ready);
17243 +static POOL_STATUS CompleteCommandResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17244 +static int RowDescription(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17245 +static POOL_STATUS AsciiRow(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, short num_fields);
17246 +static POOL_STATUS BinaryRow(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, short num_fields);
17247 +static POOL_STATUS CursorResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17248 +static POOL_STATUS NoticeResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17249 +static POOL_STATUS CopyInResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17250 +static POOL_STATUS CopyOutResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17251 +static POOL_STATUS CopyDataRows(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, int copyin);
17252 +static POOL_STATUS EmptyQueryResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17253 +static POOL_STATUS NotificationResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17254 +static POOL_STATUS FunctionCall(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17255 +static POOL_STATUS FunctionResultResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17256 +static POOL_STATUS ProcessFrontendResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17257 +static int synchronize(POOL_CONNECTION *cp);
17258 +static void process_reporting(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17259 +static int reset_backend(POOL_CONNECTION_POOL *backend, int qcnt);
17260 +static int load_balance_enabled(POOL_CONNECTION_POOL *backend, char *sql);
17261 +static void start_load_balance(POOL_CONNECTION_POOL *backend);
17262 +static void end_load_balance(POOL_CONNECTION_POOL *backend);
17264 +static POOL_CONNECTION_POOL_SLOT *slots[MAX_CONNECTION_SLOTS];
17266 +POOL_STATUS pool_process_query(POOL_CONNECTION *frontend,
17267 + POOL_CONNECTION_POOL *backend,
17268 + int connection_reuse)
17270 + char * func = "pool_process_query()";
17271 + char kind, kind1; /* packet kind (backend) */
17272 + char fkind; /* packet kind (frontend) */
17273 + short num_fields = 0;
17275 + fd_set writemask;
17276 + fd_set exceptmask;
17278 + POOL_STATUS status;
17279 + int state; /* 0: ok to issue commands 1: waiting for "ready for query" response */
17282 + frontend->no_forward = connection_reuse;
17288 + kind = kind1 = 0;
17291 + if (state == 0 && connection_reuse)
17295 + /* send query for resetting connection such as "ROLLBACK" "RESET ALL"... */
17296 + st = reset_backend(backend, qcnt);
17298 + if (st < 0) /* error? */
17301 + else if (st == 0) /* no query issued? */
17307 + else if (st == 1) /* more query remains */
17314 + else if (st == 2) /* no more qury */
17316 + frontend->no_forward = 0;
17317 + return POOL_CONTINUE;
17322 + if ((!REPLICATION && MASTER(backend)->len == 0 && frontend->len == 0) ||
17323 + (REPLICATION && MASTER(backend)->len == 0 &&
17324 + SECONDARY(backend)->len == 0
17325 + && frontend->len == 0))
17328 + struct timeval timeout;
17330 + timeout.tv_sec = 1;
17331 + timeout.tv_usec = 0;
17333 + FD_ZERO(&readmask);
17334 + FD_ZERO(&writemask);
17335 + FD_ZERO(&exceptmask);
17336 + if (!connection_reuse)
17337 + FD_SET(frontend->fd, &readmask);
17338 + FD_SET(MASTER(backend)->fd, &readmask);
17340 + FD_SET(SECONDARY(backend)->fd, &readmask);
17341 + if (!connection_reuse)
17342 + FD_SET(frontend->fd, &exceptmask);
17343 + FD_SET(MASTER(backend)->fd, &exceptmask);
17345 + if (connection_reuse)
17348 + fds = select(Max(SECONDARY(backend)->fd, MASTER(backend)->fd) + 1,
17349 + &readmask, &writemask, &exceptmask, NULL);
17351 + fds = select(MASTER(backend)->fd+1, &readmask, &writemask, &exceptmask, NULL);
17356 + fds = select(Max(SECONDARY(backend)->fd,
17357 + Max(frontend->fd, MASTER(backend)->fd)+1),
17358 + &readmask, &writemask, &exceptmask, NULL);
17360 + fds = select(Max(frontend->fd, MASTER(backend)->fd)+1,
17361 + &readmask, &writemask, &exceptmask, NULL);
17366 + if (errno == EINTR)
17369 + show_error("%s:select() failed. reason: %s",func, strerror(errno));
17370 + return POOL_ERROR;
17375 + return POOL_CONTINUE;
17378 + if (FD_ISSET(MASTER(backend)->fd, &readmask))
17380 + pool_read(MASTER(backend), &kind, 1);
17381 +#ifdef PRINT_DEBUG
17382 + show_debug("%s:read kind from backend %c", func,kind);
17386 + if (REPLICATION && FD_ISSET(SECONDARY(backend)->fd, &readmask))
17388 + pool_read(SECONDARY(backend), &kind1, 1);
17389 +#ifdef PRINT_DEBUG
17390 + show_debug("%s:read kind from secondary backend %c", func,kind1);
17394 + if (!connection_reuse && FD_ISSET(frontend->fd, &exceptmask))
17398 + if (FD_ISSET(MASTER(backend)->fd, &exceptmask))
17400 + return POOL_ERROR;
17403 + if (!connection_reuse && FD_ISSET(frontend->fd, &readmask))
17405 + status = ProcessFrontendResponse(frontend, backend);
17406 + if (status != POOL_CONTINUE)
17414 + if (MASTER(backend)->len > 0)
17416 + pool_read(MASTER(backend), &kind, 1);
17419 + pool_read(SECONDARY(backend), &kind1, 1);
17420 + if (kind == '\0' || kind != kind1)
17422 + show_error("%s: kind does not match between backends master(%c) secondary(%c)",
17423 + func, kind, kind1);
17424 + pool_send_error_message(frontend, MAJOR(backend), "XX000",
17425 + "kind mismatch between backends", "",
17426 + "check data consistency between master and secondary", __FILE__, __LINE__);
17428 + if (pool_config_replication_stop_on_mismatch)
17429 + return POOL_FATAL;
17431 + return POOL_ERROR;
17434 +#ifdef PRINT_DEBUG
17435 + show_debug("%s:read kind from backend pending data %c len: %d po: %d", func, kind, MASTER(backend)->len, MASTER(backend)->po);
17438 + if (frontend->len > 0)
17440 + status = ProcessFrontendResponse(frontend, backend);
17441 + if (status != POOL_CONTINUE)
17448 + /* this is the synchronous point */
17453 + pool_read(MASTER(backend), &kind, 1);
17457 + pool_read(SECONDARY(backend), &kind1, 1);
17459 + if (kind == '\0' || kind != kind1)
17461 + show_error("%s: kind does not match between backends master(%c) secondary(%c)",
17462 + func, kind, kind1);
17463 + pool_send_error_message(frontend, MAJOR(backend), "XX000",
17464 + "kind mismatch between backends", "",
17465 + "check data consistency between master and secondary", __FILE__, __LINE__);
17467 + if (pool_config_replication_stop_on_mismatch)
17468 + return POOL_FATAL;
17470 + return POOL_ERROR;
17475 + * Prrocess backend Response
17478 + if (MAJOR(backend) == PROTO_MAJOR_V3)
17483 + /* CopyIn response */
17484 + status = CopyInResponse(frontend, backend);
17487 + /* Paramter Status */
17488 + status = ParameterStatus(frontend, backend);
17491 + /* Ready for query */
17492 + status = ReadyForQuery(frontend, backend, 1);
17495 + status = SimpleForwardToFrontend(kind, frontend, backend);
17504 + /* Notification response */
17505 + status = NotificationResponse(frontend, backend);
17510 + status = BinaryRow(frontend, backend, num_fields);
17514 + /* Complete command response */
17515 + status = CompleteCommandResponse(frontend, backend);
17520 + status = AsciiRow(frontend, backend, num_fields);
17524 + /* Error Response */
17525 + status = ErrorResponse(frontend, backend);
17529 + /* CopyIn Response */
17530 + status = CopyInResponse(frontend, backend);
17534 + /* CopyOut Response */
17535 + status = CopyOutResponse(frontend, backend);
17539 + /* Empty Query Response */
17540 + status = EmptyQueryResponse(frontend, backend);
17544 + /* Notice Response */
17545 + status = NoticeResponse(frontend, backend);
17549 + /* CursorResponse */
17550 + status = CursorResponse(frontend, backend);
17554 + /* RowDescription */
17555 + status = RowDescription(frontend, backend);
17557 + return POOL_ERROR;
17559 + num_fields = status;
17560 + status = POOL_CONTINUE;
17564 + /* FunctionResultResponse and FunctionVoidResponse */
17565 + status = FunctionResultResponse(frontend, backend);
17569 + /* Ready for query */
17570 + status = ReadyForQuery(frontend, backend, 1);
17574 + show_error("%s:Unknown message type %c(%02x)",func, kind, kind);
17579 + if (status != POOL_CONTINUE)
17582 + if (kind == 'Z' && frontend->no_forward && state == 1)
17588 + return POOL_CONTINUE;
17591 +static POOL_STATUS Query(POOL_CONNECTION *frontend,
17592 + POOL_CONNECTION_POOL *backend, char *query)
17594 +#ifdef PRINT_DEBUG
17595 + char * func = "Query()";
17599 + static char *sq = "show pool_status";
17601 + if (query == NULL)
17603 + /* read actual query */
17604 + if (MAJOR(backend) == PROTO_MAJOR_V3)
17606 + if (pool_read(frontend, &len, sizeof(len)) < 0)
17608 + len = ntohl(len) - 4;
17609 + string = pool_read2(frontend, len);
17612 + string = pool_read_string(frontend, &len, 0);
17614 + if (string == NULL)
17619 + len = strlen(query)+1;
17623 +#ifdef PRINT_DEBUG
17624 + show_debug("%s: %s", func,string);
17627 + /* process status reporting? */
17628 + if (strncasecmp(sq, string, strlen(sq)) == 0)
17630 +#ifdef PRINT_DEBUG
17631 + show_debug("%s:process reporting",func);
17633 + process_reporting(frontend, backend);
17634 + return POOL_CONTINUE;
17637 + /* load balance trick */
17638 + if (load_balance_enabled(backend, string))
17639 + start_load_balance(backend);
17641 + /* forward the query to the backend */
17642 + pool_write(MASTER(backend), "Q", 1);
17644 + if (MAJOR(backend) == PROTO_MAJOR_V3)
17646 + int sendlen = htonl(len + 4);
17647 + pool_write(MASTER(backend), &sendlen, sizeof(sendlen));
17650 + if (pool_write_and_flush(MASTER(backend), string, len) < 0)
17657 + /* in "strict mode" we need to wait for master completing the query */
17658 + if (pool_config_replication_strict || STRICT_MODE(string))
17659 + if (synchronize(MASTER(backend)))
17662 + pool_write(SECONDARY(backend), "Q", 1);
17663 + if (MAJOR(backend) == PROTO_MAJOR_V3)
17665 + int sendlen = htonl(len + 4);
17666 + pool_write(SECONDARY(backend), &sendlen, sizeof(sendlen));
17669 + if (pool_write_and_flush(SECONDARY(backend), string, len) < 0)
17674 + return POOL_CONTINUE;
17677 +static POOL_STATUS ReadyForQuery(POOL_CONNECTION *frontend,
17678 + POOL_CONNECTION_POOL *backend, int send_ready)
17680 +#ifdef PRINT_DEBUG
17681 + char * func = "ReadyForQuery()";
17684 + pool_flush(frontend);
17688 + pool_write(frontend, "Z", 1);
17690 + if (MAJOR(backend) == PROTO_MAJOR_V3)
17693 + signed char state;
17695 + if ((len = pool_read_message_length(backend)) < 0)
17698 +#ifdef PRINT_DEBUG
17699 + show_debug("%s: message length: %d", func, len);
17702 + len = htonl(len);
17703 + pool_write(frontend, &len, sizeof(len));
17705 + state = pool_read_kind(backend);
17709 + /* set transaction state */
17710 +#ifdef PRINT_DEBUG
17711 + show_debug("%s: transaction state: %c", func, state);
17713 + MASTER(backend)->tstate = state;
17715 + SECONDARY(backend)->tstate = state;
17717 + pool_write(frontend, &state, 1);
17720 + if (pool_flush(frontend))
17724 + /* end load balance mode */
17725 + if (IN_LOAD_BALANCE)
17726 + end_load_balance(backend);
17728 + return ProcessFrontendResponse(frontend, backend);
17731 +static POOL_STATUS CompleteCommandResponse(POOL_CONNECTION *frontend,
17732 + POOL_CONNECTION_POOL *backend)
17734 + char * func = "CompleteCommandResponse()";
17735 + char *string, *string1;
17738 + /* read command tag */
17739 + string = pool_read_string(MASTER(backend), &len, 0);
17740 + if (string == NULL)
17745 + string1 = pool_read_string(SECONDARY(backend), &len1, 0);
17746 + if (string1 == NULL)
17751 + show_error("%s: message length does not match between master(%d \"%s\",) and secondary(%d \"%s\",)",
17752 + func, len, string, len1, string1);
17756 + /* forward to the frontend */
17757 + pool_write(frontend, "C", 1);
17758 +#ifdef PRINT_DEBUG
17759 + show_debug("%s: string: \"%s\"",func, string);
17761 + if (pool_write(frontend, string, len) < 0)
17765 + return POOL_CONTINUE;
17768 +static int RowDescription(POOL_CONNECTION *frontend,
17769 + POOL_CONNECTION_POOL *backend)
17771 + char * func = "RowDescription()";
17772 + short num_fields, num_fields1;
17775 + short size, size1;
17776 + char *string, *string1;
17780 + /* # of fields (could be 0) */
17781 + pool_read(MASTER(backend), &num_fields, sizeof(short));
17784 + pool_read(SECONDARY(backend), &num_fields1, sizeof(short));
17785 + if (num_fields != num_fields1)
17787 + show_error("%s: num_fields deos not match between backends master(%d) and secondary(%d)",
17788 + func, num_fields, num_fields1);
17789 + return POOL_FATAL;
17793 + /* forward it to the frontend */
17794 + pool_write(frontend, "T", 1);
17795 + pool_write(frontend, &num_fields, sizeof(short));
17797 + num_fields = ntohs(num_fields);
17798 + for (i = 0;i<num_fields;i++)
17801 + string = pool_read_string(MASTER(backend), &len, 0);
17802 + if (string == NULL)
17807 + string1 = pool_read_string(SECONDARY(backend), &len1, 0);
17808 + if (string == NULL)
17812 + show_error("%s: field length deos not match between backends master(%d) and secondary(%d)",
17813 + func, ntohl(len), ntohl(len1));
17814 + return POOL_FATAL;
17818 + pool_write(frontend, string, len);
17821 + pool_read(MASTER(backend), &oid, sizeof(int));
17824 + pool_read(SECONDARY(backend), &oid1, sizeof(int));
17826 + /* we do not regard oid mismatch as fatal */
17829 + show_error("%s: field oid deos not match between backends master(%d) and secondary(%d)",
17830 + func, ntohl(oid), ntohl(oid1));
17833 + pool_write(frontend, &oid, sizeof(int));
17836 + pool_read(MASTER(backend), &size, sizeof(short));
17839 + pool_read(SECONDARY(backend), &size1, sizeof(short));
17840 + if (size1 != size1)
17842 + show_error("%s: field size deos not match between backends master(%d) and secondary(%d)",
17843 + func, ntohs(size), ntohs(size1));
17844 + return POOL_FATAL;
17847 +#ifdef PRINT_DEBUG
17848 + show_debug("%s: field size:%d", func, ntohs(size));
17850 + pool_write(frontend, &size, sizeof(short));
17853 + pool_read(MASTER(backend), &mod, sizeof(int));
17856 + pool_read(SECONDARY(backend), &mod1, sizeof(int));
17859 + show_error("%s: modifier deos not match between backends master(%d) and secondary(%d)",
17860 + func, ntohl(mod), ntohl(mod1));
17863 + pool_write(frontend, &mod, sizeof(int));
17866 + return num_fields;
17869 +static POOL_STATUS AsciiRow(POOL_CONNECTION *frontend,
17870 + POOL_CONNECTION_POOL *backend,
17871 + short num_fields)
17873 + char * func = "AsciiRow()";
17874 + static char nullmap[8192], nullmap1[8192];
17877 + unsigned char mask;
17880 + char msgbuf[1024];
17882 + pool_write(frontend, "D", 1);
17884 + nbytes = (num_fields + 7)/8;
17887 + return POOL_CONTINUE;
17890 + pool_read(MASTER(backend), nullmap, nbytes);
17891 + if (pool_write(frontend, nullmap, nbytes) < 0)
17896 + if (pool_read(SECONDARY(backend), nullmap1, nbytes) < 0)
17899 + if (memcmp(nullmap, nullmap1, nbytes))
17901 + /* XXX: NULLMAP maybe different among
17902 + backends. If we were a paranoid, we have to treat
17903 + this as a fatal error. However in the real world
17904 + we'd better to adapt this situation. Just throw a
17906 + show_error("%s: NULLMAP differ between master and secondary",func);
17912 + for (i = 0;i<num_fields;i++)
17918 + if (mask & nullmap[i/8])
17921 + if (pool_read(MASTER(backend), &size, sizeof(int)) < 0)
17925 + if (REPLICATION && (mask & nullmap1[i/8]))
17927 + /* XXX: field size maybe different among
17928 + backends. If we were a paranoid, we have to treat
17929 + this as a fatal error. However in the real world
17930 + we'd better to adapt this situation. Just throw a
17933 + if (pool_read(SECONDARY(backend), &size1, sizeof(int)) < 0)
17936 + if (size != size1)
17937 + show_error("%s: %d th field size does not match between master(%d) and secondary(%d)",
17938 + func, i, ntohl(size), ntohl(size1));
17939 + size1 = ntohl(size1) - 4;
17944 + if (mask & nullmap[i/8])
17946 + /* forward to frontend */
17947 + pool_write(frontend, &size, sizeof(int));
17948 + size = ntohl(size) - 4;
17950 + /* read and send actual data only when size > 0 */
17953 + buf = pool_read2(MASTER(backend), size);
17959 + if (REPLICATION && size1 > 0 && (mask & nullmap1[i/8]))
17961 + /* read and discard secondary data */
17962 + if (pool_read2(SECONDARY(backend), size1) == NULL)
17968 + pool_write(frontend, buf, size);
17969 + snprintf(msgbuf, Min(sizeof(msgbuf), size+1), "%s", buf);
17970 +#ifdef PRINT_DEBUG
17971 + show_debug("%s: len: %d data: %s", func, size, msgbuf);
17978 + return POOL_CONTINUE;
17981 +static POOL_STATUS BinaryRow(POOL_CONNECTION *frontend,
17982 + POOL_CONNECTION_POOL *backend,
17983 + short num_fields)
17985 + char * func = "BinaryRow()";
17986 + static char nullmap[8192], nullmap1[8192];
17989 + unsigned char mask;
17993 + pool_write(frontend, "B", 1);
17995 + nbytes = (num_fields + 7)/8;
17998 + return POOL_CONTINUE;
18001 + pool_read(MASTER(backend), nullmap, nbytes);
18002 + if (pool_write(frontend, nullmap, nbytes) < 0)
18007 + if (pool_read(SECONDARY(backend), nullmap1, nbytes) < 0)
18010 + if (memcmp(nullmap, nullmap1, nbytes))
18012 + /* XXX: NULLMAP maybe different among
18013 + backends. If we were a paranoid, we have to treat
18014 + this as a fatal error. However in the real world
18015 + we'd better to adapt this situation. Just throw a
18017 + show_error("%s: NULLMAP differ between master and secondary",func);
18023 + for (i = 0;i<num_fields;i++)
18029 + if (mask & nullmap[i/8])
18032 + if (pool_read(MASTER(backend), &size, sizeof(int)) < 0)
18036 + if (REPLICATION && (mask & nullmap1[i/8]))
18038 + /* XXX: field size maybe different among
18039 + backends. If we were a paranoid, we have to treat
18040 + this as a fatal error. However in the real world
18041 + we'd better to adapt this situation. Just throw a
18044 + if (pool_read(SECONDARY(backend), &size1, sizeof(int)) < 0)
18047 + if (size != size1)
18048 + show_error("%s: %d th field size does not match between master(%d) and secondary(%d)",
18049 + func, i, ntohl(size), ntohl(size1));
18050 + size1 = ntohl(size1) - 4;
18055 + if (mask & nullmap[i/8])
18057 + /* forward to frontend */
18058 + pool_write(frontend, &size, sizeof(int));
18059 + size = ntohl(size) - 4;
18061 + /* read and send actual data only when size > 0 */
18064 + buf = pool_read2(MASTER(backend), size);
18070 + if (REPLICATION && size1 > 0 && (mask & nullmap1[i/8]))
18072 + /* read and discard secondary data */
18073 + if (pool_read2(SECONDARY(backend), size1) == NULL)
18078 + pool_write(frontend, buf, size);
18082 + return POOL_CONTINUE;
18085 +static POOL_STATUS CursorResponse(POOL_CONNECTION *frontend,
18086 + POOL_CONNECTION_POOL *backend)
18088 + char * func = "CursorResponse()";
18089 + char *string, *string1;
18092 + /* read cursor name */
18093 + string = pool_read_string(MASTER(backend), &len, 0);
18094 + if (string == NULL)
18098 + string1 = pool_read_string(SECONDARY(backend), &len1, 0);
18099 + if (string1 == NULL)
18103 + show_error("%s: length does not match between master(%d) and secondary(%d)",
18104 + func, len, len1);
18105 + show_error("%s: master(%s) secondary(%s)", func, string, string1);
18110 + /* forward to the frontend */
18111 + pool_write(frontend, "P", 1);
18112 + if (pool_write(frontend, string, len) < 0)
18116 + return POOL_CONTINUE;
18119 +POOL_STATUS ErrorResponse(POOL_CONNECTION *frontend,
18120 + POOL_CONNECTION_POOL *backend)
18125 + /* read error message */
18126 + string = pool_read_string(MASTER(backend), &len, 0);
18127 + if (string == NULL)
18131 + string = pool_read_string(SECONDARY(backend), &len, 0);
18132 + if (string == NULL)
18136 + /* forward to the frontend */
18137 + pool_write(frontend, "E", 1);
18138 + if (pool_write_and_flush(frontend, string, len) < 0)
18141 + return POOL_CONTINUE;
18144 +static POOL_STATUS NoticeResponse(POOL_CONNECTION *frontend,
18145 + POOL_CONNECTION_POOL *backend)
18147 + char *string, *string1;
18150 + /* read notice message */
18151 + string = pool_read_string(MASTER(backend), &len, 0);
18152 + if (string == NULL)
18156 + string1 = pool_read_string(SECONDARY(backend), &len1, 0);
18157 + if (string1 == NULL)
18161 + /* forward to the frontend */
18162 + pool_write(frontend, "N", 1);
18163 + if (pool_write_and_flush(frontend, string, len) < 0)
18167 + return POOL_CONTINUE;
18170 +static POOL_STATUS CopyInResponse(POOL_CONNECTION *frontend,
18171 + POOL_CONNECTION_POOL *backend)
18173 + POOL_STATUS status;
18175 + /* forward to the frontend */
18176 + if (MAJOR(backend) == PROTO_MAJOR_V3)
18178 + if (SimpleForwardToFrontend('G', frontend, backend) != POOL_CONTINUE)
18180 + if (pool_flush(frontend) != POOL_CONTINUE)
18184 + if (pool_write_and_flush(frontend, "G", 1) < 0)
18187 + status = CopyDataRows(frontend, backend, 1);
18191 +static POOL_STATUS CopyOutResponse(POOL_CONNECTION *frontend,
18192 + POOL_CONNECTION_POOL *backend)
18194 + POOL_STATUS status;
18196 + /* forward to the frontend */
18197 + if (MAJOR(backend) == PROTO_MAJOR_V3)
18199 + if (SimpleForwardToFrontend('H', frontend, backend) != POOL_CONTINUE)
18201 + if (pool_flush(frontend) != POOL_CONTINUE)
18205 + if (pool_write_and_flush(frontend, "H", 1) < 0)
18208 + status = CopyDataRows(frontend, backend, 0);
18212 +static POOL_STATUS CopyDataRows(POOL_CONNECTION *frontend,
18213 + POOL_CONNECTION_POOL *backend, int copyin)
18215 +#ifdef PRINT_DEBUG
18216 + char * func = "CopyDataRows()";
18221 +#ifdef PRINT_DEBUG
18230 + if (MAJOR(backend) == PROTO_MAJOR_V3)
18233 + POOL_STATUS status;
18235 + if (pool_read(frontend, &kind, 1) < 0)
18238 + status = SimpleForwardToBackend(kind, frontend, backend);
18239 + if (status == POOL_END)
18250 + string = pool_read_string(frontend, &len, 1);
18251 + if (string == NULL)
18258 + if (MAJOR(backend) == PROTO_MAJOR_V3)
18260 + signed char kind;
18261 + POOL_STATUS status;
18263 + if ((kind = pool_read_kind(backend)) < 0)
18266 + status = SimpleForwardToFrontend(kind, frontend, backend);
18267 + if (status == POOL_END)
18278 + string = pool_read_string(MASTER(backend), &len, 1);
18280 + string = pool_read_string(SECONDARY(backend), &len, 1);
18284 + if (string == NULL)
18287 +#ifdef PRINT_DEBUG
18288 + buf = malloc(len + 1);
18291 + show_error("CopyDataRows: malloc failed: %s", strerror(errno));
18294 + strncpy(buf, string, len);
18296 + show_debug("%s: copy line %d %d bytes :%s:",func, i++, len, buf);
18302 + pool_write(MASTER(backend), string, len);
18304 + pool_write(SECONDARY(backend), string, len);
18307 + pool_write(frontend, string, len);
18309 + if (len == PROTO_MAJOR_V3)
18311 + /* end of copy? */
18312 + if (string[0] == '\\' &&
18313 + string[1] == '.' &&
18314 + string[2] == '\n')
18323 + if (pool_flush(MASTER(backend)) <0)
18327 + if (pool_flush(SECONDARY(backend)) <0)
18332 + if (pool_flush(frontend) <0)
18335 + return POOL_CONTINUE;
18338 +static POOL_STATUS EmptyQueryResponse(POOL_CONNECTION *frontend,
18339 + POOL_CONNECTION_POOL *backend)
18343 + if (pool_read(MASTER(backend), &c, sizeof(c)) < 0)
18348 + if (pool_read(SECONDARY(backend), &c, sizeof(c)) < 0)
18352 + pool_write(frontend, "I", 1);
18353 + return pool_write_and_flush(frontend, "", 1);
18356 +static POOL_STATUS NotificationResponse(POOL_CONNECTION *frontend,
18357 + POOL_CONNECTION_POOL *backend)
18360 + char *condition, *condition1;
18363 + pool_write(frontend, "A", 1);
18365 + if (pool_read(MASTER(backend), &pid, sizeof(pid)) < 0)
18366 + return POOL_ERROR;
18370 + if (pool_read(SECONDARY(backend), &pid1, sizeof(pid1)) < 0)
18371 + return POOL_ERROR;
18374 + condition = pool_read_string(MASTER(backend), &len, 0);
18375 + if (condition == NULL)
18379 + condition1 = pool_read_string(SECONDARY(backend), &len1, 0);
18380 + if (condition1 == NULL)
18384 + pool_write(frontend, &pid, sizeof(pid));
18386 + return pool_write_and_flush(frontend, condition, len);
18389 +static POOL_STATUS FunctionCall(POOL_CONNECTION *frontend,
18390 + POOL_CONNECTION_POOL *backend)
18397 + pool_write(MASTER(backend), "F", 1);
18399 + pool_write(SECONDARY(backend), "F", 1);
18402 + if (pool_read(frontend, dummy, sizeof(dummy)) < 0)
18403 + return POOL_ERROR;
18404 + pool_write(MASTER(backend), dummy, sizeof(dummy));
18406 + pool_write(SECONDARY(backend), dummy, sizeof(dummy));
18408 + /* function object id */
18409 + if (pool_read(frontend, &oid, sizeof(oid)) < 0)
18410 + return POOL_ERROR;
18412 + pool_write(MASTER(backend), &oid, sizeof(oid));
18414 + pool_write(SECONDARY(backend), &oid, sizeof(oid));
18416 + /* number of arguments */
18417 + if (pool_read(frontend, &argn, sizeof(argn)) < 0)
18418 + return POOL_ERROR;
18419 + pool_write(MASTER(backend), &argn, sizeof(argn));
18421 + pool_write(SECONDARY(backend), &argn, sizeof(argn));
18423 + argn = ntohl(argn);
18425 + for (i=0;i<argn;i++)
18430 + /* length of each argument in bytes */
18431 + if (pool_read(frontend, &len, sizeof(len)) < 0)
18432 + return POOL_ERROR;
18434 + pool_write(MASTER(backend), &len, sizeof(len));
18436 + pool_write(SECONDARY(backend), &len, sizeof(len));
18438 + len = ntohl(len);
18440 + /* argument value itself */
18441 + if ((arg = pool_read2(frontend, len)) == NULL)
18442 + return POOL_ERROR;
18443 + pool_write(MASTER(backend), arg, len);
18445 + pool_write(SECONDARY(backend), arg, len);
18448 + if (pool_flush(MASTER(backend)))
18449 + return POOL_ERROR;
18451 + if (pool_flush(SECONDARY(backend)))
18452 + return POOL_ERROR;
18453 + return POOL_CONTINUE;
18456 +static POOL_STATUS FunctionResultResponse(POOL_CONNECTION *frontend,
18457 + POOL_CONNECTION_POOL *backend)
18463 + pool_write(frontend, "V", 1);
18465 + if (pool_read(MASTER(backend), &dummy, 1) < 0)
18466 + return POOL_ERROR;
18468 + if (pool_read(SECONDARY(backend), &dummy, 1) < 0)
18469 + return POOL_ERROR;
18471 + pool_write(frontend, &dummy, 1);
18473 + /* non empty result? */
18474 + if (dummy == 'G')
18476 + /* length of result in bytes */
18477 + if (pool_read(MASTER(backend), &len, sizeof(len)) < 0)
18478 + return POOL_ERROR;
18480 + if (pool_read(SECONDARY(backend), &len, sizeof(len)) < 0)
18481 + return POOL_ERROR;
18483 + pool_write(frontend, &len, sizeof(len));
18485 + len = ntohl(len);
18487 + /* result value itself */
18488 + if ((result = pool_read2(MASTER(backend), len)) == NULL)
18489 + return POOL_ERROR;
18491 + if (pool_read(SECONDARY(backend), result, len) < 0)
18492 + return POOL_ERROR;
18494 + pool_write(frontend, result, len);
18497 + /* unused ('0') */
18498 + if (pool_read(MASTER(backend), &dummy, 1) < 0)
18499 + return POOL_ERROR;
18501 + if (pool_read(SECONDARY(backend), &dummy, 1) < 0)
18502 + return POOL_ERROR;
18504 + pool_write(frontend, "0", 1);
18506 + return pool_flush(frontend);
18509 +static POOL_STATUS ProcessFrontendResponse(POOL_CONNECTION *frontend,
18510 + POOL_CONNECTION_POOL *backend)
18512 + char * func = "ProcessFrontendResponse()";
18514 + POOL_STATUS status;
18516 + if (frontend->len <= 0 && frontend->no_forward != 0)
18517 + return POOL_CONTINUE;
18519 + if (pool_read(frontend, &fkind, 1) < 0)
18521 + show_error("%s: failed to read kind",func);
18525 +#ifdef PRINT_DEBUG
18526 + show_debug("%s:read kind from frontend %c(%02x)", func, fkind, fkind);
18532 + if (MAJOR(backend) == PROTO_MAJOR_V3)
18535 + pool_read(frontend, &len, sizeof(len));
18537 + status = POOL_END;
18541 + status = Query(frontend, backend, NULL);
18545 + if (MAJOR(backend) == PROTO_MAJOR_V3)
18547 + status = SimpleForwardToBackend(fkind, frontend, backend);
18548 + if (pool_flush(MASTER(backend)))
18549 + status = POOL_ERROR;
18551 + if (pool_flush(SECONDARY(backend)))
18552 + status = POOL_ERROR;
18554 + else if (MAJOR(backend) == PROTO_MAJOR_V2 && fkind == 'F')
18555 + status = FunctionCall(frontend, backend);
18558 + show_error("%s: unknown message type %c(%02x)", func, fkind, fkind);
18559 + status = POOL_ERROR;
18567 +static int timeoutmsec;
18569 + * enable read timeout
18571 +void pool_enable_timeout(void)
18573 + timeoutmsec = pool_config_replication_timeout;
18577 + * disable read timeout
18579 +void pool_disable_timeout(void)
18585 + * wait until read data is ready
18587 +static int synchronize(POOL_CONNECTION *cp)
18589 + return pool_check_fd(cp, 1);
18593 + * wait until read data is ready
18594 + * if notimeout is non 0, wait forever.
18596 +int pool_check_fd(POOL_CONNECTION *cp, int notimeout)
18598 + char * func = "pool_check_fd()";
18600 + fd_set exceptmask;
18603 + struct timeval timeout;
18604 + struct timeval *tp;
18610 + FD_ZERO(&readmask);
18611 + FD_ZERO(&exceptmask);
18612 + FD_SET(fd, &readmask);
18613 + FD_SET(fd, &exceptmask);
18615 + if (notimeout || timeoutmsec == 0)
18619 + timeout.tv_sec = 0;
18620 + timeout.tv_usec = pool_config_replication_timeout*1000;
18624 + fds = select(fd+1, &readmask, NULL, &exceptmask, tp);
18628 + if (errno == EAGAIN || errno == EINTR)
18631 + show_error("%s: select() failed. reason %s",func, strerror(errno));
18635 + if (FD_ISSET(fd, &exceptmask))
18637 + show_error("%s: exception occurred",func);
18643 + show_error("%s: data is not ready tp->tv_sec %d tp->tp_usec %d", func, tp->tv_sec, tp->tv_usec);
18651 +static void process_reporting(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend)
18653 + static char *cursorname = "blank";
18654 + static short num_fields = 3;
18655 + static char *field_names[] = {"item", "value", "description"};
18656 + static int oid = 0;
18657 + static short fsize = -1;
18658 + static int mod = 0;
18665 + static char nullmap[2] = {0xff, 0xff};
18666 + int nbytes = (num_fields + 7)/8;
18668 +#define MAXVALLEN 512
18672 + char value[MAXVALLEN+1];
18674 + } POOL_REPORT_STATUS;
18676 +#define MAXITEMS 128
18678 + POOL_REPORT_STATUS status[MAXITEMS];
18686 + status[i].name = "inetdomain";
18687 + snprintf(status[i].value, MAXVALLEN, "%d", pool_config_inetdomain);
18688 + status[i].desc = "1 if accepting TCP/IP connection";
18691 + status[i].name = "port";
18692 + snprintf(status[i].value, MAXVALLEN, "%d", pool_config_port);
18693 + status[i].desc = "pgpool accepting port number";
18696 + status[i].name = "socket_dir";
18697 + snprintf(status[i].value, MAXVALLEN, "%s", pool_config_socket_dir);
18698 + status[i].desc = "pgpool socket directory";
18701 + status[i].name = "backend_host_name";
18702 + snprintf(status[i].value, MAXVALLEN, "%s", pool_config_backend_host_name);
18703 + status[i].desc = "master backend host name";
18706 + status[i].name = "backend_port";
18707 + snprintf(status[i].value, MAXVALLEN, "%d", pool_config_backend_port);
18708 + status[i].desc = "master backend port number";
18711 + status[i].name = "secondary_backend_host_name";
18712 + snprintf(status[i].value, MAXVALLEN, "%s", pool_config_secondary_backend_host_name);
18713 + status[i].desc = "secondary backend host name";
18716 + status[i].name = "secondary_backend_port";
18717 + snprintf(status[i].value, MAXVALLEN, "%d", pool_config_secondary_backend_port);
18718 + status[i].desc = "secondary backend port number";
18721 + status[i].name = "num_init_children";
18722 + snprintf(status[i].value, MAXVALLEN, "%d", pool_config_num_init_children);
18723 + status[i].desc = "# of children initially pre-forked";
18726 + status[i].name = "child_life_time";
18727 + snprintf(status[i].value, MAXVALLEN, "%d", pool_config_child_life_time);
18728 + status[i].desc = "if idle for this seconds, child exits (not implemented yet)";
18731 + status[i].name = "connection_life_time";
18732 + snprintf(status[i].value, MAXVALLEN, "%d", pool_config_connection_life_time);
18733 + status[i].desc = "if idle for this seconds, connection closes";
18736 + status[i].name = "max_pool";
18737 + snprintf(status[i].value, MAXVALLEN, "%d", pool_config_max_pool);
18738 + status[i].desc = "max # of connection pool per child";
18741 + status[i].name = "logdir";
18742 + snprintf(status[i].value, MAXVALLEN, "%s", pool_config_logdir);
18743 + status[i].desc = "logging directory";
18746 + status[i].name = "backend_socket_dir";
18747 + snprintf(status[i].value, MAXVALLEN, "%s", pool_config_backend_socket_dir);
18748 + status[i].desc = "Unix domain socket directory for the PostgreSQL server";
18751 + status[i].name = "replication_mode";
18752 + snprintf(status[i].value, MAXVALLEN, "%d", pool_config_replication_mode);
18753 + status[i].desc = "non 0 if operating in replication mode";
18756 + status[i].name = "replication_strict";
18757 + snprintf(status[i].value, MAXVALLEN, "%d", pool_config_replication_strict);
18758 + status[i].desc = "non 0 if operating in strict mode";
18761 + status[i].name = "replication_timeout";
18762 + snprintf(status[i].value, MAXVALLEN, "%d", pool_config_replication_timeout);
18763 + status[i].desc = "if secondary does not respond in this milli seconds, abort the session";
18766 + status[i].name = "current_backend_host_name";
18767 + snprintf(status[i].value, MAXVALLEN, "%s", pool_config_current_backend_host_name);
18768 + status[i].desc = "current master host name";
18771 + status[i].name = "current_backend_port";
18772 + snprintf(status[i].value, MAXVALLEN, "%d", pool_config_current_backend_port);
18773 + status[i].desc = "current master port #";
18776 + status[i].name = "replication_enabled";
18777 + snprintf(status[i].value, MAXVALLEN, "%d", pool_config_replication_enabled);
18778 + status[i].desc = "non 0 if actually operating in replication mode";
18781 + status[i].name = "load_balance_mode";
18782 + snprintf(status[i].value, MAXVALLEN, "%d", pool_config_load_balance_mode);
18783 + status[i].desc = "non 0 if operating in load balancing mode";
18786 + status[i].name = "replication_stop_on_mismatch";
18787 + snprintf(status[i].value, MAXVALLEN, "%d", pool_config_replication_stop_on_mismatch);
18788 + status[i].desc = "stop replication mode on fatal error";
18793 + if (MAJOR(backend) == PROTO_MAJOR_V2)
18795 + /* cursor response */
18796 + pool_write(frontend, "P", 1);
18797 + pool_write(frontend, cursorname, strlen(cursorname)+1);
18800 + /* row description */
18801 + pool_write(frontend, "T", 1);
18803 + if (MAJOR(backend) == PROTO_MAJOR_V3)
18805 + len = sizeof(num_fields) + sizeof(len);
18807 + for (i=0;i<num_fields;i++)
18809 + char *f = field_names[i];
18810 + len += strlen(f)+1;
18811 + len += sizeof(oid);
18812 + len += sizeof(colnum);
18813 + len += sizeof(oid);
18814 + len += sizeof(s);
18815 + len += sizeof(mod);
18816 + len += sizeof(s);
18819 + len = htonl(len);
18820 + pool_write(frontend, &len, sizeof(len));
18823 + n = htons(num_fields);
18824 + pool_write(frontend, &n, sizeof(short));
18826 + for (i=0;i<num_fields;i++)
18828 + char *f = field_names[i];
18830 + pool_write(frontend, f, strlen(f)+1); /* field name */
18832 + if (MAJOR(backend) == PROTO_MAJOR_V3)
18834 + pool_write(frontend, &oid, sizeof(oid)); /* table oid */
18835 + colnum = htons(i);
18836 + pool_write(frontend, &colnum, sizeof(colnum)); /* column number */
18839 + pool_write(frontend, &oid, sizeof(oid)); /* data type oid */
18840 + s = htons(fsize);
18841 + pool_write(frontend, &s, sizeof(fsize)); /* field size */
18842 + pool_write(frontend, &mod, sizeof(mod)); /* modifier */
18844 + if (MAJOR(backend) == PROTO_MAJOR_V3)
18847 + pool_write(frontend, &s, sizeof(fsize)); /* field format (text) */
18850 + pool_flush(frontend);
18852 + if (MAJOR(backend) == PROTO_MAJOR_V2)
18855 + for (i=0;i<nrows;i++)
18857 + pool_write(frontend, "D", 1);
18858 + pool_write_and_flush(frontend, nullmap, nbytes);
18860 + size = strlen(status[i].name);
18861 + hsize = htonl(size+4);
18862 + pool_write(frontend, &hsize, sizeof(hsize));
18863 + pool_write(frontend, status[i].name, size);
18865 + size = strlen(status[i].value);
18866 + hsize = htonl(size+4);
18867 + pool_write(frontend, &hsize, sizeof(hsize));
18868 + pool_write(frontend, status[i].value, size);
18870 + size = strlen(status[i].desc);
18871 + hsize = htonl(size+4);
18872 + pool_write(frontend, &hsize, sizeof(hsize));
18873 + pool_write(frontend, status[i].desc, size);
18879 + for (i=0;i<nrows;i++)
18881 + pool_write(frontend, "D", 1);
18882 + len = sizeof(len) + sizeof(nrows);
18883 + len += sizeof(int) + strlen(status[i].name);
18884 + len += sizeof(int) + strlen(status[i].value);
18885 + len += sizeof(int) + strlen(status[i].desc);
18886 + len = htonl(len);
18887 + pool_write(frontend, &len, sizeof(len));
18889 + pool_write(frontend, &s, sizeof(s));
18891 + len = htonl(strlen(status[i].name));
18892 + pool_write(frontend, &len, sizeof(len));
18893 + pool_write(frontend, status[i].name, strlen(status[i].name));
18895 + len = htonl(strlen(status[i].value));
18896 + pool_write(frontend, &len, sizeof(len));
18897 + pool_write(frontend, status[i].value, strlen(status[i].value));
18899 + len = htonl(strlen(status[i].desc));
18900 + pool_write(frontend, &len, sizeof(len));
18901 + pool_write(frontend, status[i].desc, strlen(status[i].desc));
18905 + /* complete command response */
18906 + pool_write(frontend, "C", 1);
18907 + if (MAJOR(backend) == PROTO_MAJOR_V3)
18909 + len = htonl(sizeof(len) + strlen("SELECT")+1);
18910 + pool_write(frontend, &len, sizeof(len));
18912 + pool_write(frontend, "SELECT", strlen("SELECT")+1);
18914 + /* ready for query */
18915 + pool_write(frontend, "Z", 1);
18916 + if (MAJOR(backend) == PROTO_MAJOR_V3)
18918 + len = htonl(sizeof(len) + 1);
18919 + pool_write(frontend, &len, sizeof(len));
18920 + pool_write(frontend, "I", 1);
18923 + pool_flush(frontend);
18926 +void pool_send_frontend_exits(POOL_CONNECTION_POOL *backend)
18930 + pool_write(MASTER(backend), "X", 1);
18932 + if (MAJOR(backend) == PROTO_MAJOR_V3)
18935 + pool_write(MASTER(backend), &len, sizeof(len));
18939 + * XXX we cannot call pool_flush() here since backend may already
18940 + * close the socket and pool_flush() automatically invokes fail
18941 + * over handler. This could happen in copy command (remember the
18942 + * famouse "lostsynchronization with server, resettin g
18943 + * connection" message)
18945 + fflush(MASTER(backend)->write_fd);
18949 + pool_write(SECONDARY(backend), "X", 1);
18950 + if (MAJOR(backend) == PROTO_MAJOR_V3)
18953 + pool_write(MASTER(backend), &len, sizeof(len));
18955 + fflush(SECONDARY(backend)->write_fd);
18960 + * -------------------------------------------------------
18962 + * -------------------------------------------------------
18964 +POOL_STATUS SimpleForwardToFrontend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend)
18966 + char * func = "SimpleForwardToFrontend()";
18971 + pool_write(frontend, &kind, 1);
18973 + status = pool_read(MASTER(backend), &len, sizeof(len));
18976 + show_error("%s: error while reading message length",func);
18982 + status = pool_read(SECONDARY(backend), &len1, sizeof(len1));
18985 + show_error("%s: error while reading message length from secondary backend",func);
18991 + show_error("%s: length does not match between backends master(%d) secondary(%d) kind:(%c)",
18992 + func, ntohl(len), ntohl(len1), kind);
18996 + pool_write(frontend, &len, sizeof(len));
18998 + len = ntohl(len);
19001 + p = pool_read2(MASTER(backend), len);
19007 + len1 = ntohl(len1);
19009 + if (pool_read2(SECONDARY(backend), len1) == NULL)
19013 + return pool_write(frontend, p, len);
19016 +POOL_STATUS SimpleForwardToBackend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend)
19022 + if (pool_write(MASTER(backend), &kind, 1))
19025 + if (pool_write(SECONDARY(backend), &kind, 1))
19028 + if (pool_read(frontend, &sendlen, sizeof(sendlen)))
19033 + len = ntohl(sendlen) - 4;
19035 + p = pool_read2(frontend, len);
19039 + if (pool_write(MASTER(backend), &sendlen, sizeof(sendlen)))
19041 + if (pool_write(MASTER(backend), p, len))
19046 + if (pool_write(SECONDARY(backend), &sendlen, sizeof(sendlen)))
19048 + if (pool_write(SECONDARY(backend), p, len))
19052 + return POOL_CONTINUE;
19055 +POOL_STATUS ParameterStatus(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend)
19057 +#ifdef PRINT_DEBUG
19058 + char * func = "ParameterStatus()";
19066 + pool_write(frontend, "S", 1);
19068 + len = pool_read_message_length(backend);
19074 + sendlen = htonl(len);
19075 + pool_write(frontend, &sendlen, sizeof(sendlen));
19079 + p = pool_read2(MASTER(backend), len);
19084 + value = p + strlen(name) + 1;
19086 +#ifdef PRINT_DEBUG
19087 + show_debug("%s:name: %s value: %s",func, name, value);
19090 + pool_add_param(&MASTER(backend)->params, name, value);
19092 +#ifdef PRINT_DEBUG
19093 + pool_param_debug_print(&MASTER(backend)->params);
19097 + if (pool_read2(SECONDARY(backend), len) == NULL)
19100 + return pool_write(frontend, p, len);
19105 + * reset backend status. return values are:
19106 + * 0: no query was issued 1: a query was issued 2: no more queries remain -1: error
19108 +static int reset_backend(POOL_CONNECTION_POOL *backend, int qcnt)
19110 +#ifdef NO_RESET_ALL
19111 + static char *queries[] = {"ABORT"};
19113 + static char *queries[] = {"ABORT", "RESET ALL"};
19117 + int qn = sizeof(queries)/sizeof(char *);
19119 + /* for PGCluster */
19120 + if (!Use_Connection_Pool)
19126 + query = queries[qcnt];
19128 + /* if transaction state is idle, we don't need to issue ABORT */
19129 + if (TSTATE(backend) == 'I' && !strcmp("ABORT", query))
19132 + if (Query(NULL, backend, query) != POOL_CONTINUE)
19139 + * return non 0 if load balance is possible
19141 +static int load_balance_enabled(POOL_CONNECTION_POOL *backend, char *sql)
19143 + if (pool_config_load_balance_mode &&
19145 + MAJOR(backend) == PROTO_MAJOR_V3 &&
19146 + TSTATE(backend) == 'I' &&
19147 + !strncasecmp(sql, "SELECT", 6))
19153 + * start load balance mode
19155 +static void start_load_balance(POOL_CONNECTION_POOL *backend)
19157 +#ifdef PRINT_DEBUG
19158 + char * func = "start_load_balance()";
19163 + /* save backend connection slots */
19164 + for (i=0;i<backend->num;i++)
19166 + slots[i] = backend->slots[i];
19169 + /* temporary turn off replication mode */
19170 + /*REPLICATION = 0; */
19172 + /* choose a master in random manner */
19173 + master = random() % backend->num;
19174 + backend->slots[0] = slots[master];
19175 +#ifdef PRINT_DEBUG
19176 + show_debug("%s: selected master is %d", func,master);
19179 + /* start load balancing */
19180 + /*in_load_balance = 1;*/
19184 + * finish load balance mode
19186 +static void end_load_balance(POOL_CONNECTION_POOL *backend)
19190 + /* restore backend connection slots */
19191 + for (i=0;i<backend->num;i++)
19193 + backend->slots[i] = slots[i];
19196 + /* turn on replication mode */
19197 + /* REPLICATION = 1; */
19199 + /*in_load_balance = 0;*/
19200 +#ifdef PRINT_DEBUG
19201 + show_debug("end_load_balance: end load balance mode");
19206 + * send error message to frontend
19208 +void pool_send_error_message(POOL_CONNECTION *frontend, int protoMajor,
19216 +#define MAXDATA 1024
19217 +#define MAXMSGBUF 128
19218 + char * func = "pool_send_error_message()";
19220 + if (protoMajor == PROTO_MAJOR_V2)
19222 + pool_write(frontend, "E", 1);
19223 + pool_write_and_flush(frontend, message, strlen(message)+1);
19225 + else if (protoMajor == PROTO_MAJOR_V3)
19227 + char data[MAXDATA];
19228 + char msgbuf[MAXMSGBUF];
19235 + pool_write(frontend, "E", 1);
19237 + /* error level */
19238 + thislen = snprintf(msgbuf, MAXMSGBUF, "SERROR");
19239 + memcpy(data +len, msgbuf, thislen+1);
19240 + len += thislen + 1;
19243 + thislen = snprintf(msgbuf, MAXMSGBUF, "C%s", code);
19244 + memcpy(data +len, msgbuf, thislen+1);
19245 + len += thislen + 1;
19248 + thislen = snprintf(msgbuf, MAXMSGBUF, "M%s", message);
19249 + memcpy(data +len, msgbuf, thislen+1);
19250 + len += thislen + 1;
19253 + if (*detail != '\0')
19255 + thislen = snprintf(msgbuf, MAXMSGBUF, "D%s", detail);
19256 + memcpy(data +len, msgbuf, thislen+1);
19257 + len += thislen + 1;
19261 + if (*hint != '\0')
19263 + thislen = snprintf(msgbuf, MAXMSGBUF, "H%s", hint);
19264 + memcpy(data +len, msgbuf, thislen+1);
19265 + len += thislen + 1;
19269 + thislen = snprintf(msgbuf, MAXMSGBUF, "F%s", file);
19270 + memcpy(data +len, msgbuf, thislen+1);
19271 + len += thislen + 1;
19274 + thislen = snprintf(msgbuf, MAXMSGBUF, "L%d", line);
19275 + memcpy(data +len, msgbuf, thislen+1);
19276 + len += thislen + 1;
19280 + *(data + len) = '\0';
19283 + len = htonl(len + 4);
19284 + pool_write(frontend, &len, sizeof(len));
19285 + pool_write_and_flush(frontend, data, sendlen);
19288 + show_error("%s: unknown protocol major %d",func, protoMajor);
19290 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_stream.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_stream.c
19291 --- postgresql-8.2.4/src/pgcluster/pglb/pool_stream.c 1970-01-01 01:00:00.000000000 +0100
19292 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_stream.c 2007-02-18 22:52:17.000000000 +0100
19294 +/*--------------------------------------------------------------------
19299 + * stream I/O modules
19301 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
19302 + * Portions Copyright (c) 2003-2006, Tatsuo Ishii
19303 + *--------------------------------------------------------------------
19306 +* Permission to use, copy, modify, and distribute this software and
19307 +* its documentation for any purpose and without fee is hereby
19308 +* granted, provided that the above copyright notice appear in all
19309 +* copies and that both that copyright notice and this permission
19310 +* notice appear in supporting documentation, and that the name of the
19311 +* author not be used in advertising or publicity pertaining to
19312 +* distribution of the software without specific, written prior
19313 +* permission. The author makes no representations about the
19314 +* suitability of this software for any purpose. It is provided "as
19315 +* is" without express or implied warranty.
19318 +#include <stdio.h>
19319 +#include <stdlib.h>
19320 +#include <string.h>
19321 +#include <errno.h>
19322 +#include <sys/types.h>
19323 +#include <unistd.h>
19324 +#include <sys/time.h>
19326 +#include "postgres_fe.h"
19327 +#include "libpq/pqcomm.h"
19328 +#include "replicate_com.h"
19331 +#define READBUFSZ 1024
19333 +POOL_CONNECTION *pool_open(int fd);
19334 +void pool_close(POOL_CONNECTION *cp);
19335 +int pool_read(POOL_CONNECTION *cp, void *buf, int len);
19336 +char *pool_read2(POOL_CONNECTION *cp, int len);
19337 +int pool_write(POOL_CONNECTION *cp, void *buf, int len);
19338 +int pool_flush(POOL_CONNECTION *cp);
19339 +int pool_write_and_flush(POOL_CONNECTION *cp, void *buf, int len);
19340 +char *pool_read_string(POOL_CONNECTION *cp, int *len, int line);
19342 +static int mystrlen(char *str, int upper, int *flag);
19343 +static int mystrlinelen(char *str, int upper, int *flag);
19344 +static int save_pending_data(POOL_CONNECTION *cp, void *data, int len);
19345 +static int consume_pending_data(POOL_CONNECTION *cp, void *data, int len);
19349 +* open read/write file descriptors.
19350 +* returns POOL_CONNECTION on success otherwise NULL.
19352 +POOL_CONNECTION *pool_open(int fd)
19354 + POOL_CONNECTION *cp;
19356 + cp = (POOL_CONNECTION *)malloc(sizeof(POOL_CONNECTION));
19359 + show_error("pool_open: malloc failed: %s", strerror(errno));
19363 + memset(cp, 0, sizeof(*cp));
19365 + cp->write_fd = fdopen(fd, "w");
19366 + if (cp->write_fd == NULL)
19368 + show_error("pool_open: fdopen failed: %s",strerror(errno));
19373 + /* initialize pending data buffer */
19374 + cp->hp = malloc(READBUFSZ);
19375 + if (cp->hp == NULL)
19377 + show_error("pool_open: malloc failed");
19380 + cp->bufsz = READBUFSZ;
19393 +* close read/write file descriptors.
19395 +void pool_close(POOL_CONNECTION *cp)
19398 + fclose(cp->write_fd);
19404 + pool_discard_params(&cp->params);
19409 +* read len bytes from cp
19410 +* returns 0 on success otherwise -1.
19412 +int pool_read(POOL_CONNECTION *cp, void *buf, int len)
19414 + static char readbuf[READBUFSZ];
19416 + int consume_size;
19419 + consume_size = consume_pending_data(cp, buf, len);
19420 + len -= consume_size;
19421 + buf += consume_size;
19425 + if (cp->issecondary_backend)
19427 + if (pool_check_fd(cp, 0))
19429 + show_error("pool_read: secondary data is not ready. abort this session");
19434 + readlen = read(cp->fd, readbuf, READBUFSZ);
19435 + if (readlen == -1)
19437 + show_error("pool_read: read failed (%s)", strerror(errno));
19439 + if (cp->isbackend)
19441 + /* fatal error, notice to parent and exit */
19442 + notice_backend_error();
19450 + else if (readlen == 0)
19452 + show_error("pool_read: EOF encountered");
19454 + if (cp->isbackend)
19456 + /* fatal error, notice to parent and exit */
19457 + notice_backend_error();
19463 + * if backend offers authentication method, frontend could close connection
19469 + if (len < readlen)
19471 + /* overrun. we need to save remaining data to pending buffer */
19472 + if (save_pending_data(cp, readbuf+len, readlen-len))
19474 + memmove(buf, readbuf, len);
19478 + memmove(buf, readbuf, readlen);
19487 +* read exactly len bytes from cp
19488 +* returns buffer address on success otherwise NULL.
19490 +char *pool_read2(POOL_CONNECTION *cp, int len)
19495 + int consume_size;
19498 + req_size = cp->len + len;
19500 + if (req_size > cp->bufsz2)
19502 + alloc_size = ((req_size+1)/READBUFSZ+1)*READBUFSZ;
19503 + cp->buf2 = realloc(cp->buf2, alloc_size);
19504 + if (cp->buf2 == NULL)
19506 + show_error("pool_read2: failed to realloc");
19509 + cp->bufsz2 = alloc_size;
19514 + consume_size = consume_pending_data(cp, buf, len);
19515 + len -= consume_size;
19516 + buf += consume_size;
19520 + if (cp->issecondary_backend)
19522 + if (pool_check_fd(cp, 0))
19524 + show_error("pool_read2: secondary data is not ready. abort this session");
19529 + readlen = read(cp->fd, buf, len);
19530 + if (readlen == -1)
19532 + show_error("pool_read2: read failed (%s)", strerror(errno));
19534 + if (cp->isbackend)
19536 + /* fatal error, notice to parent and exit */
19537 + notice_backend_error();
19545 + else if (readlen == 0)
19547 + show_error("pool_read2: EOF encountered");
19549 + if (cp->isbackend)
19551 + /* fatal error, notice to parent and exit */
19552 + notice_backend_error();
19558 + * if backend offers authentication method, frontend could close connection
19572 +* write len bytes from cp
19573 +* returns 0 on success otherwise -1.
19575 +int pool_write(POOL_CONNECTION *cp, void *buf, int len)
19577 + if (!cp->no_forward)
19578 + fwrite(buf, len, 1, cp->write_fd);
19584 +* flush write buffer
19586 +int pool_flush(POOL_CONNECTION *cp)
19588 + if (fflush(cp->write_fd) != 0)
19590 + show_error("pool_flush: fflush failed (%s)", strerror(errno));
19592 + if (cp->isbackend)
19594 + notice_backend_error();
19606 +* combo of pool_write and pool_flush
19608 +int pool_write_and_flush(POOL_CONNECTION *cp, void *buf, int len)
19610 + if (pool_write(cp, buf, len))
19612 + return pool_flush(cp);
19616 + * read a string until EOF or NULL is encountered.
19617 + * if line is not 0, read until new line is encountered.
19619 +char *pool_read_string(POOL_CONNECTION *cp, int *len, int line)
19626 + int consume_size;
19629 + static char pbuf[READBUFSZ];
19635 + /* initialize read buffer */
19636 + if (cp->sbufsz == 0)
19638 + cp->sbuf = malloc(READBUFSZ);
19639 + if (cp->sbuf == NULL)
19641 + show_error("pool_read_string: malloc failed");
19644 + cp->sbufsz = READBUFSZ;
19645 + *cp->sbuf = '\0';
19648 + /* any pending data? */
19652 + strlength = mystrlinelen(cp->hp+cp->po, cp->len, &flag);
19654 + strlength = mystrlen(cp->hp+cp->po, cp->len, &flag);
19656 + /* buffer is too small? */
19657 + if ((strlength + 1) > cp->sbufsz)
19659 + cp->sbufsz = ((strlength+1)/READBUFSZ+1)*READBUFSZ;
19660 + cp->sbuf = realloc(cp->sbuf, cp->sbufsz);
19661 + if (cp->sbuf == NULL)
19663 + show_error("pool_read_string: realloc failed");
19668 + /* consume pending and save to read string buffer */
19669 + consume_size = consume_pending_data(cp, cp->sbuf, strlength);
19671 + *len = strlength;
19673 + /* is the string null terminated? */
19674 + if (consume_size == strlength && !flag)
19676 + /* not null or line terminated.
19677 + * we need to read more since we have not encountered NULL or new line yet
19679 + readsize = cp->sbufsz - strlength;
19680 + readp = strlength;
19684 +#ifdef PRINT_DEBUG
19685 + show_debug("pool_read_string: read all from pending data. po:%d len:%d",
19686 + cp->po, cp->len);
19692 + readsize = cp->sbufsz;
19698 + readlen = read(cp->fd, cp->sbuf+readp, readsize);
19699 + if (readlen == -1)
19701 + show_error("pool_read_string: read() failed. reason:%s", strerror(errno));
19703 + if (cp->isbackend)
19705 + notice_backend_error();
19714 + if (readlen == 0)
19717 + /* check overrun */
19719 + strlength = mystrlinelen(cp->sbuf+readp, readlen, &flag);
19721 + strlength = mystrlen(cp->sbuf+readp, readlen, &flag);
19723 + if (strlength < readlen)
19725 + save_pending_data(cp, cp->sbuf+readp+strlength, readlen-strlength);
19726 + *len += strlength;
19727 +#ifdef PRINT_DEBUG
19728 + show_debug("pool_read_string: total result %d with pending data po:%d len:%d", *len, cp->po, cp->len);
19735 + /* encountered null or newline? */
19738 + /* ok we have read all data */
19739 +#ifdef PRINT_DEBUG
19740 + show_debug("pool_read_string: total result %d ", *len);
19745 + readp += readlen;
19746 + readsize = READBUFSZ;
19748 + if ((*len+readsize) > cp->sbufsz)
19750 + cp->sbufsz += READBUFSZ;
19752 + cp->sbuf = realloc(cp->sbuf, cp->sbufsz);
19753 + if (cp->sbuf == NULL)
19755 + show_error("pool_read_string: realloc failed");
19764 + * returns the byte length of str, including \0, no more than upper.
19765 + * if encountered \0, flag is set to non 0.
19767 + * mystrlen("abc", 2) returns 2
19768 + * mystrlen("abc", 3) returns 3
19769 + * mystrlen("abc", 4) returns 4
19770 + * mystrlen("abc", 5) returns 4
19772 +static int mystrlen(char *str, int upper, int *flag)
19778 + for (len = 0;len < upper; len++, str++)
19791 + * returns the byte length of str terminated by \n or \0 (including \n or \0), no more than upper.
19792 + * if encountered \0 or \n, flag is set to non 0.
19794 + * mystrlinelen("abc", 2) returns 2
19795 + * mystrlinelen("abc", 3) returns 3
19796 + * mystrlinelen("abc", 4) returns 4
19797 + * mystrlinelen("abc", 5) returns 4
19798 + * mystrlinelen("abcd\nefg", 4) returns 4
19799 + * mystrlinelen("abcd\nefg", 5) returns 5
19800 + * mystrlinelen("abcd\nefg", 6) returns 5
19802 +static int mystrlinelen(char *str, int upper, int *flag)
19808 + for (len = 0;len < upper; len++, str++)
19810 + if (!*str || *str == '\n')
19821 + * save pending data
19823 +static int save_pending_data(POOL_CONNECTION *cp, void *data, int len)
19826 + size_t realloc_size;
19830 + if (cp->len == 0)
19833 + reqlen = cp->po + cp->len + len;
19835 + /* pending buffer is enough? */
19836 + if (reqlen > cp->bufsz)
19838 + /* too small, enlarge it */
19839 + realloc_size = (reqlen/READBUFSZ+1)*READBUFSZ;
19840 + p = realloc(cp->hp, realloc_size);
19843 + show_error("save_pending_data: realloc failed");
19847 + cp->bufsz = realloc_size;
19851 + memmove(cp->hp + cp->po + cp->len, data, len);
19858 + * consume pending data. returns actually consumed data length.
19860 +static int consume_pending_data(POOL_CONNECTION *cp, void *data, int len)
19862 + int consume_size;
19864 + if (cp->len <= 0)
19867 + consume_size = Min(len, cp->len);
19868 + memmove(data, cp->hp + cp->po, consume_size);
19869 + cp->len -= consume_size;
19871 + if (cp->len <= 0)
19874 + cp->po += consume_size;
19876 + return consume_size;
19878 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/recovery.c pgcluster-1.7.0rc7/src/pgcluster/pglb/recovery.c
19879 --- postgresql-8.2.4/src/pgcluster/pglb/recovery.c 1970-01-01 01:00:00.000000000 +0100
19880 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/recovery.c 2007-02-18 22:52:17.000000000 +0100
19882 +/*--------------------------------------------------------------------
19887 + * This file is composed of the functions to call with the source
19888 + * at pglb for the recovery.
19890 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
19891 + *--------------------------------------------------------------------
19894 + * Permission to use, copy, modify, and distribute this software and
19895 + * its documentation for any purpose and without fee is hereby
19896 + * granted, provided that the above copyright notice appear in all
19897 + * copies and that both that copyright notice and this permission
19898 + * notice appear in supporting documentation, and that the name of the
19899 + * author not be used in advertising or publicity pertaining to
19900 + * distribution of the software without specific, written prior
19901 + * permission. The author makes no representations about the
19902 + * suitability of this software for any purpose. It is provided "as
19903 + * is" without express or implied warranty.
19906 +#include <stdio.h>
19907 +#include <string.h>
19908 +#include <stdlib.h>
19909 +#include <unistd.h>
19910 +#include <signal.h>
19911 +#include <sys/wait.h>
19912 +#include <ctype.h>
19913 +#include <sys/types.h>
19914 +#include <sys/stat.h>
19915 +#include <sys/socket.h>
19916 +#include <sys/ipc.h>
19917 +#include <sys/msg.h>
19918 +#include <netdb.h>
19919 +#include <netinet/in.h>
19920 +#include <errno.h>
19921 +#include <fcntl.h>
19923 +#include <sys/param.h>
19924 +#include <arpa/inet.h>
19925 +#include <sys/file.h>
19927 +#ifdef HAVE_NETINET_TCP_H
19928 +#include <netinet/tcp.h>
19931 +#ifdef HAVE_SYS_SELECT_H
19932 +#include <sys/select.h>
19934 +#include "replicate_com.h"
19938 +/*--------------------------------------
19939 + * PROTOTYPE DECLARATION
19940 + *--------------------------------------
19942 +void PGRrecovery_main(int fork_wait_time);
19944 +static int set_recovery(RecoveryPacket *packet);
19945 +static int receive_recovery(int fd);
19948 +/*--------------------------------------------------------------------
19950 + * PGRrecovery_main()
19952 + * main module of recovery function
19957 + *--------------------------------------------------------------------
19960 +PGRrecovery_main(int fork_wait_time)
19962 + char * func = "PGRrecovery_main()";
19968 + pgid = getpgid(0);
19975 + PGRsignal(SIGCHLD, SIG_DFL);
19976 + PGRsignal(SIGHUP, PGRexit_subprocess);
19977 + PGRsignal(SIGINT, PGRexit_subprocess);
19978 + PGRsignal(SIGQUIT, PGRexit_subprocess);
19979 + PGRsignal(SIGTERM, PGRexit_subprocess);
19980 + PGRsignal(SIGPIPE, SIG_IGN);
19982 + * in child process,
19983 + * call recovery module
19987 + if (fork_wait_time > 0) {
19988 +#ifdef PRINT_DEBUG
19989 + show_debug("recovery process: wait fork(): pid = %d", getpid());
19991 + sleep(fork_wait_time);
19994 + fd = PGRcreate_recv_socket(ResolvedName, Recovery_Port_Number);
19997 + show_error("%s:PGRcreate_recv_socket failed",func);
20004 + struct timeval timeout;
20006 + timeout.tv_sec = 60;
20007 + timeout.tv_usec = 0;
20010 + * Wait for something to happen.
20013 + FD_SET(fd,&rmask);
20014 + rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
20015 + if (rtn && FD_ISSET(fd, &rmask))
20017 + receive_recovery(fd);
20022 +/*--------------------------------------------------------------------
20026 + * check a recovery request from replication server
20031 + *--------------------------------------------------------------------
20034 +set_recovery(RecoveryPacket *packet)
20036 +#ifdef PRINT_DEBUG
20037 + char * func = "set_recovery()";
20039 + int status = STATUS_OK;
20041 + ClusterTbl * ptr;
20043 + PGRset_key_of_cluster(&key,packet);
20044 +#ifdef PRINT_DEBUG
20045 + show_debug("%s:received no:%d",func, ntohs(packet->packet_no));
20047 + switch (ntohs(packet->packet_no))
20049 + case RECOVERY_PREPARE_REQ:
20050 + /* add cluster db */
20051 +#ifdef PRINT_DEBUG
20052 + show_debug("%s:add_db host:%s port:%d max:%d",
20053 + func, packet->hostName,ntohs(packet->port),ntohs(packet->max_connect));
20055 + ptr = PGRsearch_cluster_tbl(&key);
20058 + ptr = PGRadd_cluster_tbl(&key);
20062 + PGRset_status_on_cluster_tbl(TBL_STOP,ptr);
20063 + if (Use_Connection_Pool)
20065 + signal(SIGCHLD,PGRrecreate_child);
20066 + status = PGRpre_fork_child(ptr);
20070 + case RECOVERY_FINISH:
20071 + /* start cluster db */
20072 + ptr = PGRsearch_cluster_tbl(&key);
20075 +#ifdef PRINT_DEBUG
20076 + show_debug("%s:start_db host:%s port:%d max:%d",
20077 + func,packet->hostName,ntohs(packet->port),ntohs(packet->max_connect));
20079 + PGRset_status_on_cluster_tbl(TBL_INIT,ptr);
20082 + case RECOVERY_PGDATA_ANS:
20083 + /* stop cluster db */
20084 + ptr = PGRsearch_cluster_tbl(&key);
20087 +#ifdef PRINT_DEBUG
20088 + show_debug("%s:stop_db host:%s port:%d max:%d",
20089 + func, packet->hostName,ntohs(packet->port),ntohs(packet->max_connect));
20091 + PGRset_status_on_cluster_tbl(TBL_STOP,ptr);
20094 + case RECOVERY_ERROR:
20095 + /* delete cluster db */
20096 + ptr = PGRsearch_cluster_tbl(&key);
20099 + PGRset_status_on_cluster_tbl(TBL_FREE,ptr);
20100 + if (Use_Connection_Pool)
20102 + PGRquit_children_on_cluster(ptr->rec_no);
20106 + /* cluster db has error */
20107 + case RECOVERY_ERROR_CONNECTION:
20108 + /* set error cluster db */
20109 + ptr = PGRsearch_cluster_tbl(&key);
20112 + PGRset_status_on_cluster_tbl(TBL_ERROR,ptr);
20113 + if (Use_Connection_Pool)
20115 + PGRquit_children_on_cluster(ptr->rec_no);
20120 + return STATUS_OK;
20124 +receive_recovery(int fd)
20126 + int status = STATUS_ERROR;
20128 + int recv_sock = -1;
20129 + RecoveryPacket packet;
20131 + recv_sock = PGRcreate_acception(fd,ResolvedName,Recovery_Port_Number);
20132 + if (recv_sock >= 0 )
20134 + memset(&packet,0, sizeof(RecoveryPacket));
20135 + r_size = PGRread_byte(recv_sock,(char *)&packet,sizeof(RecoveryPacket),MSG_WAITALL);
20136 + if ( r_size == sizeof(RecoveryPacket) )
20138 + status = set_recovery(&packet);
20141 + PGRclose_sock(&recv_sock);
20144 diff -aruN postgresql-8.2.4/src/pgcluster/pglb/socket.c pgcluster-1.7.0rc7/src/pgcluster/pglb/socket.c
20145 --- postgresql-8.2.4/src/pgcluster/pglb/socket.c 1970-01-01 01:00:00.000000000 +0100
20146 +++ pgcluster-1.7.0rc7/src/pgcluster/pglb/socket.c 2007-02-18 22:52:17.000000000 +0100
20148 +/*--------------------------------------------------------------------
20153 + * This file is composed of the communication modules
20155 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
20156 + *--------------------------------------------------------------------
20159 + * Permission to use, copy, modify, and distribute this software and
20160 + * its documentation for any purpose and without fee is hereby
20161 + * granted, provided that the above copyright notice appear in all
20162 + * copies and that both that copyright notice and this permission
20163 + * notice appear in supporting documentation, and that the name of the
20164 + * author not be used in advertising or publicity pertaining to
20165 + * distribution of the software without specific, written prior
20166 + * permission. The author makes no representations about the
20167 + * suitability of this software for any purpose. It is provided "as
20168 + * is" without express or implied warranty.
20171 +#include "postgres.h"
20172 +#include <stdio.h>
20173 +#include <string.h>
20174 +#include <stdlib.h>
20175 +#include <unistd.h>
20176 +#include <sys/wait.h>
20177 +#include <ctype.h>
20178 +#include <sys/types.h>
20179 +#include <sys/stat.h>
20180 +#include <sys/socket.h>
20181 +#include <sys/un.h>
20182 +#include <sys/ipc.h>
20183 +#include <netdb.h>
20184 +#include <errno.h>
20185 +#include <fcntl.h>
20187 +#include <sys/param.h>
20188 +#include <sys/file.h>
20189 +#include <netinet/in.h>
20190 +#include <arpa/inet.h>
20192 +#ifdef HAVE_SYS_SELECT_H
20193 +#include <sys/select.h>
20196 +#ifdef HAVE_NETINET_TCP_H
20197 +#include <netinet/tcp.h>
20200 +#include "replicate_com.h"
20204 +/*--------------------------------------
20205 + * PROTOTYPE DECLARATION
20206 + *--------------------------------------
20208 +int PGRcreate_unix_domain_socket(char * sock_dir, unsigned short port);
20209 +int PGRcreate_recv_socket(char * hostName , unsigned short portNumber);
20210 +int PGRcreate_acception(int fd, char * hostName , unsigned short portNumber);
20211 +void PGRclose_sock(int * sock);
20212 +int PGRread_byte(int sock,char * buf,int len, int flag);
20213 +int PGRcreate_cluster_socket( int * sock, ClusterTbl * ptr );
20215 +static int create_send_socket(int * fdP, char * hostName , unsigned short portNumber);
20219 +* create UNIX domain socket
20222 +PGRcreate_unix_domain_socket(char * sock_dir, unsigned short port)
20224 + char * func = "PGRcreate_unix_domain_socket()";
20225 + struct sockaddr_un addr;
20230 + /* set unix domain socket path */
20231 + fd = socket(AF_UNIX, SOCK_STREAM, 0);
20234 + show_error("%s:Failed to create UNIX domain socket. reason: %s",func, strerror(errno));
20237 + memset((char *) &addr, 0, sizeof(addr));
20238 + ((struct sockaddr *)&addr)->sa_family = AF_UNIX;
20239 + snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/.s.PGSQL.%d",sock_dir,port);
20240 + len = sizeof(struct sockaddr_un);
20241 + status = bind(fd, (struct sockaddr *)&addr, len);
20242 + if (status == -1)
20244 + show_error("%s: bind() failed. reason: %s", func, strerror(errno));
20248 + if (chmod(addr.sun_path, 0777) == -1)
20250 + show_error("%s: chmod() failed. reason: %s", func, strerror(errno));
20254 + status = listen(fd, PGLB_MAX_SOCKET_QUEUE);
20257 + show_error("%s: listen() failed. reason: %s", func, strerror(errno));
20264 +PGRcreate_recv_socket(char * hostName , unsigned short portNumber)
20266 + char * func = "PGRcreate_recv_socket()";
20269 + struct sockaddr_in addr;
20272 + if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
20274 + show_error("%s: socket() failed. (%s)", func, strerror(errno));
20277 + if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one))) == -1)
20279 + PGRclose_sock(&fd);
20280 + show_error("%s: setsockopt() failed. (%s)",func, strerror(errno));
20283 + addr.sin_family = AF_INET;
20284 + if ((hostName == NULL) || (hostName[0] == '\0'))
20285 + addr.sin_addr.s_addr = htonl(INADDR_ANY);
20288 + struct hostent *hp;
20290 + hp = gethostbyname(hostName);
20291 + if ((hp == NULL) || (hp->h_addrtype != AF_INET))
20293 + PGRclose_sock(&fd);
20296 + memmove((char *) &(addr.sin_addr), (char *) hp->h_addr, hp->h_length);
20299 + addr.sin_port = htons(portNumber);
20300 + len = sizeof(struct sockaddr_in);
20302 + err = bind(fd, (struct sockaddr *) & addr, len);
20305 + PGRclose_sock(&fd);
20306 + show_error("%s: bind() failed. (%s)",func, strerror(errno));
20309 + err = listen(fd, PGLB_MAX_SOCKET_QUEUE);
20312 + PGRclose_sock(&fd);
20313 + show_error("%s: listen() failed. (%s)", func, strerror(errno));
20320 +PGRcreate_acception(int fd, char * hostName , unsigned short portNumber)
20322 + char * func = "PGRcreate_acception()";
20324 + struct sockaddr addr;
20329 + len = sizeof(struct sockaddr);
20331 + while ((sock = accept(fd,&addr,&len)) < 0)
20333 + show_error("%s:accept error",func);
20334 + PGRclose_sock(&fd);
20335 + if ( count > PGLB_CONNECT_RETRY_TIME)
20339 + fd = PGRcreate_recv_socket(hostName , portNumber);
20344 + while (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) < 0)
20346 + show_error("%s: setsockopt TCP_NODELAY error (%s)",func, strerror(errno));
20347 + if ( count > PGLB_CONNECT_RETRY_TIME)
20354 + while (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one)) < 0)
20356 + show_error("%s:setsockopt SO_KEEPALIVE error (%s)",func,strerror(errno));
20357 + if ( count > PGLB_CONNECT_RETRY_TIME)
20368 +PGRclose_sock(int * sock)
20375 +PGRread_byte(int sock,char * buf,int len, int flag)
20377 + char * func = "PGRread_byte()";
20380 + int read_size = 0;
20381 + int max_buf_size ;
20385 + max_buf_size = len;
20386 + read_ptr = (char*)buf;
20389 + r = recv(sock,read_ptr + read_size ,max_buf_size - read_size, flag);
20392 + if (errno == EINTR)
20397 + if (errno == EAGAIN)
20399 + return read_size;
20402 +#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
20403 + if (errno == EWOULDBLOCK)
20405 + show_error("%s:no data (%s)",func,strerror(errno));
20406 + return read_size;
20410 + if (errno == ECONNRESET)
20412 + PGRclose_sock(&sock);
20413 + show_error("%s:connection reset (%s)",func, strerror(errno));
20417 + show_error("%s:recv() failed. (%s)",func,strerror(errno));
20424 + if (max_buf_size == read_size)
20432 + return read_size;
20440 + return read_size;
20444 +PGRcreate_cluster_socket( int * sock, ClusterTbl * ptr )
20446 + char * func = "PGRcreate_cluster_socket()";
20447 + int status = STATUS_ERROR;
20450 + if (PGRis_connection_full(ptr) == 1)
20452 + return STATUS_ERROR;
20455 + if (ptr != (ClusterTbl *) NULL)
20457 + status = create_send_socket(sock, ptr->hostName, ptr->port) ;
20461 + show_error("%s:ClusterTbl is not initialize",func);
20467 +create_send_socket(int * fdP, char * hostName , unsigned short portNumber)
20469 + char * func = "create_send_socket()";
20472 + struct sockaddr_in addr;
20476 +#ifdef PRINT_DEBUG
20477 + show_debug("%s: host:%s port:%d",func, hostName,portNumber);
20480 + memset((char *)&addr,0,sizeof(addr));
20482 + if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
20486 + show_error("%s:socket() failed. (%s)",func, strerror(errno));
20487 + return STATUS_ERROR;
20489 + if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one))) == -1)
20491 + PGRclose_sock(&fd);
20493 + show_error("%s:setsockopt() failed. (%s)", func, strerror(errno));
20494 + return STATUS_ERROR;
20495 + return STATUS_ERROR;
20497 + if ((setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one))) == -1)
20499 + PGRclose_sock(&fd);
20501 + show_error("%s:setsockopt() failed. (%s)", func, strerror(errno));
20502 + return STATUS_ERROR;
20504 + if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) < 0)
20506 + PGRclose_sock(&fd);
20508 + show_error("%s:setsockopt() failed. (%s)",func, strerror(errno));
20509 + return STATUS_ERROR;
20512 + addr.sin_family = AF_INET;
20513 + if ((hostName == NULL) || (hostName[0] == '\0'))
20514 + addr.sin_addr.s_addr = htonl(INADDR_ANY);
20517 + struct hostent *hp;
20519 + hp = gethostbyname(hostName);
20520 + if ((hp == NULL) || (hp->h_addrtype != AF_INET))
20522 + PGRclose_sock(&fd);
20524 + return STATUS_ERROR;
20526 + memmove((char *) &(addr.sin_addr), (char *) hp->h_addr, hp->h_length);
20529 + addr.sin_port = htons(portNumber);
20530 + len = sizeof(struct sockaddr_in);
20532 + if ((sock = connect(fd,(struct sockaddr*)&addr,len)) < 0)
20534 + PGRclose_sock(&fd);
20536 + return STATUS_ERROR;
20540 + return STATUS_OK;
20543 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/AUTHORS pgcluster-1.7.0rc7/src/pgcluster/pgrp/AUTHORS
20544 --- postgresql-8.2.4/src/pgcluster/pgrp/AUTHORS 1970-01-01 01:00:00.000000000 +0100
20545 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/AUTHORS 2007-02-18 22:52:17.000000000 +0100
20549 +pgrp was written by Atsushi Mitani
20550 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/COPYING pgcluster-1.7.0rc7/src/pgcluster/pgrp/COPYING
20551 --- postgresql-8.2.4/src/pgcluster/pgrp/COPYING 1970-01-01 01:00:00.000000000 +0100
20552 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/COPYING 2007-02-18 22:52:17.000000000 +0100
20554 +Copyright (c) 2003-2006 Atsushi Mitani
20556 +Permission to use, copy, modify, and distribute this software and
20557 +its documentation for any purpose and without fee is hereby
20558 +granted, provided that the above copyright notice appear in all
20559 +copies and that both that copyright notice and this permission
20560 +notice appear in supporting documentation, and that the name of the
20561 +author not be used in advertising or publicity pertaining to
20562 +distribution of the software without specific, written prior
20563 +permission. The author makes no representations about the
20564 +suitability of this software for any purpose. It is provided "as
20565 +is" without express or implied warranty.
20566 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/Makefile pgcluster-1.7.0rc7/src/pgcluster/pgrp/Makefile
20567 --- postgresql-8.2.4/src/pgcluster/pgrp/Makefile 1970-01-01 01:00:00.000000000 +0100
20568 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/Makefile 2007-02-18 22:52:17.000000000 +0100
20570 +#-------------------------------------------------------------------------
20572 +# Makefile for src/pgcluster/pgrp
20574 +#-------------------------------------------------------------------------
20576 +subdir = src/pgcluster/pgrp
20577 +top_builddir = ../../..
20578 +include $(top_builddir)/src/Makefile.global
20580 +# this setup is for V2 protocol
20581 +#OBJS= cascade.o conf.o main.o recovery.o replicate.o rlog.o
20582 +# this setup is for V3 protocol
20583 +OBJS= pqformat.o cascade.o conf.o main.o recovery.o replicate.o rlog.o lifecheck.o
20585 +EXTRA_OBJS = $(top_builddir)/src/backend/libpq/replicate_com.o ../libpgc/SUBSYS.o
20587 +CFLAGS += -DPRINT_DEBUG
20588 +override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -DBINDIR=\"$(bindir)\"
20591 +pgreplicate: $(OBJS) $(libpq_builddir)/libpq.a
20592 + $(CC) $(CFLAGS) $(OBJS) $(EXTRA_OBJS) $(libpq) $(libpq_builddir)/libpq.a $(LDFLAGS) $(LIBS) -o $@
20594 +install: all installdirs
20595 + $(INSTALL_PROGRAM) pgreplicate$(X) $(DESTDIR)$(bindir)/pgreplicate$(X)
20596 + $(INSTALL_DATA) pgreplicate.conf.sample $(DESTDIR)$(datadir)/pgreplicate.conf.sample
20599 + $(mkinstalldirs) $(DESTDIR)$(bindir)
20600 + $(mkinstalldirs) $(DESTDIR)$(datadir)
20603 + rm -f $(addprefix $(DESTDIR)$(bindir)/, pgreplicate$(X))
20604 + rm -f $(DESTDIR)$(datadir)/pgreplicate.conf.sample
20606 +clean distclean maintainer-clean:
20607 + rm -f pgreplicate$(X) $(OBJS)
20611 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/cascade.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/cascade.c
20612 --- postgresql-8.2.4/src/pgcluster/pgrp/cascade.c 1970-01-01 01:00:00.000000000 +0100
20613 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/cascade.c 2007-02-18 22:52:17.000000000 +0100
20615 +/*--------------------------------------------------------------------
20620 + * This file is composed of the functions to call with the source
20621 + * at pgreplicate for backup and cascade .
20623 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
20624 + *--------------------------------------------------------------------
20626 +#ifdef USE_REPLICATION
20628 +#include "postgres.h"
20629 +#include "postgres_fe.h"
20631 +#include <stdio.h>
20632 +#include <unistd.h>
20633 +#ifdef HAVE_SYS_TYPES_H
20634 +#include <sys/types.h>
20636 +#ifdef HAVE_FCNTL_H
20637 +#include <fcntl.h>
20639 +#include <errno.h>
20640 +#include <ctype.h>
20642 +#include <sys/ipc.h>
20643 +#include <sys/shm.h>
20644 +#include <sys/sem.h>
20645 +#include <signal.h>
20646 +#include <sys/socket.h>
20647 +#ifdef HAVE_UNISTD_H
20648 +#include <unistd.h>
20650 +#include <netdb.h>
20651 +#ifdef HAVE_NETINET_TCP_H
20652 +#include <netinet/tcp.h>
20654 +#include <dirent.h>
20655 +#include <arpa/inet.h>
20657 +#ifdef HAVE_CRYPT_H
20658 +#include <crypt.h>
20662 +#include "mb/pg_wchar.h"
20665 +#include "libpq-fe.h"
20666 +#include "libpq-int.h"
20667 +#include "fe-auth.h"
20669 +#include "access/xact.h"
20670 +#include "replicate_com.h"
20671 +#include "pgreplicate.h"
20674 +static int count_cascade(int flag);
20675 +static void PGRinit_cascade_child(void);
20678 +static int fixup_socket_for_cascades(int *sock ,ReplicateServerInfo * target);
20679 +static ReplicateServerInfo * get_cascade_data(int * cnt, int flag);
20680 +static int add_cascade_data(ReplicateHeader * header, ReplicateServerInfo * add_data);
20681 +static int update_cascade_data(ReplicateHeader * header, ReplicateServerInfo * update_data);
20682 +static void write_cascade_status_file(ReplicateServerInfo * cascade);
20683 +static int notice_cascade_data(int sock);
20684 +static int notice_cascade_data_to_cluster_db(void);
20687 + * socket variables, moved from Cascade_Inf->(lower|upper)->sock.
20688 + * Cascade->Inf is in shared memory, so sometimes cascades returns EBADF due to not initialized socket in specified process.
20689 + * 05/10/05 tanida@sraoss.co.jp
20692 +static int lsock=-1; /* socket for lower-cascade. */
20693 +static int usock=-1; /* socket for upper-cascade. */
20695 +/*--------------------------------------
20696 + * PROTOTYPE DECLARATION
20697 + *--------------------------------------
20702 +count_cascade(int flag)
20705 + int cascade_cnt = 0;
20706 + ReplicateServerInfo * cascade = NULL;
20708 + if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
20713 + /* count cascadeing replication server */
20716 + case UPPER_CASCADE:
20717 + case ALL_CASCADE:
20718 + cascade = Cascade_Tbl;
20720 + case LOWER_CASCADE:
20721 + cascade = Cascade_Inf->myself;
20725 + if (cascade == NULL)
20729 + while (cascade->useFlag != DB_TBL_END)
20731 + if (cascade->useFlag == DB_TBL_USE)
20735 + if ((flag == UPPER_CASCADE) &&
20736 + (cascade == Cascade_Inf->myself))
20741 + if (cnt >= MAX_DB_SERVER -1 )
20747 + return cascade_cnt;
20751 +PGRinit_cascade_child(void) {
20752 + fixup_socket_for_cascades(&usock,NULL);
20753 + fixup_socket_for_cascades(&lsock,NULL);
20757 +static ReplicateServerInfo *
20758 +get_cascade_data(int * cnt, int flag)
20760 + char * func = "get_cascade_data()";
20762 + int loop_cnt = 0;
20764 + ReplicateServerInfo * buf = NULL;
20765 + ReplicateServerInfo * cascade = NULL;
20767 + size = sizeof(ReplicateServerInfo) * MAX_DB_SERVER;
20768 + buf = (ReplicateServerInfo *)malloc(size);
20769 + if (buf == (ReplicateServerInfo *)NULL)
20771 + show_error("%s:malloc failed: (%s)",func,strerror(errno));
20775 + memset(buf,0,size);
20779 + case UPPER_CASCADE:
20780 + case ALL_CASCADE:
20781 + cascade = Cascade_Tbl;
20783 + case LOWER_CASCADE:
20784 + cascade = Cascade_Inf->myself;
20793 + if (cascade == NULL)
20799 + PGRsem_lock(CascadeSemID,1);
20802 + while (cascade->useFlag != DB_TBL_END)
20804 + if (cascade->useFlag == DB_TBL_USE)
20806 + (buf + i)->useFlag = htonl(cascade->useFlag);
20807 + strncpy((buf + i)->hostName,cascade->hostName,sizeof(cascade->hostName));
20808 + (buf + i)->portNumber = htons(cascade->portNumber);
20809 + (buf + i)->recoveryPortNumber = htons(cascade->recoveryPortNumber);
20810 + (buf + i)->lifecheckPortNumber = htons(cascade->lifecheckPortNumber);
20813 + if ((flag == UPPER_CASCADE) &&
20814 + (cascade == Cascade_Inf->myself))
20819 + if (loop_cnt >= MAX_DB_SERVER -1 )
20823 + if (Cascade_Inf->end == cascade)
20830 + PGRsem_unlock(CascadeSemID,1);
20836 +update_cascade_data(ReplicateHeader * header, ReplicateServerInfo * update_data)
20838 + char * func = "update_cascade_data()";
20841 + ReplicateServerInfo * ptr = NULL;
20842 + ReplicateServerInfo * cascade = NULL;
20843 + char hostName[HOSTNAME_MAX_LENGTH];
20846 + show_debug("executing %s",func);
20847 + if ((header == NULL ) || ( update_data == NULL))
20849 + show_error("%s:receive data is wrong",func);
20850 + return STATUS_ERROR;
20852 + if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
20854 + show_error("%s:config data read error",func);
20855 + return STATUS_ERROR;
20859 + size = ntohl(header->query_size);
20860 + cnt = size / sizeof(ReplicateServerInfo);
20861 + if (cnt >= MAX_DB_SERVER)
20863 + show_error("%s:update cascade data is too large. it's more than %d", func,MAX_DB_SERVER);
20864 + return STATUS_ERROR;
20867 + Cascade_Inf->useFlag = DB_TBL_INIT;
20868 + fixup_socket_for_cascades(&usock,NULL);
20869 + fixup_socket_for_cascades(&lsock,NULL);
20871 + Cascade_Inf->upper = NULL;
20872 + Cascade_Inf->lower = NULL;
20874 + gethostname(hostName,sizeof(hostName));
20875 + ptr = update_data;
20876 + cascade = Cascade_Tbl;
20877 + memset(cascade,0,(sizeof(ReplicateServerInfo)*MAX_DB_SERVER));
20878 + Cascade_Inf->top = cascade;
20882 + cascade->useFlag = ntohl(ptr->useFlag);
20883 + strncpy(cascade->hostName,ptr->hostName,sizeof(cascade->hostName));
20884 + cascade->portNumber = ntohs(ptr->portNumber);
20885 + cascade->recoveryPortNumber = ntohs(ptr->recoveryPortNumber);
20886 + cascade->lifecheckPortNumber = ntohs(ptr->lifecheckPortNumber);
20888 + if ((!strncmp(cascade->hostName,hostName,sizeof(cascade->hostName))) &&
20889 + (cascade->portNumber == Port_Number) &&
20890 + (cascade->recoveryPortNumber == Recovery_Port_Number))
20892 + Cascade_Inf->myself = cascade;
20895 + Cascade_Inf->end = cascade;
20899 + cascade->useFlag = DB_TBL_END;
20901 + Cascade_Inf->useFlag = DB_TBL_USE;
20903 + return STATUS_OK;
20907 +add_cascade_data(ReplicateHeader * header, ReplicateServerInfo * add_data)
20909 + char *func = "add_cascade_data()";
20912 + ReplicateServerInfo * ptr = NULL;
20913 + ReplicateServerInfo * cascade = NULL;
20914 + char hostName[HOSTNAME_MAX_LENGTH];
20916 + if ((header == NULL ) || ( add_data == NULL))
20918 + show_error("%s:receive data is wrong",func);
20919 + return STATUS_ERROR;
20921 + if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
20923 + show_error("%s:config data read error",func);
20924 + return STATUS_ERROR;
20926 + size = ntohl(header->query_size);
20927 + cnt = size / sizeof(ReplicateServerInfo);
20928 + if (cnt >= MAX_DB_SERVER)
20930 + show_error("%s:addtional cascade data is too large. it's more than %d", func,MAX_DB_SERVER);
20931 + return STATUS_ERROR;
20934 + Cascade_Inf->useFlag = DB_TBL_INIT;
20935 + fixup_socket_for_cascades(&lsock,NULL);
20936 + Cascade_Inf->lower = NULL;
20938 + gethostname(hostName,sizeof(hostName));
20940 + cascade = Cascade_Inf->myself;
20944 + cascade->useFlag = ntohl(ptr->useFlag);
20945 + strncpy(cascade->hostName,ptr->hostName,sizeof(cascade->hostName));
20946 + cascade->portNumber = ntohs(ptr->portNumber);
20947 + cascade->recoveryPortNumber = ntohs(ptr->recoveryPortNumber);
20948 + cascade->lifecheckPortNumber = ntohs(ptr->lifecheckPortNumber);
20949 + cascade->replicate_id=-1;
20950 + cascade->response_mode=-1;
20952 + Cascade_Inf->end = cascade;
20954 + if ((!strncmp(cascade->hostName,hostName,sizeof(cascade->hostName))) &&
20955 + (cascade->portNumber == Port_Number) &&
20956 + (cascade->recoveryPortNumber == Recovery_Port_Number))
20963 + cascade->useFlag = DB_TBL_END;
20967 + Cascade_Inf->useFlag = DB_TBL_USE;
20968 + return STATUS_OK;
20972 +PGRstartup_cascade(void)
20974 + char * func = "PGRstartup_cascade()";
20976 + int status = STATUS_OK;
20977 + ReplicateHeader header;
20978 + ReplicateServerInfo * cascade = NULL;
20979 + ReplicateServerInfo * buf = NULL;
20981 + if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
20983 + show_error("%s:config data read error",func);
20984 + return STATUS_ERROR;
20987 + /* count lower server */
20988 + cascade = Cascade_Inf->myself;
20989 + if (cascade == NULL)
20991 + show_error("%s:cascade data initialize error",func);
20992 + return STATUS_ERROR;
20994 + buf = get_cascade_data(&cnt,LOWER_CASCADE);
20997 + show_error("%s:cascade data get error",func);
20998 + return STATUS_ERROR;
21001 + memset(&header,0,sizeof(ReplicateHeader));
21002 + header.cmdSys = CMD_SYS_CASCADE;
21003 + header.cmdSts = CMD_STS_TO_UPPER;
21004 + header.cmdType = CMD_TYPE_ADD;
21005 + header.query_size = htonl(sizeof(ReplicateServerInfo) * cnt);
21007 + status = PGRsend_upper_cascade(&header, (char *)buf);
21012 + if (status == STATUS_OK)
21014 + memset(&header,0,sizeof(ReplicateHeader));
21015 + buf = PGRrecv_cascade_answer( Cascade_Inf->upper, &header);
21018 + status=STATUS_ERROR;
21020 + else if((header.cmdSys == CMD_SYS_CASCADE) &&
21021 + (header.cmdSts == CMD_STS_TO_LOWER) &&
21022 + (header.cmdType == CMD_TYPE_UPDATE_ALL))
21024 + status = update_cascade_data(&header,buf);
21029 + show_debug("%s:startup packet result is %d",func,status);
21034 +PGRsend_lower_cascade(ReplicateHeader * header, char * query)
21038 + char * func = "PGRsend_lower_cascade()";
21039 + ReplicateServerInfo *lower = PGRget_lower_cascade();
21042 + while(lower!=NULL)
21045 + * check lower_cascade validaty.
21049 + PGRsend_cascade(lsock,header,query)==STATUS_OK)
21051 + return STATUS_OK;
21056 + * current lower cascade is missing.
21057 + * fix socket , or go to next one.
21060 + while( lower!=NULL &&
21061 + fixup_socket_for_cascades(&lsock,lower)!=STATUS_OK)
21063 + show_error("%s:lower cascade maybe down,challenge new one.",func);
21064 + PGRset_cascade_server_status(lower,DB_TBL_ERROR);
21065 + lower =PGRget_lower_cascade();
21068 + Cascade_Inf->lower=lower;
21072 + return STATUS_ERROR;
21077 +PGRsend_upper_cascade(ReplicateHeader * header, char * query)
21079 + char * func = "PGRsend_upper_cascade()";
21080 + ReplicateServerInfo *upper = PGRget_upper_cascade();
21083 + while(upper!=NULL)
21086 + * check upper_cascade validaty.
21090 + PGRsend_cascade(usock,header,query)==STATUS_OK)
21092 + return STATUS_OK;
21097 + * current upper cascade is missing.
21098 + * fix socket , or go to next one.
21101 + while( upper!=NULL &&
21102 + fixup_socket_for_cascades(&usock,upper)!=STATUS_OK)
21104 + show_error("%s:upper cascade maybe down,challenge new one.",func);
21105 + PGRset_cascade_server_status(upper,DB_TBL_ERROR);
21106 + upper =PGRget_upper_cascade();
21109 + Cascade_Inf->upper=upper;
21112 + return STATUS_ERROR;
21115 +ReplicateServerInfo *
21116 +PGRget_lower_cascade(void)
21118 + char * func = "PGRget_lower_cascade()";
21119 + ReplicateServerInfo * cascade = NULL;
21121 + if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
21123 + show_error("%s:config data read error",func);
21127 + /* count lower server */
21129 + cascade = Cascade_Inf->myself;
21130 + if (cascade == NULL)
21132 + show_error("%s:cascade data initialize error",func);
21135 + if (cascade->useFlag != DB_TBL_END)
21139 + while (cascade->useFlag != DB_TBL_END)
21141 +#ifdef PRINT_DEBUG
21142 + show_debug("%s:lower cascade search[%d]@[%s] use[%d]",
21144 + cascade->portNumber,
21145 + cascade->hostName,
21146 + cascade->useFlag);
21148 + if (cascade->useFlag == DB_TBL_USE)
21150 +#ifdef PRINT_DEBUG
21151 + show_debug("%s:find lower cascade",func);
21160 +ReplicateServerInfo *
21161 +PGRget_upper_cascade(void)
21163 + char * func = "PGRget_upper_cascade()";
21164 + ReplicateServerInfo * cascade = NULL;
21166 + if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
21168 + show_error("%s:config data read error",func);
21173 + /* count lower server */
21174 + cascade = Cascade_Inf->myself;
21175 + if ((cascade == NULL) || (Cascade_Inf->top == cascade))
21180 + while (cascade != NULL)
21182 + if (cascade->useFlag == DB_TBL_USE)
21186 + if (Cascade_Inf->top == cascade)
21196 +write_cascade_status_file(ReplicateServerInfo * cascade)
21198 + switch( cascade->useFlag)
21200 + case DB_TBL_FREE:
21201 + PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) free",
21202 + cascade->hostName,
21203 + cascade->portNumber);
21205 + case DB_TBL_INIT:
21206 + PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) initialize",
21207 + cascade->hostName,
21208 + cascade->portNumber);
21211 + PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) start use",
21212 + cascade->hostName,
21213 + cascade->portNumber);
21215 + case DB_TBL_ERROR:
21216 + PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) error",
21217 + cascade->hostName,
21218 + cascade->portNumber);
21221 + PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) become top",
21222 + cascade->hostName,
21223 + cascade->portNumber);
21229 +PGRset_cascade_server_status(ReplicateServerInfo * cascade, int status)
21231 + if (cascade == NULL)
21235 + if (cascade->useFlag != status)
21237 + cascade->useFlag = status;
21238 + write_cascade_status_file(cascade);
21242 +ReplicateServerInfo *
21243 +PGRrecv_cascade_answer(ReplicateServerInfo * cascade,ReplicateHeader * header)
21245 + ReplicateServerInfo * answer = NULL;
21248 + if ((cascade == NULL) || (header == NULL))
21253 + /* FIXME: ReplicateServerInfo->sock must be removed in cascading. */
21254 + if(cascade == Cascade_Inf->upper )
21258 + else if (cascade == Cascade_Inf->lower )
21264 + show_debug("PGRrecv_cascade_answer:receiving packet from sock not belogs to cascade->upper / lower. maybe missing .");
21265 + sock=cascade->sock;
21267 + answer = (ReplicateServerInfo*)PGRread_packet(sock,header);
21272 +PGRsend_cascade(int sock , ReplicateHeader * header, char * query)
21274 + char * func ="PGRsend_cascade()";
21278 + int send_size = 0;
21283 + struct timeval timeout;
21284 + int query_size = 0;
21286 + /* check parameter */
21287 + if ((header == NULL) || (sock == -1))
21289 + return STATUS_ERROR;
21292 +#ifdef PRINT_DEBUG
21293 + show_debug("%s:PGRsend_cascade sock[%d]",func,sock);
21295 + query_size = ntohl(header->query_size);
21296 + header_size = sizeof(ReplicateHeader);
21297 + buf_size = header_size + query_size + 4;
21298 + buf = malloc(buf_size);
21299 + memset(buf,0,buf_size);
21301 + memcpy(buf,header,header_size);
21302 + if (query_size > 0)
21304 + memcpy((char *)(buf+header_size),query,query_size+1);
21310 + timeout.tv_sec = 10;
21311 + timeout.tv_usec = 0;
21314 + * Wait for something to happen.
21317 + FD_SET(sock,&wmask);
21318 + rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
21322 + if (errno == EINTR || errno == EAGAIN)
21325 + show_error("%s:select failed ,errno is %s",func , strerror(errno));
21327 + return STATUS_ERROR;
21330 + if (rtn && FD_ISSET(sock, &wmask))
21332 + s = send(sock,send_ptr + send_size,buf_size - send_size ,0);
21335 + if (errno == EINTR || errno == EAGAIN)
21339 + show_error("%s:send failed: %d(%s)",func, errno, strerror(errno));
21341 + return STATUS_ERROR;
21346 + show_error("%s:unexpected EOF", func);
21348 + return STATUS_ERROR;
21351 + if (send_size == buf_size)
21353 +#ifdef PRINT_DEBUG
21354 + show_debug("%s:send[%s] size[%d]",func,query,send_size);
21357 + return STATUS_OK;
21361 + return STATUS_OK;
21365 +PGRwait_answer_cascade(int sock)
21367 + ReplicateHeader header;
21368 + char * answer = NULL;
21370 + answer = PGRread_packet(sock,&header);
21371 + if (answer != NULL)
21374 + return STATUS_OK;
21376 + return STATUS_ERROR;
21379 + * fixup_socket_for_cascades checks socket's validaty.
21380 + * returns STATUS_OK if succeeded , or STATUS_ERROR if some error occured.
21381 + * if target is null , only close socket.
21383 + * originally written by tanida@sraoss.co.jp
21386 +fixup_socket_for_cascades(int *sock, ReplicateServerInfo *target)
21393 + if(target!=NULL) {
21394 + return PGR_Create_Socket_Connect(sock,target->hostName,target->portNumber);
21396 + return STATUS_OK;
21401 +notice_cascade_data(int sock)
21403 + char * func = "notice_cascade_data";
21404 + ReplicateServerInfo *cascade_data = NULL;
21405 + ReplicateHeader header;
21411 + return STATUS_ERROR;
21414 + cascade_data = get_cascade_data(&cnt, ALL_CASCADE );
21417 + show_error("%s:cascade data is wrong",func);
21418 + return STATUS_ERROR;
21420 + size = sizeof (ReplicateServerInfo) * cnt ;
21422 + memset(&header,0,sizeof(ReplicateHeader));
21423 + header.cmdSys = CMD_SYS_CASCADE ;
21424 + header.cmdSts = CMD_STS_TO_LOWER ;
21425 + header.cmdType = CMD_TYPE_UPDATE_ALL;
21426 + header.query_size = htonl(size);
21427 + PGRsend_cascade(sock, &header, (char *)cascade_data );
21428 + if (cascade_data != NULL)
21430 + free(cascade_data);
21432 + return STATUS_OK;
21436 +PGRcascade_main(int sock, ReplicateHeader * header, char * query)
21438 + switch (header->cmdSts)
21440 + case CMD_STS_TO_UPPER:
21441 + if (header->cmdType == CMD_TYPE_ADD)
21443 + /* add lower cascade data to myself */
21444 + add_cascade_data(header,(ReplicateServerInfo*)query);
21445 + /* send cascade data to upper */
21446 + /* and receive new cascade data from upper */
21447 + PGRstartup_cascade();
21448 + /* return to lower with new cascade data */
21449 + notice_cascade_data(sock);
21450 + /* notifies a cascade server's information to Cluster DBs */
21451 + notice_cascade_data_to_cluster_db();
21454 + case CMD_STS_TO_LOWER:
21456 + * use for cascading replication
21460 + return STATUS_OK;
21464 +notice_cascade_data_to_cluster_db(void)
21466 + char userName[USERNAME_MAX_LENGTH];
21467 + ReplicateServerInfo *s=NULL;
21469 + if (Cascade_Inf->lower == NULL)
21471 + Cascade_Inf->lower = PGRget_lower_cascade();
21473 + if (Cascade_Inf->lower == NULL)
21475 + return STATUS_ERROR;
21477 + s=Cascade_Inf->lower;
21478 + memset(userName,0,sizeof(userName));
21479 + strncpy(userName ,getenv("LOGNAME"),sizeof(userName)-1);
21481 + PGRnotice_replication_server(s->hostName,
21483 + s->recoveryPortNumber,
21484 + s->lifecheckPortNumber,
21487 + return STATUS_OK;
21491 +PGRwait_notice_rlog_done(void)
21493 + ReplicateHeader header;
21496 + PGRread_packet(lsock,&header);
21497 + return STATUS_OK;
21499 + return STATUS_ERROR;
21505 +PGRsend_notice_quit(void )
21507 + ReplicateHeader header;
21510 + size = strlen("QUIT_SAFELY");
21511 + memset(&header,0,sizeof(ReplicateHeader));
21512 + header.cmdSys = CMD_SYS_CALL ;
21513 + header.cmdSts = CMD_STS_RESPONSE ;
21514 + header.cmdType = CMD_TYPE_FRONTEND_CLOSED;
21515 + header.query_size = htonl(size);
21516 + PGRsend_lower_cascade(&header, "QUIT_SAFELY");
21517 + PGRwait_notice_rlog_done();
21518 + return STATUS_OK;
21522 +PGRsend_notice_rlog_done(int sock)
21524 + ReplicateHeader header;
21529 + return STATUS_ERROR;
21532 + size = strlen(PGR_QUERY_DONE_NOTICE_CMD);
21533 + memset(&header,0,sizeof(ReplicateHeader));
21534 + header.cmdSys = CMD_SYS_CASCADE ;
21535 + header.cmdSts = CMD_STS_RESPONSE ;
21536 + header.cmdType = 0;
21537 + header.query_size = htonl(size);
21538 + PGRsend_cascade(sock, &header, PGR_QUERY_DONE_NOTICE_CMD);
21539 + return STATUS_OK;
21542 +#endif /* USE_REPLICATION */
21543 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/conf.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/conf.c
21544 --- postgresql-8.2.4/src/pgcluster/pgrp/conf.c 1970-01-01 01:00:00.000000000 +0100
21545 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/conf.c 2007-02-18 22:52:17.000000000 +0100
21547 +/*--------------------------------------------------------------------
21550 + * Replication server for PostgreSQL
21553 + * Read and set configuration data in this modul.
21555 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
21556 + *--------------------------------------------------------------------
21558 +#include "postgres.h"
21560 +#include <stdio.h>
21561 +#include <string.h>
21562 +#include <unistd.h>
21563 +#include <sys/types.h>
21564 +#include <fcntl.h>
21565 +#include <ctype.h>
21566 +#include <sys/stat.h>
21567 +#include <sys/ipc.h>
21568 +#include <sys/shm.h>
21569 +#include <sys/sem.h>
21570 +#include <netdb.h>
21571 +#include <errno.h>
21572 +#include <sys/file.h>
21576 +#include "libpq-fe.h"
21577 +#include "libpq-int.h"
21578 +#include "fe-auth.h"
21580 +#include "replicate_com.h"
21581 +#include "pgreplicate.h"
21583 +/*--------------------------------------------------------------------
21585 + * PGRget_Conf_Data()
21587 + * Initialize mamory and tables
21589 + * char * path: path of the setup file (I)
21592 + * NG: STATUS_ERROR
21593 + *--------------------------------------------------------------------
21596 +PGRget_Conf_Data(char * path)
21598 + char * func = "PGRget_Conf_Data()";
21599 + HostTbl host_tbl[MAX_DB_SERVER];
21600 + ConfDataType * conf = NULL;
21603 + int cascade_cnt = 0;
21605 + int lb_rec_no = 0;
21606 + int cascade_rec_no = -1;
21610 + union semun sem_arg;
21615 + if (path == NULL)
21619 + size = sizeof(LogFileInf);
21620 + LogFileData = (LogFileInf *) malloc(size);
21621 + if (LogFileData == NULL)
21623 + show_error("%s:malloc() failed. reason: %s", func,strerror(errno));
21624 + return STATUS_ERROR;
21626 + memset(LogFileData,0,size);
21628 + snprintf(fname,sizeof(fname),"%s/%s",path,PGREPLICATE_STATUS_FILE);
21629 + StatusFp = fopen(fname,"a");
21630 + if (StatusFp == NULL)
21632 + show_error("%s:fopen failed: (%s)",func,strerror(errno));
21633 + return STATUS_ERROR;
21636 + snprintf(fname,sizeof(fname),"%s/%s",path,PGREPLICATE_RID_FILE);
21637 + RidFp = fopen(fname,"r+");
21638 + if (RidFp == NULL)
21640 + RidFp = fopen(fname,"w+");
21641 + if (RidFp == NULL)
21643 + show_error("%s:fopen failed: (%s)",func,strerror(errno));
21644 + return STATUS_ERROR;
21649 + * read configuration file
21651 + if (PGR_Get_Conf_Data(path,PGREPLICATE_CONF_FILE) != STATUS_OK)
21653 + show_error("%s:PGR_Get_Conf_Data failed",func);
21654 + return STATUS_ERROR;
21656 +#ifdef PRINT_DEBUG
21657 + show_debug("PGR_Get_Conf_Data ok");
21660 + /* allocate response information table */
21661 + PGR_Response_Inf = (ResponseInf *)malloc(sizeof(ResponseInf));
21662 + if (PGR_Response_Inf == NULL)
21664 + show_error("%s:malloc() failed. reason: %s", func,strerror(errno));
21665 + return STATUS_ERROR;
21667 + PGR_Response_Inf->response_mode = PGR_NORMAL_MODE;
21668 + PGR_Response_Inf->current_cluster = 0;
21671 + * memory allocate load balance table buffer
21673 + LoadBalanceTbl = (RecoveryTbl *)malloc(sizeof(RecoveryTbl)*MAX_DB_SERVER);
21674 + if (LoadBalanceTbl == (RecoveryTbl *)NULL)
21676 + show_error("%s:malloc failed: (%s)",func,strerror(errno));
21677 + return STATUS_ERROR;
21679 +#ifdef PRINT_DEBUG
21680 + show_debug("LoadBalanceTbl allocate ok");
21684 + * memory allocate cascade server table buffer
21686 + size = sizeof(ReplicateServerInfo) * MAX_DB_SERVER;
21687 + CascadeTblShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
21688 + if (CascadeTblShmid < 0)
21690 + show_error("%s:shmget() failed. reason: %s", func,strerror(errno));
21691 + return STATUS_ERROR;
21693 +#ifdef PRINT_DEBUG
21694 + show_debug("%s:CascadeTbl shmget ok",func);
21696 + Cascade_Tbl = (ReplicateServerInfo *)shmat(CascadeTblShmid,0,0);
21697 + if (Cascade_Tbl == (ReplicateServerInfo *)-1)
21699 + show_error("%s:shmat() failed. reason: %s", func,strerror(errno));
21700 + return STATUS_ERROR;
21702 +#ifdef PRINT_DEBUG
21703 + show_debug("%s:CascadeTbl shmat ok",func);
21705 + memset(Cascade_Tbl , 0 , size );
21708 + * memory allocate cascade index
21710 + size = sizeof(CascadeInf);
21711 + CascadeInfShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
21712 + if (CascadeInfShmid < 0)
21714 + show_error("%s:shmget() failed. reason: %s", func,strerror(errno));
21715 + return STATUS_ERROR;
21717 +#ifdef PRINT_DEBUG
21718 + show_debug("%s:CascadeInf shmget ok",func);
21720 + Cascade_Inf = (CascadeInf *)shmat(CascadeInfShmid,0,0);
21721 + if (Cascade_Inf == (CascadeInf *)-1)
21723 + show_error("%s:shmat() failed. reason: %s",func, strerror(errno));
21724 + return STATUS_ERROR;
21726 +#ifdef PRINT_DEBUG
21727 + show_debug("%s:CascadeInf shmat ok",func);
21729 + memset(Cascade_Inf , 0 , size );
21732 + * memory allocate replication commit log buffer
21734 + size = sizeof(CommitLogInf) * MAX_DB_SERVER * MAX_CONNECTIONS;
21735 + CommitLogShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
21736 + if (CommitLogShmid < 0)
21738 + show_error("%s:shmget() failed. reason: %s", func, strerror(errno));
21739 + return STATUS_ERROR;
21741 +#ifdef PRINT_DEBUG
21742 + show_debug("%s:CommitLog shmget ok",func);
21744 + Commit_Log_Tbl = (CommitLogInf *)shmat(CommitLogShmid,0,0);
21745 + if (Commit_Log_Tbl == (CommitLogInf *)-1)
21747 + show_error("%s:shmat() failed. reason: %s",func, strerror(errno));
21748 + return STATUS_ERROR;
21750 +#ifdef PRINT_DEBUG
21751 + show_debug("%s:Commit_Log_Tbl shmat ok",func);
21753 + memset(Commit_Log_Tbl , 0 , size );
21754 + (Commit_Log_Tbl + (MAX_DB_SERVER * MAX_CONNECTIONS) -1)->inf.useFlag = DB_TBL_END;
21756 + /* create semapho */
21757 + if ((SemID = semget(IPC_PRIVATE,2,IPC_CREAT | IPC_EXCL | 0600)) < 0)
21759 + show_error("%s:semget() failed. (%s)",func,strerror(errno));
21760 + return STATUS_ERROR;
21762 + for ( i = 0 ; i < 2 ; i ++)
21764 + semctl(SemID, i, GETVAL, sem_arg);
21766 + semctl(SemID, i, SETVAL, sem_arg);
21769 + /* create semapho */
21770 + if ((CascadeSemID = semget(IPC_PRIVATE,2,IPC_CREAT | IPC_EXCL | 0600)) < 0)
21772 + show_error("%s:semget() failed. (%s)",func,strerror(errno));
21773 + return STATUS_ERROR;
21775 + for ( i = 0 ; i < 2 ; i ++)
21777 + semctl(CascadeSemID, i, GETVAL, sem_arg);
21779 + semctl(CascadeSemID, i, SETVAL, sem_arg);
21783 + if ((VacuumSemID = semget(IPC_PRIVATE,2,IPC_CREAT | IPC_EXCL | 0600)) < 0)
21785 + show_error("%s:semget() failed. (%s)",func,strerror(errno));
21786 + return STATUS_ERROR;
21788 + for ( i = 0 ; i < 2 ; i ++)
21790 + semctl(VacuumSemID, i, GETVAL, sem_arg);
21792 + semctl(VacuumSemID, i, SETVAL, sem_arg);
21794 + size = sizeof(ReplicationLogInf);
21795 + Replicateion_Log = malloc(size);
21796 + if (Replicateion_Log == NULL)
21798 + show_error("%s:malloc failed: (%s)",func,strerror(errno));
21799 + return STATUS_ERROR;
21801 + memset(Replicateion_Log , 0 , size );
21802 + Replicateion_Log->RLog_Sock_Path = NULL;
21803 +#ifdef PRINT_DEBUG
21804 + show_debug("%s:RLog Memory Allocation ok",func);
21809 + * set each datas into the tables
21811 + conf = ConfData_Top;
21812 + while (conf != (ConfDataType *)NULL)
21814 + show_debug("registering (key,value)=(%s,%s)",conf->key,conf->value);
21815 + /* get cluster db data */
21816 + if (!STRCMP(conf->table,CLUSTER_SERVER_TAG))
21818 + rec_no = conf->rec_no;
21819 + if (cnt < rec_no)
21822 + if (cnt >= MAX_DB_SERVER)
21827 + if (!STRCMP(conf->key,HOST_NAME_TAG))
21830 + strncpy(host_tbl[rec_no].hostName,conf->value,sizeof(host_tbl[rec_no].hostName));
21831 + show_debug("registering hostname %s",host_tbl[rec_no].hostName);
21832 + ip=PGRget_ip_by_name(conf->value);
21834 + sprintf(host_tbl[rec_no].resolvedName,
21837 + (ip >> 8) & 0xff ,
21838 + (ip >> 16) & 0xff ,
21839 + (ip >> 24) & 0xff );
21840 + show_debug("resolved name is %s",host_tbl[rec_no].resolvedName);
21842 + conf = (ConfDataType*)conf->next;
21845 + if (!STRCMP(conf->key,PORT_TAG))
21847 + host_tbl[rec_no].port = atoi(conf->value);
21848 + conf = (ConfDataType*)conf->next;
21851 + if (!STRCMP(conf->key,RECOVERY_PORT_TAG))
21853 + host_tbl[rec_no].recoveryPort = atoi(conf->value);
21854 + conf = (ConfDataType*)conf->next;
21858 + /* get cascade server data */
21859 + else if (!STRCMP(conf->table, REPLICATION_SERVER_INFO_TAG))
21861 + cascade_rec_no = conf->rec_no ;
21862 + if (cascade_cnt < cascade_rec_no)
21864 + cascade_cnt = cascade_rec_no;
21865 + if (cascade_cnt >= MAX_DB_SERVER)
21870 + if (!STRCMP(conf->key,HOST_NAME_TAG))
21872 + strncpy((Cascade_Tbl+cascade_rec_no)->hostName,conf->value,sizeof(Cascade_Tbl->hostName));
21873 + conf = (ConfDataType*)conf->next;
21876 + if (!STRCMP(conf->key,PORT_TAG))
21878 + if (atoi(conf->value) > 0)
21880 + (Cascade_Tbl+cascade_rec_no)->portNumber = atoi(conf->value);
21884 + (Cascade_Tbl+cascade_rec_no)->portNumber = DEFAULT_PGRP_PORT;
21886 + (Cascade_Tbl+cascade_rec_no)->sock = -1;
21888 + conf = (ConfDataType*)conf->next;
21889 + PGRset_cascade_server_status(Cascade_Tbl+cascade_rec_no,DB_TBL_USE);
21890 + if (cascade_rec_no == 0)
21892 + Cascade_Inf->top = Cascade_Tbl;
21896 + if (!STRCMP(conf->key,RECOVERY_PORT_TAG))
21898 + if (atoi(conf->value) > 0)
21900 + (Cascade_Tbl+cascade_rec_no)->recoveryPortNumber = atoi(conf->value);
21904 + (Cascade_Tbl+cascade_rec_no)->recoveryPortNumber = DEFAULT_PGRP_RECOVERY_PORT;
21906 + (Cascade_Tbl+cascade_rec_no)->rlog_sock=-1;
21907 + (Cascade_Tbl+cascade_rec_no +1)->useFlag = DB_TBL_END;
21908 + conf = (ConfDataType*)conf->next;
21912 + /* get loadbalancer table data */
21913 + else if (!STRCMP(conf->table,LOAD_BALANCE_SERVER_TAG))
21915 + lb_rec_no = conf->rec_no;
21916 + if (lb_cnt < lb_rec_no)
21918 + lb_cnt = lb_rec_no;
21919 + if (lb_cnt >= MAX_DB_SERVER)
21924 + if (!STRCMP(conf->key,HOST_NAME_TAG))
21926 + strncpy((LoadBalanceTbl + lb_rec_no)->hostName, conf->value,sizeof(LoadBalanceTbl->hostName));
21927 + conf = (ConfDataType*)conf->next;
21930 + if (!STRCMP(conf->key,RECOVERY_PORT_TAG))
21932 + (LoadBalanceTbl + lb_rec_no)->recoveryPort = atoi(conf->value);
21933 + (LoadBalanceTbl + lb_rec_no)->sock = -1;
21934 + (LoadBalanceTbl + lb_rec_no)->recovery_sock = -1;
21935 + conf = (ConfDataType*)conf->next;
21939 + /* get logging file data */
21940 + else if (!STRCMP(conf->table, LOG_INFO_TAG))
21942 + if (!STRCMP(conf->key, FILE_NAME_TAG))
21944 + strncpy(LogFileData->file_name, conf->value ,sizeof(LogFileData->file_name));
21945 + LogFileData->fp = NULL;
21946 + conf = (ConfDataType*)conf->next;
21949 + if (!STRCMP(conf->key, FILE_SIZE_TAG))
21954 + len = strlen(conf->value);
21955 + ptr = conf->value;
21956 + for (i = 0; i < len ; i ++,ptr++)
21958 + if ((! isdigit(*ptr)) && (! isspace(*ptr)))
21968 + unit = 1024*1024;
21972 + unit = 1024*1024*1024;
21979 + LogFileData->max_size = atoi(conf->value) * unit;
21980 + conf = (ConfDataType*)conf->next;
21983 + if (!STRCMP(conf->key, LOG_ROTATION_TAG))
21985 + LogFileData->rotation = atoi(conf->value);
21986 + conf = (ConfDataType*)conf->next;
21992 + if (!STRCMP(conf->key,HOST_NAME_TAG))
21995 + ip=PGRget_ip_by_name(conf->value);
21996 + if (ResolvedName == NULL)
21998 + ResolvedName = malloc(ADDRESS_LENGTH);
22000 + if (ResolvedName == NULL)
22006 + memset(ResolvedName,0,ADDRESS_LENGTH);
22009 + sprintf(ResolvedName,
22012 + (ip >> 8) & 0xff ,
22013 + (ip >> 16) & 0xff ,
22014 + (ip >> 24) & 0xff );
22015 + conf = (ConfDataType*)conf->next;
22018 + else if (!STRCMP(conf->key,REPLICATE_PORT_TAG))
22020 + Port_Number = atoi(conf->value);
22021 + conf = (ConfDataType*)conf->next;
22024 + /* get port number for recovery cluster db server */
22025 + else if (!STRCMP(conf->key,RECOVERY_PORT_TAG))
22027 + if (atoi(conf->value) > 0)
22029 + Recovery_Port_Number = atoi(conf->value);
22033 + Recovery_Port_Number =DEFAULT_PGRP_RECOVERY_PORT;
22035 + conf = (ConfDataType*)conf->next;
22038 + else if (!STRCMP(conf->key,LIFECHECK_PORT_TAG))
22040 + if (atoi(conf->value) > 0)
22042 + LifeCheck_Port_Number = atoi(conf->value);
22046 + LifeCheck_Port_Number = DEFAULT_PGRP_LIFECHECK_PORT;
22048 + conf = (ConfDataType*)conf->next;
22051 + else if (!STRCMP(conf->key,RLOG_PORT_TAG))
22053 + if (atoi(conf->value) > 0)
22055 + Replicateion_Log->RLog_Port_Number = atoi(conf->value);
22059 + Replicateion_Log->RLog_Port_Number = DEFAULT_PGRP_RLOG_PORT;
22061 + conf = (ConfDataType*)conf->next;
22064 + /* get response mode */
22065 + else if (!STRCMP(conf->key,RESPONSE_MODE_TAG))
22067 + if (!STRCMP(conf->value,RESPONSE_MODE_RELIABLE))
22069 + PGR_Response_Inf->response_mode = PGR_RELIABLE_MODE;
22071 + else if (!STRCMP(conf->value,RESPONSE_MODE_FAST))
22073 + PGR_Response_Inf->response_mode = PGR_FAST_MODE;
22077 + PGR_Response_Inf->response_mode = PGR_NORMAL_MODE;
22079 + conf = (ConfDataType*)conf->next;
22082 + /* get replication log use or not */
22083 + else if (!STRCMP(conf->key,USE_REPLICATION_LOG_TAG))
22085 + if (!STRCMP(conf->value,"yes"))
22087 + PGR_Use_Replication_Log = true;
22089 + conf = (ConfDataType*)conf->next;
22092 + /* get replication timeout */
22093 + else if (!STRCMP(conf->key,TIMEOUT_TAG))
22095 + /* get repliaction timeout */
22096 + PGR_Replication_Timeout = PGRget_time_value(conf->value);
22097 + if ((PGR_Replication_Timeout < 1) || (PGR_Replication_Timeout > 3600))
22099 + fprintf(stderr,"%s is out of range. It should be between 1sec-1hr.\n",TIMEOUT_TAG);
22100 + return STATUS_ERROR;
22102 + conf = (ConfDataType*)conf->next;
22105 + else if (!STRCMP(conf->key,LIFECHECK_TIMEOUT_TAG))
22107 + /* get lifecheck timeout */
22108 + PGR_Lifecheck_Timeout = PGRget_time_value(conf->value);
22109 + if ((PGR_Lifecheck_Timeout < 1) || (PGR_Lifecheck_Timeout > 3600))
22111 + show_error("%s is out of range. It should be between 1sec-1hr.\n",LIFECHECK_TIMEOUT_TAG);
22112 + return STATUS_ERROR;
22114 + conf = (ConfDataType*)conf->next;
22117 + else if (!STRCMP(conf->key,LIFECHECK_INTERVAL_TAG))
22119 + /* get lifecheck interval */
22120 + PGR_Lifecheck_Interval = PGRget_time_value(conf->value);
22121 + if ((PGR_Lifecheck_Interval < 1) || (PGR_Lifecheck_Interval > 3600))
22123 + show_error("%s is out of range. It should between 1sec-1hr.\n",LIFECHECK_INTERVAL_TAG);
22124 + return STATUS_ERROR;
22126 + conf = (ConfDataType*)conf->next;
22130 + conf = (ConfDataType*)conf->next;
22133 + /* create cluster db server table */
22134 + Host_Tbl_Begin = (HostTbl *)NULL;
22136 + size = sizeof(HostTbl) * MAX_DB_SERVER;
22137 + HostTblShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
22138 + if (HostTblShmid < 0)
22140 + show_error("%s:shmget() failed. reason: %s", func,strerror(errno));
22141 + return STATUS_ERROR;
22143 +#ifdef PRINT_DEBUG
22144 + show_debug("%s:HostTbl shmget ok",func);
22146 + Host_Tbl_Begin = (HostTbl *)shmat(HostTblShmid,0,0);
22147 + if (Host_Tbl_Begin == (HostTbl *)-1)
22149 + show_error("%s:shmat() failed. reason: %s", func, strerror(errno));
22150 + return STATUS_ERROR;
22152 +#ifdef PRINT_DEBUG
22153 + show_debug("%s:HostTbl shmat ok",func);
22155 + memset(Host_Tbl_Begin , 0 , size );
22156 + Host_Tbl_Begin -> useFlag = DB_TBL_END;
22158 + for ( i = 0 ; i <= cnt ; i ++)
22160 + PGRadd_HostTbl(&host_tbl[i],DB_TBL_INIT);
22162 + /* set load balance table */
22163 + for ( i = 0 ; i <= lb_cnt ; i ++)
22165 + (LoadBalanceTbl + i)->port = -1;
22166 + (LoadBalanceTbl + i)->sock = -1;
22168 + memset((LoadBalanceTbl + i),0,sizeof(RecoveryTbl));
22169 + PGR_Free_Conf_Data();
22171 + /* allocate result buffer of query */
22172 + PGR_Result = malloc(PGR_MESSAGE_BUFSIZE);
22173 + if (PGR_Result == NULL)
22175 + show_error("%s:malloc() failed. reason: %s", func, strerror(errno));
22176 + return STATUS_ERROR;
22178 + memset(PGR_Result,0,PGR_MESSAGE_BUFSIZE);
22180 + /* allocate log_data */
22181 + PGR_Log_Header = malloc(sizeof(ReplicateHeader));
22182 + if (PGR_Log_Header == NULL)
22184 + show_error("%s:malloc() failed. reason: %s", func, strerror(errno));
22185 + return STATUS_ERROR;
22187 + memset(PGR_Log_Header,0,sizeof(ReplicateHeader));
22189 + /* allocate send query id */
22190 + size = sizeof(unsigned int) * (MAX_DB_SERVER +1);
22191 + PGR_Send_Query_ID = malloc (size);
22192 + if (PGR_Send_Query_ID == NULL)
22194 + show_error("%s:malloc() failed. reason: %s", func, strerror(errno));
22195 + return STATUS_ERROR;
22197 + memset(PGR_Send_Query_ID, 0, size);
22198 + for ( i = 0 ; i < MAX_DB_SERVER ; i ++)
22200 + StartReplication[i] = true;
22203 + /* set self data into cascade table */
22205 + cascade_rec_no ++;
22206 + if (ResolvedName != NULL)
22208 + strncpy((Cascade_Tbl+cascade_rec_no)->hostName,ResolvedName,ADDRESS_LENGTH);
22213 + gethostname((Cascade_Tbl+cascade_rec_no)->hostName,sizeof(Cascade_Tbl->hostName));
22215 + (Cascade_Tbl+cascade_rec_no)->portNumber = Port_Number;
22216 + (Cascade_Tbl+cascade_rec_no)->recoveryPortNumber = Recovery_Port_Number;
22217 + (Cascade_Tbl+cascade_rec_no)->sock = -1;
22219 + PGRset_cascade_server_status(Cascade_Tbl+cascade_rec_no,DB_TBL_USE);
22221 + (Cascade_Tbl+(cascade_rec_no+1))->useFlag = DB_TBL_END;
22223 + Cascade_Inf->top = Cascade_Tbl;
22224 + Cascade_Inf->end = Cascade_Tbl+cascade_rec_no;
22225 + Cascade_Inf->upper = NULL;
22226 + Cascade_Inf->lower = NULL;
22227 + if (cascade_rec_no >= 1)
22229 + Cascade_Inf->upper = (Cascade_Tbl+cascade_rec_no - 1);
22231 + (Cascade_Tbl+(cascade_rec_no+1))->useFlag = DB_TBL_END;
22233 + Cascade_Inf->myself = (Cascade_Tbl+cascade_rec_no);
22234 + Cascade_Inf->useFlag = DB_TBL_USE;
22236 + PGR_Response_Inf->response_mode = PGR_NORMAL_MODE;
22238 + return STATUS_OK;
22241 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/lifecheck.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/lifecheck.c
22242 --- postgresql-8.2.4/src/pgcluster/pgrp/lifecheck.c 1970-01-01 01:00:00.000000000 +0100
22243 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/lifecheck.c 2007-03-01 16:27:15.000000000 +0100
22245 +/*--------------------------------------------------------------------
22250 + * This file is composed of the functions to call with the source
22251 + * at pgreplicate for the lifecheck.
22253 + * Portions Copyright (c) 2003-2007, Atsushi Mitani
22254 + *--------------------------------------------------------------------
22256 +#include "postgres.h"
22257 +#include "postgres_fe.h"
22259 +#include <pthread.h>
22260 +#include <stdio.h>
22261 +#include <stdarg.h>
22262 +#include <sys/types.h>
22263 +#include <fcntl.h>
22264 +#include <errno.h>
22265 +#include <ctype.h>
22267 +#include <sys/ipc.h>
22268 +#include <sys/shm.h>
22269 +#include <sys/sem.h>
22270 +#include <sys/msg.h>
22271 +#include <signal.h>
22274 +#include "libpq/pqsignal.h"
22275 +#include "utils/guc.h"
22276 +#include "miscadmin.h"
22277 +#include "nodes/nodes.h"
22278 +#include "nodes/parsenodes.h"
22279 +#include "access/xact.h"
22280 +#include "access/xlog.h"
22281 +#include "tcop/tcopprot.h"
22282 +#include "postmaster/postmaster.h"
22285 +#include "libpq-fe.h"
22286 +#include "libpq-int.h"
22287 +#include "fe-auth.h"
22289 +#include <sys/socket.h>
22290 +#include <unistd.h>
22291 +#include <netdb.h>
22292 +#include <arpa/inet.h>
22294 +#ifdef HAVE_NETINET_TCP_H
22295 +#include <netinet/tcp.h>
22298 +#ifdef HAVE_SYS_SELECT_H
22299 +#include <sys/select.h>
22303 +#ifdef HAVE_CRYPT_H
22304 +#include <crypt.h>
22309 +#include "mb/pg_wchar.h"
22312 +#include "access/xact.h"
22313 +#include "lib/dllist.h"
22314 +#include "libpq/pqformat.h"
22315 +#include "replicate_com.h"
22316 +#include "pgreplicate.h"
22318 +#define PING_DB "template1"
22319 +#define PING_QUERY "SELECT 1"
22321 +static HostTbl * PGR_Cluster_DB_4_Lifecheck = (HostTbl*)NULL;
22323 +/*--------------------------------------
22324 + * PROTOTYPE DECLARATION
22325 + *--------------------------------------
22327 +int PGRlifecheck_main(int fork_wait_time);
22329 +static bool is_started_replication(void);
22330 +static void set_timeout(SIGNAL_ARGS);
22331 +static int lifecheck_loop(void);
22332 +static int ping_cluster(PGconn * conn);
22333 +static void set_host_status( HostTbl * host_ptr , int status );
22336 +PGRlifecheck_main(int fork_wait_time)
22338 + bool started = false;
22342 + pgid = getpgid(0);
22346 + return STATUS_OK;
22350 + * in child process,
22351 + * call recovery module
22355 + PGRsignal(SIGHUP, PGRexit_subprocess);
22356 + PGRsignal(SIGTERM, PGRexit_subprocess);
22357 + PGRsignal(SIGINT, PGRexit_subprocess);
22358 + PGRsignal(SIGQUIT, PGRexit_subprocess);
22359 + PGRsignal(SIGALRM, set_timeout);
22361 + if (fork_wait_time > 0) {
22362 + sleep(fork_wait_time);
22365 + if (PGRuserName == NULL)
22367 + PGRuserName = getenv("LOGNAME");
22368 + if (PGRuserName == NULL)
22370 + PGRuserName = getenv("USER");
22371 + if (PGRuserName == NULL)
22372 + PGRuserName = "postgres";
22378 + started = is_started_replication();
22381 + /* wait next lifecheck as interval */
22382 + sleep(PGR_Lifecheck_Interval);
22386 + /* life check to all cluster dbs */
22387 + lifecheck_loop();
22389 + /* wait next lifecheck as interval */
22390 + sleep(PGR_Lifecheck_Interval);
22392 + return STATUS_OK;
22396 +is_started_replication(void)
22398 + HostTbl * host_ptr = (HostTbl*)NULL;
22400 + host_ptr = Host_Tbl_Begin;
22401 + while(host_ptr->useFlag != DB_TBL_END)
22403 + if (host_ptr->useFlag == DB_TBL_USE)
22413 +set_timeout(SIGNAL_ARGS)
22415 + if (PGR_Cluster_DB_4_Lifecheck != NULL)
22417 + PGR_Cluster_DB_4_Lifecheck->retry_count ++;
22418 + if (PGR_Cluster_DB_4_Lifecheck->retry_count > PGR_CONNECT_RETRY_TIME )
22420 + set_host_status(PGR_Cluster_DB_4_Lifecheck,DB_TBL_ERROR);
22423 + PGRsignal(SIGALRM, set_timeout);
22427 +lifecheck_loop(void)
22429 + HostTbl * host_ptr = (HostTbl*)NULL;
22431 + char * host = NULL;
22432 + PGconn * conn = NULL;
22434 + host_ptr = Host_Tbl_Begin;
22435 + if (host_ptr == NULL)
22437 + return STATUS_ERROR;
22440 + while(host_ptr->useFlag != DB_TBL_END)
22443 + * check the status of the cluster DB
22445 + if (host_ptr->useFlag != DB_TBL_USE)
22450 + snprintf(port,sizeof(port),"%d", host_ptr->port);
22451 + host = (char *)(host_ptr->resolvedName);
22452 + /* set host data */
22453 + PGR_Cluster_DB_4_Lifecheck = host_ptr;
22455 + /* set alarm as lifecheck timeout */
22456 + alarm(PGR_Lifecheck_Timeout);
22459 + conn = PGRcreateConn(host,port, PING_DB ,PGRuserName,"","","");
22460 + if ((conn != NULL) &&
22461 + (ping_cluster(conn) == STATUS_OK))
22463 + set_host_status(host_ptr, DB_TBL_USE);
22467 + set_host_status(host_ptr, DB_TBL_ERROR);
22469 + /* reset alarm */
22477 + return STATUS_OK;
22481 +ping_cluster(PGconn * conn)
22484 + PGresult * res = (PGresult *)NULL;
22486 + res = PQexec(conn, PING_QUERY );
22488 + status = PQresultStatus(res);
22493 + if ((status == PGRES_NONFATAL_ERROR ) ||
22494 + (status == PGRES_FATAL_ERROR ))
22496 + return STATUS_ERROR;
22498 + return STATUS_OK;
22502 +set_host_status( HostTbl * host_ptr , int status )
22504 + if (host_ptr == NULL)
22506 + if (status == DB_TBL_ERROR)
22508 + host_ptr->retry_count ++;
22509 + if (host_ptr->retry_count > PGR_CONNECT_RETRY_TIME )
22511 + PGRset_host_status(host_ptr, status);
22516 + host_ptr->retry_count = 0;
22517 + PGRset_host_status(host_ptr, status);
22521 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/main.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/main.c
22522 --- postgresql-8.2.4/src/pgcluster/pgrp/main.c 1970-01-01 01:00:00.000000000 +0100
22523 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/main.c 2007-02-18 22:52:17.000000000 +0100
22525 +/*--------------------------------------------------------------------
22528 + * Replication server for PostgreSQL
22531 + * This is the main module of the replication server.
22533 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
22534 + *--------------------------------------------------------------------
22536 +#include "postgres.h"
22538 +#include <stdio.h>
22539 +#include <string.h>
22540 +#include <unistd.h>
22541 +#include <sys/time.h>
22542 +#include <signal.h>
22543 +#include <sys/wait.h>
22544 +#include <ctype.h>
22545 +#include <sys/types.h>
22546 +#include <sys/stat.h>
22547 +#include <sys/socket.h>
22548 +#include <sys/ipc.h>
22549 +#include <sys/shm.h>
22550 +#include <netdb.h>
22551 +#include <netinet/in.h>
22552 +#include <errno.h>
22553 +#include <fcntl.h>
22555 +#include <sys/param.h>
22556 +#include <arpa/inet.h>
22557 +#include <sys/file.h>
22558 +#include <pthread.h>
22560 +#ifdef HAVE_NETINET_TCP_H
22561 +#include <netinet/tcp.h>
22563 +#ifdef HAVE_SYS_SELECT_H
22564 +#include <sys/select.h>
22567 +#ifdef HAVE_GETOPT_H
22568 +#include <getopt.h>
22571 +#include "miscadmin.h"
22572 +#include "nodes/nodes.h"
22574 +#include "libpq-fe.h"
22575 +#include "libpq/libpq-fs.h"
22576 +#include "libpq-int.h"
22577 +#include "fe-auth.h"
22580 +#include "access/xact.h"
22581 +#include "replicate_com.h"
22582 +#include "pgreplicate.h"
22585 +#include "win32.h"
22587 +#include <arpa/inet.h>
22588 +#ifdef HAVE_CRYPT_H
22589 +#include <crypt.h>
22593 +#include "mb/pg_wchar.h"
22596 +/*--------------------------------------
22597 + * GLOBAL VARIABLE DECLARATION
22598 + *--------------------------------------
22600 +/* for replicate_com.h */
22602 +ConfDataType * ConfData_Top = (ConfDataType *)NULL;
22603 +ConfDataType * ConfData_End = (ConfDataType *)NULL;
22605 +/* replication server data */
22606 +char * ResolvedName = NULL;
22607 +uint16_t Port_Number = 0;
22608 +uint16_t LifeCheck_Port_Number = 0;
22609 +uint16_t Recovery_Port_Number = 0;
22610 +bool PGR_Parse_Session_Started = false;
22611 +int PGR_Replication_Timeout = 60;
22612 +int PGR_Lifecheck_Timeout = 3;
22613 +int PGR_Lifecheck_Interval = 15;
22615 +/* global table data */
22616 +HostTbl *Host_Tbl_Begin = NULL;
22617 +Dllist * Transaction_Tbl_Begin = NULL;
22618 +TransactionTbl * Transaction_Tbl_End = NULL;
22619 +RecoveryTbl * LoadBalanceTbl = NULL;
22620 +RecoveryStatusInf * Recovery_Status_Inf = NULL;
22621 +ReplicateHeader * PGR_Log_Header = NULL;
22622 +ReplicateServerInfo * Cascade_Tbl = NULL;;
22623 +CommitLogInf * Commit_Log_Tbl = NULL;
22624 +QueryLogType * Query_Log_Top = NULL;
22625 +QueryLogType * Query_Log_End = NULL;
22626 +CascadeInf * Cascade_Inf = NULL;
22627 +ReplicationLogInf * Replicateion_Log = NULL;
22628 +/* IPC's id data */
22629 +int RecoveryShmid = 0;
22630 +int ReplicateSerializationShmid=0;
22631 +int RecoveryMsgShmid = 0;
22632 +int *RecoveryMsgid = NULL;
22633 +int HostTblShmid = 0;
22634 +int LockWaitTblShmid = 0;
22635 +int LoadBalanceTblShmid = 0;
22636 +int CascadeTblShmid = 0;
22637 +int CascadeInfShmid = 0;
22638 +int CommitLogShmid = 0;
22639 +int QueryLogMsgid = 0;
22640 +int QueryLogAnsMsgid = 0;
22641 +int PGconnMsgid = 0;
22642 +int MaxBackends = 0;
22643 +char * PGR_Result = NULL;
22645 +int RecoverySemID= 0;
22646 +int RecovErysemid = 0;
22647 +int VacuumSemID = 0;
22648 +int CascadeSemID= 0;
22649 +char * PGR_Data_Path = NULL;
22650 +char * PGR_Write_Path = NULL;
22651 +int IS_SESSION_AUTHORIZATION = 0;
22652 +ResponseInf * PGR_Response_Inf = NULL;
22653 +bool StartReplication[MAX_DB_SERVER];
22654 +bool PGR_Cascade = false;
22655 +bool PGR_Use_Replication_Log = false;
22656 +bool PGR_AutoCommit = true;
22657 +unsigned int * PGR_Send_Query_ID = NULL;
22658 +unsigned int PGR_Query_ID = 0;
22659 +volatile bool exit_processing = false;
22660 +int pgreplicate_pid = 0;
22662 +int ReplicateSock = -1;
22663 +int exit_signo = SIGTERM;
22665 +RecoveryQueueInf RecoveryQueue;
22666 +char * Backend_Socket_Dir = NULL;
22668 +unsigned int * PGR_ReplicateSerializationID = NULL;
22670 +int Log_Print = 0;
22671 +int Debug_Print = 0;
22672 +FILE * LogFp = (FILE *)NULL;
22673 +FILE * StatusFp = (FILE *)NULL;
22674 +FILE * RidFp = (FILE *)NULL;
22675 +FILE * QueueFp = (FILE *)NULL;
22677 +extern char *optarg;
22678 +char * PGRuserName = NULL;
22680 +int fork_wait_time = 0;
22681 +int Idle_Flag = IDLE_MODE;
22682 +volatile bool Exit_Request = false;
22684 +pthread_mutex_t transaction_table_mutex;
22686 +/*--------------------------------------
22687 + * PROTOTYPE DECLARATION
22688 + *--------------------------------------
22690 +static void startup_replication_server(void);
22691 +static int replicate_loop(int fd);
22692 +static void replicate_main(void);
22693 +static void quick_exit(SIGNAL_ARGS);
22694 +static void daemonize(void);
22695 +static void write_pid_file(void);
22696 +static void stop_pgreplicate(void);
22697 +static bool is_exist_pid_file(void);
22698 +static void usage(void);
22699 +static void set_exit_processing(int signo);
22701 +/*--------------------------------------------------------------------
22703 + * replicate_loop()
22705 + * replication module
22710 + * NG: STATUS_ERROR
22711 + *--------------------------------------------------------------------
22714 +replicate_loop(int fd)
22716 + char * func = "replicate_loop()";
22723 + bool exist_sys_log=false;
22724 + bool exist_replicate=false;
22725 + bool clear_connection = false;
22728 + result = PGR_Create_Acception(fd,&sock,"",Port_Number);
22729 + if (result == STATUS_ERROR)
22731 + show_error("%s: accept failed (%s)", func, strerror(errno));
22737 + pgid = getpgid(0);
22741 + show_error("%s:fork failed (%s)",func,strerror(errno));
22742 + PGRreplicate_exit(0);
22746 + int status = LOOP_CONTINUE;
22747 + bool PGR_Cascade = false;
22748 + ReplicateHeader header;
22749 + ReplicateHeader header_save_for_recovering;
22750 + char * query = NULL;
22752 + if (fork_wait_time > 0) {
22753 + sleep(fork_wait_time);
22758 + PGRsignal(SIGHUP, quick_exit);
22759 + PGRsignal(SIGINT, quick_exit);
22760 + PGRsignal(SIGQUIT, quick_exit);
22761 + PGRsignal(SIGTERM, quick_exit);
22762 + PGRsignal(SIGALRM, quick_exit);
22763 + PGRsignal(SIGPIPE, SIG_IGN);
22766 + if (PGRinit_transaction_table() != STATUS_OK)
22768 + show_error("transaction table memory allocate failed");
22769 + PGR_Close_Sock(&sock);
22773 + pthread_mutex_init(&transaction_table_mutex, NULL);
22779 + struct timeval timeout;
22781 + timeout.tv_sec = PGR_Replication_Timeout;
22782 + timeout.tv_usec = 0;
22784 + if (query != NULL)
22790 + * Wait for something to happen.
22793 + FD_SET(sock,&rmask);
22794 + rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
22797 + if (errno == EINTR)
22801 + if (rtn && FD_ISSET(sock, &rmask))
22804 + query = PGRread_packet(sock,&header);
22805 + if ((query == NULL) || (header.cmdSts == 0))
22808 + if (exist_sys_log)
22810 + show_error("%s:upper cascade closed? , errno=%d(%s)",func,errno,strerror(errno));
22811 + memset(&header, 0, sizeof(ReplicateHeader));
22812 + header.cmdSys = CMD_SYS_CALL;
22813 + header.cmdSts = CMD_STS_QUERY_SUSPEND;
22814 + header.query_size = htonl(0);
22815 + PGRsend_rlog_to_local(&header, NULL);
22816 + exist_sys_log = false;
22820 + if (exist_replicate)
22822 + PGRclear_connections();
22823 + clear_connection = true;
22824 + header_save_for_recovering.cmdSts=CMD_TYPE_OTHER;
22825 + header_save_for_recovering.cmdType=CMD_TYPE_CONNECTION_CLOSE;
22826 + header_save_for_recovering.query_size = htonl(21);
22827 + PGRdo_replicate(sock,&header_save_for_recovering,"PGR_CLOSE_CONNECTION");
22829 + PGRsend_notice_quit();
22834 + switch (header.cmdSys)
22836 + case CMD_SYS_LIFECHECK:
22837 + PGRreturn_result(sock,"1", PGR_NOWAIT_ANSWER);
22839 + case CMD_SYS_PREREPLICATE:
22840 + if(Cascade_Inf!=NULL ||
22841 + Cascade_Inf->upper == NULL)
22843 + /* 1 means "I am primary replicate server." */
22844 + PGRreturn_result(sock,"1", PGR_NOWAIT_ANSWER);
22848 + /* 0 means "I am not primary replicate server." */
22849 + PGRreturn_result(sock,"0", PGR_NOWAIT_ANSWER);
22852 + case CMD_SYS_REPLICATE:
22853 + if (exist_replicate == false)
22855 + exist_replicate=true;
22856 + memcpy(&header_save_for_recovering,
22858 + sizeof(ReplicateHeader));
22860 + status = PGRdo_replicate(sock,&header,query);
22862 + case CMD_SYS_LOG:
22863 + exist_sys_log = true;
22864 + PGRsend_rlog_to_local(&header, query);
22865 + /* set own replicate id by rlog */
22866 + PGRset_replication_id(ntohl(header.replicate_id));
22867 + PGRsend_notice_rlog_done(sock);
22869 + case CMD_SYS_CASCADE:
22870 + PGR_Cascade = true;
22871 + PGRcascade_main(sock,&header,query);
22873 + case CMD_SYS_CALL:
22874 + if (header.cmdSts == CMD_STS_TRANSACTION_ABORT)
22876 + PGRreconfirm_commit(sock,&header);
22878 + else if (header.cmdSts == CMD_STS_NOTICE)
22882 + else if (header.cmdSts == CMD_STS_RESPONSE)
22884 + if (header.cmdType == CMD_TYPE_FRONTEND_CLOSED)
22886 + PGRsend_notice_rlog_done(sock);
22887 + status = LOOP_END;
22892 + show_error("WARNING: unknown Header->cmdSys %c",header.cmdSys);
22895 + if (status == LOOP_END)
22901 + PGR_Close_Sock(&sock);
22902 + if (query != NULL)
22907 + if (!clear_connection)
22908 + PGRclear_connections();
22909 + PGRdestroy_transaction_table();
22910 + pthread_mutex_destroy(&transaction_table_mutex);
22915 + PGR_Close_Sock(&sock);
22921 +startup_replication_server(void)
22923 + ReplicateHeader header;
22924 + char hostName[HOSTNAME_MAX_LENGTH];
22925 + char userName[USERNAME_MAX_LENGTH];
22928 + if (PGRuserName == NULL)
22930 + PGRuserName = getenv("LOGNAME");
22931 + if (PGRuserName == NULL)
22933 + PGRuserName = getenv("USER");
22934 + if (PGRuserName == NULL)
22935 + PGRuserName = "postgres";
22938 + memset(&header,0,sizeof(ReplicateHeader));
22939 + memset(query,0,sizeof(query));
22940 + memset(hostName,0,sizeof(hostName));
22941 + memset(userName,0,sizeof(userName));
22942 + if (ResolvedName != NULL)
22944 + strncpy(hostName,ResolvedName,ADDRESS_LENGTH);
22948 + gethostname(hostName,sizeof(hostName)-1);
22950 + strncpy(userName ,PGRuserName,sizeof(userName)-1);
22951 + snprintf(query,sizeof(query)-1,"SELECT %s(%d,'%s',%d,%d)",
22952 + PGR_SYSTEM_COMMAND_FUNC,
22953 + PGR_STARTUP_REPLICATION_SERVER_FUNC_NO,
22956 + Recovery_Port_Number);
22957 + header.cmdSts = CMD_STS_NOTICE;
22958 + header.query_id = htonl(PGRget_next_query_id());
22959 + header.query_size = htonl(strlen(query));
22960 + memcpy(header.from_host,hostName,sizeof(header.from_host));
22961 + memcpy(header.userName,userName,sizeof(header.userName));
22962 + strcpy(header.dbName,"template1");
22963 + replicate_packet_send_internal( &header, query,-1,PGRget_recovery_status(),true);
22966 +/*--------------------------------------------------------------------
22968 + * replicate_main()
22970 + * Replication main module
22975 + *--------------------------------------------------------------------
22978 +replicate_main(void)
22980 +#ifdef PRINT_DEBUG
22981 + char * func = "replicate_main()";
22985 + show_debug ("%s:entering replicate_main",func);
22987 + /* cascade start up notice */
22988 + if (Cascade_Inf->upper != NULL)
22990 + show_debug("initialize cascade information");
22991 + PGRstartup_cascade();
22994 + status = PGR_Create_Socket_Bind(&ReplicateSock, ResolvedName, Port_Number);
22996 + if (status != STATUS_OK)
22998 + show_debug("%s %d port bind failed. quit.",func,Port_Number);
22999 + stop_pgreplicate();
23000 + PGRreplicate_exit(0);
23002 +#ifdef PRINT_DEBUG
23003 + show_debug("%s %d port bind OK",func,Port_Number);
23007 + /* replication start up notice */
23008 + startup_replication_server();
23013 + struct timeval timeout;
23015 + if (exit_processing == true)
23016 + PGRreplicate_exit(0);
23018 + timeout.tv_sec = PGR_Replication_Timeout;
23019 + timeout.tv_usec = 0;
23023 + * Wait for something to happen.
23026 + FD_SET(ReplicateSock,&rmask);
23027 + rtn = select(ReplicateSock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
23031 + if (rtn && FD_ISSET(ReplicateSock, &rmask))
23034 + * get recovery status.
23036 + PGRcheck_recovered_host();
23038 + if (exit_processing == true)
23042 + * call replication module
23044 + replicate_loop(ReplicateSock);
23049 +/*--------------------------------------------------------------------
23053 + * Exit child process
23055 + * SIGNAL_ARGS: receive signal number(I)
23058 + *--------------------------------------------------------------------
23061 +quick_exit(SIGNAL_ARGS)
23063 +#ifdef PRINT_DEBUG
23064 + show_debug("quick_exit:signo = %d", postgres_signal_arg);
23069 +/*--------------------------------------------------------------------
23073 + * Daemonize this process
23078 + *--------------------------------------------------------------------
23083 + char * func = "daemonize()";
23088 + if (pid == (pid_t) -1)
23090 + show_error("%s:fork() failed. reason: %s",func, strerror(errno));
23092 + return; /* not reached */
23094 + else if (pid > 0)
23099 +#ifdef HAVE_SETSID
23100 + if (setsid() < 0)
23102 + show_error("%s:setsid() failed. reason:%s", func,strerror(errno));
23107 + i = open("/dev/null", O_RDWR);
23114 +/*--------------------------------------------------------------------
23116 + * write_pid_file()
23118 + * The process ID is written in the file.
23119 + * This process ID is used when finish pglb.
23124 + *--------------------------------------------------------------------
23127 +write_pid_file(void)
23129 + char * func = "write_pid_file()";
23132 + char pidbuf[128];
23134 + snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGREPLICATE_PID_FILE);
23135 + fd = fopen(fname, "w");
23138 + show_error("%s:could not open pid file as %s. reason: %s",
23139 + func, fname, strerror(errno));
23142 + snprintf(pidbuf, sizeof(pidbuf), "%d", getpid());
23143 + fwrite(pidbuf, strlen(pidbuf), 1, fd);
23146 + show_error("%s:could not write pid file as %s. reason: %s",
23147 + func,fname, strerror(errno));
23152 +/*--------------------------------------------------------------------
23154 + * stop_pgreplicate()
23156 + * Stop the pgreplicate process
23161 + *--------------------------------------------------------------------
23164 +stop_pgreplicate(void)
23166 + char * func = "stop_pgreplicate()";
23169 + char pidbuf[128];
23172 + snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGREPLICATE_PID_FILE);
23173 + fd = fopen(fname, "r");
23176 + show_error("%s:could not open pid file as %s. reason: %s",
23177 + func,fname, strerror(errno));
23180 + memset(pidbuf,0,sizeof(pidbuf));
23181 + fread(pidbuf, sizeof(pidbuf), 1, fd);
23183 + pid = atoi(pidbuf);
23185 + if (kill (pid,SIGTERM) == -1)
23187 + show_error("%s:could not stop pid: %d, reason: %s",func,pid,strerror(errno));
23192 +/*--------------------------------------------------------------------
23194 + * is_exist_pid_file()
23196 + * Check existence of pid file.
23200 + * 1: the pid file is exist
23201 + * 0: the pid file is not exist
23202 + *--------------------------------------------------------------------
23205 +is_exist_pid_file(void)
23210 + snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGREPLICATE_PID_FILE);
23211 + if (stat(fname,&buf) == 0)
23213 + /* pid file is exist */
23218 + /* pid file is not exist */
23223 +/*--------------------------------------------------------------------
23227 + * Waiting for hung up a child
23229 + * int signal_args: signal number (expecting the SIGCHLD)
23232 + *--------------------------------------------------------------------
23235 +child_wait(SIGNAL_ARGS)
23241 + pid = waitpid(-1,&ret,WNOHANG);
23242 + } while(pid > 0);
23245 +/*--------------------------------------------------------------------
23249 + * show usage of pglb
23254 + *--------------------------------------------------------------------
23261 + path = getenv("PGDATA");
23262 + if (path == NULL)
23264 + fprintf(stderr,"PGReplicate version [%s]\n",PGREPLICATE_VERSION);
23265 + fprintf(stderr,"A replication server for cluster DB servers (based on PostgreSQL)\n\n");
23266 + fprintf(stderr,"usage: pgreplicate [-D path_of_config_file] [-W path_of_work_files] [-U login user][-l][-n][-v][-h][stop]\n");
23267 + fprintf(stderr," config file default path: %s/%s\n",path, PGREPLICATE_CONF_FILE);
23268 + fprintf(stderr," -l: print error logs in the log file.\n");
23269 + fprintf(stderr," -n: don't run in daemon mode.\n");
23270 + fprintf(stderr," -v: debug mode. need '-n' flag\n");
23271 + fprintf(stderr," -h: print this help\n");
23272 + fprintf(stderr," stop: stop pgreplicate\n");
23275 +/*--------------------------------------------------------------------
23279 + * main module of pgreplicate
23281 + * int argc: number of parameter
23282 + * char ** argv: value of parameter
23285 + *--------------------------------------------------------------------
23288 +main(int argc, char * argv[])
23290 + char * func = "main()";
23292 + char * r_path = NULL;
23293 + char * w_path = NULL;
23294 + bool detach = true;
23297 + r_path = getenv("PGDATA");
23298 + if (r_path == NULL)
23300 + while ((opt = getopt(argc, argv, "U:D:W:w:lvnh")) != -1)
23310 + PGRuserName = strdup(optarg);
23329 + fork_wait_time = atoi(optarg);
23330 + if (fork_wait_time < 0)
23331 + fork_wait_time = 0;
23351 + PGR_Data_Path = r_path;
23352 + if (w_path == NULL)
23354 + PGR_Write_Path = PGR_Data_Path;
23358 + PGR_Write_Path = w_path;
23361 + if (optind == (argc-1) && !strncasecmp(argv[optind],"stop",4))
23363 + stop_pgreplicate();
23366 + else if (optind == argc)
23368 + if (is_exist_pid_file())
23370 + fprintf(stderr,"pid file %s/%s found. is another pgreplicate running?", PGR_Write_Path, PGREPLICATE_PID_FILE);
23374 + else if (optind < argc)
23385 + PGR_Under_Replication_Server = true;
23386 + write_pid_file();
23387 + pgreplicate_pid = getpid();
23389 + PGRsignal(SIGINT, set_exit_processing);
23390 + PGRsignal(SIGQUIT, set_exit_processing);
23391 + PGRsignal(SIGTERM, set_exit_processing);
23392 + PGRsignal(SIGCHLD, child_wait);
23393 + PGRsignal(SIGPIPE, SIG_IGN);
23395 + if (PGRget_Conf_Data(PGR_Data_Path) != STATUS_OK)
23397 + show_error("%s:PGRget_Conf_Data error",func);
23398 + PGRreplicate_exit(0);
23400 + if (PGRinit_recovery() != STATUS_OK)
23402 + show_error("%s:PGRinit_recovery error",func);
23403 + PGRreplicate_exit(0);
23405 + if (PGRload_replication_id() != STATUS_OK)
23407 + show_error("%s:PGRload_replication_id error",func);
23408 + PGRreplicate_exit(0);
23411 + if ( PGR_Use_Replication_Log == true )
23413 +#ifdef PRINT_DEBUG
23414 + show_debug("Use Replication Log. Start PGR_RLog_Main()");
23416 + rlog_pid = PGR_RLog_Main();
23417 + if (rlog_pid < 0)
23419 + show_error("%s:PGR_RLog_Main failed",func);
23420 + PGRreplicate_exit(0);
23425 + * fork recovery process
23427 + PGRrecovery_main(fork_wait_time);
23430 + * fork lifecheck process
23432 + PGRlifecheck_main(fork_wait_time);
23435 + * call replicate module
23437 + Replicateion_Log->r_log_sock =-1;
23439 + if (fork_wait_time > 0) {
23440 +#ifdef PRINT_DEBUG
23441 + show_debug("replicate process: wait fork(): pid = %d", getpid());
23443 + sleep(fork_wait_time);
23446 + replicate_main();
23448 + PGRreplicate_exit(0);
23449 + return STATUS_OK;
23453 +set_exit_processing(int signo)
23455 + exit_signo = signo;
23456 + exit_processing = true;
23457 + PGRsignal(signo, SIG_IGN);
23460 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/pgreplicate.conf.sample pgcluster-1.7.0rc7/src/pgcluster/pgrp/pgreplicate.conf.sample
23461 --- postgresql-8.2.4/src/pgcluster/pgrp/pgreplicate.conf.sample 1970-01-01 01:00:00.000000000 +0100
23462 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/pgreplicate.conf.sample 2007-02-18 22:52:17.000000000 +0100
23464 +#=============================================================
23465 +# PGReplicate configuration file
23466 +#-------------------------------------------------------------
23467 +# file: pgreplicate.conf
23468 +#-------------------------------------------------------------
23469 +# This file controls:
23470 +# o which hosts & port are cluster server
23471 +# o which port use for replication request from cluster server
23472 +#=============================================================
23474 +#-------------------------------------------------------------
23475 +# A setup of Cluster DB(s)
23477 +# o Host_Name : The host name of Cluster DB.
23478 +# Please write a host name by FQDN.
23479 +# DO NOT write IP address.
23480 +# o Port : The connection port with postmaster.
23481 +# o Recovery_Port : The connection port at the time of
23482 +# a recovery sequence.
23483 +#-------------------------------------------------------------
23484 +#<Cluster_Server_Info>
23485 +# <Host_Name> master.pgcluster.org </Host_Name>
23486 +# <Port> 5432 </Port>
23487 +# <Recovery_Port> 7001 </Recovery_Port>
23488 +#</Cluster_Server_Info>
23489 +#<Cluster_Server_Info>
23490 +# <Host_Name> clusterdb2.pgcluster.org </Host_Name>
23491 +# <Port> 5432 </Port>
23492 +# <Recovery_Port> 7001 </Recovery_Port>
23493 +#</Cluster_Server_Info>
23494 +#<Cluster_Server_Info>
23495 +# <Host_Name> cluster3.pgcluster.org </Host_Name>
23496 +# <Port> 5432 </Port>
23497 +# <Recovery_Port> 7001 </Recovery_Port>
23498 +#</Cluster_Server_Info>
23500 +#-------------------------------------------------------------
23501 +# A setup of Load Balance Server
23503 +# o Host_Name : The host name of a load balance server.
23504 +# Please write a host name by FQDN or IP address.
23505 +# o Recovery_Port : The connection port at the time of
23506 +# a recovery sequence .
23507 +#-------------------------------------------------------------
23508 +#<LoadBalance_Server_Info>
23509 +# <Host_Name> loadbalancer.pgcluster.org </Host_Name>
23510 +# <Recovery_Port> 6001 </Recovery_Port>
23511 +#</LoadBalance_Server_Info>
23513 +#------------------------------------------------------------
23514 +# A setup of the cascade connection between replication servers.
23515 +# When you do not use RLOG recovery, you can skip this setup
23517 +# o Host_Name : The host name of the upper replication server.
23518 +# Please write a host name by FQDN or IP address.
23519 +# o Port : The connection port with postmaster.
23520 +# o Recovery_Port : The connection port at the time of
23521 +# a recovery sequence .
23522 +#------------------------------------------------------------
23523 +#<Replicate_Server_Info>
23524 +# <Host_Name> upper_replicate.pgcluster.org </Host_Name>
23525 +# <Port> 8002 </Port>
23526 +# <Recovery_Port> 8102 </Recovery_Port>
23527 +#</Replicate_Server_Info>
23529 +#-------------------------------------------------------------
23530 +# A setup of a replication server
23532 +# o Host_Name : The host name of the this replication server.
23533 +# Please write a host name by FQDN or IP address.
23534 +# o Replicate_Port : Connection port for replication
23535 +# o Recovery_Port : Connection port for recovery
23536 +# o RLOG_Port : Connection port for replication log
23537 +# o Response_mode : Timing which returns a response
23538 +# normal -- return result of DB which received the query
23539 +# reliable -- return result after waiting for response of
23540 +# all Cluster DBs.
23541 +# o Use_Replication_Log : Use replication log
23542 +# [yes/no]. default : no
23543 +# o Replication_Timeout : Timeout of each replication response
23544 +# o Lifecheck_Timeout : Timeout of the lifecheck response
23545 +# o Lifecheck_Interval : Interval time of the lifecheck
23547 +# 10s -- 10 seconds
23548 +# 10min -- 10 minutes
23550 +#-------------------------------------------------------------
23551 +<Host_Name> replicate.pgcluster.org </Host_Name>
23552 +<Replication_Port> 8001 </Replication_Port>
23553 +<Recovery_Port> 8101 </Recovery_Port>
23554 +<RLOG_Port> 8301 </RLOG_Port>
23555 +<Response_Mode> normal </Response_Mode>
23556 +<Use_Replication_Log> no </Use_Replication_Log>
23557 +<Replication_Timeout> 1min </Replication_Timeout>
23558 +<LifeCheck_Timeout> 3s </LifeCheck_Timeout>
23559 +<LifeCheck_Interval> 15s </LifeCheck_Interval>
23560 +#-------------------------------------------------------------
23561 +# A setup of a log files
23563 +# o File_Name : Log file name with full path
23564 +# o File_Size : Maximum size of each log files
23565 +# Please specify in a number and unit(K or M)
23569 +# o Rotate : Rotation times
23570 +# If specified 0, old versions are removed.
23571 +#-------------------------------------------------------------
23573 + <File_Name> /tmp/pgreplicate.log </File_Name>
23574 + <File_Size> 1M </File_Size>
23575 + <Rotate> 3 </Rotate>
23577 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/pgreplicate.h pgcluster-1.7.0rc7/src/pgcluster/pgrp/pgreplicate.h
23578 --- postgresql-8.2.4/src/pgcluster/pgrp/pgreplicate.h 1970-01-01 01:00:00.000000000 +0100
23579 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/pgreplicate.h 2007-03-01 16:27:56.000000000 +0100
23581 +/*--------------------------------------------------------------------
23585 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
23586 + *--------------------------------------------------------------------
23588 +#ifndef PGREPLICATE_H
23589 +#define PGREPLICATE_H
23591 +#define PGREPLICATE_VERSION "1.7.0rc7"
23593 +#include "lib/dllist.h"
23594 +#include "lib/stringinfo.h"
23595 +#include "../libpgc/libpgc.h"
23597 +/* cascade packet id */
23598 +#define CMD_SYS_CASCADE 'C'
23599 +#define CMD_STS_TO_UPPER 'U'
23600 +#define CMD_STS_TO_LOWER 'L'
23601 +#define CMD_TYPE_ADD 'A'
23602 +#define CMD_TYPE_DELTE 'D'
23603 +#define CMD_TYPE_UPDATE_ALL 'A'
23605 +/* log packet id */
23606 +#define CMD_SYS_LOG 'L'
23607 +#define CMD_STS_DELETE_QUERY 'q'
23608 +#define CMD_STS_DELETE_TRANSACTION 't'
23609 +#define CMD_STS_UPDATE_QUERY 'r'
23610 +#define CMD_STS_UPDATE_TRANSACTION 'u'
23612 +#define INIT_TRANSACTION_TBL_NUM (12)
23613 +#define FILENAME_MAX_LENGTH (256)
23614 +#define MAX_DB_SERVER (32)
23615 +#define MAX_CONNECTIONS (128)
23616 +#define MAX_QUEUE_FILE_SIZE (0x40000000)
23617 +#define PGR_MAX_TICKETS (0x7FFFFFFF)
23618 +#define PGR_MAX_QUERY_ID (0x7FFFFFFF)
23619 +#define PGR_CONNECT_RETRY_TIME (3)
23620 +#define PGR_EXEC_RETRY_TIME (5)
23621 +#define DB_TBL_FREE (0)
23622 +#define DB_TBL_INIT (1)
23623 +#define DB_TBL_USE (2)
23624 +#define DB_TBL_ERROR (-1)
23625 +#define DB_TBL_TOP (10)
23626 +#define DB_TBL_END (11)
23627 +#define RECOVERY_FILE_MTYPE (1)
23628 +#define QUERY_LOG_MTYPE (2)
23629 +#define PGREPLICATE_CONF_FILE "pgreplicate.conf"
23630 +#define PGREPLICATE_LOG_FILE "pgreplicate.log"
23631 +#define PGREPLICATE_STATUS_FILE "pgreplicate.sts"
23632 +#define PGREPLICATE_PID_FILE "pgreplicate.pid"
23633 +#define PGREPLICATE_RID_FILE "pgreplicate.rid"
23634 +#define RECOVERY_QUEUE_FILE "pgr_recovery"
23635 +/* setup data tag of the configuration file */
23636 +#define CLUSTER_SERVER_TAG "Cluster_Server_Info"
23637 +#define LOAD_BALANCE_SERVER_TAG "LoadBalance_Server_Info"
23638 +#define REPLICATE_PORT_TAG "Replication_Port"
23639 +#define RECOVERY_PORT_TAG "Recovery_Port"
23640 +#define LIFECHECK_PORT_TAG "LifeCheck_Port"
23641 +#define RLOG_PORT_TAG "RLOG_Port"
23642 +#define RESPONSE_MODE_TAG "Response_Mode"
23643 +#define RESPONSE_MODE_FAST "fast"
23644 +#define RESPONSE_MODE_NORMAL "normal"
23645 +#define RESPONSE_MODE_RELIABLE "reliable"
23646 +#define USE_REPLICATION_LOG_TAG "Use_Replication_Log"
23647 +#define RESERVED_CONNECTIONS_TAG "Reserved_Connections"
23648 +/* semapho numner of recovery queue */
23649 +#define SEM_NUM_OF_RECOVERY (1)
23650 +#define SEM_NUM_OF_RECOVERY_QUEUE (2)
23651 +/* semapho numner of lock tickets */
23652 +#define SEM_NUM_OF_LOCK (1)
23653 +#define STATUS_LOCK_CONFLICT (2)
23654 +#define STATUS_DEADLOCK_DETECT (3)
23655 +#define STATUS_ABORTED (4)
23656 +#define STATUS_NOT_YET_REPLICATE (5)
23657 +#define STATUS_ALREADY_REPLICATED (6)
23658 +#define STATUS_SKIP_REPLICATE (7)
23659 +#define PGR_NOWAIT_ANSWER (0)
23660 +#define PGR_WAIT_ANSWER (1)
23661 +#define LOOP_CONTINUE (0)
23662 +#define LOOP_END (1)
23663 +#define LOWER_CASCADE (1)
23664 +#define UPPER_CASCADE (2)
23665 +#define ALL_CASCADE (3)
23666 +#define NOTICE_SYSTEM_CALL_TYPE (10)
23667 +#define RECOVERY_QUERY_TYPE (20)
23669 +#define PGR_TIME_OUT (60)
23670 +#define PGR_SEND_RETRY_CNT (100)
23671 +#define PGR_SEND_WAIT_MSEC (500)
23672 +#define PGR_RECV_RETRY_CNT (100)
23673 +#define PGR_RECV_WAIT_MSEC (500)
23674 +#define PGR_SEM_UNLOCK_WAIT_MSEC (100)
23675 +#define PGR_SEM_LOCK_WAIT_MSEC (500)
23676 +#define PGR_RECOVERY_RETRY_CNT (6000)
23677 +#define PGR_RECOVERY_WAIT_MSEC (500)
23678 +#define PGR_CHECK_POINT (300)
23680 +#define PGR_RECOVERY_1ST_STAGE (1)
23681 +#define PGR_RECOVERY_2ND_STAGE (2)
23683 +#define IDLE_MODE (0)
23684 +#define BUSY_MODE (1)
23687 + * connection table for transaction query
23692 + int transaction_count;
23693 + unsigned short port;
23694 + unsigned short pid;
23695 + unsigned int hostIP;
23696 + unsigned int srcHostIP;
23697 + char host[HOSTNAME_MAX_LENGTH];
23698 + char srcHost[HOSTNAME_MAX_LENGTH];
23699 + char dbName[DBNAME_MAX_LENGTH];
23701 + bool in_transaction;
23706 + * cluster server table
23710 + char hostName[HOSTNAME_MAX_LENGTH];
23711 + char resolvedName[24];
23713 + int recoveryPort;
23715 + int transaction_count;
23722 + int current_queue_no;
23723 +} RecoveryQueueInf;
23727 + * host table for recovery request
23730 + char hostName[HOSTNAME_MAX_LENGTH];
23731 + char resolvedName[24];
23733 + int recoveryPort;
23735 + int recovery_sock;
23739 + * status table for recovery
23743 + int transaction_count;
23744 + int recovery_status;
23745 + unsigned int replication_id;
23746 + HostTbl target_host;
23747 + int read_queue_no;
23748 + int write_queue_no;
23750 + unsigned int file_size;
23751 + char write_file[FILENAME_MAX_LENGTH];
23752 + char read_file[FILENAME_MAX_LENGTH];
23753 +} RecoveryStatusInf;
23758 +} RecoveryQueueFile;
23762 + unsigned int replicationId;
23764 +} RecoveryQueueQuery;
23767 + unsigned int entry_ticket;
23768 + unsigned int lock_wait_queue_length;
23773 + int response_mode;
23774 + int current_cluster;
23778 + ReplicateHeader * header;
23785 + ReplicateServerInfo * top;
23786 + ReplicateServerInfo * end;
23787 + ReplicateServerInfo * lower;
23788 + ReplicateServerInfo * upper;
23789 + ReplicateServerInfo * myself;
23797 + int commit_log_num;
23799 + ReplicateHeader header;
23804 + char * RLog_Sock_Path;
23805 + uint16_t RLog_Port_Number;
23807 + ReplicateHeader * header;
23809 +} ReplicationLogInf;
23812 + char hostName[HOSTNAME_MAX_LENGTH];
23815 + uint32_t request_id;
23819 + QueryLogID query_log_id;
23822 +} ConfirmQueryList;
23825 + ReplicateHeader * header;
23828 + int current_cluster;
23829 + int transaction_count;
23830 + HostTbl * host_ptr;
23831 + TransactionTbl *transaction_tbl;
23834 +/* replication server data */
23835 +extern char * ResolvedName;
23836 +extern uint16_t Port_Number;
23837 +extern uint16_t LifeCheck_Port_Number;
23838 +extern uint16_t Recovery_Port_Number;
23839 +extern int Reserved_Connections;
23840 +extern bool PGR_Parse_Session_Started;
23841 +extern int PGR_Replication_Timeout;
23843 +/* global tables */
23844 +extern HostTbl * Host_Tbl_Begin;
23845 +extern Dllist * Transaction_Tbl_Begin;
23846 +extern TransactionTbl * Transaction_Tbl_End;
23847 +extern RecoveryTbl * LoadBalanceTbl;
23848 +extern RecoveryStatusInf * Recovery_Status_Inf;
23849 +extern LockWaitInf * Lock_Wait_Tbl;
23850 +extern ReplicateHeader * PGR_Log_Header;
23851 +extern ReplicateServerInfo * Cascade_Tbl;
23852 +extern CascadeInf * Cascade_Inf;
23853 +extern CommitLogInf * Commit_Log_Tbl;
23854 +extern QueryLogType * Query_Log_Top;
23855 +extern QueryLogType * Query_Log_End;
23856 +extern ReplicationLogInf * Replicateion_Log;
23857 +extern int RecoveryShmid;
23858 +extern int ReplicateSerializationShmid;
23859 +extern int RecoveryMsgShmid;
23860 +extern int *RecoveryMsgid;
23861 +extern int HostTblShmid;
23862 +extern int LockWaitTblShmid;
23863 +extern int CascadeTblShmid;
23864 +extern int CascadeInfShmid;
23865 +extern int CommitLogShmid;
23866 +extern int MaxBackends;
23867 +extern char * PGR_Result;
23869 +extern int RecoverySemID;
23870 +extern int CascadeSemID;
23871 +extern int LockSemID;
23872 +extern int VacuumSemID;
23873 +extern char * PGR_Data_Path;
23874 +extern char * PGR_Write_Path;
23875 +extern FILE * LogFp;
23876 +extern FILE * StatusFp;
23877 +extern FILE * RidFp;
23878 +extern FILE * QueueFp;
23879 +extern int Log_Print;
23880 +extern int Debug_Print;
23881 +extern char * Function;
23882 +extern int IS_SESSION_AUTHORIZATION;
23883 +extern ResponseInf * PGR_Response_Inf;
23884 +extern bool StartReplication[MAX_DB_SERVER];
23885 +extern bool PGR_Cascade;
23886 +extern bool PGR_Use_Replication_Log;
23887 +extern bool PGR_AutoCommit;
23888 +extern unsigned int * PGR_ReplicateSerializationID;
23889 +extern unsigned int * PGR_Send_Query_ID;
23890 +extern unsigned int PGR_Query_ID;
23891 +extern volatile bool exit_processing;
23892 +extern RecoveryQueueInf RecoveryQueue;
23893 +extern int pgreplicate_pid;
23894 +extern char * PGRuserName;
23895 +extern int exit_signo;
23897 +extern int ReplicateSock;
23899 +/* smart shutdown */
23900 +extern int Idle_Flag;
23901 +extern volatile bool Exit_Request;
23904 + * external prototype in main.c
23906 +extern void child_wait(SIGNAL_ARGS);
23909 + * external prototype in conf.c
23911 +extern int PGRget_Conf_Data(char * path);
23914 + * external prototype in replicate.c
23916 +extern int PGRset_replication_id(uint32_t rid);
23917 +extern bool PGRis_same_host(char * host1, unsigned short port1 , char * host2, unsigned short port2);
23918 +extern HostTbl * PGRadd_HostTbl(HostTbl * conf_data, int useFlag);
23919 +extern HostTbl * PGRget_master(void);
23920 +extern void PGRset_recovery_status(int status);
23921 +extern int PGRget_recovery_status(void);
23922 +extern int PGRcheck_recovered_host(void);
23923 +extern int PGRset_recovered_host(HostTbl * target,int useFlag);
23924 +extern int PGRinit_recovery(void);
23925 +extern void PGRexit_subprocess(int signo);
23926 +extern void PGRreplicate_exit(int exit_status);
23927 +extern int PGRsend_replicate_packet_to_server( HostTbl * host_ptr, ReplicateHeader * header, char *query , char * result,unsigned int replicationId, bool recovery);
23928 +extern int PGRreplicate_packet_send_each_server( HostTbl * ptr, bool return_response, ReplicateHeader * header, char * query,int dest);
23929 +extern HostTbl * PGRget_HostTbl(char * hostName,int port);
23930 +extern int PGRset_queue(ReplicateHeader * header,char * query);
23931 +extern int PGRset_host_status(HostTbl * host_ptr,int status);
23932 +extern void PGRclear_connections(void);
23933 +extern void PGRdestroy_transaction_table(void);
23934 +extern void PGRsem_unlock( int semid, short sem_num );
23935 +extern void PGRsem_lock( int semid, short sem_num );
23936 +extern int PGRdo_replicate(int sock,ReplicateHeader *header, char * query);
23937 +extern int PGRreturn_result(int dest, char * result, int wait);
23938 +extern int PGRreplicate_packet_send( ReplicateHeader * header, char * query,int dest,int recovery_status);
23939 +extern char * PGRread_packet(int sock, ReplicateHeader *header);
23940 +extern void PGRnotice_replication_server(char * hostName, unsigned short portNumber,unsigned short recoveryPortNumber, unsigned short lifecheckPortNumber, char * userName);
23941 +extern char * PGRread_query(int sock, ReplicateHeader *header);
23942 +extern int PGRsync_oid(ReplicateHeader *header);
23943 +extern unsigned int PGRget_next_query_id(void);
23944 +extern int PGRinit_transaction_table(void);
23945 +extern int replicate_packet_send_internal(ReplicateHeader * header, char * query,int dest,int recovery_status,bool isHeldLock);
23946 +extern int PGRsync_oid(ReplicateHeader *header);
23947 +extern int PGRload_replication_id(void);
23948 +extern PGconn * PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt );
23950 + * external prototype in recovery.c
23952 +extern int PGRsend_load_balance_packet(RecoveryPacket * packet);
23953 +extern void PGRrecovery_main(int fork_wait_time);
23954 +extern FILE * PGRget_recovery_queue_file_for_write(void);
23955 +extern FILE * PGRget_recovery_queue_file_for_read(int next);
23958 + * external prototype in rlog.c
23960 +extern int PGRwrite_rlog(ReplicateHeader * header, char * query);
23961 +extern ReplicateHeader * PGRget_requested_query(ReplicateHeader * header);
23962 +extern void PGRreconfirm_commit(int sock, ReplicateHeader * header);
23963 +extern void PGRset_rlog(ReplicateHeader * header, char * query);
23964 +extern void PGRunset_rlog(ReplicateHeader * header, char * query);
23965 +extern int PGRresend_rlog_to_db(void);
23966 +extern void PGRreconfirm_query(int sock, ReplicateHeader * header);
23967 +extern pid_t PGR_RLog_Main(void);
23968 +extern int PGRcreate_send_rlog_socket(void);
23969 +extern int PGRsend_rlog_packet(int sock,ReplicateHeader * header, const char * query_string);
23970 +extern int PGRrecv_rlog_result(int sock,void * result, int size);
23971 +extern int PGRsend_rlog_to_local(ReplicateHeader * header,char * query);
23972 +extern int PGRget_rlog_header(ReplicateHeader * header);
23975 + * external prototype in cascade.c
23977 +extern int PGRstartup_cascade(void);
23978 +extern int PGRsend_lower_cascade(ReplicateHeader * header, char * query);
23979 +extern int PGRsend_upper_cascade(ReplicateHeader * header, char * query);
23980 +extern int PGRwait_answer_cascade(int sock);
23981 +extern ReplicateServerInfo * PGRget_lower_cascade(void);
23982 +extern ReplicateServerInfo * PGRget_upper_cascade(void);
23983 +extern void PGRset_cascade_server_status(ReplicateServerInfo * cascade, int status);
23984 +extern ReplicateServerInfo * PGRrecv_cascade_answer(ReplicateServerInfo * cascade,ReplicateHeader * header);
23985 +extern int PGRsend_cascade(int sock , ReplicateHeader * header, char * query);
23986 +extern int PGRcascade_main(int sock, ReplicateHeader * header, char * query);
23987 +extern int PGRwait_notice_rlog_done(void);
23988 +extern int PGRsend_notice_rlog_done(int sock);
23989 +extern int PGRsend_notice_quit(void);
23992 + * external prototype in pqformat.c
23994 +extern const char * pq_getmsgstring(StringInfo msg);
23995 +extern unsigned int pq_getmsgint(StringInfo msg, int b);
23996 +extern void pq_copymsgbytes(StringInfo msg, char *buf, int datalen);
23997 +extern const char * pq_getmsgbytes(StringInfo msg, int datalen);
23998 +extern int pq_getmsgbyte(StringInfo msg);
24001 + * external prototype in lifecheck.c
24003 +extern int PGRlifecheck_main(int fork_wait_time);
24005 +#endif /* PGREPLICATE_H */
24006 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/pqformat.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/pqformat.c
24007 --- postgresql-8.2.4/src/pgcluster/pgrp/pqformat.c 1970-01-01 01:00:00.000000000 +0100
24008 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/pqformat.c 2007-02-18 22:52:17.000000000 +0100
24010 +/*-------------------------------------------------------------------------
24012 + * Routines for formatting and parsing frontend/backend messages
24014 + * These modules copyed from src/backend/libpq/pgformat.c.
24015 + * Original modules have some shared modules and macro,
24016 + * then it is difficult link to replication server directory.
24017 + * Therefore, these modules were custamized.
24018 + * (removed shared module and macro)
24020 + * Original source code is under the following copyright
24022 + * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
24023 + * Portions Copyright (c) 1994, Regents of the University of California
24025 + *-------------------------------------------------------------------------
24028 + * INTERFACE ROUTINES
24029 + * Message parsing after input:
24030 + * pq_getmsgbyte - get a raw byte from a message buffer
24031 + * pq_getmsgint - get a binary integer from a message buffer
24032 + * pq_getmsgbytes - get raw data from a message buffer
24033 + * pq_copymsgbytes - copy raw data from a message buffer
24034 + * pq_getmsgstring - get a null-terminated text string (with conversion)
24037 +/* --------------------------------
24038 + * pq_getmsgstring - get a null-terminated text string (with conversion)
24040 + * May return a pointer directly into the message buffer, or a pointer
24041 + * to a palloc'd conversion result.
24042 + * --------------------------------
24045 +#include "postgres.h"
24046 +#include <errno.h>
24047 +#include <sys/types.h>
24048 +#include <sys/param.h>
24049 +#include <netinet/in.h>
24050 +#include <arpa/inet.h>
24051 +#ifdef HAVE_ENDIAN_H
24052 +#include <endian.h>
24055 +#include "mb/pg_wchar.h"
24057 +#include "libpq-fe.h"
24058 +#include "libpq-int.h"
24059 +#include "fe-auth.h"
24060 +#include "replicate_com.h"
24061 +#include "pgreplicate.h"
24063 +const char * pq_getmsgstring(StringInfo msg);
24064 +unsigned int pq_getmsgint(StringInfo msg, int b);
24065 +void pq_copymsgbytes(StringInfo msg, char *buf, int datalen);
24066 +const char * pq_getmsgbytes(StringInfo msg, int datalen);
24067 +int pq_getmsgbyte(StringInfo msg);
24070 +pq_getmsgstring(StringInfo msg)
24079 + str = &msg->data[msg->cursor];
24081 + * It's safe to use strlen() here because a StringInfo is guaranteed to
24082 + * have a trailing null byte. But check we found a null inside the
24085 + slen = strlen(str);
24086 + if (msg->cursor + slen >= msg->len)
24090 + msg->cursor += slen + 1;
24096 +/* --------------------------------
24097 + * pq_getmsgint - get a binary integer from a message buffer
24099 + * Values are treated as unsigned.
24100 + * --------------------------------
24103 +pq_getmsgint(StringInfo msg, int b)
24105 + unsigned int result;
24106 + unsigned char n8;
24113 + pq_copymsgbytes(msg, (char *) &n8, 1);
24117 + pq_copymsgbytes(msg, (char *) &n16, 2);
24118 + result = ntohs(n16);
24121 + pq_copymsgbytes(msg, (char *) &n32, 4);
24122 + result = ntohl(n32);
24125 + result = 0; /* keep compiler quiet */
24131 +/* --------------------------------
24132 + * pq_copymsgbytes - copy raw data from a message buffer
24134 + * Same as above, except data is copied to caller's buffer.
24135 + * --------------------------------
24138 +pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
24140 + if (datalen < 0 || datalen > (msg->len - msg->cursor))
24144 + memcpy(buf, &msg->data[msg->cursor], datalen);
24145 + msg->cursor += datalen;
24149 +/* --------------------------------
24150 + * pq_getmsgbytes - get raw data from a message buffer
24152 + * Returns a pointer directly into the message buffer; note this
24153 + * may not have any particular alignment.
24154 + * --------------------------------
24157 +pq_getmsgbytes(StringInfo msg, int datalen)
24159 + const char *result;
24161 + if (datalen < 0 || datalen > (msg->len - msg->cursor))
24165 + result = &msg->data[msg->cursor];
24166 + msg->cursor += datalen;
24170 +/* --------------------------------
24171 + * pq_getmsgbyte - get a raw byte from a message buffer
24172 + * --------------------------------
24175 +pq_getmsgbyte(StringInfo msg)
24177 + if (msg->cursor >= msg->len)
24181 + return (unsigned char) msg->data[msg->cursor++];
24183 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/recovery.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/recovery.c
24184 --- postgresql-8.2.4/src/pgcluster/pgrp/recovery.c 1970-01-01 01:00:00.000000000 +0100
24185 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/recovery.c 2007-02-18 22:52:17.000000000 +0100
24187 +/*--------------------------------------------------------------------
24192 + * This file is composed of the functions to call with the source
24193 + * at pgreplicate for the recovery.
24195 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
24196 + *--------------------------------------------------------------------
24198 +#include "postgres.h"
24200 +#include <stdio.h>
24201 +#include <unistd.h>
24202 +#include <signal.h>
24203 +#include <sys/wait.h>
24204 +#include <sys/types.h>
24205 +#include <sys/stat.h>
24206 +#include <sys/socket.h>
24207 +#include <sys/ipc.h>
24208 +#include <sys/shm.h>
24209 +#include <sys/msg.h>
24210 +#include <netdb.h>
24211 +#include <netinet/in.h>
24212 +#include <errno.h>
24213 +#include <fcntl.h>
24215 +#include <arpa/inet.h>
24216 +#include <sys/param.h>
24217 +#include <sys/file.h>
24219 +#ifdef HAVE_SYS_SELECT_H
24220 +#include <sys/select.h>
24223 +#ifdef HAVE_CRYPT_H
24224 +#include <crypt.h>
24227 +#include "miscadmin.h"
24228 +#include "nodes/nodes.h"
24230 +#include "libpq-fe.h"
24231 +#include "libpq/libpq-fs.h"
24232 +#include "libpq-int.h"
24233 +#include "fe-auth.h"
24235 +#include "access/xact.h"
24236 +#include "replicate_com.h"
24237 +#include "pgreplicate.h"
24241 +#include "win32.h"
24243 +#ifdef HAVE_NETINET_TCP_H
24244 +#include <netinet/tcp.h>
24246 +#include <arpa/inet.h>
24249 +#ifdef HAVE_CRYPT_H
24250 +#include <crypt.h>
24254 +#include "mb/pg_wchar.h"
24256 +#include "pgreplicate.h"
24259 +/*--------------------------------------
24260 + * GLOBAL VARIABLE DECLARATION
24261 + *--------------------------------------
24263 +RecoveryPacket MasterPacketData;
24264 +RecoveryTbl Master;
24265 +RecoveryTbl Target;
24268 +/*--------------------------------------
24269 + * PROTOTYPE DECLARATION
24270 + *--------------------------------------
24272 +static int read_packet(int sock,RecoveryPacket * packet);
24273 +static int read_packet_from_master( RecoveryTbl * host, RecoveryPacket * packet );
24274 +static int send_recovery_packet(int sock, RecoveryPacket * packet);
24275 +static int send_packet(RecoveryTbl * host, RecoveryPacket * packet );
24276 +static void start_recovery_prepare(void);
24277 +static void reset_recovery_prepare(void);
24278 +static void finish_recovery(void);
24279 +static bool first_setup_recovery(int * sock, RecoveryPacket * packet);
24280 +static int wait_transaction_count_clear(void);
24281 +static bool second_setup_recovery (RecoveryPacket * packet);
24282 +static void pgrecovery_loop(int fd);
24283 +static int PGRsend_queue(RecoveryTbl * master, RecoveryTbl * target);
24284 +static int send_vacuum(HostTbl *host, char * userName, int stage);
24285 +static char * read_queue_file(FILE * fp, ReplicateHeader * header, char * query);
24287 +#ifdef PRINT_DEBUG
24288 +static void show_recovery_packet(RecoveryPacket * packet);
24291 +int PGRsend_load_balance_packet(RecoveryPacket * packet);
24292 +void PGRrecovery_main(int fork_wait_time);
24294 +/*-----------------------------------------------------------
24298 + * Read recovery packet data
24300 + * int sock : socket
24301 + * RecoveryPacket * packet : read packet buffer
24305 + *-----------------------------------------------------------
24308 +read_packet(int sock,RecoveryPacket * packet)
24310 +#ifdef PRINT_DEBUG
24311 + char * func = "read_packet()";
24314 + char * read_ptr = NULL;
24315 + int read_size = 0;
24316 + int packet_size = 0;
24318 + if (packet == NULL)
24322 + read_ptr = (char*)packet;
24323 + packet_size = sizeof(RecoveryPacket);
24326 + r = recv(sock,read_ptr + read_size ,packet_size - read_size, MSG_WAITALL);
24329 + if (errno == EINTR || errno == EAGAIN)
24333 + show_error("%s:recv failed: %d(%s)",func, errno, strerror(errno));
24340 + if (read_size == packet_size)
24342 +#ifdef PRINT_DEBUG
24343 + show_debug("%s:receive packet",func);
24344 + show_recovery_packet(packet);
24346 + return read_size;
24349 + else /* r == 0 */
24351 + show_error("%s:unexpected EOF", func);
24359 +read_packet_from_master( RecoveryTbl * host, RecoveryPacket * packet )
24361 + int read_size = 0;
24364 + struct timeval timeout;
24368 + timeout.tv_sec = RECOVERY_TIMEOUT;
24369 + timeout.tv_usec = 0;
24372 + * Wait for something to happen.
24375 + FD_SET(host->recovery_sock,&rmask);
24376 + rtn = select(host->recovery_sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
24378 + if (rtn == 0) /* timeout */
24383 + if (rtn && FD_ISSET(host->recovery_sock, &rmask))
24385 + read_size = read_packet(host->recovery_sock, packet);
24386 + return read_size;
24392 +send_recovery_packet(int sock, RecoveryPacket * packet)
24394 + char *func = "send_recovery_packet";
24396 + int send_size= 0;
24397 + int buf_size = 0;
24400 + send_ptr = (char *)packet;
24401 + buf_size = sizeof(RecoveryPacket);
24405 + s = send(sock, send_ptr + send_size,buf_size - send_size ,0);
24408 + if (errno == EINTR || errno == EAGAIN)
24411 + show_error("%s:send error: %d(%s)", func, errno, strerror(errno));
24412 + return STATUS_ERROR;
24416 + show_error("%s:unexpected EOF", func);
24417 + return STATUS_ERROR;
24421 + if (send_size == buf_size)
24422 + return STATUS_OK;
24427 +send_packet(RecoveryTbl * host, RecoveryPacket * packet )
24429 + char * func = "send_packet()";
24432 + if (host->recovery_sock == -1)
24434 + while(PGR_Create_Socket_Connect(&(host->recovery_sock), host->hostName , host->recoveryPort) != STATUS_OK )
24436 + if (count > MAX_RETRY_TIMES )
24438 + show_error("%s:host[%s] port[%d]PGR_Create_Socket_Connect failed",func,host->hostName, host->recoveryPort);
24439 + return STATUS_ERROR;
24445 + while (send_recovery_packet(host->recovery_sock,packet) != STATUS_OK)
24447 + close(host->recovery_sock);
24448 + host->recovery_sock = -1;
24449 + PGR_Create_Socket_Connect(&(host->recovery_sock), host->hostName , host->recoveryPort);
24450 +#ifdef PRINT_DEBUG
24451 + show_debug("%s:PGR_Create_Socket_Connectsock[%d] host[%s] port[%d]",
24452 + func,host->recovery_sock,host->hostName,host->recoveryPort);
24454 + if (count > PGR_CONNECT_RETRY_TIME )
24457 + show_error("%s:send failed and PGR_Create_Socket_Connect failed",func);
24458 + return STATUS_ERROR;
24462 + return STATUS_OK;
24466 +start_recovery_prepare(void)
24468 + PGRset_recovery_status (RECOVERY_PREPARE_START);
24472 +reset_recovery_prepare(void)
24474 + PGRset_recovery_status (RECOVERY_INIT);
24478 +finish_recovery(void)
24480 + PGRset_recovery_status (RECOVERY_INIT);
24484 +PGRsend_load_balance_packet(RecoveryPacket * packet)
24486 + char * func = "PGRsend_load_balance_packet()";
24487 + RecoveryTbl * lbp;
24490 + lbp = LoadBalanceTbl;
24491 + if (lbp == (RecoveryTbl *)NULL)
24493 + show_error("%s:recovery table is NULL",func);
24494 + return STATUS_ERROR;
24496 + while (lbp->hostName[0] != 0)
24498 + if (lbp->recovery_sock != -1)
24500 + close(lbp->recovery_sock);
24501 + lbp->recovery_sock = -1;
24503 +#ifdef PRINT_DEBUG
24504 + show_debug("%s:host[%s] port[%d]",func,lbp->hostName,lbp->recoveryPort);
24506 + status = send_packet(lbp,packet);
24507 + if (lbp->recovery_sock != -1)
24509 + close(lbp->recovery_sock);
24510 + lbp->recovery_sock = -1;
24514 + return STATUS_OK;
24518 +send_vacuum(HostTbl *host, char * userName, int stage)
24520 + int rtn = STATUS_OK;
24521 + ReplicateHeader header;
24522 + char * query = NULL;
24524 + if (stage == PGR_RECOVERY_1ST_STAGE)
24526 + query = strdup("VACUUM");
24530 + query = strdup("VACUUM FULL");
24532 + memset(&header,0,sizeof(header));
24533 + header.query_size = strlen(query) + 1;
24534 + strncpy(header.dbName,"template1",sizeof(header.dbName));
24535 + strncpy(header.userName,userName,sizeof(header.userName));
24536 + header.cmdSys = CMD_SYS_REPLICATE;
24537 + header.cmdSts = CMD_STS_QUERY;
24538 + header.cmdType = CMD_TYPE_VACUUM;
24539 + header.pid = getpid();
24540 + header.query_id = getpid();
24541 + header.isAutoCommit=1;
24542 + rtn = PGRsend_replicate_packet_to_server(host,&header,query,PGR_Result,0, true);
24543 + if (query !=NULL)
24549 +first_setup_recovery(int * sock, RecoveryPacket * packet)
24551 + char * func = "first_setup_recovery()";
24553 + HostTbl * master = (HostTbl *)NULL;
24554 + bool loop_end = false;
24555 + HostTbl host_tbl;
24556 + char * userName = NULL;
24559 + memset(Target.hostName,0,sizeof(Target.hostName));
24560 + strncpy(Target.hostName,packet->hostName,sizeof(Target.hostName));
24561 + ip = PGRget_ip_by_name(Target.hostName);
24562 + sprintf(Target.resolvedName,
24565 + (ip >> 8) & 0xff ,
24566 + (ip >> 16) & 0xff ,
24567 + (ip >> 24) & 0xff );
24568 + Target.port = ntohs(packet->port);
24569 + Target.recoveryPort = ntohs(packet->recoveryPort);
24570 + Target.sock = *sock;
24571 + Target.recovery_sock = *sock;
24572 +#ifdef PRINT_DEBUG
24573 + show_debug("%s:1st setup target %s",func,Target.hostName);
24574 + show_debug("%s:1st setup port %d",func,Target.port);
24577 + * check another recovery process
24579 + if (PGRget_recovery_status() != RECOVERY_INIT)
24582 + * recovery process is already running
24584 +#ifdef PRINT_DEBUG
24585 + show_debug("%s:already recovery job runing",func);
24587 + memset(packet,0,sizeof(packet));
24588 + PGRset_recovery_packet_no(packet, RECOVERY_ERROR_OCCUPIED) ;
24589 + status = send_packet(&Target,packet);
24594 + * add recovery target to host table
24596 +#ifdef PRINT_DEBUG
24597 + show_debug("%s:add recovery target to host table",func);
24599 + memcpy(host_tbl.hostName,Target.hostName,sizeof(host_tbl.hostName));
24600 + memcpy(host_tbl.resolvedName,Target.resolvedName,sizeof(host_tbl.resolvedName));
24601 + host_tbl.port = Target.port;
24602 + host_tbl.recoveryPort = Target.recoveryPort;
24603 + PGRset_recovered_host(&host_tbl,DB_TBL_INIT);
24604 + PGRadd_HostTbl(&host_tbl,DB_TBL_INIT);
24606 + * send prepare recovery to load balancer
24608 + PGRsend_load_balance_packet(packet);
24609 + userName = strdup(packet->userName);
24612 + * set RECOVERY_PGDATA_REQ packet data
24614 +#ifdef PRINT_DEBUG
24615 + show_debug("%s:set RECOVERY_PGDATA_REQ packet data",func);
24617 + memset(packet,0,sizeof(RecoveryPacket));
24618 + PGRset_recovery_packet_no(packet, RECOVERY_PGDATA_REQ );
24620 +retry_connect_master:
24621 + master = PGRget_master();
24622 + if (master == (HostTbl *)NULL)
24625 + * connection error , master may be down
24627 + show_error("%s:get master info error , master may be down",func);
24628 + PGRset_recovery_packet_no(packet, RECOVERY_ERROR_TARGET_ONLY);
24629 + status = send_packet(&Target, packet);
24630 + reset_recovery_prepare();
24632 + if (userName != NULL)
24636 + /* send vauum command to master server */
24637 + status = send_vacuum(master, userName, PGR_RECOVERY_1ST_STAGE );
24638 + if (status != STATUS_OK)
24640 + PGRset_host_status(master, DB_TBL_ERROR);
24641 + goto retry_connect_master;
24644 + memcpy(Master.hostName,master->hostName,sizeof(Master.hostName));
24645 + memcpy(Master.resolvedName,master->resolvedName,sizeof(Master.resolvedName));
24646 + Master.sock = -1;
24647 + Master.recovery_sock = -1;
24648 + Master.port = master->port;
24649 + Master.recoveryPort = master->recoveryPort;
24651 +#ifdef PRINT_DEBUG
24652 + show_debug("%s:send packet to master %s recoveryPort %d",func, Master.hostName, Master.recoveryPort);
24654 + status = send_packet(&Master, packet);
24655 + if (status != STATUS_OK)
24658 + * connection error , master may be down
24660 + show_error("%s:connection error , master may be down",func);
24661 + PGRset_host_status(master,DB_TBL_ERROR);
24662 + goto retry_connect_master ;
24666 + * start prepare of recovery
24667 + * set recovery status to "prepare start"
24668 + * start transaction count up
24670 + start_recovery_prepare();
24672 + * wait answer from master server
24674 +#ifdef PRINT_DEBUG
24675 + show_debug("%s:wait answer from master server",func);
24677 + memset(packet,0,sizeof(RecoveryPacket));
24678 + read_packet_from_master(&Master, packet);
24679 +#ifdef PRINT_DEBUG
24680 + show_debug("%s:get answer from master:no[%d]",func,ntohs(packet->packet_no));
24682 + if (ntohs(packet->packet_no) == RECOVERY_PGDATA_ANS)
24685 + * send a packet to load balancer that is stopped master's
24686 + * load balancing until all recovery process is finished
24688 + PGRsend_load_balance_packet(packet);
24689 + memcpy((char *)&MasterPacketData,packet,sizeof(RecoveryPacket));
24692 + * prepare answer from master DB
24694 + PGRset_recovery_packet_no(packet, RECOVERY_PREPARE_ANS );
24695 + memcpy(packet->hostName,Master.hostName,sizeof(packet->hostName));
24696 + status = send_packet(&Target, packet);
24697 + if (status != STATUS_OK)
24699 + show_error("%s:no[%d] send_packet to target error",func,ntohs(packet->packet_no));
24700 + PGRset_recovery_packet_no(packet, RECOVERY_ERROR_TARGET_ONLY);
24701 + status = send_packet(&Master,packet);
24702 + reset_recovery_prepare();
24706 + if (userName != NULL)
24714 +wait_transaction_count_clear(void)
24716 + char * func ="wait_transaction_count_clear()";
24717 + HostTbl * master = (HostTbl *)NULL;
24719 + int recovery_status = PGRget_recovery_status();
24721 + while (recovery_status != RECOVERY_CLEARED)
24723 + master = PGRget_master();
24724 + if (master == (HostTbl *)NULL)
24726 + show_error("%s:get master info error , master may be down",func);
24729 + if ((recovery_status == RECOVERY_PREPARE_START) &&
24730 + (master->transaction_count==0))
24732 + PGRset_recovery_status(RECOVERY_CLEARED);
24737 +#ifdef PRINT_DEBUG
24738 + show_debug("now, waiting clear every transaction for recovery");
24741 + if (cnt > RECOVERY_TIMEOUT * 60 )
24743 + show_error("sorry, it is timeout for waiting clear transaction");
24744 + return STATUS_ERROR;
24746 + recovery_status = PGRget_recovery_status();
24748 + return STATUS_OK;
24752 +second_setup_recovery (RecoveryPacket * packet)
24754 + char * func = "second_setup_recovery()";
24755 + HostTbl * master = (HostTbl *)NULL;
24757 + bool loop_end = false;
24758 + char * userName = NULL;
24759 + int recovery_status = 0;
24761 + /* send vauum command to master server */
24762 + while ((master = PGRget_master()) != NULL)
24765 + * wait until all started transactions are going to finish
24767 + status = wait_transaction_count_clear();
24768 + if (status != STATUS_OK)
24770 + show_error("%s:transaction is too busy, please try again after",func);
24771 + PGRset_recovery_packet_no(packet, RECOVERY_ERROR_TARGET_ONLY);
24772 + status = send_packet(&Target,packet);
24773 + status = send_packet(&Master,packet);
24774 + reset_recovery_prepare();
24777 + userName = strdup(packet->userName);
24778 + status = send_vacuum(master, userName, PGR_RECOVERY_2ND_STAGE );
24779 + if (status != STATUS_OK)
24781 + PGRset_host_status(master, DB_TBL_ERROR);
24782 + if (userName != NULL)
24792 + if (master == NULL)
24794 + show_error("%s:vacuum error , master may be down",func);
24795 + PGRset_recovery_packet_no(packet, RECOVERY_ERROR_TARGET_ONLY);
24796 + status = send_packet(&Target,packet);
24797 + status = send_packet(&Master,packet);
24798 + reset_recovery_prepare();
24803 + recovery_status = PGRget_recovery_status();
24804 + if ((recovery_status != RECOVERY_PREPARE_START) &&
24805 + (recovery_status != RECOVERY_WAIT_CLEAN) &&
24806 + (recovery_status != RECOVERY_CLEARED))
24808 + show_error("%s:queue set failed. stop to recovery",func);
24809 + PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
24810 + status = send_packet(&Target,packet);
24811 + status = send_packet(&Master,packet);
24812 + reset_recovery_prepare();
24813 + if (userName != NULL)
24819 + * then, send fsync request to master DB
24821 + PGRset_recovery_packet_no(packet, RECOVERY_FSYNC_REQ );
24822 + status = send_packet(&Master,packet);
24823 + if (status != STATUS_OK)
24826 + * connection error , master may be down
24828 + show_error("%s:connection error , master may be down",func);
24829 + PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
24830 + status = send_packet(&Target,packet);
24831 + status = send_packet(&Master,packet);
24832 + reset_recovery_prepare();
24833 + if (userName != NULL)
24838 + recovery_status = PGRget_recovery_status();
24839 + if ((recovery_status != RECOVERY_PREPARE_START) &&
24840 + (recovery_status != RECOVERY_WAIT_CLEAN) &&
24841 + (recovery_status != RECOVERY_CLEARED))
24843 + show_error("%s:queue set failed. stop to recovery",func);
24844 + PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
24845 + status = send_packet(&Target,packet);
24846 + status = send_packet(&Master,packet);
24847 + reset_recovery_prepare();
24848 + if (userName != NULL)
24854 + * wait answer from master server
24856 + memset(packet,0,sizeof(RecoveryPacket));
24857 + read_packet_from_master(&Master,packet);
24858 + if (ntohs(packet->packet_no) == RECOVERY_FSYNC_ANS )
24861 + * master DB finished fsync
24863 + PGRset_recovery_packet_no(packet, RECOVERY_START_ANS );
24864 + memcpy(packet->hostName,Master.hostName,sizeof(packet->hostName));
24865 + status = send_packet(&Target,packet);
24866 + if (status != STATUS_OK)
24868 + finish_recovery();
24874 + show_error("%s:failure answer returned",func);
24875 + PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
24876 + status = send_packet(&Target,packet);
24877 + status = send_packet(&Master,packet);
24878 + reset_recovery_prepare();
24881 + if (userName != NULL)
24887 +read_queue_file(FILE * fp, ReplicateHeader * header, char *query)
24889 + char * func = "read_queue_file()";
24896 + if (fread((char*)header,sizeof(ReplicateHeader),1,fp) < 1)
24900 + size = ntohl(header->query_size);
24903 + query = malloc(size+4);
24904 + if (query == NULL)
24906 + show_error("%s:malloc failed:(%s)",func,strerror(errno));
24908 + memset(query,0,size+4);
24911 + if (fread(query,size,1,fp) < 1)
24922 + * send queries from queue.
24925 + * STATUS_OK - success both
24926 + * STATUS_ERROR - fail both
24929 +PGRsend_queue(RecoveryTbl * master, RecoveryTbl * target)
24931 + char * func = "PGRsend_queue()";
24932 + HostTbl * master_ptr = NULL;
24933 + HostTbl * target_ptr = NULL;
24934 + RecoveryQueueFile * msg = NULL;
24935 + FILE * rfp = NULL;
24936 + ReplicateHeader header;
24937 + char * query = NULL;
24940 + int query_size = 0;
24943 + if (master == (RecoveryTbl *)NULL)
24945 + show_error("%s:there is no master ",func);
24946 + return STATUS_ERROR;
24948 +#ifdef PRINT_DEBUG
24949 + show_debug("%s:master %s - %d",func,master->hostName,master->port);
24951 + master_ptr = PGRget_HostTbl(master->resolvedName,master->port);
24952 + if (master_ptr == (HostTbl *)NULL)
24954 + show_error("%s:master table is null",func);
24955 + return STATUS_ERROR;
24957 + if (target != (RecoveryTbl *)NULL)
24959 +#ifdef PRINT_DEBUG
24960 + show_debug("%s:target %s - %d",func,target->hostName,target->port);
24962 + target_ptr = PGRget_HostTbl(target->resolvedName,target->port);
24963 + if (target_ptr == (HostTbl *)NULL)
24965 + show_error("%s:target table is null",func);
24966 + return STATUS_ERROR;
24970 + size = sizeof(RecoveryQueueFile) + FILENAME_MAX_LENGTH;
24971 + msg = (RecoveryQueueFile *)malloc(size+4);
24974 +#ifdef PRINT_DEBUG
24975 + show_debug("%s:malloc() failed. reason: %s",func, strerror(errno));
24977 + return STATUS_ERROR;
24979 + memset(msg,0,size+4);
24980 + status = STATUS_OK;
24981 + while (msgrcv(*RecoveryMsgid , msg, FILENAME_MAX_LENGTH, 0, IPC_NOWAIT) > 0 )
24983 + strncpy(Recovery_Status_Inf->read_file,(char *)(msg->mdata),FILENAME_MAX_LENGTH);
24984 + PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
24985 + if (!strncmp(Recovery_Status_Inf->write_file,Recovery_Status_Inf->read_file,sizeof(Recovery_Status_Inf->write_file)))
24987 + memset(Recovery_Status_Inf->write_file,0,sizeof(Recovery_Status_Inf->write_file));
24989 + PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
24990 + rfp = fopen(Recovery_Status_Inf->read_file,"r");
24993 + show_error("%s:queue file [%s] can not be opened:(%s)",func,Recovery_Status_Inf->read_file,strerror(errno));
24994 + return STATUS_ERROR;
24996 + while ((query = read_queue_file(rfp, &header,query)) != NULL)
24998 + query_size = ntohl(header.query_size);
24999 + if (query_size < 0)
25001 + if (query != NULL)
25008 + PGR_Response_Inf->current_cluster = 0;
25009 + rtn=PGRsend_replicate_packet_to_server(master_ptr,&header,query,PGR_Result,ntohl(header.replicate_id), true);
25010 + if (target_ptr != NULL)
25012 + PGR_Response_Inf->current_cluster = 1;
25013 + rtn=PGRsend_replicate_packet_to_server(target_ptr,&header,query,PGR_Result,ntohl(header.replicate_id), true);
25016 + if (query != NULL)
25025 + unlink(Recovery_Status_Inf->read_file);
25026 + memset(Recovery_Status_Inf->read_file,0,sizeof(Recovery_Status_Inf->read_file));
25029 +#ifdef PRINT_DEBUG
25030 + show_debug("%s:send_queue return status %d",func,status);
25036 +pgrecovery_loop(int fd)
25038 + char * func = "pgrecovery_loop()";
25042 + bool loop_end = false;
25043 + RecoveryPacket packet;
25044 + HostTbl new_host;
25045 + RecoveryTbl * lbp;
25047 + lbp = LoadBalanceTbl;
25048 + if (lbp == (RecoveryTbl *)NULL)
25050 + show_error("%s:recovery table is NULL",func);
25053 +#ifdef PRINT_DEBUG
25054 + show_debug("%s:recovery accept port %d",func, Recovery_Port_Number);
25057 + while ((status = PGR_Create_Acception(fd,&sock,"",Recovery_Port_Number)) != STATUS_OK)
25059 + show_error("%s:PGR_Create_Acception failed",func);
25060 + PGR_Close_Sock(&sock);
25062 + if ( count > PGR_CONNECT_RETRY_TIME)
25069 + show_error("can't create recovery socket.exit.");
25070 + PGRreplicate_exit(1);
25074 + int read_size = 0;
25077 + struct timeval timeout;
25079 + timeout.tv_sec = RECOVERY_TIMEOUT;
25080 + timeout.tv_usec = 0;
25083 + * Wait for something to happen.
25086 + FD_SET(sock,&rmask);
25088 + * read packet from target cluster server
25090 + rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
25092 + if (rtn == 0) /* timeout */
25097 + if (rtn && FD_ISSET(sock, &rmask))
25099 + read_size = read_packet(sock, &packet);
25106 +#ifdef PRINT_DEBUG
25107 + show_debug("%s:receive packet no:%d",func,ntohs(packet.packet_no));
25110 + switch (ntohs(packet.packet_no))
25112 + case RECOVERY_PREPARE_REQ :
25114 + * start prepare of recovery
25117 +#ifdef PRINT_DEBUG
25118 + show_debug("%s:1st master %s - %d",
25119 + func,Master.hostName,Master.port);
25120 + show_debug("%s:1st target %s - %d",
25121 + func,Target.hostName,Target.port);
25124 + loop_end = first_setup_recovery(&sock, &packet);
25125 +#ifdef PRINT_DEBUG
25126 + show_debug("%s:first_setup_recovery end:%d ",func,loop_end);
25129 + case RECOVERY_START_REQ :
25131 + * now, recovery process will start
25132 + * stop the transaction count up
25133 + * start queueing and stop send all queries for master DB
25135 +#ifdef PRINT_DEBUG
25136 + show_debug("%s:2nd master %s - %d",
25137 + func, Master.hostName,Master.port);
25138 + show_debug("%s:2nd target %s - %d",
25139 + func, Target.hostName,Target.port);
25141 + loop_end = second_setup_recovery (&packet);
25142 +#ifdef PRINT_DEBUG
25143 + show_debug("%s:second_setup_recovery end :%d ",
25147 + case RECOVERY_QUEUE_DATA_REQ :
25149 + * send all queries in queue
25152 +#ifdef PRINT_DEBUG
25153 + show_debug("%s:last master %s - %d",
25154 + func, Master.hostName,Master.port);
25155 + show_debug("%s:last target %s - %d",
25156 + func, Target.hostName,Target.port);
25158 + status = PGRsend_queue(&Master,&Target);
25159 + if (status == STATUS_OK)
25161 + memcpy(new_host.hostName,Target.hostName,sizeof(new_host.hostName));
25162 + memcpy(new_host.resolvedName,Target.resolvedName,sizeof(new_host.resolvedName));
25163 + new_host.port = Target.port;
25164 + new_host.recoveryPort = Target.recoveryPort;
25165 + PGRset_recovered_host(&new_host,DB_TBL_USE);
25166 + PGRadd_HostTbl(&new_host,DB_TBL_USE);
25167 + PGRset_recovery_packet_no(&packet, RECOVERY_QUEUE_DATA_ANS );
25168 + status = send_packet(&Target, &packet);
25169 + if (status != STATUS_OK)
25171 + finish_recovery();
25176 + /* connection error , master or target may be down */
25177 + show_error("%s:PGRsend_queue failed",func);
25178 + PGRset_recovery_packet_no(&packet, RECOVERY_ERROR_CONNECTION);
25179 + status = send_packet(&Target,&packet);
25180 + finish_recovery();
25184 + case RECOVERY_FINISH :
25186 + * finished rsync DB datas from master to target
25189 + * stop queueing, and re-initialize recovery status
25191 + finish_recovery();
25194 + * send finish recovery to load balancer
25196 + if (Master.recovery_sock != -1)
25198 + close(Master.recovery_sock);
25199 + Master.recovery_sock = -1;
25201 + if (Target.recovery_sock != -1)
25203 + close(Target.recovery_sock);
25204 + Target.recovery_sock = -1;
25206 + send_packet(&Master, &packet);
25207 + MasterPacketData.packet_no = packet.packet_no;
25208 + PGRsend_load_balance_packet(&MasterPacketData);
25209 + PGRsend_load_balance_packet(&packet);
25210 + memset((char *)&MasterPacketData,0,sizeof(RecoveryPacket));
25212 + case RECOVERY_ERROR_ANS :
25213 +#ifdef PRINT_DEBUG
25214 + show_debug("%s:recovery error accept. top queueing and initiarse recovery status",func);
25216 + status = PGRsend_queue(&Master,NULL);
25217 + memset(&packet,0,sizeof(RecoveryPacket));
25218 + PGRset_recovery_packet_no(&packet, RECOVERY_ERROR_ANS);
25219 + send_packet(&Master, &packet);
25220 + finish_recovery();
25222 + PGRset_recovery_packet_no(&MasterPacketData, RECOVERY_FINISH );
25223 + PGRsend_load_balance_packet(&MasterPacketData);
25224 + memset((char *)&MasterPacketData,0,sizeof(RecoveryPacket));
25227 + show_error("%s:unknown packet. abort to parse");
25233 + if (Master.sock != -1)
25235 + close (Master.sock);
25237 + if (Master.recovery_sock != -1)
25239 + close (Master.recovery_sock);
25241 + PGR_Close_Sock(&sock);
25248 +PGRrecovery_main(int fork_wait_time)
25250 + char * func = "PGRrecovery_main()";
25257 + pgid = getpgid(0);
25264 + PGRsignal(SIGCHLD, SIG_DFL);
25265 + PGRsignal(SIGHUP, PGRexit_subprocess);
25266 + PGRsignal(SIGINT, PGRexit_subprocess);
25267 + PGRsignal(SIGQUIT, PGRexit_subprocess);
25268 + PGRsignal(SIGTERM, PGRexit_subprocess);
25269 + PGRsignal(SIGPIPE, SIG_IGN);
25271 + * in child process,
25272 + * call recovery module
25276 + if (fork_wait_time > 0) {
25277 +#ifdef PRINT_DEBUG
25278 + show_debug("recovery process: wait fork(): pid = %d", getpid());
25280 + sleep(fork_wait_time);
25283 +#ifdef PRINT_DEBUG
25284 + show_debug("%s:PGRrecovery_main bind port %d",func,Recovery_Port_Number);
25286 + status = PGR_Create_Socket_Bind(&fd, "", Recovery_Port_Number);
25287 + if (status != STATUS_OK)
25289 + show_error("%s:PGR_Create_Socket_Bind failed",func);
25292 + memset(&MasterPacketData,0,sizeof(RecoveryPacket));
25293 + memset(&Master,0,sizeof(RecoveryTbl));
25294 + memset(&Target,0,sizeof(RecoveryTbl));
25298 + struct timeval timeout;
25300 + timeout.tv_sec = RECOVERY_TIMEOUT;
25301 + timeout.tv_usec = 0;
25304 + * Wait for something to happen.
25307 + FD_SET(fd,&rmask);
25308 + rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
25309 + if (rtn && FD_ISSET(fd, &rmask))
25311 + pgrecovery_loop(fd);
25316 +#ifdef PRINT_DEBUG
25318 +show_recovery_packet(RecoveryPacket * packet)
25320 + show_debug("no = %d",ntohs(packet->packet_no));
25321 + show_debug("max_connect = %d",ntohs(packet->max_connect));
25322 + show_debug("port = %d",ntohs(packet->port));
25323 + show_debug("recoveryPort = %d",ntohs(packet->recoveryPort));
25324 + if (packet->hostName != NULL)
25325 + show_debug("hostName = %s",packet->hostName);
25326 + if (packet->pg_data != NULL)
25327 + show_debug("pg_data = %s",packet->pg_data);
25330 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/replicate.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/replicate.c
25331 --- postgresql-8.2.4/src/pgcluster/pgrp/replicate.c 1970-01-01 01:00:00.000000000 +0100
25332 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/replicate.c 2007-03-01 16:27:15.000000000 +0100
25334 +/*--------------------------------------------------------------------
25339 + * This file is composed of the functions to call with the source
25340 + * at pgreplicate for the replication.
25342 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
25343 + *--------------------------------------------------------------------
25345 +#include "postgres.h"
25346 +#include "postgres_fe.h"
25348 +#include <pthread.h>
25349 +#include <stdio.h>
25350 +#include <stdarg.h>
25351 +#include <sys/types.h>
25352 +#include <fcntl.h>
25353 +#include <errno.h>
25354 +#include <ctype.h>
25356 +#include <sys/ipc.h>
25357 +#include <sys/shm.h>
25358 +#include <sys/sem.h>
25359 +#include <sys/msg.h>
25360 +#include <signal.h>
25363 +#include "libpq-fe.h"
25364 +#include "libpq-int.h"
25365 +#include "fe-auth.h"
25367 +#include <sys/socket.h>
25368 +#include <unistd.h>
25369 +#include <netdb.h>
25370 +#include <arpa/inet.h>
25372 +#ifdef HAVE_NETINET_TCP_H
25373 +#include <netinet/tcp.h>
25376 +#ifdef HAVE_SYS_SELECT_H
25377 +#include <sys/select.h>
25381 +#ifdef HAVE_CRYPT_H
25382 +#include <crypt.h>
25387 +#include "mb/pg_wchar.h"
25390 +#include "access/xact.h"
25391 +#include "lib/dllist.h"
25392 +#include "libpq/pqformat.h"
25393 +#include "replicate_com.h"
25394 +#include "pgreplicate.h"
25397 +#define IPC_NMAXSEM (32)
25399 +/*--------------------------------------
25400 + * PROTOTYPE DECLARATION
25401 + *--------------------------------------
25403 +static TransactionTbl * setTransactionTbl(HostTbl * host_ptr, ReplicateHeader * header);
25404 +static TransactionTbl * insertTransactionTbl( HostTbl * host_ptr, TransactionTbl * datap);
25405 +static TransactionTbl * getTransactionTbl( HostTbl * host_ptr, ReplicateHeader * header);
25406 +static void deleteTransactionTbl(HostTbl * host_ptr,ReplicateHeader * header);
25408 +static HostTbl * deleteHostTbl(HostTbl * ptr);
25409 +static bool is_master_in_recovery(char * host, int port,int recovery_status);
25410 +static void sem_quit(int semid);
25411 +static int send_cluster_status_to_load_balance(HostTbl * host_ptr,int status);
25412 +static void set_transaction_status(int status);
25413 +static void check_transaction_status(ReplicateHeader * header,TransactionTbl *transaction);
25414 +static HostTbl * check_host_transaction_status(ReplicateHeader * header,HostTbl *host );
25415 +static void clearHostTbl(void);
25416 +static bool is_need_sync_time(ReplicateHeader * header);
25417 +static bool is_need_wait_answer(ReplicateHeader * header);
25418 +static void write_host_status_file(HostTbl * host_ptr);
25420 +static void delete_template(HostTbl * ptr, ReplicateHeader * header);
25421 +static char * check_copy_command(char * query);
25422 +static int read_answer(int dest);
25423 +static bool is_autocommit_off(char * query);
25424 +static bool is_autocommit_on(char * query);
25425 +static unsigned int get_host_ip_from_tbl(char * host);
25426 +static unsigned int get_srcHost_ip_from_tbl(char * srcHost);
25428 +static int next_replication_id(void);
25429 +static void check_replication_id(void);
25430 +static bool is_need_use_rlog(ReplicateHeader * header);
25431 +static bool is_need_queue_jump( ReplicateHeader * header,char * query);
25432 +static int check_delete_transaction (HostTbl * host_ptr, ReplicateHeader * header);
25434 +static bool is_executed_query_in_origin( ReplicateHeader *header );
25435 +static bool is_executed_query( PGconn *conn,ReplicateHeader *header );
25437 +static void * thread_send_source(void * arg);
25438 +static void * thread_send_cluster(void * arg);
25440 +static int send_replicate_packet_to_server( TransactionTbl * transaction_tbl, int current_cluster, HostTbl * host_ptr, ReplicateHeader * header, char *query , char * result,unsigned int replicationId, bool recovery);
25441 +static int check_result( PGresult * res );
25442 +static bool compare_results(int *results, int size, int source_id);
25444 +static int send_func(HostTbl * host_ptr,ReplicateHeader * header, char * func,char * result);
25445 +static uint32_t get_oid(HostTbl * host_ptr,ReplicateHeader * header);
25446 +static int set_oid(HostTbl * host_ptr,ReplicateHeader * header, uint32_t oid);
25447 +static int replicate_lo( PGconn * conn, ReplicateHeader * header, LOArgs * query);
25448 +static int notice_abort(HostTbl * host_ptr,ReplicateHeader * header);
25449 +static FILE * create_queue_file(void);
25450 +static int add_queue_file(char * data, int size);
25452 +static int send_p_parse (PGconn * conn, StringInfo input_message);
25453 +static int send_p_bind (PGconn * conn, StringInfo input_message);
25454 +static int send_p_describe (PGconn * conn, StringInfo input_message);
25455 +static int send_p_execute (PGconn * conn, StringInfo input_message);
25456 +static int send_p_sync (PGconn * conn, StringInfo input_message);
25457 +static int send_p_close (PGconn * conn, StringInfo input_message);
25458 +static void set_string_info(StringInfo input_message, ReplicateHeader * header, char * query);
25460 +int replicate_packet_send_internal(ReplicateHeader * header, char * query,int dest,int recovery_status,bool isHeldLock);
25461 +bool PGRis_same_host(char * host1, unsigned short port1 , char * host2, unsigned short port2);
25462 +HostTbl * PGRadd_HostTbl(HostTbl * conf_data, int useFlag);
25463 +HostTbl * PGRget_master(void);
25464 +void PGRset_recovery_status(int status);
25465 +int PGRget_recovery_status(void);
25466 +int PGRcheck_recovered_host(void);
25467 +int PGRset_recovered_host(HostTbl * target,int useFlag);
25468 +int PGRinit_recovery(void);
25469 +void PGRexit_subprocess(int signo);
25470 +void PGRreplicate_exit(int exit_status);
25471 +int PGRsend_replicate_packet_to_server( HostTbl * host_ptr, ReplicateHeader * header, char *query , char * result,unsigned int replicationId, bool recovery);
25472 +HostTbl * PGRget_HostTbl(char * resolvedName,int port);
25473 +int PGRset_queue(ReplicateHeader * header,char * query);
25474 +int PGRset_host_status(HostTbl * host_ptr,int status);
25475 +void PGRclear_transactions(void);
25476 +void PGRclear_connections();
25477 +int PGRset_replication_id(uint32_t id);
25478 +int PGRdo_replicate(int sock,ReplicateHeader *header, char * query);
25479 +int PGRreturn_result(int dest, char * result,int wait);
25480 +int PGRreplicate_packet_send( ReplicateHeader * header, char * query,int dest,int recovery_status);
25481 +char * PGRread_packet(int sock, ReplicateHeader *header);
25482 +char * PGRread_query(int sock, ReplicateHeader *header);
25483 +PGconn * PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt );
25485 +unsigned int PGRget_next_query_id(void);
25486 +int PGRinit_transaction_table(void);
25487 +int PGRsync_oid(ReplicateHeader *header);
25488 +int PGRload_replication_id(void);
25489 +extern pthread_mutex_t transaction_table_mutex;
25492 +PGRis_same_host(char * host1, unsigned short port1 , char * host2, unsigned short port2)
25494 +#ifdef PRINT_DEBUG
25495 + char * func = "PGRis_same_host()";
25497 + unsigned int ip1, ip2;
25499 + if ((host1[0] == '\0' ) || (host2[0] == '\0') ||
25500 + ( port1 != port2 ))
25502 +#ifdef PRINT_DEBUG
25503 + show_debug("%s:target host",func);
25507 + ip1 = PGRget_ip_by_name( host1);
25508 + ip2 = PGRget_ip_by_name( host2);
25510 + if ((ip1 == ip2) && (port1 == port2))
25518 +PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt )
25520 + char * func = "PGRcreateConn()";
25522 + PGconn * conn = NULL;
25525 + memset(pwd,0,sizeof(pwd));
25526 + if (*password != '\0')
25528 + if ((strncmp(password,"md5",3) == 0) && (md5Salt != NULL))
25530 + sprintf(pwd,"%s(%d)(%d)(%d)(%d)",password,
25531 + *md5Salt,*(md5Salt+1),*(md5Salt+2),*(md5Salt+3));
25535 + strncpy(pwd,password,sizeof(pwd));
25538 + conn = PQsetdbLogin(host, port, NULL, NULL, database, userName, pwd);
25539 + /* check to see that the backend Connection was successfully made */
25541 + while (PQstatus(conn) == CONNECTION_BAD)
25543 + if (conn != NULL)
25548 + conn = PQsetdbLogin(host, port, NULL, NULL, database, userName, pwd);
25549 + if (cnt > PGR_CONNECT_RETRY_TIME )
25551 + if (conn != NULL)
25556 + return (PGconn *)NULL;
25559 + if(PQstatus(conn) == CONNECTION_BAD && h_errno==2)
25561 + show_error("gethostbyname() failed. sleep and retrying...");
25562 + usleep(PGR_SEND_WAIT_MSEC);
25565 + else if(!strncasecmp(PQerrorMessage(conn),"FATAL: Sorry, too many clients already",30) ||
25566 + !strncasecmp(PQerrorMessage(conn),"FATAL: Non-superuser connection limit",30) )
25568 + usleep(PGR_SEND_WAIT_MSEC);
25569 + show_error("Connection overflow. sleep and retrying...");
25572 + else if(!strncasecmp(PQerrorMessage(conn),"FATAL: The database system is starting up",40) )
25574 +#ifdef PRINT_DEBUG
25575 + show_debug("waiting for starting up...");
25577 + usleep(PGR_SEND_WAIT_MSEC);
25581 +#ifdef PRINT_DEBUG
25582 + show_error("%s:Retry. h_errno is %d,reason is '%s'",func,h_errno,PQerrorMessage(conn));
25585 + usleep(PGR_SEND_WAIT_MSEC);
25592 +static TransactionTbl *
25593 +setTransactionTbl(HostTbl * host_ptr, ReplicateHeader * header)
25595 + char * func = "setTransactionTbl()";
25596 + TransactionTbl * ptr = NULL;
25597 + TransactionTbl work ;
25599 + char * hostName = NULL;
25600 + char * dbName = NULL;
25601 + char * userName = NULL;
25602 + char * password = NULL;
25603 + char * md5Salt = NULL;
25604 + char * cryptSalt = NULL;
25606 + if ((host_ptr == NULL) || (header == NULL))
25608 + return (TransactionTbl *)NULL;
25610 + dbName = (char *)header->dbName;
25611 + snprintf(port,sizeof(port),"%d", host_ptr->port);
25612 + userName = (char *)(header->userName);
25613 + password = (char *)(header->password);
25614 + md5Salt = (char *)(header->md5Salt);
25615 + cryptSalt = (char *)(header->cryptSalt);
25616 + hostName = (char *)(host_ptr->resolvedName);
25618 + ptr = getTransactionTbl(host_ptr,header);
25621 + ptr->transaction_count = 0;
25622 + ptr->conn = PGRcreateConn(hostName,port,dbName,userName,password,md5Salt,cryptSalt);
25623 + if (ptr->conn == NULL)
25625 + show_error("%s:Transaction is pooling but PGRcreateConn failed",func);
25626 + deleteTransactionTbl(host_ptr, header);
25627 + PGRset_host_status(host_ptr,DB_TBL_ERROR);
25633 + memset(&work,0,sizeof(work));
25634 + strncpy(work.host, hostName, sizeof(work.host));
25635 + strncpy(work.srcHost, header->from_host, sizeof(work.srcHost));
25636 + work.hostIP = PGRget_ip_by_name(hostName);
25637 + work.port = host_ptr->port;
25638 + work.srcHostIP = PGRget_ip_by_name(header->from_host);
25639 + work.pid = ntohs(header->pid);
25640 + strncpy(work.dbName,header->dbName,sizeof(work.dbName));
25641 + work.conn = PGRcreateConn(hostName,port,dbName,userName,password,md5Salt,cryptSalt);
25642 + if (work.conn == NULL)
25644 +#ifdef PRINT_DEBUG
25645 + show_debug("%s: %s@%s is not ready",func,port,hostName);
25647 + return (TransactionTbl *)NULL;
25649 + work.useFlag = DB_TBL_USE ;
25650 + work.in_transaction = false;
25651 + work.transaction_count = 0;
25652 + ptr = insertTransactionTbl(host_ptr,&work);
25653 + if (ptr == (TransactionTbl *)NULL)
25655 + show_error("%s:insertTransactionTbl failed",func);
25656 + return (TransactionTbl *)NULL;
25661 +static TransactionTbl *
25662 +insertTransactionTbl( HostTbl * host_ptr, TransactionTbl * datap)
25664 + char * func = "insertTransactionTbl()";
25665 + TransactionTbl * workp = NULL;
25667 + pthread_mutex_lock(&transaction_table_mutex);
25668 + if ((host_ptr == (HostTbl *)NULL) || (datap == (TransactionTbl*)NULL))
25670 + show_error("%s:host table or transaction table is NULL",func);
25671 + pthread_mutex_unlock(&transaction_table_mutex);
25673 + return (TransactionTbl *)NULL;
25675 + if (Transaction_Tbl_Begin == NULL)
25677 + if (PGRinit_transaction_table() != STATUS_OK)
25679 + pthread_mutex_unlock(&transaction_table_mutex);
25681 + return (TransactionTbl *)NULL;
25685 + workp = (TransactionTbl *)malloc(sizeof(TransactionTbl));
25686 + memset(workp,0,sizeof(TransactionTbl));
25687 + Transaction_Tbl_End = workp;
25688 + workp->hostIP = datap->hostIP;
25689 + workp->port = datap->port;
25690 + workp->pid = datap->pid;
25691 + workp->srcHostIP = datap->srcHostIP;
25692 + strncpy(workp->host,datap->host,sizeof(workp->host));
25693 + strncpy(workp->srcHost,datap->srcHost,sizeof(workp->srcHost));
25694 + strncpy(workp->dbName,datap->dbName,sizeof(workp->dbName));
25695 + workp->conn = datap->conn;
25696 + workp->useFlag = DB_TBL_USE;
25697 + workp->lock = STATUS_OK;
25698 + workp->in_transaction =datap->in_transaction;
25699 + workp->transaction_count =datap->transaction_count;
25700 + DLAddTail(Transaction_Tbl_Begin, DLNewElem(workp));
25702 + pthread_mutex_unlock(&transaction_table_mutex);
25707 +static TransactionTbl *
25708 +getTransactionTbl( HostTbl * host_ptr, ReplicateHeader * header)
25710 + Dlelem * ptr = NULL;
25711 + unsigned int host_ip,srcHost_ip;
25712 + unsigned short pid = 0;
25714 + if (Transaction_Tbl_Begin == (Dllist *) NULL)
25716 + return (TransactionTbl * )NULL;
25718 + if ((host_ptr == (HostTbl *)NULL) ||
25719 + (header == (ReplicateHeader *)NULL))
25721 + return (TransactionTbl * )NULL;
25723 + host_ip = get_host_ip_from_tbl(host_ptr->resolvedName);
25724 + if (host_ip == 0)
25726 + host_ip = PGRget_ip_by_name(host_ptr->resolvedName);
25728 + srcHost_ip = get_srcHost_ip_from_tbl(header->from_host);
25729 + if (srcHost_ip == 0)
25731 + srcHost_ip = PGRget_ip_by_name(header->from_host);
25733 + pid = ntohs(header->pid);
25735 + pthread_mutex_lock(&transaction_table_mutex);
25737 + ptr = DLGetHead(Transaction_Tbl_Begin);
25740 + TransactionTbl *transaction = DLE_VAL(ptr);
25741 + if ((transaction->useFlag == DB_TBL_USE) &&
25742 + (transaction->hostIP == host_ip) &&
25743 + (transaction->port == host_ptr->port) &&
25744 + (transaction->srcHostIP == srcHost_ip) &&
25745 + (!strncasecmp(transaction->dbName,header->dbName,sizeof(transaction->dbName))) &&
25746 + (transaction->pid == pid))
25748 + pthread_mutex_unlock(&transaction_table_mutex);
25749 + return transaction;
25751 + ptr = DLGetSucc(ptr);
25753 + pthread_mutex_unlock(&transaction_table_mutex);
25755 + return (TransactionTbl * )NULL;
25759 +deleteTransactionTbl(HostTbl * host_ptr,ReplicateHeader * header)
25761 + TransactionTbl *ptr = NULL;
25764 + ptr = getTransactionTbl(host_ptr,header);
25766 + pthread_mutex_lock(&transaction_table_mutex);
25771 + if (ptr->in_transaction)
25773 + if (host_ptr->transaction_count > 0)
25774 + host_ptr->transaction_count--;
25778 + if (ptr->conn != NULL)
25780 + PQfinish(ptr->conn);
25782 + elem = DLGetHead(Transaction_Tbl_Begin);
25785 + TransactionTbl *transaction = DLE_VAL(elem);
25786 + if (transaction == ptr) {
25789 + DLFreeElem(elem);
25790 + pthread_mutex_unlock(&transaction_table_mutex);
25793 + elem = DLGetSucc(elem);
25796 + pthread_mutex_unlock(&transaction_table_mutex);
25800 +deleteHostTbl(HostTbl * ptr)
25802 + if (ptr != (HostTbl*)NULL)
25804 + memset(ptr,0,sizeof(HostTbl));
25810 +PGRadd_HostTbl(HostTbl *conf_data, int useFlag)
25812 + HostTbl * ptr = NULL;
25815 + ptr = PGRget_HostTbl(conf_data->resolvedName, conf_data->port);
25816 + if (ptr != (HostTbl*)NULL)
25818 + PGRset_host_status(ptr,useFlag);
25822 + ptr = Host_Tbl_Begin;
25824 + while (ptr->useFlag != DB_TBL_END)
25826 + if (ptr->useFlag == DB_TBL_FREE)
25833 + if (cnt >= MAX_DB_SERVER)
25835 + return (HostTbl*)NULL;
25837 + if (ptr->useFlag == DB_TBL_END)
25839 + (ptr + 1) -> useFlag = DB_TBL_END;
25841 + memset(ptr,0,sizeof(HostTbl));
25842 + ptr->hostNum = cnt;
25843 + memcpy(ptr->hostName,conf_data->hostName,sizeof(ptr->hostName));
25844 + memcpy(ptr->resolvedName,conf_data->resolvedName,sizeof(ptr->resolvedName));
25845 + ptr->port = conf_data->port;
25846 + ptr->recoveryPort = conf_data->recoveryPort;
25847 + ptr->transaction_count = 0;
25848 + PGRset_host_status(ptr,useFlag);
25854 +PGRget_master(void)
25856 + HostTbl * host_tbl = NULL;
25858 + host_tbl = Host_Tbl_Begin;
25859 + while(host_tbl->useFlag != DB_TBL_END)
25861 + if (host_tbl->useFlag == DB_TBL_USE)
25867 + return (HostTbl *)NULL;
25871 +PGRset_recovery_status(int status)
25873 + if (RecoverySemID <= 0)
25875 + PGRsem_lock(RecoverySemID,SEM_NUM_OF_RECOVERY);
25876 + if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25878 + Recovery_Status_Inf->recovery_status = status;
25881 + PGRsem_unlock(RecoverySemID,SEM_NUM_OF_RECOVERY);
25885 +PGRget_recovery_status(void)
25889 + if (RecoverySemID <= 0)
25891 + PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25892 + if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25894 + status = Recovery_Status_Inf->recovery_status;
25896 + PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25902 +set_transaction_status(int status)
25904 + if (RecoverySemID <= 0)
25906 + PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25907 + if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25909 + Recovery_Status_Inf->recovery_status = status;
25911 + PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25916 +get_transaction_status(void)
25920 + if (RecoverySemID <= 0)
25922 + PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25923 + if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25925 + status = Recovery_Status_Inf->recovery_status;
25926 + PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25929 + PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25935 +PGRcheck_recovered_host(void)
25937 + char * func = "PGRcheck_recovered_host()";
25938 + HostTbl * ptr = NULL;
25939 + int rtn = STATUS_OK;
25941 + if (RecoverySemID <= 0)
25942 + return STATUS_ERROR;
25943 + PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25944 + if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25946 + if (Recovery_Status_Inf->useFlag != DB_TBL_FREE)
25948 + ptr = PGRadd_HostTbl((HostTbl *)&(Recovery_Status_Inf->target_host),Recovery_Status_Inf->useFlag);
25949 + if (ptr == (HostTbl *) NULL)
25951 + show_error("%s:PGRadd_HostTbl failed",func);
25952 + rtn = STATUS_ERROR;
25954 + Recovery_Status_Inf->useFlag = DB_TBL_FREE;
25955 + memset((HostTbl *)&(Recovery_Status_Inf->target_host),0,sizeof(HostTbl));
25959 + PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25964 +PGRset_recovered_host(HostTbl * target, int useFlag)
25966 + if (RecoverySemID <= 0)
25968 + PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25969 + if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25971 + Recovery_Status_Inf->useFlag = useFlag;
25972 + if (target != (HostTbl*)NULL)
25974 + memcpy((HostTbl *)&(Recovery_Status_Inf->target_host),target,sizeof(HostTbl));
25975 + PGRset_host_status(target,useFlag);
25979 + PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25984 +is_master_in_recovery(char * host , int port,int recovery_status)
25986 + HostTbl * master = NULL;
25988 + int status = PGRget_recovery_status();
25989 + if (status == RECOVERY_CLEARED)
25991 + master = PGRget_master();
25992 + if (master == (HostTbl *)NULL)
25996 + return (PGRis_same_host(host, port , master->hostName, master->port));
26002 +PGRinit_recovery(void)
26004 + char * func = "PGRinit_recovery()";
26006 + union semun sem_arg;
26009 + if ((RecoverySemID = semget(IPC_PRIVATE,4,IPC_CREAT | IPC_EXCL | 0600)) < 0)
26011 + show_error("%s:semget() failed. (%s)",func,strerror(errno));
26012 + return STATUS_ERROR;
26014 + for ( i = 0 ; i < 4 ; i ++)
26016 + semctl(RecoverySemID, i, GETVAL, sem_arg);
26018 + semctl(RecoverySemID, i, SETVAL, sem_arg);
26021 + size = sizeof(RecoveryStatusInf);
26022 + RecoveryShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
26023 + if (RecoveryShmid < 0)
26025 + show_error("%s:shmget() failed. (%s)",func,strerror(errno));
26026 + return STATUS_ERROR;
26028 + Recovery_Status_Inf = (RecoveryStatusInf *)shmat(RecoveryShmid,0,0);
26029 + if (Recovery_Status_Inf == (RecoveryStatusInf *)-1)
26031 + show_error("%s:shmat() failed. (%s)",func,strerror(errno));
26032 + return STATUS_ERROR;
26034 + memset(Recovery_Status_Inf,0,size);
26035 + Recovery_Status_Inf->check_point = PGR_CHECK_POINT ;
26037 + size = sizeof(unsigned int);
26038 + ReplicateSerializationShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
26039 + if (ReplicateSerializationShmid < 0)
26041 + show_error("%s:shmget() failed. (%s)",func,strerror(errno));
26042 + return STATUS_ERROR;
26045 + PGR_ReplicateSerializationID = (unsigned int *)shmat(ReplicateSerializationShmid,0,0);
26046 + if( PGR_ReplicateSerializationID == (unsigned int *)-1) {
26047 + show_error("%s:shmat() failed. (%s)",func,strerror(errno));
26048 + return STATUS_ERROR;
26050 + memset(PGR_ReplicateSerializationID,0,size);
26051 + PGRset_recovery_status(RECOVERY_INIT);
26052 + PGRset_recovered_host((HostTbl *)NULL, DB_TBL_FREE);
26053 + set_transaction_status(0);
26056 + * create message queue
26058 + RecoveryMsgShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
26059 + if (RecoveryMsgShmid < 0)
26061 + show_error("%s:shmget() failed. (%s)",func,strerror(errno));
26062 + return STATUS_ERROR;
26065 + RecoveryMsgid = (int *)shmat(RecoveryMsgShmid,0,0);
26066 + if( RecoveryMsgid < 0) {
26067 + show_error("%s:shmat() failed. (%s)",func,strerror(errno));
26068 + return STATUS_ERROR;
26070 + *RecoveryMsgid = msgget (IPC_PRIVATE, 00666 | IPC_CREAT );
26071 + if (*RecoveryMsgid < 0)
26073 + show_error("%s:msgget() failed. (%s)",func,strerror(errno));
26074 + return STATUS_ERROR;
26078 + return STATUS_OK;
26082 +clearHostTbl(void)
26085 + HostTbl * ptr = NULL;
26087 + if (Host_Tbl_Begin == NULL)
26089 + /* normal socket close */
26090 + ptr = Host_Tbl_Begin;
26091 + while(ptr && ptr->useFlag != DB_TBL_END)
26093 + ptr = deleteHostTbl(ptr);
26098 +PGRexit_subprocess(int signo)
26100 + exit_signo = signo;
26101 + PGRreplicate_exit(1);
26105 +PGRreplicate_exit(int exit_status)
26111 + sigemptyset(&mask);
26112 + sigaddset(&mask, SIGTERM);
26113 + sigaddset(&mask, SIGINT);
26114 + sigaddset(&mask, SIGQUIT);
26115 + sigaddset(&mask, SIGCHLD);
26116 + sigprocmask(SIG_BLOCK, &mask, NULL);
26118 + kill (0, exit_signo);
26122 + if (RidFp != NULL)
26125 + if (Recovery_Status_Inf != NULL)
26127 + PGRwrite_log_file(RidFp,"%u",Recovery_Status_Inf->replication_id);
26134 + if (ReplicateSock > 0)
26135 + close(ReplicateSock);
26137 + /* recovery status clear */
26138 + if (RecoverySemID > 0)
26139 + Recovery_Status_Inf->recovery_status = RECOVERY_INIT;
26141 + /* normal socket close */
26144 + if (Host_Tbl_Begin != (HostTbl *)NULL)
26146 + rtn = shmdt((char *)Host_Tbl_Begin);
26147 + shmctl(HostTblShmid,IPC_RMID,(struct shmid_ds *)NULL);
26150 + if (Cascade_Tbl != (ReplicateServerInfo *)NULL)
26152 + rtn = shmdt((char *)Cascade_Tbl);
26153 + shmctl(CascadeTblShmid,IPC_RMID,(struct shmid_ds *)NULL);
26156 + if (Cascade_Inf != (CascadeInf *)NULL)
26158 + rtn = shmdt((char *)Cascade_Inf);
26159 + shmctl(CascadeInfShmid,IPC_RMID,(struct shmid_ds *)NULL);
26162 + if (Commit_Log_Tbl != (CommitLogInf *)NULL)
26164 + rtn = shmdt((char *)Commit_Log_Tbl);
26165 + shmctl(CommitLogShmid,IPC_RMID,(struct shmid_ds *)NULL);
26168 + if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
26170 + rtn = shmdt((char *)Recovery_Status_Inf);
26171 + shmctl(RecoveryShmid,IPC_RMID,(struct shmid_ds *)NULL);
26173 + if (PGR_ReplicateSerializationID!=NULL)
26175 + shmdt(PGR_ReplicateSerializationID);
26176 + shmctl(ReplicateSerializationShmid,IPC_RMID,(struct shmid_ds *)NULL);
26179 + if (RecoveryMsgid)
26181 + if (*RecoveryMsgid >= 0)
26182 + msgctl(*RecoveryMsgid,IPC_RMID,(struct msqid_ds *)NULL);
26184 + shmdt(RecoveryMsgid);
26185 + shmctl(RecoveryMsgShmid, IPC_RMID, NULL);
26188 + if (StatusFp != NULL)
26190 + fflush(StatusFp);
26191 + fclose(StatusFp);
26194 + if (LogFp != NULL)
26201 + if (PGR_Result != NULL)
26203 + free(PGR_Result);
26204 + PGR_Result = NULL;
26206 + if (PGR_Response_Inf != NULL)
26208 + free(PGR_Response_Inf);
26209 + PGR_Response_Inf = NULL;
26212 + if (LoadBalanceTbl != NULL)
26214 + free(LoadBalanceTbl);
26215 + LoadBalanceTbl = NULL;
26218 + if (PGR_Log_Header != NULL)
26220 + free(PGR_Log_Header);
26221 + PGR_Log_Header = NULL;
26224 + if (PGR_Send_Query_ID != NULL)
26226 + free(PGR_Send_Query_ID);
26227 + PGR_Send_Query_ID = NULL;
26230 + if (CascadeSemID > 0)
26232 + sem_quit(CascadeSemID);
26233 + CascadeSemID = 0;
26240 + if (RecoverySemID > 0)
26242 + sem_quit(RecoverySemID);
26243 + RecoverySemID = 0;
26245 + if (VacuumSemID > 0)
26247 + sem_quit(VacuumSemID);
26250 + snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGREPLICATE_PID_FILE);
26253 + /* close socket between rlog process */
26255 + if (Replicateion_Log->r_log_sock >= 0)
26257 + close(Replicateion_Log->r_log_sock);
26258 + Replicateion_Log->r_log_sock = -1;
26260 + if (Replicateion_Log->RLog_Sock_Path != NULL)
26262 + unlink(Replicateion_Log->RLog_Sock_Path);
26263 + free(Replicateion_Log->RLog_Sock_Path);
26264 + Replicateion_Log->RLog_Sock_Path = NULL;
26267 + if (ResolvedName != NULL)
26269 + free(ResolvedName);
26270 + ResolvedName = NULL;
26272 + exit(exit_status);
26276 +send_cluster_status_to_load_balance(HostTbl * host_ptr,int status)
26278 + RecoveryPacket packet;
26281 + memset(&packet,0,sizeof(RecoveryPacket));
26282 + packet.packet_no = htons(status);
26283 + strncpy(packet.hostName,host_ptr->hostName,sizeof(packet.hostName));
26284 + packet.port = htons(host_ptr->port);
26285 + rtn = PGRsend_load_balance_packet(&packet);
26290 +PGRset_host_status(HostTbl * host_ptr,int status)
26292 + if (host_ptr == NULL)
26294 + return STATUS_ERROR;
26296 + if (host_ptr->useFlag != status)
26298 + host_ptr->useFlag = status;
26299 + if (status == DB_TBL_ERROR )
26301 + host_ptr->transaction_count = 0;
26302 + send_cluster_status_to_load_balance(host_ptr,RECOVERY_ERROR_CONNECTION);
26304 + write_host_status_file(host_ptr);
26306 + return STATUS_OK;
26310 +write_host_status_file(HostTbl * host_ptr)
26312 + switch( host_ptr->useFlag)
26314 + case DB_TBL_FREE:
26315 + PGRwrite_log_file(StatusFp,"port(%d) host:%s free",
26317 + host_ptr->hostName);
26319 + case DB_TBL_INIT:
26320 + PGRwrite_log_file(StatusFp,"port(%d) host:%s initialize",
26322 + host_ptr->hostName);
26325 + PGRwrite_log_file(StatusFp,"port(%d) host:%s start use",
26327 + host_ptr->hostName);
26329 + case DB_TBL_ERROR:
26330 + PGRwrite_log_file(StatusFp,"port(%d) host:%s error",
26332 + host_ptr->hostName);
26335 + PGRwrite_log_file(StatusFp,"port(%d) host:%s end",
26337 + host_ptr->hostName);
26343 +check_result( PGresult * res )
26347 + status = PQresultStatus(res);
26348 + if ((status == PGRES_NONFATAL_ERROR ) ||
26349 + (status == PGRES_FATAL_ERROR ))
26351 + return STATUS_ERROR;
26353 + return STATUS_OK;
26357 +compare_results(int *results, int size, int source_id)
26361 + for (i = 0; i < size; i++)
26363 + if (i != source_id)
26365 + prev = results[i];
26370 + for (; i < size; i++)
26372 + if (i == source_id)
26374 + if (prev != results[i])
26376 + prev = results[i];
26381 +/*--------------------------------------------------
26383 + * PGRsend_replicate_packet_to_server()
26385 + * Send query data to the cluster DB and recieve result data.
26387 + * HostTbl * host_ptr: the record of cluster DB table (target)
26388 + * ReplicateHeader * header: header data
26389 + * char *query: query data
26390 + * char * result: returned result data
26393 + * STATUS_ERROR: NG
26394 + * STATUS_LOCK_CONFLICT: Lock conflicted
26395 + *---------------------------------------------------
26398 +PGRsend_replicate_packet_to_server( HostTbl * host_ptr, ReplicateHeader * header, char *query , char * result,unsigned int replicationId, bool recovery)
26400 + char * func = "PGRsend_replicate_packet_to_server()";
26401 + TransactionTbl * transaction_tbl = NULL;
26402 + char *database = NULL;
26404 + char *userName = NULL;
26405 + char * password = NULL;
26406 + char * host = NULL;
26407 + char * md5Salt = NULL;
26408 + char * cryptSalt = NULL;
26410 + int current_cluster = 0;
26411 + int query_size = 0;
26413 + if ((query == NULL) || (header == NULL))
26415 + show_error("%s: query is broken",func);
26416 + return STATUS_ERROR;
26418 + query_size = ntohl(header->query_size);
26419 + if (query_size < 0)
26421 + show_error("%s: query size is broken",func);
26422 + return STATUS_ERROR;
26424 + if (host_ptr == NULL)
26426 + return STATUS_ERROR;
26429 + if (PGR_Response_Inf != NULL)
26431 + current_cluster = PGR_Response_Inf->current_cluster;
26435 + * set up the connection
26437 + database = (char *)header->dbName;
26438 + snprintf(port,sizeof(port),"%d", host_ptr->port);
26439 + userName = (char *)(header->userName);
26440 + password = (char *)(header->password);
26441 + md5Salt = (char *)(header->md5Salt);
26442 + cryptSalt = (char *)(header->cryptSalt);
26443 + host = (char *)(host_ptr->resolvedName);
26445 + * get the transaction table data
26446 + * it has the connection data with each cluster DB
26448 + transaction_tbl = getTransactionTbl(host_ptr,header);
26450 + * if the transaction process is new one,
26451 + * create connection data and add the transaction table
26453 + if (transaction_tbl == (TransactionTbl *)NULL)
26455 + if (recovery == true)
26458 + while(transaction_tbl == (TransactionTbl *)NULL)
26460 + transaction_tbl = setTransactionTbl(host_ptr, header);
26461 + if (cnt > RECOVERY_TIMEOUT)
26471 + transaction_tbl = setTransactionTbl(host_ptr, header);
26473 + if (transaction_tbl == (TransactionTbl *)NULL)
26475 + show_error("%s:setTransactionTbl failed",func);
26476 + if ( header->cmdSts != CMD_STS_NOTICE )
26478 + PGRset_host_status(host_ptr,DB_TBL_ERROR);
26480 + return STATUS_ERROR;
26482 + StartReplication[current_cluster] = true;
26487 + * re-use the connection data
26489 + if ((transaction_tbl->conn != (PGconn *)NULL) &&
26490 + (transaction_tbl->conn->sock > 0))
26492 + StartReplication[current_cluster] = false;
26496 + if (transaction_tbl->conn != (PGconn *)NULL)
26498 + PQfinish(transaction_tbl->conn);
26499 + transaction_tbl->conn = NULL;
26501 + transaction_tbl->conn = PGRcreateConn(host,port,database,userName,password,md5Salt,cryptSalt);
26502 + StartReplication[current_cluster] = true;
26505 + if(header->cmdSts==CMD_STS_OTHER &&
26506 + header->cmdType==CMD_TYPE_CONNECTION_CLOSE)
26508 + check_delete_transaction(host_ptr, header);
26509 + return STATUS_OK;
26511 +#ifdef PRINT_DEBUG
26512 + show_debug("%s:connect db:%s port:%s user:%s host:%s query:%s",
26513 + func, database,port,userName,host,query);
26515 + rtn = send_replicate_packet_to_server( transaction_tbl, current_cluster, host_ptr, header, query ,result ,replicationId, recovery);
26520 +send_replicate_packet_to_server( TransactionTbl * transaction_tbl, int current_cluster, HostTbl * host_ptr, ReplicateHeader * header, char *query , char * result,unsigned int replicationId, bool recovery)
26522 + char * func = "send_replicate_packet_to_server()";
26523 + PGconn * conn = (PGconn *)NULL;
26524 + PGresult * res = (PGresult *)NULL;
26525 + char sync_command[256];
26526 + bool sync_command_flg = false;
26527 + char * str = NULL;
26529 + int query_size = 0;
26531 + StringInfoData input_message;
26533 + if (( transaction_tbl == (TransactionTbl *)NULL) ||
26534 + ( host_ptr == (HostTbl *) NULL) ||
26535 + (header == (ReplicateHeader *) NULL) ||
26536 + (query == NULL) ||
26537 + ( result == NULL))
26539 + show_error("%s:unexpected NULL variable",func);
26540 + return STATUS_ERROR;
26543 + query_size = ntohl(header->query_size);
26544 + if (query_size < 0)
26546 + show_error("%s: query size is broken",func);
26547 + return STATUS_ERROR;
26551 + if(header->cmdSts == CMD_STS_OTHER &&
26552 + header->cmdType == CMD_TYPE_CONNECTION_CLOSE)
26554 + check_delete_transaction(host_ptr,header);
26555 + return STATUS_OK;
26558 + conn = transaction_tbl->conn;
26559 + if (conn == NULL)
26561 + show_error("%s:[%d@%s] may be down",func,host_ptr->port,host_ptr->hostName);
26562 + if ( header->cmdSts != CMD_STS_NOTICE )
26564 + PGRset_host_status(host_ptr,DB_TBL_ERROR);
26566 + return STATUS_ERROR;
26568 + hostNum = host_ptr->hostNum;
26571 + * When the query is transaction query...
26573 + if (is_need_sync_time(header) == true)
26575 + if (transaction_tbl->transaction_count >1 )
26577 + sync_command_flg = false;
26581 + sync_command_flg = true;
26584 + if ((header->cmdSts == CMD_STS_TRANSACTION ) ||
26585 + (header->cmdSts == CMD_STS_SET_SESSION_AUTHORIZATION ))
26587 + if ((header->cmdSts == CMD_STS_TRANSACTION ) &&
26588 + ((header->cmdType != CMD_TYPE_BEGIN) ||
26589 + (transaction_tbl->transaction_count >1 )))
26591 + sync_command_flg = false;
26599 + if (header->rlog > 0 )
26602 + if (is_executed_query( conn, header) == true)
26604 + return STATUS_OK;
26608 +#ifdef PRINT_DEBUG
26609 + show_debug("%s:check replication log issue , id=%d,rlog=%d,query=%s status=not_replicated",func,ntohl(header->replicate_id),header->rlog,query);
26613 + if (( header->cmdSts != CMD_STS_NOTICE ) &&
26614 + ( header->cmdSts != CMD_STS_PREPARE ) &&
26615 + ((sync_command_flg == true) ||
26616 + (StartReplication[current_cluster] == true)))
26618 + snprintf(sync_command,sizeof(sync_command),
26619 + "SELECT %s(%d,%u,%u,%u,%d,%u) ",
26620 + PGR_SYSTEM_COMMAND_FUNC,
26621 + PGR_SET_CURRENT_TIME_FUNC_NO,
26622 + (unsigned int)ntohl(header->tv.tv_sec),
26623 + (unsigned int)ntohl(header->tv.tv_usec),
26624 + (unsigned int)ntohl(PGR_Log_Header->replicate_id),
26625 + PGR_Response_Inf->response_mode,
26626 + *PGR_ReplicateSerializationID);
26627 +#ifdef PRINT_DEBUG
26628 + show_debug("%s:sync_command(%s)",func,sync_command);
26630 + res = PQexec(conn, sync_command);
26633 + StartReplication[current_cluster] = false;
26637 + if ((header->cmdType == CMD_TYPE_COPY_DATA) ||
26638 + (header->cmdType == CMD_TYPE_COPY_DATA_END))
26640 + /* copy data replication */
26641 + rtn =PQputnbytes(conn, query,query_size);
26642 + if (header->cmdType == CMD_TYPE_COPY_DATA_END)
26644 + rtn = PQendcopy(conn);
26645 + if (rtn == 1) /* failed */
26647 + if (transaction_tbl->conn != NULL)
26649 + PQfinish(transaction_tbl->conn);
26650 + transaction_tbl->conn = (PGconn *)NULL;
26651 + StartReplication[current_cluster] = true;
26655 + *(PGR_Send_Query_ID + hostNum ) = ntohl(header->query_id);
26656 + return STATUS_OK;
26658 + else if (header->cmdSts == CMD_STS_LARGE_OBJECT)
26660 + replicate_lo(conn, header,(LOArgs *)query);
26661 + return STATUS_OK;
26664 + else if (header->cmdSts == CMD_STS_PREPARE)
26667 + if ( !PGR_Parse_Session_Started)
26669 + snprintf(sync_command,sizeof(sync_command),
26670 + "SELECT %s(%d,%u,%u,%u,%d,%u) ",
26671 + PGR_SYSTEM_COMMAND_FUNC,
26672 + PGR_SET_CURRENT_TIME_FUNC_NO,
26673 + (unsigned int)ntohl(header->tv.tv_sec),
26674 + (unsigned int)ntohl(header->tv.tv_usec),
26675 + (unsigned int)ntohl(PGR_Log_Header->replicate_id),
26676 + PGR_Response_Inf->response_mode,
26677 + *PGR_ReplicateSerializationID);
26678 + res = PQexec(conn, sync_command);
26684 + while ((res = PQgetResult(conn)) != NULL)
26686 + if (res->resultStatus == PGRES_COPY_IN)
26689 + return STATUS_ERROR;
26691 + else if (res->resultStatus == PGRES_COPY_OUT)
26693 + conn->asyncStatus = PGASYNC_BUSY;
26695 + else if (conn->status == CONNECTION_BAD)
26698 + return STATUS_ERROR;
26703 + set_string_info(&input_message,header,query);
26704 + switch (header->cmdType)
26706 + case CMD_TYPE_P_PARSE :
26707 + if (send_p_parse(conn, &input_message) != STATUS_OK)
26709 + pqHandleSendFailure(conn);
26710 + PGR_Parse_Session_Started = false;
26711 + return STATUS_ERROR;
26714 + case CMD_TYPE_P_BIND :
26715 + if (send_p_bind(conn, &input_message) != STATUS_OK)
26717 + pqHandleSendFailure(conn);
26718 + PGR_Parse_Session_Started = false;
26719 + return STATUS_ERROR;
26722 + case CMD_TYPE_P_DESCRIBE :
26723 + if (send_p_describe(conn, &input_message) != STATUS_OK)
26725 + pqHandleSendFailure(conn);
26726 + PGR_Parse_Session_Started = false;
26727 + return STATUS_ERROR;
26730 + case CMD_TYPE_P_EXECUTE :
26731 + if (send_p_execute(conn,&input_message) != STATUS_OK)
26733 + pqHandleSendFailure(conn);
26734 + PGR_Parse_Session_Started = false;
26735 + return STATUS_ERROR;
26738 + case CMD_TYPE_P_SYNC :
26739 + if (send_p_sync(conn, &input_message) != STATUS_OK)
26741 + pqHandleSendFailure(conn);
26742 + PGR_Parse_Session_Started = false;
26743 + return STATUS_ERROR;
26746 + case CMD_TYPE_P_CLOSE :
26747 + if (send_p_close(conn, &input_message) != STATUS_OK)
26749 + pqHandleSendFailure(conn);
26750 + PGR_Parse_Session_Started = false;
26751 + return STATUS_ERROR;
26757 + return STATUS_OK;
26761 + if (transaction_tbl->lock != STATUS_OK)
26763 +#ifdef PRINT_DEBUG
26764 + show_debug("%s:[%d]transaction_tbl->lock is [%d]",func,current_cluster,transaction_tbl->lock );
26766 + transaction_tbl->lock = STATUS_OK;
26768 + snprintf(sync_command,sizeof(sync_command),
26769 + "SELECT %s(%d,%u,%u,%d) ",
26770 + PGR_SYSTEM_COMMAND_FUNC,
26771 + PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO,
26774 + PGR_Response_Inf->response_mode);
26775 + res = PQexec(conn, sync_command);
26781 + res = PQexec(conn, query);
26782 + rtn = check_result(res);
26783 +#ifdef PRINT_DEBUG
26784 + show_debug("%s:PQexec send :%s",func,query);
26791 + StartReplication[current_cluster] = true;
26792 + return STATUS_ERROR;
26795 + str = PQcmdStatus(res);
26796 +#ifdef PRINT_DEBUG
26797 + show_debug("%s:PQexec returns :%s",func,str);
26799 + if ((str == NULL) || (*str == '\0'))
26801 + if ((result != NULL) && (res != NULL) && (res->errMsg != NULL))
26803 + snprintf(result,PGR_MESSAGE_BUFSIZE,"E%s",res->errMsg);
26807 + strcpy(result,"E");
26809 + StartReplication[current_cluster] = true;
26813 + if (!strncasecmp(str,PGR_LOCK_CONFLICT_NOTICE_CMD,strlen(PGR_LOCK_CONFLICT_NOTICE_CMD)))
26815 +#ifdef PRINT_DEBUG
26816 + show_debug("%s:LOCK CONFLICT from PQexec",func);
26821 + transaction_tbl->lock = STATUS_LOCK_CONFLICT;
26822 + return STATUS_LOCK_CONFLICT;
26824 + else if (!strncasecmp(str,PGR_DEADLOCK_DETECT_NOTICE_CMD,strlen(PGR_DEADLOCK_DETECT_NOTICE_CMD)))
26826 +#ifdef PRINT_DEBUG
26827 + show_debug("%s:DEADLOCK DETECTED from PQexec",func);
26831 + transaction_tbl->lock = STATUS_DEADLOCK_DETECT;
26832 + return STATUS_DEADLOCK_DETECT;
26834 + snprintf(result,PGR_MESSAGE_BUFSIZE,"C%s",str);
26839 + /* set send query id */
26840 + *(PGR_Send_Query_ID + hostNum ) = ntohl(header->query_id);
26843 + * if the query is end transaction process...
26845 + check_delete_transaction(host_ptr,header);
26847 + return STATUS_OK;
26851 +check_delete_transaction (HostTbl * host_ptr, ReplicateHeader * header)
26853 + char *database = NULL;
26855 + if ((host_ptr == NULL) || (header == NULL))
26857 + return STATUS_ERROR;
26859 + database = (char *)header->dbName;
26860 + if(header->cmdSts == CMD_STS_OTHER &&
26861 + header->cmdType == CMD_TYPE_CONNECTION_CLOSE)
26863 + notice_abort(host_ptr, header);
26864 + deleteTransactionTbl(host_ptr,header);
26867 + delete_template(host_ptr, header);
26868 + return STATUS_OK;
26872 +check_transaction_status(ReplicateHeader * header,
26873 + TransactionTbl *transaction)
26875 + if (header == (ReplicateHeader *)NULL)
26879 + if (header->cmdSts == CMD_STS_TRANSACTION )
26881 + if (header->cmdType == CMD_TYPE_BEGIN )
26883 + if (transaction != NULL)
26885 + transaction->in_transaction = true;
26886 + transaction->transaction_count ++;
26889 + else if ((header->cmdType == CMD_TYPE_COMMIT) ||
26890 + (header->cmdType == CMD_TYPE_ROLLBACK))
26892 + if (transaction != NULL)
26894 + if (transaction->transaction_count > 0)
26896 + transaction->transaction_count --;
26898 + if (transaction->transaction_count == 0)
26900 + transaction->in_transaction = false;
26907 + if ( header->cmdType == CMD_TYPE_COPY )
26909 + if (transaction != NULL)
26911 + transaction->exec_copy = true;
26914 + else if (header->cmdType == CMD_TYPE_COPY_DATA_END)
26916 + if (transaction != NULL)
26918 + transaction->exec_copy = false;
26925 +check_host_transaction_status(ReplicateHeader * header,
26928 + int recovery_status = 0;
26930 + if ((header == (ReplicateHeader *)NULL) || (host == (HostTbl *)NULL))
26934 + if (header->cmdType == CMD_TYPE_BEGIN )
26936 + host->transaction_count++;
26938 + else if ((header->cmdType == CMD_TYPE_COMMIT) ||
26939 + (header->cmdType == CMD_TYPE_ROLLBACK))
26941 + if (host->transaction_count > 0)
26942 + host->transaction_count--;
26945 + recovery_status = PGRget_recovery_status();
26946 + if ((recovery_status == RECOVERY_PREPARE_START) &&
26947 + (host->transaction_count > 0))
26949 + PGRset_recovery_status(RECOVERY_WAIT_CLEAN);
26951 + else if ((recovery_status == RECOVERY_PREPARE_START) &&
26952 + (host->transaction_count==0))
26954 + PGRset_recovery_status(RECOVERY_CLEARED);
26956 + else if ((recovery_status == RECOVERY_WAIT_CLEAN) &&
26957 + (host->transaction_count==0))
26959 + PGRset_recovery_status(RECOVERY_CLEARED);
26965 +create_queue_file(void)
26967 + char * func = "create_queue_file()";
26968 + FILE * fp = NULL;
26969 + struct timeval tv;
26970 + char fname[FILENAME_MAX_LENGTH];
26973 + RecoveryQueueFile * msg = NULL;
26975 + if (*RecoveryMsgid < 0)
26977 + return (FILE *)NULL;
26979 + /* create uniq file name */
26980 + gettimeofday(&tv,NULL);
26981 + memset(fname,0,sizeof(fname));
26982 + snprintf(fname,sizeof(fname),"%s/%s_%u.%u",
26984 + RECOVERY_QUEUE_FILE,
26985 + (uint32_t)tv.tv_sec,
26986 + (uint32_t)tv.tv_usec);
26988 + size = sizeof(fname) + sizeof(RecoveryQueueFile);
26989 + msg = (RecoveryQueueFile *)malloc(size);
26992 + show_error("%s:malloc() failed. reason: %s", func, strerror(errno));
26993 + return (FILE *)NULL;
26995 + memset(msg,0,size);
26996 + msg->mtype = RECOVERY_FILE_MTYPE;
26997 + strncpy(msg->mdata,fname,sizeof(fname));
26999 + fp = fopen(fname,"a");
27002 + show_error("%s:fopen failed: (%s)",func,strerror(errno));
27003 + return (FILE *)NULL;
27006 + rtn = msgsnd(*RecoveryMsgid, msg, sizeof(fname), IPC_NOWAIT);
27009 + show_error("%s:msgsnd failed. reason: %s", func, strerror(errno));
27011 + msgctl(*RecoveryMsgid, IPC_RMID, NULL);
27012 + *RecoveryMsgid = msgget (IPC_PRIVATE, 00666 | IPC_CREAT );
27013 + return (FILE *)NULL;
27016 + strncpy(Recovery_Status_Inf->write_file,fname,sizeof(Recovery_Status_Inf->write_file));
27021 +add_queue_file(char * data,int size)
27025 + if ((QueueFp == NULL) || (data == NULL) || (size < 0))
27027 + return STATUS_ERROR;
27029 + /*fseek(QueueFp,0,SEEK_END);*/
27030 + while (fwrite(data, size,1,QueueFp) <= 0)
27034 + if (cnt > MAX_RETRY_TIMES)
27036 + return STATUS_ERROR;
27038 + QueueFp = create_queue_file();
27041 + Recovery_Status_Inf->file_size += size;
27042 + return STATUS_OK;
27046 + * set query in queue
27049 +PGRset_queue(ReplicateHeader * header,char * query)
27051 + char * func = "PGRset_queue()";
27052 + int header_size = 0;
27053 + int query_size = 0;
27055 + if ((Recovery_Status_Inf == NULL) || (header == NULL))
27057 + show_error("%s:header is null",func);
27058 + return STATUS_ERROR;
27061 + query_size = ntohl(header->query_size);
27062 + if (query_size < 0)
27064 + show_error("%s:query size less than 0",func);
27065 + return STATUS_ERROR;
27067 + header_size = sizeof(ReplicateHeader);
27069 + if (RecoverySemID <= 0)
27071 + show_error("%s:RecoverySemID is not initialized",func);
27072 + return STATUS_ERROR;
27074 + PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
27075 + /* check existance of queue file */
27076 + if (Recovery_Status_Inf->write_file[0] == '\0')
27078 + /* create new queue file */
27079 + Recovery_Status_Inf->file_size = 0;
27080 + QueueFp = create_queue_file();
27084 + /* check size of queue file */
27085 + if (Recovery_Status_Inf->file_size + header_size + query_size > MAX_QUEUE_FILE_SIZE)
27087 + /* if the file size is over the limit, create new queue file */
27088 + memset(Recovery_Status_Inf->write_file,0,sizeof(Recovery_Status_Inf->write_file));
27090 + Recovery_Status_Inf->file_size = 0;
27091 + QueueFp = create_queue_file();
27095 + QueueFp= fopen(Recovery_Status_Inf->write_file,"a");
27098 + if (QueueFp == (FILE *)NULL)
27100 + PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
27101 + show_error("%s:QueueFp open failed. error is %s",func,strerror(errno));
27102 + return STATUS_ERROR;
27104 + header->replicate_id = htonl(*PGR_ReplicateSerializationID);
27105 + if (add_queue_file((char *)header,header_size) != STATUS_OK)
27107 + PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
27108 + show_error("%s:header add failed into queue file",func);
27109 + return STATUS_ERROR;
27111 + if (query_size > 0)
27113 + if (add_queue_file((char *)query,query_size) != STATUS_OK)
27115 + PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
27116 + show_error("%s:queue add failed into queue file",func);
27117 + return STATUS_ERROR;
27122 + PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
27124 + return STATUS_OK;
27128 +PGRget_HostTbl(char * resolvedName, int port)
27130 + HostTbl * ptr = NULL;
27133 + if (Host_Tbl_Begin == NULL)
27137 + len = strlen(resolvedName);
27138 + ptr = Host_Tbl_Begin;
27139 + if (len > sizeof(ptr->resolvedName))
27141 + len = sizeof(ptr->resolvedName);
27143 + while(ptr->useFlag != DB_TBL_END)
27145 + if ((! memcmp(ptr->resolvedName,resolvedName,len)) &&
27146 + (ptr->port == port))
27152 + return (HostTbl*)NULL;
27156 +sem_quit(int semid)
27158 + semctl(semid, 0, IPC_RMID);
27162 +PGRclear_connections(void)
27164 + Dlelem *ptr = NULL;
27166 + pthread_mutex_lock(&transaction_table_mutex);
27167 + ptr = DLGetHead(Transaction_Tbl_Begin);
27170 + TransactionTbl *transaction = DLE_VAL(ptr);
27171 + if (transaction->conn != NULL)
27173 + PQfinish(transaction->conn);
27174 + transaction->conn = NULL;
27176 + ptr = DLGetSucc(ptr);
27178 + pthread_mutex_unlock(&transaction_table_mutex);
27182 +PGRdestroy_transaction_table(void)
27184 + Dlelem *ptr = NULL, *next;
27185 + pthread_mutex_lock(&transaction_table_mutex);
27186 + ptr = DLGetHead(Transaction_Tbl_Begin);
27189 + next = DLGetSucc(ptr);
27194 + DLFreeList(Transaction_Tbl_Begin);
27195 + Transaction_Tbl_Begin = NULL;
27196 + pthread_mutex_unlock(&transaction_table_mutex);
27200 +is_need_sync_time(ReplicateHeader * header)
27202 + bool rtn = false;
27204 + if (header->cmdSts == CMD_STS_PREPARE)
27208 + else if ((header->cmdType == CMD_TYPE_COPY) ||
27209 + (header->cmdType == CMD_TYPE_COPY_DATA) ||
27210 + (header->cmdType == CMD_TYPE_COPY_DATA_END))
27214 + if ((header->cmdSts == CMD_STS_QUERY ) &&
27215 + ((header->cmdType == CMD_TYPE_INSERT) ||
27216 + (header->cmdType == CMD_TYPE_UPDATE) ||
27217 + (header->cmdType == CMD_TYPE_DELETE) ||
27218 + (header->cmdType == CMD_TYPE_SET) ||
27219 + (header->cmdType == CMD_TYPE_EXECUTE)))
27225 + if ((header->cmdType == CMD_TYPE_COPY) ||
27226 + (header->cmdType == CMD_TYPE_SELECT) ||
27227 + (header->cmdType == CMD_TYPE_VACUUM) ||
27228 + (header->cmdType == CMD_TYPE_ANALYZE) ||
27229 + (header->cmdType == CMD_TYPE_BEGIN))
27233 + if ((header->cmdSts == CMD_STS_TRANSACTION ) &&
27234 + (header->cmdType != CMD_TYPE_BEGIN))
27243 +is_need_wait_answer(ReplicateHeader * header)
27245 + bool rtn = false;
27247 + if (header->cmdSts == CMD_STS_PREPARE)
27251 + else if ((header->cmdType == CMD_TYPE_COPY) ||
27252 + (header->cmdType == CMD_TYPE_COPY_DATA) ||
27253 + (header->cmdType == CMD_TYPE_COPY_DATA_END))
27257 + else if ((header->cmdSts == CMD_STS_QUERY ) &&
27258 + ((header->cmdType == CMD_TYPE_INSERT) ||
27259 + (header->cmdType == CMD_TYPE_UPDATE) ||
27260 + (header->cmdType == CMD_TYPE_DELETE) ||
27261 + (header->cmdType == CMD_TYPE_VACUUM) ||
27262 + (header->cmdType == CMD_TYPE_ANALYZE) ||
27263 + (header->cmdType == CMD_TYPE_EXECUTE)))
27267 + else if ((header->cmdSts == CMD_STS_TRANSACTION ) ||
27268 + (header->cmdSts == CMD_STS_SET_SESSION_AUTHORIZATION ) ||
27269 + (header->cmdSts == CMD_STS_TEMP_TABLE ) ||
27270 + (header->cmdType == CMD_TYPE_SELECT))
27279 +delete_template(HostTbl * ptr, ReplicateHeader * header)
27281 + if ((ptr == (HostTbl *)NULL ) ||
27282 + (header == (ReplicateHeader *)NULL) )
27287 + if ((! strncmp(header->dbName,"template1",9)) ||
27288 + (! strncmp(header->dbName,"template0",9)))
27290 + if ((header->cmdSts != CMD_STS_TRANSACTION ) &&
27291 + ( header->cmdSts != CMD_STS_SET_SESSION_AUTHORIZATION ) &&
27292 + ( header->cmdSts != CMD_STS_TEMP_TABLE ))
27294 + deleteTransactionTbl(ptr,header);
27299 +/*--------------------------------------------------------------------
27301 + * check_copy_command()
27303 + * check the query which it is copy command or not
27304 + * when the query is 'copy from', set 'stdin' after 'from'
27306 + * char * query: query strings(I)
27308 + * copy command : changed copy command
27309 + * other command : NULL
27310 + *--------------------------------------------------------------------
27313 +check_copy_command(char * query)
27316 + char * p1, *p2, *wp;
27320 + if (query == NULL)
27322 + size = strlen(query) + strlen(" stdin ");
27324 + wp = strstr(p,"FROM");
27326 + wp = strstr(p,"from");
27330 + p = wp + strlen("FROM");
27333 + while ((isspace(*p)) && (*p != '\0')) p++;
27334 + while ((!isspace(*p)) && (*p != '\0')) p++;
27336 + buf = malloc(size);
27341 + snprintf(buf,size,"%s stdin %s",p1,p2);
27348 +next_replication_id(void)
27350 + char * func = "next_replication_id()";
27352 + if (Recovery_Status_Inf == (RecoveryStatusInf *)NULL)
27354 + show_error("%s: Recovery_Status_Inf is NULL",func);
27357 + Recovery_Status_Inf->replication_id ++;
27358 + Recovery_Status_Inf->check_point --;
27359 + return (Recovery_Status_Inf->replication_id);
27363 +check_replication_id(void)
27365 + char * func = "check_replication_id()";
27367 + if (Recovery_Status_Inf == (RecoveryStatusInf *)NULL)
27369 + show_error("%s: Recovery_Status_Inf is NULL",func);
27372 + if (Recovery_Status_Inf->check_point < 0)
27374 + Recovery_Status_Inf->check_point = PGR_CHECK_POINT ;
27376 + PGRwrite_log_file(RidFp,"%u",Recovery_Status_Inf->replication_id + PGR_CHECK_POINT );
27381 +PGRset_replication_id(uint32_t id)
27383 + Recovery_Status_Inf->replication_id = id;
27384 + return (Recovery_Status_Inf->replication_id);
27388 +PGRdo_replicate(int sock,ReplicateHeader *header, char * query)
27391 + char * func = "PGRdo_replicate()";
27393 + struct timeval tv;
27394 + int status = STATUS_OK;
27395 + int recovery_status = 0;
27396 + char * query_string = NULL;
27398 + if (header->cmdType == CMD_TYPE_COPY)
27400 + query_string = check_copy_command(query);
27401 + if (query_string == NULL)
27403 + return LOOP_CONTINUE;
27408 + query_string = query;
27409 + if (header->cmdType == CMD_TYPE_SET)
27411 + if (is_autocommit_off(query_string) == true)
27413 + PGR_AutoCommit = false;
27415 + else if (is_autocommit_on(query_string) == true)
27417 + PGR_AutoCommit = true;
27421 + header->isAutoCommit=PGR_AutoCommit ? 1 : 0;
27422 + gettimeofday(&tv,NULL);
27423 + header->tv.tv_sec = htonl(tv.tv_sec);
27424 + header->tv.tv_usec = htonl(tv.tv_usec);
27425 +#ifdef PRINT_DEBUG
27426 + show_debug("%s:query :: %s",func,query_string);
27429 + /* set query id */
27430 + header->query_id = htonl(PGRget_next_query_id());
27432 + /* save header for logging */
27433 + if (is_need_sync_time(header) == true)
27435 + if (PGR_Log_Header != NULL)
27437 + memcpy(PGR_Log_Header,header,sizeof(ReplicateHeader));
27438 + if (header->rlog == 0)
27440 + PGR_Log_Header->replicate_id = htonl(next_replication_id());
27445 + if (header->rlog == CONNECTION_SUSPENDED_TYPE )
27447 + if (PGRget_rlog_header(header) == STATUS_OK)
27449 + header->rlog = CONNECTION_SUSPENDED_TYPE;
27454 + /* check recovery mode */
27456 + recovery_status = PGRget_recovery_status();
27457 + PGRcheck_recovered_host();
27459 + /* send replication packet */
27460 + status = PGRreplicate_packet_send( header,query_string,sock,recovery_status);
27462 + if ((header->cmdType == CMD_TYPE_COPY) &&
27463 + (query_string != NULL))
27465 + free(query_string);
27466 + query_string = NULL;
27469 + if (status == STATUS_ABORTED )
27471 +#ifdef PRINT_DEBUG
27472 + show_debug("%s:status is STATUS_ABORTED",func);
27476 + if (status == STATUS_DEADLOCK_DETECT)
27478 +#ifdef PRINT_DEBUG
27479 + show_debug("%s:status is STATUS_DEADLOCK_DETECT",func);
27483 + return LOOP_CONTINUE;
27486 +/*--------------------------------------------------------------------
27488 + * PGRreturn_result()
27490 + * Return result of execution
27492 + * int dest: socket of destination server (I)
27493 + * char *result: result data(I)
27494 + * int wait: wait flag (I)
27497 + * NG: STATUS_ERROR
27498 + * NG: STATUS_LOCK_CONFLICT
27499 + * NG: STATUS_DEADLOCK_DETECT
27500 + *--------------------------------------------------------------------
27503 +PGRreturn_result(int dest, char * result, int wait)
27505 + char * func = "PGRreturn_result()";
27507 + struct timeval timeout;
27509 + char * send_ptr = NULL;
27510 + int send_size= 0;
27511 + int buf_size = 0;
27516 + if (result == NULL)
27518 + show_error("%s:result is not initialize",func);
27519 + return STATUS_ERROR;
27523 + return STATUS_ERROR;
27525 + send_ptr = result;
27526 + buf_size = PGR_MESSAGE_BUFSIZE;
27527 + if (buf_size < 1)
27531 + * Wait for something to happen.
27533 +#ifdef MSG_DONTWAIT
27534 + flag |= MSG_DONTWAIT;
27536 +#ifdef MSG_NOSIGNAL
27537 + flag |= MSG_NOSIGNAL;
27542 + timeout.tv_sec = PGR_Replication_Timeout;
27543 + timeout.tv_usec = 0;
27546 + FD_SET(dest,&wmask);
27548 + rtn = select(dest+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
27551 + if (errno == EINTR || errno == EAGAIN)
27554 + show_error("%s:select failed ,errno is %s",func , strerror(errno));
27555 + return STATUS_ERROR;
27557 + else if (rtn && FD_ISSET(dest, &wmask))
27559 + s = send(dest,send_ptr + send_size,buf_size - send_size ,flag);
27562 + if (errno == EINTR || errno == EAGAIN)
27566 + show_error("%s:send error: %d(%s)", func, errno, strerror(errno));
27567 + memset(send_ptr, 0, PGR_MESSAGE_BUFSIZE);
27568 + return STATUS_ERROR;
27574 + if (send_size == buf_size)
27577 + status = STATUS_OK;
27578 + if (wait == PGR_WAIT_ANSWER)
27580 + status = read_answer(dest);
27585 + else /* s == 0 */
27587 + show_error("%s:unexpected EOF", func);
27588 + memset(send_ptr, 0, PGR_MESSAGE_BUFSIZE);
27589 + return STATUS_ERROR;
27593 + memset(send_ptr, 0, PGR_MESSAGE_BUFSIZE);
27594 + return STATUS_ERROR;
27597 +/*--------------------------------------------------------------------
27601 + * Receive answer packet
27603 + * int dest: socket of destination server (I)
27606 + * NG: STATUS_ERROR
27607 + * NG: STATUS_LOCK_CONFLICT
27608 + * NG: STATUS_DEADLOCK_DETECT
27609 + *--------------------------------------------------------------------
27612 +read_answer(int dest)
27614 + char * func = "read_answer()";
27616 + struct timeval timeout;
27618 + ReplicateHeader header;
27619 + char * answer = NULL;
27620 + int status = STATUS_ERROR;
27624 + if (answer != NULL)
27629 + timeout.tv_sec = PGR_Replication_Timeout;
27630 + timeout.tv_usec = 0;
27632 + FD_SET(dest,&rmask);
27633 + rtn = select(dest+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
27636 + if (errno == EINTR || errno == EAGAIN)
27639 + show_error("%s:select failed ,errno is %s",func , strerror(errno));
27640 + return STATUS_ERROR;
27642 + else if (rtn && FD_ISSET(dest, &rmask))
27644 + memset(&header,0,sizeof(ReplicateHeader));
27645 + answer = PGRread_packet(dest,&header);
27646 + if (answer == NULL)
27648 + status = STATUS_ERROR;
27651 + if ((header.cmdSts != CMD_STS_RESPONSE) &&
27652 + (header.cmdSts != CMD_STS_NOTICE))
27654 + show_error("%s:none response packet received",func);
27657 + status = STATUS_ERROR;
27660 +#ifdef PRINT_DEBUG
27661 + show_debug("%s:answer[%s]",func,answer);
27663 + if (answer != NULL)
27665 + if (!strncasecmp(answer,PGR_QUERY_DONE_NOTICE_CMD,strlen(PGR_QUERY_DONE_NOTICE_CMD)))
27667 +#ifdef PRINT_DEBUG
27668 + show_debug("%s:QUERY DONE",func);
27670 + status = STATUS_OK;
27672 + else if (!strncasecmp(answer,PGR_QUERY_ABORTED_NOTICE_CMD,strlen(PGR_QUERY_ABORTED_NOTICE_CMD)))
27674 +#ifdef PRINT_DEBUG
27675 + show_debug("%s:QUERY ABORTED",func);
27677 + status = STATUS_ABORTED;
27679 + else if (!strncasecmp(answer,PGR_LOCK_CONFLICT_NOTICE_CMD,strlen(PGR_LOCK_CONFLICT_NOTICE_CMD)))
27681 +#ifdef PRINT_DEBUG
27682 + show_debug("%s:LOCK CONFLICT !!",func);
27684 + status = STATUS_LOCK_CONFLICT;
27686 + else if (!strncasecmp(answer,PGR_DEADLOCK_DETECT_NOTICE_CMD,strlen(PGR_DEADLOCK_DETECT_NOTICE_CMD)))
27688 +#ifdef PRINT_DEBUG
27689 + show_debug("%s:DEADLOCK DETECT !!",func);
27691 + status = STATUS_DEADLOCK_DETECT;
27702 +/*--------------------------------------------------
27704 + * PGRreplicate_packet_send()
27706 + * Send query to each cluster DB servers and return result.
27708 + * ReplicateHeader * header : packet header (I)
27709 + * char * query : query for replication (I)
27710 + * int dest : destination socket for return result (I)
27713 + * NG : STATUS_ERROR
27714 + * DEADLOCK : STATUS_DEADLOCK_DETECT
27715 + *---------------------------------------------------
27718 +PGRreplicate_packet_send( ReplicateHeader * header, char * query,int dest,int recovery_status) {
27719 + return replicate_packet_send_internal(header,query,dest,recovery_status,false);
27724 +replicate_packet_send_internal(ReplicateHeader * header, char * query,int dest,int recovery_status,bool isHeldLock)
27726 + char * func = "replicate_packet_send_internal()";
27727 + HostTbl * host_ptr = (HostTbl*)NULL;
27728 + HostTbl * source_host_ptr = (HostTbl*)NULL;
27729 + int status = STATUS_OK;
27732 + char *database = NULL;
27734 + char *userName = NULL;
27735 + char *password = NULL;
27736 + char * md5Salt = NULL;
27737 + char * cryptSalt = NULL;
27738 + char * host = NULL;
27739 + char result[PGR_MESSAGE_BUFSIZE];
27741 + pthread_attr_t attr;
27745 + int source_t_cnt = -1;
27746 + int transaction_count = 0;
27747 + int *results_from_thread;
27748 + bool reliable_mode = true;
27750 + pthread_t thread[MAX_DB_SERVER];
27751 + ThreadArgInf thread_arg[MAX_DB_SERVER];
27754 +#ifdef PRINT_DEBUG
27755 + show_debug("cmdSts=%c",header->cmdSts);
27756 + if(header->cmdType!='\0')
27757 + show_debug("cmdType=%c",header->cmdType);
27758 + show_debug("rlog=%d",header->rlog);
27759 + show_debug("port=%d",ntohs(header->port));
27760 + show_debug("pid=%d",ntohs(header->pid));
27761 + show_debug("from_host=%s",header->from_host);
27762 + show_debug("dbName=%s",header->dbName);
27763 + show_debug("userName=%s",header->userName);
27764 + show_debug("recieve sec=%u",ntohl(header->tv.tv_sec));
27765 + show_debug("recieve usec=%u",ntohl(header->tv.tv_usec));
27766 + show_debug("query_size=%d",ntohl(header->query_size));
27767 + show_debug("request_id=%d",ntohl(header->request_id));
27768 + show_debug("replicate_id=%d",ntohl(header->replicate_id));
27769 + show_debug("recovery_status=%d",recovery_status);
27770 + if (header->cmdSts != CMD_STS_PREPARE)
27771 + show_debug("query=%s",query);
27775 + /* check rlog type */
27776 + if (header->rlog == FROM_R_LOG_TYPE)
27778 + if (is_executed_query_in_origin(header) == false)
27780 +#ifdef PRINT_DEBUG
27781 + show_debug("this query is not yet done in source cluster db. so it wait for receive re-replicate request");
27783 + /* wait re-replicate request */
27784 + return STATUS_SKIP_REPLICATE;
27788 + * loop while registrated cluster DB exist
27790 + if (Host_Tbl_Begin == NULL)
27792 + return STATUS_ERROR;
27794 + host_ptr = Host_Tbl_Begin;
27795 + PGR_Response_Inf->current_cluster = 0;
27796 + memset(result,0,sizeof(result));
27799 + if (is_need_queue_jump(header,query) == false)
27805 + sem_id = VacuumSemID;
27807 + if(!isHeldLock) {
27808 +#ifdef PRINT_DEBUG
27809 + show_debug("sem_lock [%d] req",sem_cnt);
27812 + PGRsem_lock(sem_id,sem_cnt);
27813 +#ifdef PRINT_DEBUG
27814 + show_debug("sem_lock [%d] got it",sem_cnt);
27817 + ++*PGR_ReplicateSerializationID;
27819 + /* set replication log */
27820 + if (is_need_use_rlog(header) == true)
27822 + PGRset_rlog(header,query);
27825 + pthread_attr_init(&attr);
27826 + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
27827 + PGR_Response_Inf->current_cluster = 0;
27829 + while(host_ptr->useFlag != DB_TBL_END)
27832 + * check the status of the cluster DB
27834 + if ((host_ptr->useFlag != DB_TBL_USE) &&
27835 + (host_ptr->useFlag != DB_TBL_INIT))
27841 + * skip loop during recover and the host name is master DB
27843 + if (is_master_in_recovery(host_ptr->hostName, host_ptr->port,recovery_status) == true)
27845 + if (PGRset_queue(header,query) != STATUS_OK)
27847 + show_error("%s:failed to put query to queue.abort to recovery",func);
27848 + PGRset_recovery_status(RECOVERY_INIT);
27850 +#ifdef PRINT_DEBUG
27851 + show_debug("%s master is using for recovery",func);
27856 + host_ptr = check_host_transaction_status(header, host_ptr);
27858 + * compare with the host name and the exceptional host name
27860 + thread_arg[t_cnt].header = header;
27861 + thread_arg[t_cnt].query = query;
27862 + thread_arg[t_cnt].dest = dest;
27863 + thread_arg[t_cnt].host_ptr = host_ptr;
27864 + thread_arg[t_cnt].current_cluster = t_cnt;
27865 + thread_arg[t_cnt].transaction_tbl = (TransactionTbl *)NULL;
27867 + if (PGRis_same_host(header->from_host,ntohs(header->port),host_ptr->resolvedName, host_ptr->port) == true)
27869 +#ifdef PRINT_DEBUG
27870 + show_debug("source host");
27872 + /* replication to source cluster db */
27873 + source_host_ptr = host_ptr;
27874 + source_t_cnt = t_cnt;
27876 + if (header->rlog == FROM_R_LOG_TYPE )
27878 +#ifdef PRINT_DEBUG
27879 + show_debug("%s: This simple query was suspended. Therefore this query is not re-replicated to source cluster db.",func);
27882 + check_transaction_status(header, thread_arg[t_cnt].transaction_tbl);
27885 + /* replication to other cluster db */
27888 + if ((header->rlog == CONNECTION_SUSPENDED_TYPE ) &&
27889 + (header->cmdSts == CMD_STS_TRANSACTION) )
27891 +#ifdef PRINT_DEBUG
27892 + show_debug("%s: This transaction query was suspended. Therefore this query is not replicated to other cluster dbs.",func);
27898 + * get the transaction table data
27899 + * it has the connection data with each cluster DB
27901 + thread_arg[t_cnt].transaction_tbl = getTransactionTbl(host_ptr,header);
27903 + * if the transaction process is new one,
27904 + * create connection data and add the transaction table
27906 + if (thread_arg[t_cnt].transaction_tbl == (TransactionTbl *)NULL)
27908 + thread_arg[t_cnt].transaction_tbl = setTransactionTbl(host_ptr, header);
27909 + if (thread_arg[t_cnt].transaction_tbl == (TransactionTbl *)NULL)
27911 + show_error("%s:setTransactionTbl failed",func);
27912 + if ( header->cmdSts != CMD_STS_NOTICE )
27914 + PGRset_host_status(host_ptr,DB_TBL_ERROR);
27919 + StartReplication[t_cnt] = true;
27924 + * re-use the connection data
27926 + if ((thread_arg[t_cnt].transaction_tbl->conn != (PGconn *)NULL) &&
27927 + (thread_arg[t_cnt].transaction_tbl->conn->sock > 0))
27930 + memset(thread_arg[t_cnt].transaction_tbl->conn->inBuffer,0,thread_arg[t_cnt].transaction_tbl->conn->inBufSize);
27931 + memset(thread_arg[t_cnt].transaction_tbl->conn->outBuffer,0,thread_arg[t_cnt].transaction_tbl->conn->outBufSize);
27933 + StartReplication[t_cnt] = false;
27937 + if (thread_arg[t_cnt].transaction_tbl->conn != (PGconn *)NULL)
27939 + PQfinish(thread_arg[t_cnt].transaction_tbl->conn);
27940 + thread_arg[t_cnt].transaction_tbl->conn = NULL;
27943 + database = (char *)(header->dbName);
27944 + snprintf(port,sizeof(port),"%d", host_ptr->port);
27945 + userName = (char *)(header->userName);
27946 + password = (char *)(header->password);
27947 + md5Salt = (char *)(header->md5Salt);
27948 + cryptSalt = (char *)(header->cryptSalt);
27949 + host = (char *)(host_ptr->hostName);
27951 + thread_arg[t_cnt].transaction_tbl->conn = PGRcreateConn(host,port,database,userName,password,md5Salt,cryptSalt);
27952 + StartReplication[t_cnt] = true;
27953 +#ifdef PRINT_DEBUG
27954 + show_debug("%s:connect db:%s port:%s user:%s host:%s query:%s",
27955 + func, database,port,userName,host,query);
27959 + check_transaction_status(header, thread_arg[t_cnt].transaction_tbl);
27960 + transaction_count = thread_arg[t_cnt].transaction_tbl->transaction_count;
27961 + rc = pthread_create(&thread[t_cnt], &attr, thread_send_cluster, (void*)&thread_arg[t_cnt]);
27965 + show_error("pthread_create error");
27971 + * send replication query to each cluster server
27973 + if (host_ptr->useFlag != DB_TBL_USE)
27975 + PGRset_host_status(host_ptr,DB_TBL_USE);
27979 + PGR_Response_Inf->current_cluster ++;
27980 + status = STATUS_OK;
27983 + /* When the query is SELECT, source cluster would not need to wait other cluster's result */
27984 + if ((header->cmdType == CMD_TYPE_SELECT) && (header->cmdSts != CMD_STS_PREPARE))
27986 + thread_send_source( (void*)&thread_arg[source_t_cnt]);
27987 + reliable_mode = false;
27990 + pthread_attr_destroy(&attr);
27992 + results_from_thread = malloc(t_cnt * sizeof(int));
27993 + for ( t = 0 ; t < t_cnt; )
27996 + if (t == source_t_cnt)
28001 + rc = pthread_join(thread[t], (void **)&result);
28002 + if ((rc != 0) && (errno == EINTR))
28007 + results_from_thread[t] = (int)result;
28008 + pthread_detach(thread[t]);
28012 + if (compare_results(results_from_thread, t_cnt, source_t_cnt) == false)
28013 + show_error("query results discrepancy between cluster servers: %s", query);
28014 + free(results_from_thread);
28016 + thread_arg[source_t_cnt].transaction_count = transaction_count;
28018 + * send replication query to source cluster server.
28020 + if ((source_t_cnt >= 0) && ( reliable_mode == true ))
28022 + thread_send_source( (void*)&thread_arg[source_t_cnt]);
28024 + /* unset replication log */
28025 + if (is_need_use_rlog(header) == true)
28027 + PGRunset_rlog(header,query);
28030 + check_replication_id();
28031 + if (header->cmdSts == CMD_STS_PREPARE)
28033 + if (header->cmdType != CMD_TYPE_P_SYNC)
28035 + if (PGR_Parse_Session_Started == false)
28037 + PGR_Parse_Session_Started = true;
28043 + PGR_Parse_Session_Started = false;
28046 + if(!isHeldLock) {
28047 +#ifdef PRINT_DEBUG
28048 + show_debug("sem_unlock[%d]",sem_cnt);
28050 + PGRsem_unlock(sem_id,sem_cnt);
28057 +thread_send_source(void * arg)
28059 + char * func = "thread_send_source()";
28060 + ThreadArgInf * thread_arg = NULL;
28061 + ReplicateHeader * header = (ReplicateHeader*)NULL;
28062 + char * query = NULL;
28064 + HostTbl * host_ptr = (HostTbl*)NULL;
28065 + int status = STATUS_OK;
28066 + int transaction_count = 0;
28067 + char result[PGR_MESSAGE_BUFSIZE];
28068 + bool sync_command_flg = false;
28072 + show_error("%s:arg is NULL",func);
28073 + status = STATUS_ERROR;
28074 + pthread_exit((void *) status);
28076 + thread_arg = (ThreadArgInf *)arg;
28077 + header = thread_arg->header;
28078 + query = thread_arg->query;
28079 + dest = thread_arg->dest;
28080 + host_ptr = thread_arg->host_ptr;
28081 + transaction_count = thread_arg->transaction_count;
28083 + if(header->cmdSts==CMD_STS_OTHER &&
28084 + header->cmdType==CMD_TYPE_CONNECTION_CLOSE)
28086 + return (void *)0;
28089 + if (header->rlog == FROM_R_LOG_TYPE )
28091 + /* It is not necessary to return rlog to source DB. */
28092 +#ifdef PRINT_DEBUG
28093 + show_debug("%s: It is not necessary to return rlog to source DB",func);
28095 + status = STATUS_OK;
28096 + return (void *)status;
28101 + * We can use PGR_ReplicateSerializationID here , because
28102 + * all queries from cluster server isn't recovery query.
28105 + if (is_need_sync_time(header) == true)
28107 + if (transaction_count >1 )
28109 + sync_command_flg = false;
28113 + sync_command_flg = true;
28116 + if (sync_command_flg == true)
28118 + snprintf(result,PGR_MESSAGE_BUFSIZE,
28119 + "%d,%u,%u,%u,%d,%u",
28120 + PGR_SET_CURRENT_TIME_FUNC_NO,
28121 + (unsigned int)ntohl(header->tv.tv_sec),
28122 + (unsigned int)ntohl(header->tv.tv_usec),
28123 + (unsigned int)ntohl(PGR_Log_Header->replicate_id),
28124 + PGR_Response_Inf->response_mode,
28125 + *PGR_ReplicateSerializationID);
28129 + snprintf(result,PGR_MESSAGE_BUFSIZE,
28131 + PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO,
28132 + *PGR_ReplicateSerializationID,
28134 + PGR_Response_Inf->response_mode);
28136 + /* execute query in the exceptional host */
28137 + /* it is not use replication */
28138 + if (is_need_wait_answer(header) == true)
28140 + status = PGRreturn_result(dest,result, PGR_WAIT_ANSWER);
28144 + status = PGRreturn_result(dest, result, PGR_NOWAIT_ANSWER);
28148 + if (status == STATUS_ERROR )
28150 + show_error("%s: %s[%d] should be down ",func,host_ptr->hostName,host_ptr->port);
28151 + PGRset_host_status(host_ptr,DB_TBL_ERROR);
28155 + /* delete server table when query use template db */
28156 + if (PGR_Response_Inf->response_mode != PGR_RELIABLE_MODE)
28158 + delete_template(host_ptr,header);
28160 +#ifdef PRINT_DEBUG
28161 + show_debug("end thread_send_source()");
28163 + return (void *)0;
28167 +thread_send_cluster(void * arg)
28169 + char * func = "thread_send_cluster()";
28170 + ThreadArgInf * thread_arg = NULL;
28171 + ReplicateHeader * header = (ReplicateHeader*)NULL;
28172 + char * query = NULL;
28174 + HostTbl * host_ptr = (HostTbl*)NULL;
28176 + int status = STATUS_OK;
28177 + TransactionTbl * transaction_tbl = (TransactionTbl *)NULL;
28178 + int current_cluster = 0;
28179 + char result[PGR_MESSAGE_BUFSIZE];
28181 +#ifdef PRINT_DEBUG
28182 + show_debug("start thread_send_cluster()");
28186 + show_error("%s:arg is NULL",func);
28187 + status = STATUS_ERROR;
28188 + pthread_exit((void *) status);
28191 + thread_arg = (ThreadArgInf *)arg;
28192 + header = thread_arg->header;
28193 + query = thread_arg->query;
28194 + dest = thread_arg->dest;
28195 + host_ptr = thread_arg->host_ptr;
28196 + transaction_tbl = thread_arg->transaction_tbl;
28197 + current_cluster = thread_arg->current_cluster;
28200 + if(header->cmdSts==CMD_STS_OTHER &&
28201 + header->cmdType==CMD_TYPE_CONNECTION_CLOSE)
28203 + check_delete_transaction(host_ptr, header);
28204 + return (void *)0;
28207 + rtn = send_replicate_packet_to_server( transaction_tbl, current_cluster, host_ptr, header, query , result,*PGR_ReplicateSerializationID, false);
28209 +#ifdef PRINT_DEBUG
28210 + show_debug("%s:return value from send_replicate_packet_to_server() is %d",func,rtn);
28212 + if (rtn == STATUS_ABORTED)
28214 + snprintf(result,PGR_MESSAGE_BUFSIZE,"%d", PGR_NOTICE_ABORT_FUNC_NO);
28215 + status = PGRreturn_result(dest, result, PGR_NOWAIT_ANSWER);
28216 + status = STATUS_ABORTED;
28217 + pthread_exit((void *) status);
28219 + /* delete server table when query use template db */
28220 + delete_template(host_ptr,header);
28221 +#ifdef PRINT_DEBUG
28222 + show_debug("%s:pthread_exit[%d]",func,current_cluster );
28225 + pthread_exit((void *) rtn);
28228 +/*--------------------------------------------------
28230 + * PGRreplicate_packet_send_each_server()
28232 + * Send query to a cluster DB server and return result.
28234 + * HostTbl * ptr : cluster server info table (I)
28235 + * bool return_response : flag for return result(I)
28236 + * ReplicateHeader * header: header data (I)
28237 + * char * query : query data (I)
28238 + * int dest : socket of destination server(I)
28241 + * NG : STATUS_ERROR
28242 + *---------------------------------------------------
28245 +PGRreplicate_packet_send_each_server( HostTbl * ptr, bool return_response, ReplicateHeader * header, char * query,int dest)
28247 + char * func = "PGRreplicate_packet_send_each_server()";
28251 + host = ptr->hostName;
28253 + * send query to cluster DB
28255 + if (PGR_Result == NULL)
28257 + show_error("%s:PGR_Result is not initialize",func);
28258 + return STATUS_ERROR;
28261 + rtn = PGRsend_replicate_packet_to_server( ptr, header,query,PGR_Result, dest, false);
28266 +/*--------------------------------------------------
28268 + * PGRread_packet()
28270 + * Read packet data and send the query to each cluster DB.
28271 + * The packet data has header data and query data.
28273 + * int sock : socket (I)
28274 + * ReplicateHeader *header : header data (O)
28276 + * OK: pointer of read query
28278 + *---------------------------------------------------
28281 +PGRread_packet(int sock, ReplicateHeader *header)
28283 + char * func = "PGRread_packet()";
28286 + char * read_ptr = NULL;
28287 + int read_size = 0;
28288 + int header_size = 0;
28289 + char * query = NULL;
28291 + struct timeval timeout;
28294 + if (header == NULL)
28298 + memset(header,0,sizeof(ReplicateHeader));
28299 + read_ptr = (char*)header;
28300 + header_size = sizeof(ReplicateHeader);
28305 + * read header data
28308 + timeout.tv_sec = 1;
28309 + timeout.tv_usec = 0;
28312 + * Wait for something to happen.
28315 + FD_SET(sock,&rmask);
28316 + rtn = select(sock+1, &rmask, (fd_set *)NULL,(fd_set *)NULL, &timeout);
28320 + if (errno == EINTR || errno == EAGAIN)
28323 + show_error("%s:select failed ,errno is %s",func , strerror(errno));
28327 + if (rtn && FD_ISSET(sock, &rmask))
28329 + r = recv(sock,read_ptr + read_size ,header_size - read_size, MSG_WAITALL);
28331 + r = recv(sock,read_ptr + read_size ,header_size - read_size, 0);
28335 + show_error("%s:recv failed: (%s)",func,strerror(errno));
28336 + if (errno == EINTR || errno == EAGAIN)
28340 + show_error("%s:recv failed: (%s)",func,strerror(errno));
28347 + if ( read_size == header_size)
28349 + query = PGRread_query(sock,header);
28363 +PGRread_query(int sock, ReplicateHeader *header)
28365 + char * func = "PGRread_query()";
28369 + int read_size = 0;
28370 + int query_size = 0;
28371 + char * query = NULL;
28373 + query_size = ntohl(header->query_size);
28374 + if (query_size < 0)
28376 + show_error("%s:receive size less than 0",func);
28379 + query = malloc(query_size+4);
28380 + if (query == NULL)
28383 + * buffer allocation failed
28385 + show_error("%s:malloc failed: (%s)",func,strerror(errno));
28388 + memset(query,0,query_size+4);
28389 + if (query_size == 0)
28395 + read_ptr = (char *)query;
28399 + * read query data
28402 + /*r = recv(sock,read_ptr + read_size ,query_size - read_size, MSG_WAITALL); */
28403 + r = recv(sock,read_ptr + read_size ,query_size - read_size, 0);
28406 + if (errno == EINTR || errno == EAGAIN)
28410 + show_error("%s:recv failed: (%s)",func,strerror(errno));
28419 + if ( read_size == query_size)
28424 + else /* r == 0 */
28426 + show_error("%s:unexpected EOF", func);
28438 +is_autocommit_off(char * query)
28444 + if (query == NULL)
28446 + memset(buf,0,sizeof(buf));
28449 + while ( *p != '\0' )
28451 + buf[i++] = toupper(*p);
28453 + if (i >= (sizeof(buf) -2))
28456 + p = strstr(buf,"AUTOCOMMIT");
28461 + p = strstr(buf,"OFF");
28470 +is_autocommit_on(char * query)
28476 + if (query == NULL)
28478 + memset(buf,0,sizeof(buf));
28481 + while ( *p != '\0' )
28483 + buf[i++] = toupper(*p);
28485 + if (i >= (sizeof(buf) -2))
28488 + p = strstr(buf,"AUTOCOMMIT");
28493 + p = strstr(buf,"ON");
28501 +static unsigned int
28502 +get_host_ip_from_tbl(char * host)
28504 + Dlelem * ptr = NULL;
28506 + pthread_mutex_lock(&transaction_table_mutex);
28507 + if (Transaction_Tbl_Begin == NULL)
28509 + pthread_mutex_unlock(&transaction_table_mutex);
28512 + ptr = DLGetHead(Transaction_Tbl_Begin);
28515 + TransactionTbl *transaction = DLE_VAL(ptr);
28516 + if (!strncasecmp(transaction->host,host,sizeof(transaction->host)))
28518 + pthread_mutex_unlock(&transaction_table_mutex);
28519 + return transaction->hostIP;
28521 + ptr = DLGetSucc(ptr);
28523 + pthread_mutex_unlock(&transaction_table_mutex);
28528 +static unsigned int
28529 +get_srcHost_ip_from_tbl(char * srcHost)
28531 + Dlelem * ptr = NULL;
28533 + pthread_mutex_lock(&transaction_table_mutex);
28535 + if (Transaction_Tbl_Begin == NULL)
28537 + pthread_mutex_unlock(&transaction_table_mutex);
28541 + ptr = DLGetHead(Transaction_Tbl_Begin);
28544 + TransactionTbl *transaction = DLE_VAL(ptr);
28545 + if (!strncasecmp(transaction->srcHost,srcHost,sizeof(transaction->srcHost)))
28547 + pthread_mutex_unlock(&transaction_table_mutex);
28549 + return transaction->srcHostIP;
28551 + ptr = DLGetSucc(ptr);
28553 + pthread_mutex_unlock(&transaction_table_mutex);
28559 +PGRget_next_query_id(void)
28561 + if (PGR_Query_ID >= PGR_MAX_QUERY_ID)
28563 + PGR_Query_ID = 0;
28566 + return PGR_Query_ID;
28571 +PGRnotice_replication_server(char * hostName, unsigned short portNumber,unsigned short recoveryPortNumber, unsigned short lifecheckPortNumber, char * userName)
28573 + char * func ="PGRnotice_replication_server()";
28574 + ReplicateHeader header;
28575 + char query[PGR_MESSAGE_BUFSIZE];
28577 + if (((hostName == NULL) || (*hostName == 0)) ||
28578 + ((userName == NULL) || (*userName == 0)) ||
28579 + ((portNumber == 0) || (recoveryPortNumber == 0)))
28581 +#ifdef PRINT_DEBUG
28582 + show_debug("%s: can not connect server[%s][%s][%d][%d]",func,hostName,userName,portNumber,recoveryPortNumber);
28586 + memset(&header,0,sizeof(ReplicateHeader));
28587 + memset(query,0,sizeof(query));
28588 + snprintf(query,sizeof(query)-1,"SELECT %s(%d,'%s',%d,%d,%d)",
28589 + PGR_SYSTEM_COMMAND_FUNC,
28590 + PGR_STARTUP_REPLICATION_SERVER_FUNC_NO,
28593 + recoveryPortNumber,
28594 + lifecheckPortNumber);
28595 + header.cmdSys = CMD_SYS_CALL;
28596 + header.cmdSts = CMD_STS_NOTICE;
28597 + header.query_size = htonl(strlen(query));
28598 + header.query_id = htonl(PGRget_next_query_id());
28599 + strncpy(header.from_host,hostName,sizeof(header.from_host));
28600 + strncpy(header.userName,userName,sizeof(header.userName));
28601 + strcpy(header.dbName,"template1");
28602 + PGRreplicate_packet_send( &header, query, NOTICE_SYSTEM_CALL_TYPE ,RECOVERY_INIT);
28606 +is_need_use_rlog(ReplicateHeader * header)
28608 + bool rtn = false;
28609 + if ((Cascade_Inf->useFlag != DB_TBL_USE) ||
28610 + (PGR_Use_Replication_Log != true) ||
28611 + (header->rlog > 0))
28615 + else if ((header->cmdSts == CMD_STS_QUERY ) &&
28616 + ((header->cmdType == CMD_TYPE_INSERT) ||
28617 + (header->cmdType == CMD_TYPE_UPDATE) ||
28618 + (header->cmdType == CMD_TYPE_DELETE) ||
28619 + (header->cmdType == CMD_TYPE_EXECUTE)))
28625 + if ((header->cmdSts == CMD_STS_TRANSACTION ) &&
28626 + (header->cmdType == CMD_TYPE_COMMIT))
28635 +PGRinit_transaction_table(void)
28637 + if (Transaction_Tbl_Begin != NULL)
28639 + DLFreeList(Transaction_Tbl_Begin);
28642 + Transaction_Tbl_Begin = DLNewList();
28644 + return STATUS_OK;
28648 +is_need_queue_jump( ReplicateHeader * header,char *query)
28650 + if (header == NULL)
28655 + if (header->cmdSts == CMD_STS_QUERY)
28657 + if ((header->cmdType == CMD_TYPE_VACUUM ) ||
28658 + (header->cmdType == CMD_TYPE_ANALYZE ))
28660 + if ((strstr(query,"full") == NULL) &&
28661 + (strstr(query,"FULL") == NULL))
28672 +is_executed_query_in_origin( ReplicateHeader *header )
28674 + char *database = NULL;
28676 + char *userName = NULL;
28677 + char *password = NULL;
28678 + char * md5Salt = NULL;
28679 + char * cryptSalt = NULL;
28680 + char * host = NULL;
28681 + HostTbl * host_ptr = (HostTbl*)NULL;
28682 + TransactionTbl * transaction_tbl = (TransactionTbl*)NULL;
28683 + PGconn * conn = (PGconn *)NULL;
28684 + bool result = false;
28686 + if (Host_Tbl_Begin == NULL)
28688 + return STATUS_ERROR;
28690 + host_ptr = Host_Tbl_Begin;
28691 + while(host_ptr->useFlag != DB_TBL_END)
28694 + * check the status of the cluster DB
28696 + if (host_ptr->useFlag != DB_TBL_USE)
28701 + if (PGRis_same_host(header->from_host,ntohs(header->port),host_ptr->hostName, host_ptr->port) == true)
28707 + if (host_ptr->useFlag == DB_TBL_END)
28712 + * set up the connection
28714 + transaction_tbl = getTransactionTbl(host_ptr,header);
28715 + if (transaction_tbl == (TransactionTbl *)NULL)
28717 + transaction_tbl = setTransactionTbl(host_ptr, header);
28718 + if (transaction_tbl == (TransactionTbl *)NULL)
28725 + if ((transaction_tbl->conn == (PGconn *)NULL) ||
28726 + (transaction_tbl->conn->sock <= 0))
28728 + database = (char *)(header->dbName);
28729 + snprintf(port,sizeof(port),"%d", host_ptr->port);
28730 + userName = (char *)(header->userName);
28731 + password = (char *)(header->password);
28732 + md5Salt = (char *)(header->md5Salt);
28733 + cryptSalt = (char *)(header->cryptSalt);
28734 + host = (char *)(host_ptr->hostName);
28735 + transaction_tbl->conn = PGRcreateConn(host,port,database,userName,password,md5Salt,cryptSalt);
28738 + conn = transaction_tbl->conn;
28739 + if (conn == NULL)
28744 + result = is_executed_query( conn, header);
28745 + deleteTransactionTbl(host_ptr,header);
28750 +is_executed_query( PGconn *conn, ReplicateHeader * header)
28752 + static PGresult * res = (PGresult *)NULL;
28753 + char sync_command[PGR_MESSAGE_BUFSIZE];
28754 + char * str = NULL;
28756 + snprintf(sync_command,sizeof(sync_command),
28757 + "SELECT %s(%d,%u,%u,%u,%d) ",
28758 + PGR_SYSTEM_COMMAND_FUNC,
28759 + PGR_QUERY_CONFIRM_ANSWER_FUNC_NO,
28760 + (unsigned int)ntohl(header->tv.tv_sec),
28761 + (unsigned int)ntohl(header->tv.tv_usec),
28762 + (unsigned int)ntohl(header->replicate_id),
28763 + PGR_Response_Inf->response_mode);
28765 + res = PQexec(conn, sync_command);
28768 + str = PQcmdStatus(res);
28769 + if ((str != NULL) &&
28770 + (!strncasecmp(str,PGR_ALREADY_REPLICATED_NOTICE_CMD,strlen(PGR_ALREADY_REPLICATED_NOTICE_CMD))))
28782 +replicate_lo( PGconn * conn, ReplicateHeader * header, LOArgs * query)
28784 + int status = STATUS_OK;
28788 + char * buf = NULL;
28789 + char * filename = NULL;
28794 + if ((conn == (PGconn *)NULL) || (query == (LOArgs *)NULL) || (header == (ReplicateHeader *)NULL))
28796 + return STATUS_ERROR;
28798 + switch (header->cmdType)
28800 + case CMD_TYPE_LO_IMPORT :
28801 + filename = query->buf;
28802 + if (lo_import(conn, filename) > 0 )
28804 + status = STATUS_OK;
28808 + status = STATUS_ERROR;
28811 + case CMD_TYPE_LO_CREATE :
28812 + mode = (int)ntohl(query->arg1);
28813 + if (lo_creat(conn, mode) > 0)
28815 + status = STATUS_OK;
28819 + status = STATUS_ERROR;
28822 + case CMD_TYPE_LO_OPEN :
28823 + lobjId = (Oid)ntohl(query->arg1);
28824 + mode = (int)ntohl(query->arg2);
28825 + if (lo_open(conn, lobjId, mode) > 0)
28827 + status = STATUS_OK;
28831 + status = STATUS_ERROR;
28834 + case CMD_TYPE_LO_WRITE :
28835 + fd = (int)ntohl(query->arg1);
28836 + len = (int)ntohl(query->arg2);
28837 + buf = query->buf;
28838 + if (lo_write(conn, fd, buf, len) == len )
28840 + status = STATUS_OK;
28844 + status = STATUS_ERROR;
28847 + case CMD_TYPE_LO_LSEEK :
28848 + fd = (int)ntohl(query->arg1);
28849 + offset = (int)ntohl(query->arg2);
28850 + whence = (int)ntohl(query->arg3);
28851 + if (lo_lseek(conn, fd, offset, whence) >= 0)
28853 + status = STATUS_OK;
28857 + status = STATUS_ERROR;
28860 + case CMD_TYPE_LO_CLOSE :
28861 + fd = (int)ntohl(query->arg1);
28862 + if (lo_close(conn, fd) == 0)
28864 + status = STATUS_OK;
28868 + status = STATUS_ERROR;
28871 + case CMD_TYPE_LO_UNLINK :
28872 + lobjId = (Oid)ntohl(query->arg1);
28873 + if (lo_unlink(conn,lobjId) >= 0)
28875 + status = STATUS_OK;
28879 + status = STATUS_ERROR;
28889 +send_func(HostTbl * host_ptr,ReplicateHeader * header, char * func,char * result)
28891 + char * f ="send_func()";
28892 + char *database = NULL;
28894 + char *userName = NULL;
28895 + char *password = NULL;
28896 + char * md5Salt = NULL;
28897 + char * cryptSalt = NULL;
28898 + char * host = NULL;
28899 + char * str = NULL;
28900 + TransactionTbl * transaction_tbl = (TransactionTbl *)NULL;
28901 + PGresult * res = (PGresult *)NULL;
28902 + PGconn * conn = (PGconn *)NULL;
28904 + int current_cluster = 0;
28906 + if ((host_ptr == (HostTbl *)NULL) ||
28907 + (header == (ReplicateHeader *)NULL) ||
28908 + (func == NULL) ||
28909 + (result == NULL))
28911 + return STATUS_ERROR;
28914 + * set up the connection
28916 + database = (char *)header->dbName;
28917 + snprintf(port,sizeof(port),"%d", host_ptr->port);
28918 + userName = (char *)(header->userName);
28919 + password = (char *)(header->password);
28920 + md5Salt = (char *)(header->md5Salt);
28921 + cryptSalt = (char *)(header->cryptSalt);
28922 + host = (char *)(host_ptr->hostName);
28923 + if (PGR_Response_Inf != NULL)
28925 + current_cluster = PGR_Response_Inf->current_cluster;
28929 + * get the transaction table data
28930 + * it has the connection data with each cluster DB
28932 + transaction_tbl = getTransactionTbl(host_ptr,header);
28934 + * if the transaction process is new one,
28935 + * create connection data and add the transaction table
28937 + if (transaction_tbl == (TransactionTbl *)NULL)
28939 + transaction_tbl = setTransactionTbl(host_ptr, header);
28940 + if (transaction_tbl == (TransactionTbl *)NULL)
28942 + StartReplication[current_cluster] = true;
28943 + show_error("%s:setTransactionTbl failed",f);
28944 + if ( header->cmdSts != CMD_STS_NOTICE )
28946 + PGRset_host_status(host_ptr,DB_TBL_ERROR);
28948 + return STATUS_ERROR;
28954 + * re-use the connection data
28956 + if ((transaction_tbl->conn != (PGconn *)NULL) &&
28957 + (transaction_tbl->conn->sock > 0))
28959 + StartReplication[current_cluster] = false;
28963 + if (transaction_tbl->conn != (PGconn *)NULL)
28965 + PQfinish(transaction_tbl->conn);
28967 + transaction_tbl->conn = PGRcreateConn(host,port,database,userName,password,md5Salt,cryptSalt);
28968 + StartReplication[current_cluster] = true;
28971 + conn = transaction_tbl->conn;
28973 + if (conn == NULL)
28975 + show_error("%s:[%d@%s] may be down",f,host_ptr->port,host_ptr->hostName);
28976 + if ( header->cmdSts != CMD_STS_NOTICE )
28978 + StartReplication[current_cluster] = true;
28979 + PGRset_host_status(host_ptr,DB_TBL_ERROR);
28981 + return STATUS_ERROR;
28983 + res = PQexec(conn, func);
28986 + StartReplication[current_cluster] = true;
28987 + return STATUS_ERROR;
28989 + str = PQcmdStatus(res);
28990 + if ((str == NULL) || (*str == '\0'))
28992 + rtn = STATUS_ERROR;
28996 + snprintf(result, PGR_MESSAGE_BUFSIZE, "%s",str);
29005 +get_oid(HostTbl * host_ptr,ReplicateHeader * header)
29007 + char sync_command[PGR_MESSAGE_BUFSIZE];
29008 + char result[PGR_MESSAGE_BUFSIZE];
29010 + memset(result,0,sizeof(result));
29011 + snprintf(sync_command,sizeof(sync_command),
29013 + PGR_SYSTEM_COMMAND_FUNC, PGR_GET_OID_FUNC_NO);
29014 + if (send_func(host_ptr, header, sync_command, result) == STATUS_OK)
29016 + return (strtoul(result, NULL, 10));
29022 +set_oid(HostTbl * host_ptr,ReplicateHeader * header, uint32_t oid)
29024 + char sync_command[PGR_MESSAGE_BUFSIZE];
29025 + char result[PGR_MESSAGE_BUFSIZE];
29027 + memset(result,0,sizeof(result));
29028 + snprintf(sync_command,sizeof(sync_command),
29029 + "SELECT %s(%d,%u)",
29030 + PGR_SYSTEM_COMMAND_FUNC,
29031 + PGR_SET_OID_FUNC_NO,
29033 + return ( send_func(host_ptr, header, sync_command, result) );
29037 + * sync oid during cluster DB's
29040 +PGRsync_oid(ReplicateHeader *header)
29042 + HostTbl * host_ptr = (HostTbl*)NULL;
29043 + uint32_t max_oid = 0;
29044 + uint32_t oid = 0;
29045 + int recovery_status = 0;
29047 + /* get current oid of all cluster db's */
29048 + host_ptr = Host_Tbl_Begin;
29049 + if (host_ptr == (HostTbl *)NULL)
29051 + return STATUS_ERROR;
29053 + recovery_status = PGRget_recovery_status();
29054 + while(host_ptr->useFlag != DB_TBL_END)
29057 + * check the status of the cluster DB
29059 + if (host_ptr->useFlag != DB_TBL_USE)
29065 + * skip loop during recover and the host name is master DB
29067 + if (is_master_in_recovery(host_ptr->hostName, host_ptr->port,recovery_status) == true)
29072 + oid = get_oid(host_ptr,header);
29073 + if (max_oid < oid )
29079 + if (max_oid <= 0)
29080 + return STATUS_ERROR;
29082 + /* set oid in cluster db */
29083 + host_ptr = Host_Tbl_Begin;
29084 + while(host_ptr->useFlag != DB_TBL_END)
29087 + * check the status of the cluster DB
29089 + if (host_ptr->useFlag != DB_TBL_USE)
29095 + * skip loop during recover and the host name is master DB
29097 + if (is_master_in_recovery(host_ptr->hostName, host_ptr->port,recovery_status) == true)
29102 + set_oid(host_ptr,header,max_oid);
29106 + return STATUS_OK;
29110 +PGRload_replication_id(void)
29112 + char * func = "PGRload_replication_id()";
29116 + if (Recovery_Status_Inf == (RecoveryStatusInf *)NULL)
29118 + show_error("%s: Recovery_Status_Inf is NULL",func);
29119 + return STATUS_ERROR;
29121 + if (RidFp == (FILE *)NULL)
29123 + show_error("%s: replication id file is not open",func);
29124 + return STATUS_ERROR;
29127 + if (fgets(buf,sizeof(buf),RidFp) == NULL)
29129 + Recovery_Status_Inf->replication_id = 0;
29133 + p = strrchr(buf,' ');
29137 + Recovery_Status_Inf->replication_id = (uint32_t) atol(p);
29141 + Recovery_Status_Inf->replication_id = 0;
29144 + return STATUS_OK;
29148 +notice_abort(HostTbl * host_ptr,ReplicateHeader * header)
29150 + char sync_command[PGR_MESSAGE_BUFSIZE];
29151 + char result[PGR_MESSAGE_BUFSIZE];
29153 + memset(result,0,sizeof(result));
29154 + snprintf(sync_command,sizeof(sync_command),
29156 + PGR_SYSTEM_COMMAND_FUNC,
29157 + PGR_NOTICE_ABORT_FUNC_NO);
29158 + return ( send_func(host_ptr, header, sync_command, result) );
29162 +send_p_parse (PGconn * conn, StringInfo input_message)
29164 + const char *stmt_name;
29165 + const char *query_string;
29169 + /* get name,query */
29170 + stmt_name = pq_getmsgstring(input_message);
29171 + query_string = pq_getmsgstring(input_message);
29172 + /* send name,query */
29173 + if (pqPutMsgStart('P', false, conn) < 0 ||
29174 + pqPuts(stmt_name, conn) < 0 ||
29175 + pqPuts(query_string, conn) < 0)
29177 + return STATUS_ERROR;
29179 + /* get number of parameter */
29180 + numParams = pq_getmsgint(input_message, 2);
29181 + /* send number of parameter */
29182 + if (pqPutInt(numParams, 2, conn) < 0)
29184 + return STATUS_ERROR;
29186 + if (numParams > 0)
29189 + for (i = 0; i < numParams; i++)
29191 + paramTypes = pq_getmsgint(input_message, 4);
29192 + if (pqPutInt(paramTypes, 4, conn) < 0)
29194 + return STATUS_ERROR;
29198 + if (pqPutMsgEnd(conn) < 0)
29200 + return STATUS_ERROR;
29202 + return STATUS_OK;
29206 +send_p_bind (PGconn * conn, StringInfo input_message)
29208 + const char *portal_name;
29209 + const char *stmt_name;
29217 + /* Get&Send the fixed part of the message */
29218 + portal_name = pq_getmsgstring(input_message);
29219 + stmt_name = pq_getmsgstring(input_message);
29220 + if (pqPutMsgStart('B', false, conn) < 0 ||
29221 + pqPuts(portal_name, conn) < 0 ||
29222 + pqPuts(stmt_name, conn) < 0)
29224 + return STATUS_ERROR;
29227 + /* Get&Send the parameter format codes */
29228 + numPFormats = pq_getmsgint(input_message, 2);
29229 + if (pqPutInt(numPFormats, 2, conn) < 0)
29231 + return STATUS_ERROR;
29233 + if (numPFormats > 0)
29235 + for (i = 0; i < numPFormats; i++)
29237 + pformats = pq_getmsgint(input_message, 2);
29238 + if (pqPutInt(pformats, 2, conn) < 0)
29240 + return STATUS_ERROR;
29245 + /* Get&Send the parameter value count */
29246 + numParams = pq_getmsgint(input_message, 2);
29247 + if (pqPutInt(numParams, 2, conn) < 0)
29249 + return STATUS_ERROR;
29251 + if (numParams > 0)
29254 + for (i = 0 ; i < numParams ; i ++)
29256 + plength = pq_getmsgint(input_message, 4);
29257 + if (plength != -1)
29259 + const char *pvalue = pq_getmsgbytes(input_message, plength);
29260 + if (pqPutInt(plength, 4, conn) < 0 ||
29261 + pqPutnchar(pvalue, plength, conn) < 0)
29263 + return STATUS_ERROR;
29268 + if (pqPutInt(plength, 4, conn) < 0)
29270 + return STATUS_ERROR;
29276 + /* Get&Send the result format codes */
29277 + numRFormats = pq_getmsgint(input_message, 2);
29278 + if (pqPutInt(numRFormats, 2, conn) < 0 )
29280 + return STATUS_ERROR;
29282 + if (numRFormats > 0)
29284 + for (i = 0; i < numRFormats; i++)
29286 + rformats = pq_getmsgint(input_message, 2);
29287 + if (pqPutInt(rformats, 2, conn) < 0)
29289 + return STATUS_ERROR;
29293 + if (pqPutMsgEnd(conn) < 0)
29295 + return STATUS_ERROR;
29297 + return STATUS_OK;
29301 +send_p_describe (PGconn * conn, StringInfo input_message)
29304 + int describe_type;
29305 + const char *describe_target;
29307 + describe_type = pq_getmsgbyte(input_message);
29308 + describe_target = pq_getmsgstring(input_message);
29310 + /* construct the Describe Portal message */
29311 + if (pqPutMsgStart('D', false, conn) < 0 ||
29312 + pqPutc(describe_type, conn) < 0 ||
29313 + pqPuts(describe_target, conn) < 0 ||
29314 + pqPutMsgEnd(conn) < 0)
29316 + return STATUS_ERROR;
29318 + return STATUS_OK;
29322 +send_p_execute (PGconn * conn, StringInfo input_message)
29324 + const char *portal_name;
29327 + portal_name = pq_getmsgstring(input_message);
29328 + max_rows = pq_getmsgint(input_message, 4);
29329 + /* construct the Execute message */
29330 + if (pqPutMsgStart('E', false, conn) < 0 ||
29331 + pqPuts(portal_name, conn) < 0 ||
29332 + pqPutInt(max_rows, 4, conn) < 0 ||
29333 + pqPutMsgEnd(conn) < 0)
29335 + return STATUS_ERROR;
29337 + return STATUS_OK;
29341 +send_p_sync (PGconn * conn, StringInfo input_message)
29343 + PGresult *result;
29344 + PGresult *lastResult;
29346 + /* construct the Sync message */
29347 + if (pqPutMsgStart('S', false, conn) < 0 ||
29348 + pqPutMsgEnd(conn) < 0)
29350 + return STATUS_ERROR;
29352 + /* remember we are using extended query protocol */
29353 + conn->queryclass = PGQUERY_EXTENDED;
29356 + * Give the data a push. In nonblock mode, don't complain if we're unable
29357 + * to send it all; PQgetResult() will do any additional flushing needed.
29359 + if (pqFlush(conn) < 0)
29361 + return STATUS_ERROR;
29364 + /* OK, it's launched! */
29365 + conn->asyncStatus = PGASYNC_BUSY;
29367 + lastResult = NULL;
29368 + while ((result = PQgetResult(conn)) != NULL)
29372 + if (lastResult->resultStatus == PGRES_FATAL_ERROR &&
29373 + result->resultStatus == PGRES_FATAL_ERROR)
29376 + result = lastResult;
29379 + PQclear(lastResult);
29381 + lastResult = result;
29382 + if (result->resultStatus == PGRES_COPY_IN ||
29383 + result->resultStatus == PGRES_COPY_OUT ||
29384 + conn->status == CONNECTION_BAD)
29387 + if (lastResult != NULL)
29389 + PQclear(lastResult);
29391 + return STATUS_OK;
29395 +send_p_close (PGconn * conn, StringInfo input_message)
29399 + const char *close_target;
29401 + close_type = pq_getmsgbyte(input_message);
29402 + close_target = pq_getmsgstring(input_message);
29403 + if (pqPutMsgStart('C', false, conn) < 0 ||
29404 + pqPutc(close_type, conn) < 0 ||
29405 + pqPuts(close_target, conn) < 0 ||
29406 + pqPutMsgEnd(conn) < 0)
29408 + return STATUS_ERROR;
29410 + return STATUS_OK;
29413 +set_string_info(StringInfo input_message, ReplicateHeader * header, char * query)
29416 + len = ntohl(header->query_size);
29417 + input_message->data = query;
29418 + input_message->maxlen = len;
29419 + input_message->len = len -1;
29420 + input_message->cursor = 0;
29422 diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/rlog.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/rlog.c
29423 --- postgresql-8.2.4/src/pgcluster/pgrp/rlog.c 1970-01-01 01:00:00.000000000 +0100
29424 +++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/rlog.c 2007-02-18 22:52:17.000000000 +0100
29426 +/*--------------------------------------------------------------------
29431 + * This file is composed of the functions to call with the source
29432 + * at pgreplicate for replicate ahead log.
29434 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
29435 + *--------------------------------------------------------------------
29437 +#ifdef USE_REPLICATION
29439 +#include "postgres.h"
29440 +#include "postgres_fe.h"
29442 +#include <stdio.h>
29443 +#include <sys/types.h>
29444 +#include <sys/stat.h>
29445 +#include <sys/un.h>
29446 +#include <unistd.h>
29447 +#ifdef HAVE_FCNTL_H
29448 +#include <fcntl.h>
29450 +#include <errno.h>
29451 +#include <ctype.h>
29453 +#include <sys/ipc.h>
29454 +#include <sys/shm.h>
29455 +#include <sys/sem.h>
29456 +#include <sys/msg.h>
29457 +#include <signal.h>
29458 +#include <sys/socket.h>
29459 +#include <netdb.h>
29460 +#ifdef HAVE_NETINET_TCP_H
29461 +#include <netinet/tcp.h>
29463 +#include <dirent.h>
29464 +#include <arpa/inet.h>
29466 +#ifdef HAVE_CRYPT_H
29467 +#include <crypt.h>
29471 +#include "mb/pg_wchar.h"
29474 +#include "libpq-fe.h"
29475 +#include "libpq-int.h"
29476 +#include "fe-auth.h"
29477 +#include "access/xact.h"
29478 +#include "replicate_com.h"
29479 +#include "pgreplicate.h"
29481 +static int RLog_Recv_Sock = -1;
29482 +/*--------------------------------------
29483 + * PROTOTYPE DECLARATION
29484 + *--------------------------------------
29486 +static int set_query_log(ReplicateHeader * header, char * query);
29487 +static QueryLogType * get_query_log_by_header(ReplicateHeader * header);
29488 +static QueryLogType * get_query_log(ReplicateHeader * header);
29489 +static void delete_query_log(ReplicateHeader * header);
29490 +static int set_commit_log(ReplicateHeader * header);
29491 +static CommitLogInf * get_commit_log(ReplicateHeader * header);
29492 +static void delete_commit_log(ReplicateHeader * header);
29493 +static bool was_committed_transaction(ReplicateHeader * header);
29494 +static int create_recv_rlog_socket(void);
29495 +static int do_rlog(int fd);
29496 +static int recv_message(int sock,char * buf, int len);
29497 +static int send_message(int sock, char * msg, int len);
29498 +static void exit_rlog(int sig);
29499 +static int reconfirm_commit(ReplicateHeader * header);
29501 +static int PGRget_sync_data(ReplicateHeader *header);
29502 +static int PGRdelete_sync_data(ReplicateHeader *header);
29503 +#endif /* #if 0 */
29507 +PGRwrite_rlog(ReplicateHeader * header, char * query)
29509 + char * func = "PGRwrite_rlog()";
29511 + if (header == NULL)
29513 + show_error("%s:header is null",func);
29514 + return STATUS_ERROR;
29516 + switch (header->cmdSts)
29518 + case CMD_STS_QUERY:
29519 +#ifdef PRINT_DEBUG
29520 + show_debug("%s:set_query_log",func);
29522 + set_query_log(header,query);
29524 + case CMD_STS_DELETE_QUERY:
29525 +#ifdef PRINT_DEBUG
29526 + show_debug("%s:delete_query_log",func);
29528 + delete_query_log(header);
29530 + case CMD_STS_TRANSACTION:
29531 + if (header->cmdType == CMD_TYPE_COMMIT)
29533 +#ifdef PRINT_DEBUG
29534 + show_debug("%s:set_commit_log call",func);
29536 + set_commit_log(header);
29539 + case CMD_STS_DELETE_TRANSACTION:
29540 + if (header->cmdType == CMD_TYPE_COMMIT)
29542 +#ifdef PRINT_DEBUG
29543 + show_debug("%s:delete_commit_log call",func);
29545 + delete_commit_log(header);
29549 + show_error("%s:unknown status %c",func,header->cmdSts);
29552 + return STATUS_OK;
29556 +PGRget_requested_query(ReplicateHeader * header)
29558 + QueryLogType * query_log = NULL;
29560 + if (Query_Log_Top == NULL)
29564 + query_log = Query_Log_Top;
29565 + while(query_log != (QueryLogType *)NULL)
29567 + if ((query_log->header->request_id == header->request_id) &&
29568 + (query_log->header->pid == header->pid) &&
29569 + (query_log->header->port == header->port) &&
29570 + (!strncmp(query_log->header->from_host,header->from_host,sizeof(header->from_host))))
29572 + return query_log->header;
29574 + query_log = (QueryLogType *)(query_log->next);
29576 + return (ReplicateHeader *)NULL;
29580 +set_query_log(ReplicateHeader * header, char * query)
29582 + char * func = "set_query_log()";
29584 + QueryLogType * query_log = NULL;
29586 + if (Query_Log_Top == NULL)
29588 + Query_Log_Top = (QueryLogType *)malloc(sizeof(QueryLogType));
29589 + if (Query_Log_Top == (QueryLogType *)NULL)
29591 + show_error("%s:malloc failed: (%s)",func,strerror(errno));
29592 + return STATUS_ERROR;
29594 + Query_Log_Top->next = NULL;
29595 + Query_Log_Top->last = NULL;
29596 + Query_Log_End = Query_Log_Top;
29597 + Query_Log_End->next = NULL;
29598 + Query_Log_End->last = NULL;
29599 + query_log = Query_Log_Top;
29603 + query_log = (QueryLogType *)malloc(sizeof(QueryLogType));
29604 + if (query_log == (QueryLogType *)NULL)
29606 + show_error("%s:malloc failed: (%s)",func,strerror(errno));
29607 + return STATUS_ERROR;
29609 + Query_Log_End->next = (char *)query_log;
29610 + query_log->last = (char *)Query_Log_End;
29611 + query_log->next = NULL;
29612 + Query_Log_End = query_log;
29614 + query_log->header = (ReplicateHeader *)malloc(sizeof(ReplicateHeader));
29615 + if (query_log->header == (ReplicateHeader *)NULL)
29617 + show_error("%s:malloc failed: (%s)",func,strerror(errno));
29618 + return STATUS_ERROR;
29620 + size = ntohl(header->query_size);
29622 + query_log->query = (char *)malloc(size+4);
29623 + if (query_log->query == (char *)NULL)
29625 + show_error("%s:malloc failed: (%s)",func,strerror(errno));
29626 + return STATUS_ERROR;
29628 + memset(query_log->query,0,size+4);
29629 + memcpy(query_log->header,header,sizeof(ReplicateHeader));
29630 + query_log->header->rlog = FROM_R_LOG_TYPE ;
29631 + memcpy(query_log->query,query,size);
29633 + return STATUS_OK;
29636 +static QueryLogType *
29637 +get_query_log_by_header(ReplicateHeader * header)
29639 + QueryLogType * query_log = NULL;
29641 + if (Query_Log_Top == NULL)
29643 + return (QueryLogType *)NULL;
29645 + query_log = Query_Log_Top;
29646 + show_debug("get_query_log_by_header:header is %d,%d,%d,%s",
29647 + header->request_id,
29650 + header->from_host);
29652 + while(query_log != (QueryLogType *)NULL)
29654 + show_debug("get_query_log_by_header:comparing to %d,%d,%d,%s",
29655 + query_log->header->request_id,
29656 + query_log->header->pid,
29657 + query_log->header->port,
29658 + query_log->header->from_host);
29660 + if ((query_log->header->request_id == header->request_id) &&
29661 + (query_log->header->pid == header->pid) &&
29662 + (query_log->header->port == header->port) &&
29663 + (!strncmp(query_log->header->from_host,header->from_host,sizeof(header->from_host))))
29665 + return query_log;
29667 + query_log = (QueryLogType *)(query_log->next);
29669 + return (QueryLogType *)NULL;
29672 +static QueryLogType *
29673 +get_query_log(ReplicateHeader * header)
29675 + QueryLogType * query_log = NULL;
29677 + if (Query_Log_Top == NULL)
29681 + query_log = Query_Log_Top;
29682 + while(query_log != (QueryLogType *)NULL)
29684 + show_debug("get_qurey_log: comparing in log is %d,header is %d",query_log->header->replicate_id,header->replicate_id);
29685 + if (query_log->header->replicate_id == header->replicate_id)
29687 + return query_log;
29689 + query_log = (QueryLogType *)(query_log->next);
29691 + return (QueryLogType*)NULL;
29695 +delete_query_log(ReplicateHeader * header)
29697 + QueryLogType * query_log = NULL;
29698 + QueryLogType * last = NULL;
29699 + QueryLogType * next = NULL;
29701 + query_log = get_query_log(header);
29703 + if (query_log == NULL)
29707 + last = (QueryLogType *)query_log->last;
29708 + next = (QueryLogType *)query_log->next;
29710 + /* change link */
29711 + if (last != (QueryLogType *)NULL)
29713 + last->next = (char *)next;
29717 + Query_Log_Top = next;
29719 + if (next != (QueryLogType *)NULL)
29721 + next->last = (char *)last;
29725 + Query_Log_End = last;
29728 + /* delete contents */
29729 + if (query_log->header != NULL)
29731 + free(query_log->header);
29733 + if (query_log->query != NULL)
29735 + free(query_log->query);
29741 +set_commit_log(ReplicateHeader * header)
29744 + CommitLogInf * commit_log = NULL;
29745 + ReplicateHeader * c_header;
29747 + if (Commit_Log_Tbl == NULL)
29749 + return STATUS_ERROR;
29751 + commit_log = Commit_Log_Tbl + 1;
29752 + while ( commit_log->inf.useFlag != DB_TBL_END )
29754 + if (commit_log->inf.useFlag != DB_TBL_USE)
29756 + commit_log->inf.useFlag = DB_TBL_USE;
29757 + c_header = &(commit_log->header);
29758 + memcpy(c_header,header,sizeof(ReplicateHeader));
29759 + Commit_Log_Tbl->inf.commit_log_num ++;
29764 + return STATUS_OK;
29767 +static CommitLogInf *
29768 +get_commit_log(ReplicateHeader * header)
29770 + CommitLogInf * commit_log = NULL;
29771 + ReplicateHeader * c_header;
29774 + if (Commit_Log_Tbl == NULL)
29776 + return (CommitLogInf *)NULL;
29778 + commit_log = Commit_Log_Tbl + 1;
29779 + while ( commit_log->inf.useFlag != DB_TBL_END )
29781 + if (commit_log->inf.useFlag == DB_TBL_USE)
29784 + c_header = &(commit_log->header);
29785 + if (c_header == NULL)
29790 + if (c_header->replicate_id == header->replicate_id)
29792 + return commit_log;
29798 + if (cnt >= Commit_Log_Tbl->inf.commit_log_num)
29804 + return (CommitLogInf *)NULL;
29808 +delete_commit_log(ReplicateHeader * header)
29810 + CommitLogInf * commit_log = NULL;
29812 + commit_log = get_commit_log(header);
29813 + if (commit_log != NULL)
29815 + memset(&(commit_log->header),0,sizeof(commit_log->header));
29816 + commit_log->inf.useFlag = DB_TBL_INIT;
29817 + Commit_Log_Tbl->inf.commit_log_num --;
29822 +was_committed_transaction(ReplicateHeader * header)
29824 + CommitLogInf * commit_log = NULL;
29826 + commit_log = get_commit_log(header);
29827 + if (commit_log != NULL)
29835 +PGRreconfirm_commit(int sock, ReplicateHeader * header)
29837 + int result = PGR_NOT_YET_COMMIT;
29839 + if (Replicateion_Log == NULL)
29844 + if (Replicateion_Log->r_log_sock > 0)
29846 + close(Replicateion_Log->r_log_sock );
29847 + Replicateion_Log->r_log_sock = -1;
29849 + Replicateion_Log->r_log_sock = PGRcreate_send_rlog_socket();
29850 + if (Replicateion_Log->r_log_sock == -1)
29853 + header->query_size = 0;
29854 + PGRsend_rlog_packet(Replicateion_Log->r_log_sock,header,"");
29855 + PGRrecv_rlog_result(Replicateion_Log->r_log_sock,&result, sizeof(result));
29858 + close(Replicateion_Log->r_log_sock );
29859 + Replicateion_Log->r_log_sock = -1;
29861 + snprintf(PGR_Result,PGR_MESSAGE_BUFSIZE,"%d,%d", PGR_TRANSACTION_CONFIRM_ANSWER_FUNC_NO,result);
29863 + PGRreturn_result(sock, PGR_Result,PGR_NOWAIT_ANSWER);
29867 +reconfirm_commit(ReplicateHeader * header)
29869 + char * func = "reconfirm_commit()";
29870 + int result = PGR_NOT_YET_COMMIT;
29872 + /* check the transaction was committed */
29873 + if (was_committed_transaction(header) == true)
29875 + result = PGR_ALREADY_COMMITTED;
29876 +#ifdef PRINT_DEBUG
29877 + show_debug("%s:PGR_ALREADY_COMMITTED",func);
29882 +#ifdef PRINT_DEBUG
29883 + show_debug("%s:PGR_NOT_YET_COMMIT",func);
29890 +PGRset_rlog(ReplicateHeader * header, char * query)
29892 + char * func = "PGRset_rlog()";
29893 + int status = STATUS_OK;
29894 + bool send_flag = false;
29896 + if (PGR_Log_Header == NULL)
29900 + switch (header->cmdSts)
29902 + case CMD_STS_QUERY:
29903 + send_flag = true;
29905 + case CMD_STS_TRANSACTION:
29906 + if (header->cmdType == CMD_TYPE_COMMIT)
29908 + send_flag = true;
29909 + PGR_Log_Header->cmdType = header->cmdType;
29910 + PGR_Log_Header->query_size = htonl(strlen(query));
29914 + if (send_flag != true)
29916 + show_error("%s:send_flag is false",func);
29919 + PGR_Log_Header->cmdSys = CMD_SYS_LOG;
29920 + if (Cascade_Inf->useFlag == DB_TBL_USE)
29922 + /* save log data in remote server */
29923 + show_debug("%s:set rlog %s",func,query);
29924 + status = PGRsend_lower_cascade(PGR_Log_Header, query);
29925 + if (status == STATUS_OK) {
29926 + status=PGRwait_notice_rlog_done();
29928 + if (status != STATUS_OK)
29930 +#ifdef PRINT_DEBUG
29931 + show_debug("%s:PGRsend_lower_cascade failed",func);
29933 + PGRwrite_rlog(PGR_Log_Header, query);
29938 + /* save log data in local server */
29939 + PGRwrite_rlog(PGR_Log_Header, query);
29944 +PGRunset_rlog(ReplicateHeader * header, char * query)
29946 + int status = STATUS_OK;
29947 + bool send_flag = false;
29949 + if (PGR_Log_Header == NULL)
29953 + switch (header->cmdSts)
29955 + case CMD_STS_QUERY:
29956 + send_flag = true;
29957 + PGR_Log_Header->cmdSts = CMD_STS_DELETE_QUERY;
29959 + case CMD_STS_TRANSACTION:
29960 + if (PGR_Log_Header->cmdType == CMD_TYPE_COMMIT)
29962 + PGR_Log_Header->cmdSts = CMD_STS_DELETE_TRANSACTION;
29963 + PGR_Log_Header->query_size = htonl(strlen(query));
29964 + send_flag = true;
29968 + if (send_flag != true)
29972 + PGR_Log_Header->cmdSys = CMD_SYS_LOG;
29973 + if (Cascade_Inf->useFlag == DB_TBL_USE)
29975 + /* save log data in remote server */
29976 + show_debug("unset rlog %s",query);
29978 + status = PGRsend_lower_cascade(PGR_Log_Header, query);
29979 + if (status == STATUS_OK)
29981 + status=PGRwait_notice_rlog_done();
29983 + if (status != STATUS_OK)
29985 +#ifdef PRINT_DEBUG
29986 + show_debug("PGRsend_lower_cascade recv failed");
29988 + PGRwrite_rlog(PGR_Log_Header, query);
29993 + /* save log data in local server */
29994 + PGRwrite_rlog(PGR_Log_Header, query);
29999 +PGRresend_rlog_to_db(void)
30001 + char *func="PGRresend_rlog_to_db";
30002 + QueryLogType * query_log = NULL;
30003 + QueryLogType * next = NULL;
30004 + int status = STATUS_OK;
30007 + show_debug("%s:enter.",func);
30009 + query_log = Query_Log_Top;
30011 + while (query_log != NULL)
30015 + show_debug("%s:processing qlog,query=%s",func,query_log->query);
30016 + if (query_log->header->rlog != FROM_R_LOG_TYPE )
30018 + query_log = (QueryLogType *)query_log->next;
30021 + status = replicate_packet_send_internal(query_log->header,query_log->query, dest,RECOVERY_INIT,false);
30022 + show_debug("%s:status=%d",func,status);
30024 + if (status == STATUS_SKIP_REPLICATE )
30026 + Query_Log_Top = query_log;
30027 + query_log = (QueryLogType *)query_log->next;
30031 + if (query_log->header != NULL)
30033 + free(query_log->header );
30035 + if (query_log->query != NULL)
30037 + free(query_log->query );
30039 + next = (QueryLogType *)query_log->next;
30041 + query_log = next;
30042 + Query_Log_Top = query_log;
30044 + if (query_log != NULL)
30046 + Query_Log_End = (QueryLogType *)query_log->next;
30050 + Query_Log_End = (QueryLogType *)NULL;
30054 + show_debug("%s:exit.",func);
30056 + return STATUS_OK;
30060 +PGR_RLog_Main(void)
30062 + char * func = "PGR_RLog_Main()";
30065 + struct sockaddr addr;
30066 + socklen_t addrlen;
30070 + extern int fork_wait_time;
30072 + if (Replicateion_Log == NULL)
30074 + show_error("%s:Replicateion_Log is NULL",func);
30077 + pgid = getpgid(0);
30078 + if ((pid = fork()) != 0 )
30082 + PGRsignal(SIGTERM,exit_rlog);
30083 + PGRsignal(SIGINT,exit_rlog);
30084 + PGRsignal(SIGQUIT,exit_rlog);
30085 + PGRsignal(SIGPIPE,SIG_IGN);
30087 + if (PGRinit_transaction_table() != STATUS_OK)
30089 + show_error("RLog process transaction table memory allocate failed");
30094 + RLog_Recv_Sock = create_recv_rlog_socket();
30095 + if(RLog_Recv_Sock == -1)
30097 + show_error("rlog socket creation failure.quit all process.");
30098 + kill(pgreplicate_pid, SIGINT);
30102 + if (fork_wait_time > 0) {
30103 +#ifdef PRINT_DEBUG
30104 + show_debug("rlog process: wait fork(): pid = %d", getpid());
30106 + sleep(fork_wait_time);
30112 + struct timeval timeout;
30114 + timeout.tv_sec = PGR_Replication_Timeout;
30115 + timeout.tv_usec = 0;
30117 + Idle_Flag = IDLE_MODE ;
30118 + if (Exit_Request)
30123 + * Wait for something to happen.
30126 + FD_SET(RLog_Recv_Sock,&rmask);
30127 + rtn = select(RLog_Recv_Sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
30130 + if (errno == EINTR || errno == EAGAIN)
30133 + else if (rtn && FD_ISSET(RLog_Recv_Sock, &rmask))
30135 + Idle_Flag = BUSY_MODE ;
30136 + addrlen = sizeof(addr);
30137 + afd = accept(RLog_Recv_Sock, &addr, &addrlen);
30153 +create_recv_rlog_socket(void)
30155 + char * func = "create_recv_socket()";
30156 + struct sockaddr_un addr;
30161 + /* set unix domain socket path */
30162 + fd = socket(AF_UNIX, SOCK_STREAM, 0);
30165 + show_error("%s:Failed to create UNIX domain socket. reason: %s",func, strerror(errno));
30168 + memset((char *) &addr, 0, sizeof(addr));
30169 + ((struct sockaddr *)&addr)->sa_family = AF_UNIX;
30170 + snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/.s.PGRLOG.%d",
30172 + Replicateion_Log->RLog_Port_Number);
30173 +fprintf(stderr,"addr.sun_path[%s]\n",addr.sun_path);
30174 + if (Replicateion_Log->RLog_Sock_Path == NULL)
30176 + Replicateion_Log->RLog_Sock_Path = strdup(addr.sun_path);
30177 +fprintf(stderr,"Replicateion_Log->RLog_Sock_Path[%s]\n",Replicateion_Log->RLog_Sock_Path);
30179 + len = sizeof(struct sockaddr_un);
30180 + status = bind(fd, (struct sockaddr *)&addr, len);
30181 + if (status == -1)
30183 + show_error("%s: bind() failed. reason: %s", func, strerror(errno));
30187 + if (chmod(addr.sun_path, 0770) == -1)
30189 + show_error("%s: chmod() failed. reason: %s", func, strerror(errno));
30193 + status = listen(fd, 1000000);
30196 + show_error("%s: listen() failed. reason: %s", func, strerror(errno));
30203 +PGRcreate_send_rlog_socket(void)
30205 + char * func = "create_recv_socket()";
30206 + struct sockaddr_un addr;
30210 + /* set unix domain socket path */
30211 + fd = socket(AF_UNIX, SOCK_STREAM, 0);
30214 + show_error("%s:Failed to create UNIX domain socket. reason: %s",func, strerror(errno));
30217 + memset((char *) &addr, 0, sizeof(addr));
30218 + ((struct sockaddr *)&addr)->sa_family = AF_UNIX;
30219 + snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/.s.PGRLOG.%d",
30221 + Replicateion_Log->RLog_Port_Number);
30222 + if (Replicateion_Log->RLog_Sock_Path == NULL)
30224 + Replicateion_Log->RLog_Sock_Path = strdup(addr.sun_path);
30226 + len = sizeof(struct sockaddr_un);
30227 + if (connect(fd, (struct sockaddr *)&addr, len) < 0)
30238 + char * func = "do_rlog()";
30239 + QueryLogType * query_log = NULL;
30240 + ReplicateHeader header;
30241 + char * query = NULL;
30242 + int status = STATUS_OK;
30244 + memset(&header,0,sizeof(header));
30245 + query = PGRread_packet(fd, &header);
30246 + show_debug("%s:got result:cmdSys='%c'",func,header.cmdSys);
30247 + if (header.cmdSys == 0)
30249 + return STATUS_ERROR;
30251 + switch (header.cmdSys)
30253 + case CMD_SYS_REPLICATE:
30254 + if (header.cmdSts != CMD_STS_DELETE_QUERY)
30256 + query_log = get_query_log_by_header(&header);
30257 + if (query_log != (QueryLogType*)NULL)
30259 + memcpy(&header,query_log->header,sizeof(ReplicateHeader));
30261 + send_message(fd,(char *)&header,sizeof(ReplicateHeader));
30262 + header.cmdSts = CMD_STS_DELETE_QUERY;
30263 + PGRwrite_rlog(&header, NULL);
30267 + status = PGRwrite_rlog((ReplicateHeader*)&header,(char *)NULL);
30268 + send_message(fd,(char *)&status,sizeof(status));
30271 + case CMD_SYS_LOG:
30272 + status = PGRwrite_rlog((ReplicateHeader*)&header, query);
30273 + send_message(fd,(char *)&status,sizeof(status));
30275 + case CMD_SYS_CALL:
30276 + if (header.cmdSts == CMD_STS_TRANSACTION_ABORT)
30278 +#ifdef PRINT_DEBUG
30279 + show_debug("%s: CMD_STS_TRANSACTION_ABORT",func);
30281 + status = reconfirm_commit(&header);
30283 + else if (header.cmdSts == CMD_STS_QUERY_SUSPEND)
30285 +#ifdef PRINT_DEBUG
30286 + show_debug("%s: CMD_STS_QUERY_SUSPEND",func);
30288 + // status = PGRresend_rlog_to_db();
30290 +#ifdef PRINT_DEBUG
30291 + show_debug("%s: SYS_CALL process done",func);
30293 + send_message(fd,(char *)&status,sizeof(status));
30296 + show_debug("%s:process result done:cmdSys='%c'",func,header.cmdSys);
30297 + return STATUS_OK;
30302 +PGRget_sync_data(ReplicateHeader *header)
30304 + ReplicateHeader rlog_header;
30306 + if (header == NULL)
30308 + return STATUS_ERROR;
30312 + if (Replicateion_Log->r_log_sock > 0)
30314 + close(Replicateion_Log->r_log_sock );
30315 + Replicateion_Log->r_log_sock = -1;
30317 + Replicateion_Log->r_log_sock = PGRcreate_send_rlog_socket();
30318 + if (Replicateion_Log->r_log_sock == -1)
30319 + return STATUS_ERROR;
30321 + memset(&rlog_header,0,sizeof(ReplicateHeader));
30322 + send_message( Replicateion_Log->r_log_sock, (char *)header,sizeof(ReplicateHeader));
30323 + recv_message( Replicateion_Log->r_log_sock, (char *)&rlog_header,sizeof(ReplicateHeader));
30324 + if (rlog_header.cmdSts != 0)
30326 + close(Replicateion_Log->r_log_sock );
30327 + Replicateion_Log->r_log_sock = -1;
30328 + memcpy(header,&rlog_header, sizeof(ReplicateHeader));
30329 + return STATUS_OK;
30332 + close(Replicateion_Log->r_log_sock );
30333 + Replicateion_Log->r_log_sock = -1;
30335 + return STATUS_ERROR;
30340 +PGRdelete_sync_data(ReplicateHeader *header)
30345 + cmdSts = header->cmdSts;
30346 + header->cmdSts = CMD_STS_DELETE_QUERY;
30347 + if (header == NULL)
30349 + return STATUS_ERROR;
30352 + if (Replicateion_Log->r_log_sock > 0)
30354 + close(Replicateion_Log->r_log_sock );
30355 + Replicateion_Log->r_log_sock = -1;
30357 + Replicateion_Log->r_log_sock = PGRcreate_send_rlog_socket();
30358 + if (Replicateion_Log->r_log_sock == -1)
30359 + return STATUS_ERROR;
30361 + send_message( Replicateion_Log->r_log_sock, (char *)header,sizeof(ReplicateHeader));
30362 + recv_message( Replicateion_Log->r_log_sock, (char *)&status,sizeof(status));
30363 + header->cmdSts = cmdSts;
30365 + close(Replicateion_Log->r_log_sock );
30366 + Replicateion_Log->r_log_sock = -1;
30371 +#endif /* #if 0 */
30374 +PGRsend_rlog_packet(int sock,ReplicateHeader * header, const char * query_string)
30376 + char * buf = NULL;
30377 + int buf_size = 0;
30378 + int header_size = 0;
30379 + int query_size = 0;
30382 + /* check parameter */
30383 + if ((sock <= 0) || (header == NULL))
30385 + return STATUS_ERROR;
30387 + if (query_string != NULL)
30389 + query_size = ntohl(header->query_size);
30391 + header_size = sizeof(ReplicateHeader);
30392 + buf_size = header_size + query_size + 4;
30393 + buf = (char *)malloc(buf_size);
30394 + if (buf == (char *)NULL)
30396 + return STATUS_ERROR;
30398 + memset(buf,0,buf_size);
30400 + memcpy(buf,header,header_size);
30401 + if (query_size > 0)
30403 + memcpy((char *)(buf+header_size),query_string,query_size+1);
30405 + rtn = send_message(sock,buf,buf_size);
30411 +PGRrecv_rlog_result(int sock,void * result, int size)
30413 + char *func = "PGRrecv_rlog_result";
30415 + struct timeval timeout;
30418 + if ((result == (void *)NULL) || (size <= 0))
30424 + * Wait for something to happen.
30429 + timeout.tv_sec = PGR_Replication_Timeout;
30430 + timeout.tv_usec = 0;
30433 + FD_SET(sock,&rmask);
30434 + rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
30437 + if (errno != EINTR || errno != EAGAIN)
30439 + show_error("%s: select() failed (%s)",func,strerror(errno));
30443 + else if (rtn && FD_ISSET(sock, &rmask))
30445 + return (recv_message(sock, (char*)result, size));
30453 +recv_message(int sock,char * buf, int len)
30455 + char *func = "recv_message";
30459 + int read_size = 0;
30465 + r = recv(sock,read_ptr + read_size ,len - read_size, 0);
30468 + if (errno == EINTR || errno == EAGAIN)
30472 + show_error("%s:recv failed: %d(%s)",func, errno, strerror(errno));
30479 + if (read_size == len)
30480 + return read_size;
30482 + else /* r == 0 */
30484 + show_error("%s:unexpected EOF", func);
30492 +PGRsend_rlog_to_local(ReplicateHeader * header,char * query)
30494 + int status = STATUS_OK;
30496 + if (Replicateion_Log == NULL)
30498 + return STATUS_ERROR;
30501 + if (Replicateion_Log->r_log_sock > 0)
30503 + close(Replicateion_Log->r_log_sock );
30504 + Replicateion_Log->r_log_sock = -1;
30507 + Replicateion_Log->r_log_sock = PGRcreate_send_rlog_socket();
30508 + if (Replicateion_Log->r_log_sock == -1)
30509 + return STATUS_ERROR;
30511 + show_debug("send_to_local %s",query);
30512 + status = PGRsend_rlog_packet(Replicateion_Log->r_log_sock,header,query);
30513 + show_debug("send_to_local result is %d,errno=%d(%s)",status,errno ,strerror(errno));
30515 + if (status != STATUS_ERROR)
30517 + PGRrecv_rlog_result(Replicateion_Log->r_log_sock,&status, sizeof(status));
30520 + close(Replicateion_Log->r_log_sock );
30521 + Replicateion_Log->r_log_sock = -1;
30527 +PGRget_rlog_header(ReplicateHeader * header)
30529 + int status = STATUS_OK;
30530 + ReplicateHeader rlog_header;
30532 + if ((Replicateion_Log == NULL) ||
30533 + (header == NULL))
30535 + return STATUS_ERROR;
30538 + if (Replicateion_Log->r_log_sock > 0)
30540 + close(Replicateion_Log->r_log_sock );
30541 + Replicateion_Log->r_log_sock = -1;
30543 + Replicateion_Log->r_log_sock = PGRcreate_send_rlog_socket();
30544 + if (Replicateion_Log->r_log_sock == -1)
30545 + return STATUS_ERROR;
30547 + memcpy(&rlog_header,header,sizeof(ReplicateHeader));
30548 + rlog_header.cmdSys = CMD_SYS_REPLICATE;
30549 + rlog_header.query_size = 0;
30550 + status =PGRsend_rlog_packet(Replicateion_Log->r_log_sock,&rlog_header,"");
30551 + if (status != STATUS_ERROR)
30553 + status = PGRrecv_rlog_result(Replicateion_Log->r_log_sock,&rlog_header, sizeof(ReplicateHeader));
30556 + memcpy(header,&rlog_header,sizeof(ReplicateHeader));
30557 + status = STATUS_OK;
30561 + status = STATUS_ERROR;
30565 + close(Replicateion_Log->r_log_sock );
30566 + Replicateion_Log->r_log_sock = -1;
30572 +send_message(int sock, char * msg, int len)
30574 + char * func = "send_message()";
30576 + struct timeval timeout;
30578 + char * send_ptr = NULL;
30579 + int send_size= 0;
30580 + int buf_size = 0;
30584 + if ((msg == NULL) || (len <= 0) || (sock <= 0))
30586 + return STATUS_ERROR;
30592 + * Wait for something to happen.
30594 +#ifdef MSG_DONTWAIT
30595 + flag |= MSG_DONTWAIT;
30597 +#ifdef MSG_NOSIGNAL
30598 + flag |= MSG_NOSIGNAL;
30603 + timeout.tv_sec = PGR_Replication_Timeout;
30604 + timeout.tv_usec = 0;
30607 + FD_SET(sock,&wmask);
30608 + rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
30612 + if (errno == EAGAIN || errno == EINTR)
30615 + show_error("%s:send-select error: %d(%s)",func,errno,strerror(errno));
30616 + return STATUS_ERROR;
30618 + else if (rtn & FD_ISSET(sock, &wmask))
30620 + s = send(sock,send_ptr + send_size,buf_size - send_size ,flag);
30623 + if (errno == EINTR || errno == EAGAIN)
30627 + show_error("%s:send error: %d(%s)",func,errno,strerror(errno));
30628 + memset(send_ptr, 0, len);
30629 + return STATUS_ERROR;
30634 + show_error("%s:unexpected EOF");
30635 + memset(send_ptr, 0, len);
30636 + return STATUS_ERROR;
30641 + if (send_size == buf_size)
30643 + return STATUS_OK;
30648 + show_error("%s:send-select unknown error: %d(%s)",
30649 + func,errno,strerror(errno));
30650 + return STATUS_ERROR;
30654 +exit_rlog(int sig)
30658 + Exit_Request = true;
30659 + if (sig == SIGTERM)
30661 + if (Idle_Flag == BUSY_MODE)
30667 + sigemptyset(&mask);
30668 + sigaddset(&mask, SIGINT);
30669 + sigaddset(&mask, SIGTERM);
30670 + sigaddset(&mask, SIGQUIT);
30671 + sigprocmask(SIG_BLOCK, &mask, NULL);
30673 + if (RLog_Recv_Sock >= 0)
30675 + close(RLog_Recv_Sock);
30676 + RLog_Recv_Sock = -1;
30678 + if (Replicateion_Log->RLog_Sock_Path != NULL)
30680 + unlink(Replicateion_Log->RLog_Sock_Path);
30681 + free(Replicateion_Log->RLog_Sock_Path);
30685 +#endif /* USE_REPLICATION */
30686 diff -aruN postgresql-8.2.4/src/pgcluster/tool/Makefile pgcluster-1.7.0rc7/src/pgcluster/tool/Makefile
30687 --- postgresql-8.2.4/src/pgcluster/tool/Makefile 1970-01-01 01:00:00.000000000 +0100
30688 +++ pgcluster-1.7.0rc7/src/pgcluster/tool/Makefile 2007-02-18 22:52:17.000000000 +0100
30690 +#-------------------------------------------------------------------------
30692 +# Makefile for src/pgcluster/pgrp
30694 +#-------------------------------------------------------------------------
30696 +subdir = src/pgcluster/tool
30697 +top_builddir = ../../..
30698 +include $(top_builddir)/src/Makefile.global
30702 +# EXTRA_OBJS = $(top_builddir)/src/backend/libpq/replicate_com.o
30704 +override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -DBINDIR=\"$(bindir)\"
30708 +pgcbench: $(OBJS) $(libpq_builddir)/libpq.a
30709 + $(CC) $(CFLAGS) $(OBJS) $(EXTRA_OBJS) $(libpq) $(LDFLAGS) $(LIBS) -o $@
30711 +install: all installdirs
30712 + $(INSTALL_PROGRAM) pgcbench$(X) $(DESTDIR)$(bindir)/pgcbench$(X)
30715 + $(mkinstalldirs) $(DESTDIR)$(bindir)
30718 + rm -f $(addprefix $(DESTDIR)$(bindir)/, pgcbench$(X))
30720 +clean distclean maintainer-clean:
30721 + rm -f pgcbench$(X) $(OBJS)
30722 diff -aruN postgresql-8.2.4/src/pgcluster/tool/README.jp pgcluster-1.7.0rc7/src/pgcluster/tool/README.jp
30723 --- postgresql-8.2.4/src/pgcluster/tool/README.jp 1970-01-01 01:00:00.000000000 +0100
30724 +++ pgcluster-1.7.0rc7/src/pgcluster/tool/README.jp 2007-02-18 22:52:17.000000000 +0100
30728 +
\e$B"#
\e(B pgcbench
\e$B$H$O
\e(B
30730 +pgcbench
\e$B$O
\e(B PGCluster
\e$B$N%Y%s%A%^!<%/%F%9%H$r9T$&%W%m%0%i%`$G$9!#$b$A$m
\e(B
30731 +
\e$B$s!"
\e(BPGCluster
\e$B$@$1$G$O$J$/
\e(B PostgreSQL
\e$B$N%Y%s%A%^!<%/$r<B;\$9$k$3$H$b$G
\e(B
30734 +pgcbench
\e$B$O
\e(B SELECT
\e$B!"
\e(BUPDATE
\e$B!"
\e(BINSERT
\e$B$r4^$`%H%i%s%6%/%7%g%s$r<B9T$7!"A4
\e(B
30735 +
\e$BBN$N<B9T;~4V$H<B:]$K40N;$7$?$G$"$m$&%H%i%s%6%/%7%g%s$N?t$+$i
\e(B 1
\e$BIC4V$K
\e(B
30736 +
\e$B<B9T$G$-$?%H%i%s%6%/%7%g%s?t
\e(B (tps)
\e$B$rI=<($7$^$9!#=hM}$NBP>]$H$J$k%F!<
\e(B
30737 +
\e$B%V%k$O%G%U%)%k%H$G$O
\e(B 10
\e$BK|9T$N%G!<%?$r4^$_$^$9!#
\e(B
30739 +
\e$B<B:]$KI=<($O0J2<$N$h$&$K$J$j$^$9!#
\e(B
30741 + number of clients: 4
30742 + number of transactions actually processed: 100
30743 + run time (sec) = 4.416423
30744 + tps = 22.642759 (including connections establishing)
30746 +
\e$B"(
\e(B
\e$BCm0U
\e(B
30748 + pgcbench
\e$B$G$O!"%H%i%s%6%/%7%g%s$,<B:]$K40N;$7$?$+$I$&$+$K4X78$J$/!"
\e(B
30749 +
\e$B:G=i$K;XDj$5$l$?%H%i%s%6%/%7%g%s$N?t$r$b$H$K
\e(B tps
\e$B$r5a$a$F$$$k$?$a!"
\e(B
30750 +
\e$B%Y%s%A%^!<%/$,ESCf$G=*N;$7$F$7$^$C$?>l9g!"I=<($5$l$k
\e(B tps
\e$B$,?.MQ$G$-
\e(B
30751 +
\e$B$J$$2DG=@-$,$"$k$3$H$KCm0U$7$F$/$@$5$$!#
\e(B
30753 +
\e$B$J$*!"
\e(Bpgcbench
\e$B$O
\e(B pgbench
\e$B$H$$$&
\e(B PostgreSQL
\e$BMQ$K=q$+$l$?%Y%s%A%^!<%/%F
\e(B
30754 +
\e$B%9%H$r9T$J$&%W%m%0%i%`$r$b$H$K:n@.$5$l$^$7$?!#
\e(B
30757 +
\e$B"#
\e(B pgbench
\e$B$H$N0c$$
\e(B
30759 +
\e$B!&
\e(B
\e$B%^%k%A%f!<%64D6-$N<B8=J}K!
\e(B
30761 + pgbench
\e$B$,
\e(B libpq
\e$B$NHsF14|=hM}5!G=$r;H$C$?%7%s%0%k%W%m%;%9$K$h$C$F%7
\e(B
30762 +
\e$B%_%e%l!<%H$7$F$$$k$N$KBP$7$F!"
\e(Bpgcbench
\e$B$G$O
\e(B fork
\e$B$r;H$C$?%^%k%A%W%m
\e(B
30763 +
\e$B%;%9$K$h$C$F<B8=$7$F$$$^$9!#$3$l$O!"%7%s%0%k%W%m%;%9$G$O%m%C%/$,H/
\e(B
30764 +
\e$B@8$9$k$H!"
\e(BPGCluster
\e$B$,;_$^$C$F$7$^$&$3$H$rHr$1$k$?$a$G$9!#
\e(B
30766 +
\e$B!&
\e(B
\e$B%*%W%7%g%s$NJQ99
\e(B
30768 + pgcbench
\e$B$K$O!"
\e(BPGCluster
\e$B$N%Y%s%A%^!<%/%F%9%H$r9T$J$&$N$KJXMx$J!"%H
\e(B
30769 +
\e$B%i%s%6%/%7%g%s$NFbMF$r;XDj$9$k%*%W%7%g%s$,!"$$$/$D$+DI2C$5$l$F$$$^
\e(B
30773 +
\e$B"#
\e(B pgcbench
\e$B$N%$%s%9%H!<%k
\e(B
30775 +1. PGCluster
\e$B$r
\e(B configure
\e$B!"
\e(Bmake
\e$B$7$^$9!#
\e(B
30777 + pgcbench
\e$B$N%$%s%9%H!<%k$@$1$,L\E*$G$"$l$P!"
\e(BPGCluster
\e$B$N$9$Y$F$r%3%s
\e(B
30778 +
\e$B%Q%$%k$9$kI,MW$O$"$j$^$;$s!#
\e(BPGCluster
\e$B%=!<%9$N%H%C%W%G%#%l%/%H%j$G
\e(B
30779 + configure
\e$B$r$7$?8e!"
\e(Bsrc/interface/libpq
\e$B$G
\e(B make all
\e$B$r<B9T$9$l$P=`
\e(B
30780 +
\e$BHw40N;$G$9!#
\e(B
30782 +2.
\e$B$3$N%G%#%l%/%H%j
\e(B (src/pgcluster/tool)
\e$B$G
\e(B make
\e$B$r<B9T$7$^$9!#
\e(B
30784 +
\e$B$=$&$9$k$H!"
\e(Bpgcbench
\e$B$H$$$&<B9T%W%m%0%i%`$,$G$-$^$9!#$=$N$^$^<B9T$7
\e(B
30785 +
\e$B$F$b9=$$$^$;$s$7!"
\e(Bmake install
\e$B$r<B9T$7$F
\e(B PGCluster
\e$B$NI8=`<B9T%W%m
\e(B
30786 +
\e$B%0%i%`%G%#%l%/%H%j
\e(B (
\e$B%G%U%)%k%H$G$O
\e(B /usr/local/pgsql/bin)
\e$B$K%$%s%9%H!<
\e(B
30787 +
\e$B%k$9$k$3$H$b$G$-$^$9!#
\e(B
30790 +
\e$B"#
\e(B pgcbench
\e$B$N;H$$J}
\e(B
30792 + pgcbench [
\e$B%*%W%7%g%s
\e(B] [
\e$B%G!<%?%Y!<%9L>
\e(B]
30794 +
\e$B%G!<%?%Y!<%9L>$r>JN,$9$k$H!"%m%0%$%sL>$HF1$8%G!<%?%Y!<%9L>$r;XDj$7$?$b
\e(B
30795 +
\e$B$N$H8+$J$7$^$9!#$J$*!"
\e(Bpgcbench
\e$B$G%G%U%)%k%H$N%Y%s%A%^!<%/$r<B;\$9$k$?
\e(B
30796 +
\e$B$a$K$O!"8e=R$N
\e(B -i
\e$B%*%W%7%g%s$r;HMQ$7$F%G!<%?%Y!<%9$r$"$i$+$8$a=i4|2=$7
\e(B
30797 +
\e$B$F$*$/I,MW$,$"$j$^$9!#
\e(B
30799 +pgcbench
\e$B$K$O$$$m$$$m$J%*%W%7%g%s$,$"$j$^$9!#
\e(B
30801 +-h
\e$B%[%9%HL>
\e(B
30803 + PostgreSQL
\e$B$N
\e(B
\e$B%G!<%?%Y!<%9%G!<%b%s
\e(B postmaster
\e$B$NF0:n$7$F$$$k%[%9%HL>
\e(B
30804 +
\e$B$r;XDj$7$^$9!#>JN,$9$k$H4D6-JQ?t
\e(B PGHOST
\e$B$K@_Dj$7$?%[%9%HL>$,;XDj$5
\e(B
30805 +
\e$B$l$^$9!#
\e(BPGHOST
\e$B$b;XDj$5$l$F$$$J$$$H<+%[%9%H$K
\e(B Unix
\e$B%I%a%$%s%=%1%C%H
\e(B
30806 +
\e$B$G@\B3$7$^$9!#
\e(B
30808 +-p
\e$B%]!<%HHV9f
\e(B
30810 + postmaster
\e$B$N;HMQ$9$k%]!<%HHV9f$r;XDj$7$^$9!#>JN,$9$k$H4D6-JQ?t
\e(B
30811 + PGPORT
\e$B$K@_Dj$7$?%]!<%HHV9f$,;XDj$5$l$^$9!#
\e(BPGPORT
\e$B$b;XDj$5$l$F$$$J
\e(B
30812 +
\e$B$$$H
\e(B 5432
\e$B$,;XDj$5$l$?$b$N$H8+$J$7$^$9!#
\e(B
30814 +-c
\e$B%/%i%$%"%s%H?t
\e(B
30816 +
\e$BF1;~<B9T%/%i%$%"%s%H?t$r;XDj$7$^$9!#>JN,;~$O
\e(B 1
\e$B$H$J$j$^$9!#
\e(Bpgcbench
30817 +
\e$B$OF1;~<B9T%/%i%$%"%s%H$4$H$K%U%!%$%k%G%#%9%/%j%W%?$r;HMQ$9$k$N$G!"
\e(B
30818 +
\e$B;HMQ2DG=%U%!%$%k%G%#%9%/%j%W%??t$r1[$($k%/%i%$%"%s%H?t$O;XDj$G$-$^
\e(B
30819 +
\e$B$;$s!#;HMQ2DG=%U%!%$%k%G%#%9%/%j%W%??t$O
\e(B limit
\e$B$d
\e(B ulimit
\e$B%3%^%s%I$G
\e(B
30820 +
\e$B3NG'$9$k$3$H$,$G$-$^$9!#
\e(B
30822 +-t
\e$B%H%i%s%6%/%7%g%s?t
\e(B
30824 +
\e$B%H%i%s%6%/%7%g%s?t$r;XDj$7$^$9!#3F%/%i%$%"%s%H$,<B9T$9$k%H%i%s%6%/
\e(B
30825 +
\e$B%7%g%s?t$O$3$l$r%/%i%$%"%s%H?t$G3d$C$??t$H$J$j$^$9!#>JN,;~$O
\e(B 10
\e$B$H
\e(B
30826 +
\e$B$J$j$^$9!#
\e(B
30828 +-s
\e$B%9%1!<%j%s%0%U%!%/%?!<
\e(B
30830 + -i
\e$B%*%W%7%g%s$H$H$b$K;HMQ$7$^$9!#%9%1!<%j%s%0%U%!%/%?!<$O
\e(B 1
\e$B0J>e$N
\e(B
30831 +
\e$B@0?t$G$9!#%9%1!<%j%s%0%U%!%/%?!<$rJQ$($k$3$H$K$h$j!"%F%9%H$NBP>]$H
\e(B
30832 +
\e$B$J$k%F!<%V%k$NBg$-$5$,
\e(B 10
\e$BK|
\e(B
\e$B!_
\e(B
\e$B%9%1!<%j%s%0%U%!%/%?!<$K$J$j$^$9!#
\e(B
30833 +
\e$B%G%U%)%k%H$N%9%1!<%j%s%0%U%!%/%?!<$O
\e(B 1
\e$B$G$9!#
\e(B
30835 +-u
\e$B%m%0%$%sL>
\e(B
30837 + DB
\e$B%f!<%6$N%m%0%$%sL>$r;XDj$7$^$9!#>JN,$9$k$H4D6-JQ?t
\e(B PGUSER
\e$B$K@_Dj
\e(B
30838 +
\e$B$7$?%m%0%$%sL>$,;XDj$5$l$^$9!#
\e(B
30840 +-P
\e$B%Q%9%o!<%I
\e(B
30842 +
\e$B%Q%9%o!<%I$r;XDj$7$^$9!#$J$*!"$3$N%*%W%7%g%s$r;H$&$H!"%Q%9%o!<%I$r
\e(B
30843 + ps
\e$B%3%^%s%I$G8+$i$l$k$J$I!"%;%-%e%j%F%#%[!<%k$K$J$k2DG=@-$,$"$k$N$G!"
\e(B
30844 +
\e$B%F%9%HMQ$K$N$_$*;H$$2<$5$$!#
\e(B
30848 +
\e$B$3$N%*%W%7%g%s$r;XDj$9$k$H!"%Y%s%A%^!<%/3+;OA0$K
\e(B VACUUM
\e$B$H
\e(B history
30849 +
\e$B%F!<%V%k$N%/%j%"$r<B9T$7$^$;$s!#
\e(B
30853 +
\e$B$3$N%*%W%7%g%s$r;XDj$9$k$H!"%Y%s%A%^!<%/3+;OA0$K
\e(B VACUUM
\e$B$H
\e(B history
30854 +
\e$B%F!<%V%k$N%/%j%"$r<B9T$7$^$9!#
\e(B-v
\e$B$H
\e(B -n
\e$B$r>JN,$9$k$H!":G>.8B$N
\e(B
30855 + VACUUM
\e$B$J$I$r<B9T$7$^$9!#$9$J$o$A!"
\e(Bhistory
\e$B%F!<%V%k$N%/%j%"$H!"
\e(B
30856 + branches
\e$B!"
\e(Btellers
\e$B!"
\e(Bhistory
\e$B%F!<%V%k$N
\e(B VACUUM
\e$B$r<B9T$7$^$9!#$3$l$O!"
\e(B
30857 + VACUUM
\e$B$N;~4V$r:G>.8B$K$7$J$,$i!"%Q%U%)!<%^%s%9$K1F6A$9$k%4%_A]=|$r
\e(B
30858 +
\e$B8z2LE*$K<B9T$7$^$9!#DL>o$O
\e(B -v
\e$B$H
\e(B -n
\e$B$r>JN,$9$k$3$H$r?d>)$7$^$9!#
\e(B
30862 +
\e$BA^F~$N$_$N%H%i%s%6%/%7%g%s$r<B9T$7$^$9!#A^F~%9%T!<%I$rB,Dj$7$?$$$H
\e(B
30863 +
\e$B$-$K;H$$$^$9!#
\e(B
30867 +
\e$B99?7$N$_$N%H%i%s%6%/%7%g%s$r<B9T$7$^$9!#99?7%9%T!<%I$rB,Dj$7$?$$$H
\e(B
30868 +
\e$B$-$K;H$$$^$9!#
\e(B
30872 +
\e$B8!:w$N$_$N%H%i%s%6%/%7%g%s$r<B9T$7$^$9!#8!:w%9%T!<%I$rB,Dj$7$?$$$H
\e(B
30873 +
\e$B$-$K;H$$$^$9!#
\e(B
30875 +-f
\e$B%U%!%$%kL>
\e(B
30877 +
\e$B%H%i%s%6%/%7%g%s$NFbMF$,5-=R$5$l$?%U%!%$%kL>$r;XDj$7$^$9!#$3$N%*%W
\e(B
30878 +
\e$B%7%g%s$r;XDj$9$k$H!"%U%!%$%k$K5-=R$5$l$?FbMF$N%H%i%s%6%/%7%g%s$r<B
\e(B
30879 +
\e$B9T$7$^$9!#%Y%s%A%^!<%/$G;HMQ$9$k%F!<%V%k$O$"$i$+$8$a=i4|2=$7$F$*$/
\e(B
30880 +
\e$BI,MW$,$"$j$^$9!#F~NO%U%!%$%k$N%U%)!<%^%C%H$O8e=R$7$^$9!#
\e(B
30884 + BEGIN
\e$B$H
\e(B END
\e$B$G0O$^$l$?%H%i%s%6%/%7%g%s%V%m%C%/Fb$G%H%i%s%6%/%7%g%s
\e(B
30885 +
\e$B$r<B9T$7$^$9!#
\e(B
30889 +
\e$B$3$N%*%W%7%g%s$r;XDj$9$k$H!":G=i$K3NN)$7$?%3%M%/%7%g%s$r;H$$2s$9$N
\e(B
30890 +
\e$B$G$O$J$/!"3F%H%i%s%6%/%7%g%s$4$H$K
\e(B DB
\e$B$X$N@\B3$r9T$$$^$9!#%3%M%/%7%g
\e(B
30891 +
\e$B%s$N%*!<%P!<$X%C%I$rB,Dj$9$k$N$KM-8z$G$9!#
\e(B
30895 +
\e$B8D!9$N%H%i%s%6%/%7%g%s$N<B9T;~4V$r5-O?$7$^$9!#5-O?@h$O%+%l%s%H%G%#
\e(B
30896 +
\e$B%l%/%H%j0J2<$N
\e(B pgbench_log.xxx
\e$B$H$$$&%U%!%$%k$G$9!#%U%!%$%k$N%U%)!<
\e(B
30897 +
\e$B%^%C%H$O0J2<$N$h$&$K$J$j$^$9!#;~4V$O%^%$%/%mICC10L$G$9!#
\e(B
30899 + <
\e$B%/%i%$%"%s%H
\e(B ID> <
\e$B%H%i%s%6%/%7%g%sHV9f
\e(B> <
\e$B;~4V
\e(B>
30903 +
\e$B%G%P%C%0%*%W%7%g%s!#MM!9$J>pJs$,I=<($5$l$^$9!#
\e(B
30906 +
\e$B"#
\e(B
\e$B%G!<%?%Y!<%9$N=i4|2=
\e(B
30908 +pgcbench
\e$B$G%G%U%)%k%H$N%Y%s%A%^!<%/%F%9%H$r<B;\$9$k$?$a$K$O!"$"$i$+$8
\e(B
30909 +
\e$B$a%G!<%?%Y!<%9$r=i4|2=$7!"%F%9%H%G!<%?$r:n$kI,MW$,$"$j$^$9!#
\e(B
30911 + pgcbench -i [
\e$B%G!<%?%Y!<%9L>
\e(B]
30913 +
\e$B$3$l$K$h$j0J2<$N%F!<%V%k$,:n@.$5$l$^$9
\e(B (
\e$B%9%1!<%j%s%0%U%!%/%?!<$,
\e(B 1
\e$B$N
\e(B
30914 +
\e$B>l9g
\e(B)
\e$B!#
\e(B
30916 +
\e$B%F!<%V%kL>
\e(B |
\e$B9T?t
\e(B
30917 + ------------+--------
30920 + accounts | 100000
30923 +
\e$B"(
\e(B
\e$BCm0U
\e(B
30925 +
\e$BF1$8L>A0$N%F!<%V%k$,$"$k$H:o=|$5$l$F$7$^$&$N$G$4Cm0U2<$5$$!#
\e(B
30927 +
\e$B%9%1!<%j%s%0%U%!%/%?!<$r
\e(B 10
\e$B!"
\e(B100
\e$B!"
\e(B1000
\e$B$J$I$KJQ99$9$k$H!">e5-9T?t$O$=
\e(B
30928 +
\e$B$l$K1~$8$F
\e(B 10
\e$BG\!"
\e(B100
\e$BG\!"
\e(B1000
\e$BG\$K$J$j$^$9!#Nc$($P!"%9%1!<%j%s%0%U%!
\e(B
30929 +
\e$B%/%?!<$r
\e(B 100
\e$B$H$9$k$H0J2<$N$h$&$K$J$j$^$9!#
\e(B
30931 +
\e$B%F!<%V%kL>
\e(B |
\e$B9T?t
\e(B
30932 + ------------+----------
30935 + accounts | 10000000
30939 +
\e$B"#
\e(B
\e$BF~NO%U%!%$%k$N%U%)!<%^%C%H
\e(B
30941 +pgcbench
\e$B$G$O!"
\e(B-f
\e$B%*%W%7%g%s$r;XDj$7$F%H%i%s%6%/%7%g%s$K4^$^$l$k
\e(B SQL
30942 +
\e$B%3%^%s%I$NFbMF$r5-=R$7$?%U%!%$%k$rFI$_9~$`$3$H$,$G$-$^$9!#F~NO%U%!%$%k
\e(B
30943 +
\e$B$K$O
\e(B 1
\e$B9T$K$D$-
\e(B 1
\e$B$D$N%3%^%s%I$r5-=R$7$^$9!#6u9T$OL5;k$5$l!"Fs=E%O%$%U
\e(B
30944 +
\e$B%s$G;O$^$k9T$O%3%a%s%H$r0UL#$7$^$9!#
\e(B
30946 +
\e$B%3%^%s%I$K$O!"
\e(BSQL
\e$B%3%^%s%I$K2C$(!"%P%C%/%9%i%C%7%e$G;O$^$k%a%?%3%^%s%I
\e(B
30947 +
\e$B$r5-=R$9$k$3$H$,$G$-$^$9!#%a%?%3%^%s%I$O
\e(B pgcbench
\e$B<+?H$K$h$C$F<B9T$5$l
\e(B
30948 +
\e$B$^$9!#%a%?%3%^%s%I$N7A<0$O%P%C%/%9%i%C%7%e!"$=$ND>8e$K%3%^%s%I$NF0;l!"
\e(B
30949 +
\e$B$=$N<!$K0z?t$,B3$-$^$9!#F0;l%3%^%s%I$H0z?t!"$^$?$=$l$>$l$N0z?t$O6uGrJ8
\e(B
30950 +
\e$B;z$K$h$C$F6hJL$5$l$^$9!#
\e(B
30952 +
\e$B:#$N$H$3$m!"0J2<$N%a%?%3%^%s%I$,Dj5A$5$l$F$$$^$9!#
\e(B
30954 +\setrandom name min max
30956 +
\e$B:G>.CM
\e(B min
\e$B$H:GBgCM
\e(B max
\e$B$N4V$NCM$r<h$kMp?t$r!"
\e(Bname
\e$BJQ?t$K@_Dj$7$^$9!#
\e(B
30958 +
\e$BJQ?t$KMp?t$r@_Dj$9$k$K$O!"
\e(B\setrandom
\e$B%a%?%3%^%s%I$r;HMQ$7$F0J2<$N$h$&
\e(B
30959 +
\e$B$K5-=R$7$^$9!#
\e(B
30961 + \setrandom aid 1 100000
30963 +
\e$B$3$l$O!"JQ?t
\e(B aid
\e$B$K
\e(B 1
\e$B$+$i
\e(B 100000
\e$B$N4V$NMp?t$r@_Dj$7$^$9!#$^$?!"JQ?t$N
\e(B
30964 +
\e$BCM$r
\e(B SQL
\e$B%3%^%s%I$KKd$a9~$`$K$O!"0J2<$N$h$&$K$=$NL>A0$NA0$K%3%m%s$rIU
\e(B
30967 + SELECT abalance FROM accounts WHERE aid = :aid
30969 +
\e$BNc$($P!"
\e(BTPC-B
\e$B$K;w$?%Y%s%A%^!<%/$r9T$&$K$O!"0J2<$N$h$&$K%H%i%s%6%/%7%g
\e(B
30970 +
\e$B%s$NFbMF$r%U%!%$%k$K5-=R$7!"
\e(B-f
\e$B%*%W%7%g%s$K$h$C$F$=$N%U%!%$%k$r;XDj$7
\e(B
30971 +
\e$B$F
\e(B pgcbench
\e$B$r<B9T$7$^$9!#
\e(B
30973 + \setrandom aid 1 100000
30974 + \setrandom bid 1 1
30975 + \setrandom tid 1 10
30976 + \setrandom delta 1 1000
30978 + UPDATE accounts SET abalance = abalance + :delta WHERE aid = :aid
30979 + SELECT abalance FROM accounts WHERE aid = :aid
30980 + UPDATE tellers SET tbalance = tbalance + :delta WHERE tid = :tid
30981 + UPDATE branches SET bbalance = bbalance + :delta WHERE bid = :bid
30982 + INSERT INTO history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, 'now')
30984 +
\e$B$J$*!"$3$NNc$G$O!"
\e(B-i
\e$B%*%W%7%g%s$r;H$C$F=i4|2=$7$?%G!<%?%Y!<%9
\e(B (
\e$B%9%1!<
\e(B
30985 +
\e$B%j%s%0%U%!%/%?!<$,
\e(B 1
\e$B$N>l9g
\e(B)
\e$B$KBP$7$F%Y%s%A%^!<%/$r9T$&$3$H$r2>Dj$7$F
\e(B
30989 +
\e$B"#
\e(B
\e$B%H%i%s%6%/%7%g%s$NDj5A
\e(B
30991 +pgcbench
\e$B$N%G%U%)%k%H$N%Y%s%A%^!<%/$G$O!"0J2<$N
\e(B SQL
\e$B%3%^%s%I$rA4It40N;
\e(B
30992 +
\e$B$7$F
\e(B 1
\e$B%H%i%s%6%/%7%g%s$H?t$($F$$$^$9!#
\e(B
30994 +1. SELECT abalance FROM accounts WHERE aid = :aid
30996 + :aid
\e$B$O
\e(B 1
\e$B$+$i%9%1!<%j%s%0%U%!%/%?!<
\e(B
\e$B!_
\e(B 10
\e$BK|$^$G$NCM$r<h$kMp?t$G$9!#
\e(B
30997 +
\e$B$3$3$G$O
\e(B 1
\e$B7o$@$18!:w$5$l$^$9!#0J2<!"Mp?t$NCM$O$=$l$>$l$3$N%H%i%s%6
\e(B
30998 +
\e$B%/%7%g%s$NCf$G$OF1$8CM$r;H$$$^$9!#
\e(B
31000 +2. UPDATE accounts SET abalance = abalance + :delta WHERE aid = :aid
31002 + :delta
\e$B$O
\e(B 1
\e$B$+$i
\e(B 1000
\e$B$^$G$NCM$r<h$kMp?t$G$9!#
\e(B
31004 +3. SELECT abalance FROM accounts WHERE aid = :aid
31006 +4. INSERT INTO history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, 'now')
31008 + :tid
\e$B$O
\e(B 1
\e$B$+$i%9%1!<%j%s%0%U%!%/%?!<
\e(B
\e$B!_
\e(B 10
\e$B$^$G$NCM$r<h$kMp?t!"
\e(B:bid
31009 +
\e$B$O
\e(B 1
\e$B$+$i%9%1!<%j%s%0%U%!%/%?!<$^$G$NCM$r<h$kMp?t$G$9!#
\e(B
31011 +5. INSERT INTO history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, 'now')
31013 +6. INSERT INTO history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, 'now')
31015 +7. SELECT abalance FROM accounts WHERE aid = :aid
31017 +-T
\e$B%*%W%7%g%s$r;XDj$7$F%H%i%s%6%/%7%g%s%V%m%C%/Fb$G%H%i%s%6%/%7%g%s$r
\e(B
31018 +
\e$B<B9T$9$k>l9g!"
\e(B1
\e$B$r
\e(B BEGIN
\e$B$K!"
\e(B7
\e$B$r
\e(B END
\e$B$KCV$-49$($?
\e(B SQL
\e$B%3%^%s%I$,<B9T$5
\e(B
31019 +
\e$B$l$^$9!#$^$?!"%H%i%s%6%/%7%g%s$H$7$F<B9T$5$l$k
\e(B SQL
\e$B%3%^%s%I$O!"
\e(B-I
\e$B%*%W
\e(B
31020 +
\e$B%7%g%s
\e(B (
\e$BA^F~$N$_
\e(B)
\e$B$G$"$l$P
\e(B 4
\e$B!"
\e(B-U (
\e$B99?7$N$_
\e(B)
\e$B$G$"$l$P
\e(B 2
\e$B!"
\e(B-S (
\e$B8!:w$N$_
\e(B)
31021 +
\e$B$G$"$l$P
\e(B 1
\e$B$H$J$j$^$9!#
\e(B
31022 diff -aruN postgresql-8.2.4/src/pgcluster/tool/pgcbench.c pgcluster-1.7.0rc7/src/pgcluster/tool/pgcbench.c
31023 --- postgresql-8.2.4/src/pgcluster/tool/pgcbench.c 1970-01-01 01:00:00.000000000 +0100
31024 +++ pgcluster-1.7.0rc7/src/pgcluster/tool/pgcbench.c 2007-02-18 22:52:17.000000000 +0100
31027 + * pgbench: a simple benchmark program for PGCluster
31028 + * This program was written based on pgbench by Tatsuo Ishii.
31030 + * Portions Copyright (c) 2003-2006, Atsushi Mitani
31031 + * Portions Copyright (c) 2000-2006, Tatsuo Ishii
31033 + * Permission to use, copy, modify, and distribute this software and
31034 + * its documentation for any purpose and without fee is hereby
31035 + * granted, provided that the above copyright notice appear in all
31036 + * copies and that both that copyright notice and this permission
31037 + * notice appear in supporting documentation, and that the name of the
31038 + * author not be used in advertising or publicity pertaining to
31039 + * distribution of the software without specific, written prior
31040 + * permission. The author makes no representations about the
31041 + * suitability of this software for any purpose. It is provided "as
31042 + * is" without express or implied warranty.
31044 +#include "postgres_fe.h"
31046 +#include "libpq-fe.h"
31048 +#include <errno.h>
31051 +#include "win32.h"
31053 +#include <sys/time.h>
31054 +#include <unistd.h>
31056 +#ifdef HAVE_GETOPT_H
31057 +#include <getopt.h>
31060 +#ifdef HAVE_SYS_SELECT_H
31061 +#include <sys/select.h>
31064 +/* for getrlimit */
31065 +#include <sys/resource.h>
31066 +#endif /* ! WIN32 */
31068 +#include <sys/types.h>
31069 +#include <sys/wait.h>
31071 +#include <ctype.h>
31072 +#include <search.h>
31074 +extern char *optarg;
31075 +extern int optind;
31082 +/********************************************************************
31083 + * some configurable parameters */
31085 +#define MAXCLIENTS 4096 /* max number of clients allowed */
31087 +int nclients = 1; /* default number of simulated clients */
31088 +int nxacts = 10; /* default number of transactions per
31092 + * scaling factor. for example, tps = 10 will make 1000000 tuples of
31093 + * accounts table.
31098 + * end of configurable parameters
31099 + *********************************************************************/
31101 +#define nbranches 1
31102 +#define ntellers 10
31103 +#define naccounts 100000
31105 +#define SELECT_ONLY (1)
31106 +#define INSERT_ONLY (2)
31107 +#define UPDATE_ONLY (3)
31108 +#define WITH_TRANSACTION (4)
31109 +#define TPC_B_LIKE (5)
31110 +#define CUSTOM_QUERY (6)
31112 +#define SQL_COMMAND 1
31113 +#define META_COMMAND 2
31115 +FILE *LOGFILE = NULL;
31117 +bool use_log = false; /* log transaction latencies to a file */
31119 +int remains; /* number of remaining clients */
31121 +int is_connect; /* establish connection for each
31124 +char *pghost = "";
31125 +char *pgport = NULL;
31126 +char *pgoptions = NULL;
31127 +char *pgtty = NULL;
31128 +char *login = NULL;
31140 + PGconn *con; /* connection handle to DB */
31141 + int id; /* client No. */
31142 + int state; /* state No. */
31143 + int cnt; /* xacts count */
31144 + int ecnt; /* error count */
31146 + int listen; /* 0 indicates that an async query has
31148 + int aid; /* account id for this transaction */
31149 + int bid; /* branch id for this transaction */
31150 + int tid; /* teller id for this transaction */
31154 + struct timeval txn_begin; /* used for measuring latencies */
31164 +Command **commands = NULL;
31169 + fprintf(stderr, "usage: pgcbench [-h hostname][-p port][-c nclients][-t ntransactions][-s scaling_factor][-I(insert only)][-U(update only)][-S(select only)][-f filename][-u login][-P password][-d(debug)][dbname]\n");
31170 + fprintf(stderr, "(initialize mode): pgcbench -i [-h hostname][-p port][-s scaling_factor][-u login][-P password][-d(debug)][dbname]\n");
31173 +/* random number generator */
31175 +getrand(int min, int max )
31178 + return (min + (int) (max * 1.0 * rand() / (RAND_MAX + 1.0)));
31181 +/* set up a connection to the backend */
31188 + con = PQsetdbLogin(pghost, pgport, pgoptions, pgtty, dbName,
31192 + fprintf(stderr, "Connection to database '%s' failed.\n", dbName);
31193 + fprintf(stderr, "Memory allocatin problem?\n");
31197 + if (PQstatus(con) == CONNECTION_BAD)
31199 + fprintf(stderr, "Connection to database '%s' failed.\n", dbName);
31201 + if (PQerrorMessage(con))
31202 + fprintf(stderr, "%s", PQerrorMessage(con));
31204 + fprintf(stderr, "No explanation from the backend\n");
31209 + res = PQexec(con, "SET search_path = public");
31210 + if (PQresultStatus(res) != PGRES_COMMAND_OK)
31212 + fprintf(stderr, "%s", PQerrorMessage(con));
31220 +/* throw away response from backend */
31222 +discard_response(CState * state)
31228 + res = PQgetResult(state->con);
31234 +/* check to see if the SQL result was good */
31236 +check(CState * st, PGresult *res, int good)
31238 + if (res && PQresultStatus(res) != good)
31240 + fprintf(stderr, "aborted in state %d: %s", st->state, PQerrorMessage(st->con));
31241 + PQfinish(st->con);
31245 + return (0); /* OK */
31249 +compareVariables(const void *v1, const void *v2)
31251 + return strcmp(((Variable *)v1)->name, ((Variable *)v2)->name);
31255 +getVariable(CState * st, char *name)
31257 + Variable key = { name }, *var;
31259 + var = tfind(&key, &st->variables, compareVariables);
31261 + return (*(Variable **)var)->value;
31267 +putVariable(CState * st, char *name, char *value)
31269 + Variable key = { name }, *var;
31271 + var = tfind(&key, &st->variables, compareVariables);
31274 + if ((var = malloc(sizeof(Variable))) == NULL)
31277 + var->name = NULL;
31278 + var->value = NULL;
31280 + if ((var->name = strdup(name)) == NULL
31281 + || (var->value = strdup(value)) == NULL
31282 + || tsearch(var, &st->variables, compareVariables) == NULL)
31285 + free(var->value);
31292 + free((*(Variable **)var)->value);
31293 + if (((*(Variable **)var)->value = strdup(value)) == NULL)
31301 +assignVariables(CState * st, char *sql)
31304 + char *p, *name, *val;
31308 + while ((p = strchr(&sql[i], ':')) != NULL)
31313 + while (isalnum(sql[i]) != 0 || sql[i] == '_');
31317 + name = malloc(i - j);
31318 + if (name == NULL)
31320 + memcpy(name, &sql[j + 1], i - (j + 1));
31321 + name[i - (j + 1)] = '\0';
31322 + val = getVariable(st, name);
31327 + if (strlen(val) > i - j)
31329 + tmp = realloc(sql, strlen(sql) - (i - j) + strlen(val) + 1);
31338 + if (strlen(val) != i - j)
31339 + memmove(&sql[j + strlen(val)], &sql[i], strlen(&sql[i]) + 1);
31341 + strncpy(&sql[j], val, strlen(val));
31343 + if (strlen(val) < i - j)
31345 + tmp = realloc(sql, strlen(sql) + 1);
31354 + i = j + strlen(val);
31360 +/* process a transaction */
31362 +doMix(CState * st, int debug, int ttype)
31368 + { /* are we receiver? */
31370 + fprintf(stderr, "client receiving\n");
31371 + if (!PQconsumeInput(st->con))
31372 + { /* there's something wrong */
31373 + fprintf(stderr, "Client aborted in state %d. Probably the backend died while processing.\n", st->state);
31374 + PQfinish(st->con);
31378 + if (PQisBusy(st->con))
31379 + return; /* don't have the whole result yet */
31381 + switch (st->state)
31383 + case 0: /* response to "begin" */
31384 + res = PQgetResult(st->con);
31385 + if (ttype == WITH_TRANSACTION)
31387 + if (check(st, res, PGRES_COMMAND_OK))
31392 + if (check(st, res, PGRES_TUPLES_OK))
31396 + discard_response(st);
31398 + case 1: /* response to "update accounts..." */
31399 + res = PQgetResult(st->con);
31400 + if (check(st, res, PGRES_COMMAND_OK))
31403 + discard_response(st);
31405 + case 2: /* response to "select abalance ..." */
31406 + res = PQgetResult(st->con);
31407 + if (check(st, res, PGRES_TUPLES_OK))
31410 + discard_response(st);
31412 + case 3: /* response to "update tellers ..." */
31413 + res = PQgetResult(st->con);
31414 + if (check(st, res, PGRES_COMMAND_OK))
31417 + discard_response(st);
31419 + case 4: /* response to "update branches ..." */
31420 + res = PQgetResult(st->con);
31421 + if (check(st, res, PGRES_COMMAND_OK))
31424 + discard_response(st);
31426 + case 5: /* response to "insert into history ..." */
31427 + res = PQgetResult(st->con);
31428 + if (check(st, res, PGRES_COMMAND_OK))
31431 + discard_response(st);
31433 + case 6: /* response to "end" */
31436 + * transaction finished: record the time it took in the
31442 + struct timeval now;
31444 + gettimeofday(&now, NULL);
31445 + diff = (int) (now.tv_sec - st->txn_begin.tv_sec) * 1000000.0 +
31446 + (int) (now.tv_usec - st->txn_begin.tv_usec);
31448 + fprintf(LOGFILE, "%d %d %.0f\n", st->id, st->cnt, diff);
31451 + res = PQgetResult(st->con);
31452 + if (ttype == WITH_TRANSACTION)
31454 + if (check(st, res, PGRES_COMMAND_OK))
31459 + if (check(st, res, PGRES_TUPLES_OK))
31463 + discard_response(st);
31467 + PQfinish(st->con);
31470 + if (++st->cnt >= st->maxAct)
31472 + remains--; /* I've done */
31473 + if (st->con != NULL)
31475 + PQfinish(st->con);
31483 + /* increment state counter */
31485 + if (st->state > 6)
31488 + remains--; /* I've done */
31492 + if (st->con == NULL)
31494 + if ((st->con = doConnect()) == NULL)
31496 + fprintf(stderr, "Client aborted in establishing connection.\n");
31497 + remains--; /* I've aborted */
31498 + PQfinish(st->con);
31504 + switch (st->state)
31506 + case 0: /* about to start */
31507 + if (ttype == WITH_TRANSACTION)
31509 + strcpy(sql, "begin");
31513 + st->aid = getrand(1, naccounts * tps);
31514 + snprintf(sql, 256, "select abalance from accounts where aid = %d", st->aid);
31516 + st->aid = getrand(1, naccounts * tps);
31517 + st->bid = getrand(1, nbranches * tps);
31518 + st->tid = getrand(1, ntellers * tps);
31519 + st->delta = getrand(1, 1000);
31521 + gettimeofday(&(st->txn_begin), NULL);
31524 + snprintf(sql, 256, "update accounts set abalance = abalance + %d where aid = %d\n", st->delta, st->aid);
31527 + snprintf(sql, 256, "select abalance from accounts where aid = %d", st->aid);
31532 + snprintf(sql, 256, "update tellers set tbalance = tbalance + %d where tid = %d\n",
31533 + st->delta, st->tid);
31539 + snprintf(sql, 256, "update branches set bbalance = bbalance + %d where bid = %d", st->delta, st->bid);
31543 + snprintf(sql, 256, "insert into history(tid,bid,aid,delta,mtime) values(%d,%d,%d,%d,'now')",
31544 + st->tid, st->bid, st->aid, st->delta);
31547 + if (ttype == WITH_TRANSACTION)
31549 + strcpy(sql, "end");
31553 + st->aid = getrand(1, naccounts * tps);
31554 + snprintf(sql, 256, "select abalance from accounts where aid = %d", st->aid);
31560 + fprintf(stderr, "client sending %s\n", sql);
31562 + if (PQsendQuery(st->con, sql) == 0)
31565 + fprintf(stderr, "PQsendQuery(%s)failed\n", sql);
31570 + st->listen++; /* flags that should be listened */
31574 +/* process a select only transaction */
31576 +doOne(CState * st, int debug, int ttype )
31582 + { /* are we receiver? */
31584 + fprintf(stderr, "client receiving\n");
31585 + if (!PQconsumeInput(st->con))
31586 + { /* there's something wrong */
31587 + fprintf(stderr, "Client aborted in state %d. Probably the backend died while processing.\n", st->state);
31588 + remains--; /* I've aborted */
31589 + PQfinish(st->con);
31593 + if (PQisBusy(st->con))
31594 + return; /* don't have the whole result yet */
31596 + switch (st->state)
31598 + case 0: /* response to "select abalance ..." */
31599 + res = PQgetResult(st->con);
31600 + if (ttype == SELECT_ONLY)
31602 + if (check(st, res, PGRES_TUPLES_OK))
31607 + if (check(st, res, PGRES_COMMAND_OK))
31611 + discard_response(st);
31615 + PQfinish(st->con);
31619 + if (++st->cnt >= st->maxAct)
31621 + remains--; /* I've done */
31622 + if (st->con != NULL)
31624 + PQfinish(st->con);
31632 + /* increment state counter */
31634 + if (st->state > 0)
31637 + remains--; /* I've done */
31641 + if (st->con == NULL)
31643 + if ((st->con = doConnect()) == NULL)
31645 + fprintf(stderr, "Client aborted in establishing connection.\n");
31646 + PQfinish(st->con);
31652 + switch (st->state)
31655 + st->aid = getrand(1, naccounts * tps);
31656 + st->bid = getrand(1, nbranches * tps);
31657 + st->tid = getrand(1, ntellers * tps);
31658 + st->delta = getrand(1, 1000);
31659 + if ( ttype == SELECT_ONLY)
31661 + snprintf(sql, 256, "select abalance from accounts where aid = %d", st->aid);
31663 + if ( ttype == UPDATE_ONLY)
31665 + snprintf(sql, 256, "update accounts set abalance = abalance + %d where aid = %d\n", st->delta, st->aid);
31667 + if ( ttype == INSERT_ONLY)
31669 + snprintf(sql, 256, "insert into history(tid,bid,aid,delta,mtime) values(%d,%d,%d,%d,'now')",
31670 + st->tid, st->bid, st->aid, st->delta);
31676 + fprintf(stderr, "client sending %s\n", sql);
31678 + if (PQsendQuery(st->con, sql) == 0)
31681 + fprintf(stderr, "PQsendQuery(%s)failed\n", sql);
31686 + st->listen++; /* flags that should be listened */
31691 +doCustom(CState * st, int debug, int ttype )
31696 + { /* are we receiver? */
31697 + if (commands[st->state]->type == SQL_COMMAND)
31700 + fprintf(stderr, "client receiving\n");
31701 + if (!PQconsumeInput(st->con))
31702 + { /* there's something wrong */
31703 + fprintf(stderr, "Client aborted in state %d. Probably the backend died while processing.\n", st->state);
31704 + PQfinish(st->con);
31708 + if (PQisBusy(st->con))
31709 + return; /* don't have the whole result yet */
31713 + * transaction finished: record the time it took in the
31716 + if (use_log && commands[st->state + 1] == NULL)
31719 + struct timeval now;
31721 + gettimeofday(&now, NULL);
31722 + diff = (int) (now.tv_sec - st->txn_begin.tv_sec) * 1000000.0 +
31723 + (int) (now.tv_usec - st->txn_begin.tv_usec);
31725 + fprintf(LOGFILE, "%d %d %.0f\n", st->id, st->cnt, diff);
31728 + if (commands[st->state]->type == SQL_COMMAND)
31730 + res = PQgetResult(st->con);
31731 + if (strncasecmp(commands[st->state]->argv[0], "select", 6) != 0)
31733 + if (check(st, res, PGRES_COMMAND_OK))
31738 + if (check(st, res, PGRES_TUPLES_OK))
31742 + discard_response(st);
31745 + if (commands[st->state + 1] == NULL)
31749 + PQfinish(st->con);
31752 + if (++st->cnt >= st->maxAct)
31754 + remains--; /* I've done */
31755 + if (st->con != NULL)
31757 + PQfinish(st->con);
31764 + /* increment state counter */
31766 + if (commands[st->state] == NULL)
31769 + remains--; /* I've done */
31773 + if (st->con == NULL)
31775 + if ((st->con = doConnect()) == NULL)
31777 + fprintf(stderr, "Client aborted in establishing connection.\n");
31778 + remains--; /* I've aborted */
31779 + PQfinish(st->con);
31785 + if (use_log && st->state == 0)
31786 + gettimeofday(&(st->txn_begin), NULL);
31788 + if (commands[st->state]->type == SQL_COMMAND)
31792 + if ((sql = strdup(commands[st->state]->argv[0])) == NULL
31793 + || (sql = assignVariables(st, sql)) == NULL)
31795 + fprintf(stderr, "out of memory\n");
31801 + fprintf(stderr, "client sending %s\n", sql);
31803 + if (PQsendQuery(st->con, sql) == 0)
31806 + fprintf(stderr, "PQsendQuery(%s)failed\n", sql);
31811 + st->listen++; /* flags that should be listened */
31816 + else if (commands[st->state]->type == META_COMMAND)
31818 + int argc = commands[st->state]->argc, i;
31819 + char **argv = commands[st->state]->argv;
31823 + fprintf(stderr, "client executing \\%s", argv[0]);
31824 + for (i = 1; i < argc; i++)
31825 + fprintf(stderr, " %s", argv[i]);
31826 + fprintf(stderr, "\n");
31829 + if (strcasecmp(argv[0], "setrandom") == 0)
31833 + if ((val = malloc(strlen(argv[3]) + 1)) == NULL)
31835 + fprintf(stderr, "%s: out of memory\n", argv[0]);
31840 + sprintf(val, "%d", getrand(atoi(argv[2]), atoi(argv[3])));
31842 + if (putVariable(st, argv[1], val) == false)
31844 + fprintf(stderr, "%s: out of memory\n", argv[0]);
31856 +/* discard connections */
31858 +disconnect_all(CState * state)
31861 + PQfinish(state->con);
31864 +/* create tables and setup data */
31870 + static char *DDLs[] = {
31871 + "drop table branches",
31872 + "create table branches(bid int not null,bbalance int,filler char(88))",
31873 + "drop table tellers",
31874 + "create table tellers(tid int not null,bid int,tbalance int,filler char(84))",
31875 + "drop table accounts",
31876 + "create table accounts(aid int not null,bid int,abalance int,filler char(84))",
31877 + "drop table history",
31878 + "create table history(tid int,bid int,aid int,delta int,mtime timestamp,filler char(22))"};
31879 + static char *DDLAFTERs[] = {
31880 + "alter table branches add primary key (bid)",
31881 + "alter table tellers add primary key (tid)",
31882 + "alter table accounts add primary key (aid)"};
31889 + if ((con = doConnect()) == NULL)
31892 + for (i = 0; i < (sizeof(DDLs) / sizeof(char *)); i++)
31894 + res = PQexec(con, DDLs[i]);
31895 + if (strncmp(DDLs[i], "drop", 4) && PQresultStatus(res) != PGRES_COMMAND_OK)
31897 + fprintf(stderr, "%s", PQerrorMessage(con));
31903 + res = PQexec(con, "begin");
31904 + if (PQresultStatus(res) != PGRES_COMMAND_OK)
31906 + fprintf(stderr, "%s", PQerrorMessage(con));
31911 + for (i = 0; i < nbranches * tps; i++)
31913 + snprintf(sql, 256, "insert into branches(bid,bbalance) values(%d,0)", i + 1);
31914 + res = PQexec(con, sql);
31915 + if (PQresultStatus(res) != PGRES_COMMAND_OK)
31917 + fprintf(stderr, "%s", PQerrorMessage(con));
31923 + for (i = 0; i < ntellers * tps; i++)
31925 + snprintf(sql, 256, "insert into tellers(tid,bid,tbalance) values (%d,%d,0)"
31926 + ,i + 1, i / ntellers + 1);
31927 + res = PQexec(con, sql);
31928 + if (PQresultStatus(res) != PGRES_COMMAND_OK)
31930 + fprintf(stderr, "%s", PQerrorMessage(con));
31936 + res = PQexec(con, "end");
31937 + if (PQresultStatus(res) != PGRES_COMMAND_OK)
31939 + fprintf(stderr, "%s", PQerrorMessage(con));
31945 + * occupy accounts table with some data
31947 + fprintf(stderr, "creating tables...\n");
31948 + for (i = 0; i < naccounts * tps; i++)
31952 + if (j % 10000 == 1)
31954 + res = PQexec(con, "copy accounts from stdin");
31955 + if (PQresultStatus(res) != PGRES_COPY_IN)
31957 + fprintf(stderr, "%s", PQerrorMessage(con));
31963 + snprintf(sql, 256, "%d\t%d\t%d\t\n", j, i / naccounts + 1, 0);
31964 + if (PQputline(con, sql))
31966 + fprintf(stderr, "PQputline failed\n");
31970 + if (j % 10000 == 0)
31973 + * every 10000 tuples, we commit the copy command. this should
31974 + * avoid generating too much WAL logs
31976 + fprintf(stderr, "%d tuples done.\n", j);
31977 + if (PQputline(con, "\\.\n"))
31979 + fprintf(stderr, "very last PQputline failed\n");
31983 + if (PQendcopy(con))
31985 + fprintf(stderr, "PQendcopy failed\n");
31992 + * do a checkpoint to purge the old WAL logs
31994 + res = PQexec(con, "checkpoint");
31995 + if (PQresultStatus(res) != PGRES_COMMAND_OK)
31997 + fprintf(stderr, "%s", PQerrorMessage(con));
32001 +#endif /* NOT_USED */
32004 + fprintf(stderr, "set primary key...\n");
32005 + for (i = 0; i < (sizeof(DDLAFTERs) / sizeof(char *)); i++)
32007 + res = PQexec(con, DDLAFTERs[i]);
32008 + if (PQresultStatus(res) != PGRES_COMMAND_OK)
32010 + fprintf(stderr, "%s", PQerrorMessage(con));
32017 + fprintf(stderr, "vacuum...");
32018 + res = PQexec(con, "vacuum analyze");
32019 + if (PQresultStatus(res) != PGRES_COMMAND_OK)
32021 + fprintf(stderr, "%s", PQerrorMessage(con));
32025 + fprintf(stderr, "done.\n");
32031 +process_file(char *filename)
32033 + const char delim[] = " \f\n\r\t\v";
32036 + int lineno, i, j;
32037 + char buf[BUFSIZ], *p, *tok;
32040 + if (strcmp(filename, "-") == 0)
32042 + else if ((fd = fopen(filename, "r")) == NULL)
32044 + fprintf(stderr, "%s: %s\n", strerror(errno), filename);
32048 + fprintf(stderr, "processing file...\n");
32052 + while (fgets(buf, sizeof(buf), fd) != NULL)
32054 + if ((p = strchr(buf, '\n')) != NULL)
32057 + while (isspace(*p))
32059 + if (*p == '\0' || strncmp(p, "--", 2) == 0)
32065 + if ((tmp = realloc(commands, sizeof(Command *) * (i + 1))) == NULL)
32072 + if ((commands[i] = malloc(sizeof(Command))) == NULL)
32075 + commands[i]->argv = NULL;
32076 + commands[i]->argc = 0;
32080 + commands[i]->type = META_COMMAND;
32083 + tok = strtok(++p, delim);
32084 + while (tok != NULL)
32086 + tmp = realloc(commands[i]->argv, sizeof(char *) * (j + 1));
32089 + commands[i]->argv = tmp;
32091 + if ((commands[i]->argv[j] = strdup(tok)) == NULL)
32094 + commands[i]->argc++;
32097 + tok = strtok(NULL, delim);
32100 + if (strcasecmp(commands[i]->argv[0], "setrandom") == 0)
32104 + if (commands[i]->argc < 4)
32106 + fprintf(stderr, "%s: %d: \\%s: missing argument\n", filename, lineno, commands[i]->argv[0]);
32110 + for (j = 4; j < commands[i]->argc; j++)
32111 + fprintf(stderr, "%s: %d: \\%s: extra argument \"%s\" ignored\n", filename, lineno, commands[i]->argv[0], commands[i]->argv[j]);
32113 + if ((min = atoi(commands[i]->argv[2])) < 0)
32115 + fprintf(stderr, "%s: %d: \\%s: invalid minimum number %s\n", filename, lineno, commands[i]->argv[0], commands[i]->argv[2]);
32119 + if ((max = atoi(commands[i]->argv[3])) < min || max > RAND_MAX)
32121 + fprintf(stderr, "%s: %d: \\%s: invalid maximum number %s\n", filename, lineno, commands[i]->argv[0], commands[i]->argv[3]);
32127 + fprintf(stderr, "%s: %d: invalid command \\%s\n", filename, lineno, commands[i]->argv[0]);
32133 + commands[i]->type = SQL_COMMAND;
32135 + if ((commands[i]->argv = malloc(sizeof(char *))) == NULL)
32138 + if ((commands[i]->argv[0] = strdup(p)) == NULL)
32141 + commands[i]->argc++;
32149 + if ((tmp = realloc(commands, sizeof(Command *) * (i + 1))) == NULL)
32153 + commands[i] = NULL;
32158 + if (errno == ENOMEM)
32159 + fprintf(stderr, "%s: %d: out of memory\n", filename, lineno);
32163 + if (commands == NULL)
32168 + if (commands[i] != NULL)
32170 + for (j = 0; j < commands[i]->argc; j++)
32171 + free(commands[i]->argv[j]);
32173 + free(commands[i]->argv);
32174 + free(commands[i]);
32184 +/* print out results */
32187 + int ttype, int normal_xacts,
32188 + struct timeval * tv1, struct timeval * tv2,
32189 + struct timeval * tv3)
32195 + t1 = (tv3->tv_sec - tv1->tv_sec) * 1000000.0 + (tv3->tv_usec - tv1->tv_usec);
32196 + t1 = t1 / 1000000.0 ;
32198 + t2 = (tv3->tv_sec - tv1->tv_sec) * 1000000.0 + (tv3->tv_usec - tv1->tv_usec);
32199 + t2 = normal_xacts * 1000000.0 / t2;
32201 +#define SELECT_ONLY (1)
32202 +#define INSERT_ONLY (2)
32203 +#define UPDATE_ONLY (3)
32204 +#define WITH_TRANSACTION (4)
32208 + s = "TPC-B (sort of)";
32210 + case SELECT_ONLY :
32211 + s = "SELECT only";
32213 + case INSERT_ONLY :
32214 + s = "INSERT only";
32216 + case UPDATE_ONLY :
32217 + s = "UPDATE only";
32219 + case CUSTOM_QUERY :
32220 + s = "Custom query";
32228 + printf("transaction type: %s\n", s);
32229 + printf("scaling factor: %d\n", tps);
32230 + printf("number of clients: %d\n", nclients);
32231 + printf("number of transactions actually processed: %d\n", normal_xacts );
32232 + printf("run time (sec) = %f \n", t1);
32233 + printf("tps = %f (including connections establishing)\n", t2);
32237 +doChild(int clientId, int min, int max, int debug, int ttype)
32239 + CState state; /* status of clients */
32241 + struct timeval tv1; /* start up time */
32242 + fd_set input_mask;
32243 + int nsocks = 0; /* return from select(2) */
32246 + gettimeofday(&tv1, NULL);
32247 + srand((unsigned int) tv1.tv_usec + clientId );
32249 + memset((char *)&state,0,sizeof(CState));
32250 + /* make connections to the database */
32251 + state.id = clientId;
32252 + if ((state.con = doConnect()) == NULL)
32255 + state.maxAct = max - min + 1;
32256 + /* send start up queries in async manner */
32259 + case WITH_TRANSACTION :
32260 + case TPC_B_LIKE :
32261 + doMix(&state, debug, ttype);
32263 + case CUSTOM_QUERY :
32264 + doCustom(&state, debug, ttype);
32267 + doOne(&state, debug, ttype);
32274 + if (remains < min || !state.con)
32279 + FD_ZERO(&input_mask);
32281 + if (ttype != CUSTOM_QUERY || commands[state.state]->type != META_COMMAND)
32283 + if (state.con == NULL)
32285 + if ((state.con = doConnect()) == NULL)
32290 + sock = PQsocket(state.con);
32294 + fprintf(stderr, "Client %d: PQsocket failed\n", clientId);
32295 + disconnect_all(&state);
32298 + FD_SET(sock, &input_mask);
32300 + if ((nsocks = select(sock + 1, &input_mask, (fd_set *) NULL,
32301 + (fd_set *) NULL, (struct timeval *) NULL)) < 0)
32303 + if (errno == EINTR)
32305 + /* must be something wrong */
32306 + disconnect_all(&state);
32307 + fprintf(stderr, "select failed: %s\n", strerror(errno));
32310 + else if (nsocks == 0)
32312 + fprintf(stderr, "select timeout\n");
32313 + fprintf(stderr, "client %d:state %d cnt %d ecnt %d listen %d\n",
32314 + clientId, state.state, state.cnt, state.ecnt, state.listen);
32319 + /* ok, backend returns reply */
32320 + if (state.con && (FD_ISSET(PQsocket(state.con), &input_mask)
32321 + || (ttype == CUSTOM_QUERY
32322 + && commands[state.state]->type == META_COMMAND)))
32326 + case WITH_TRANSACTION :
32327 + case TPC_B_LIKE :
32328 + doMix(&state, debug, ttype);
32330 + case CUSTOM_QUERY :
32331 + doCustom(&state, debug, ttype);
32334 + doOne(&state, debug, ttype);
32339 + disconnect_all(&state);
32344 +doClient(int debug, int ttype)
32351 + base = nxacts / nclients;
32352 + mo = nxacts % nclients;
32354 + for ( i = 0 ; i < nclients ; i ++)
32366 + doChild(i, min, max, debug, ttype);
32370 + while ( wait(NULL) > 0)
32376 +main(int argc, char **argv)
32379 + int is_init_mode = 0; /* initialize mode? */
32380 + int is_no_vacuum = 0; /* no vacuum at all before
32382 + int is_full_vacuum = 0; /* do full vacuum before testing? */
32383 + int debug = 0; /* debug flag */
32384 + int ttype = TPC_B_LIKE; /* transaction type */
32385 + char *filename = NULL;
32387 + struct timeval tv1; /* start up time */
32388 + struct timeval tv2; /* after establishing all connections to
32390 + struct timeval tv3; /* end time */
32392 +#if !(defined(__CYGWIN__) || defined(__MINGW32__))
32393 + struct rlimit rlim;
32400 + if ((env = getenv("PGHOST")) != NULL && *env != '\0')
32402 + if ((env = getenv("PGPORT")) != NULL && *env != '\0')
32404 + else if ((env = getenv("PGUSER")) != NULL && *env != '\0')
32407 + while ((c = getopt(argc, argv, "ih:nvp:dc:t:s:u:P:CNSlTUIf:")) != -1)
32421 + is_full_vacuum++;
32430 + ttype = SELECT_ONLY;
32433 + ttype = INSERT_ONLY;
32436 + ttype = UPDATE_ONLY;
32439 + ttype = WITH_TRANSACTION;
32442 + nclients = atoi(optarg);
32443 + if (nclients <= 0 || nclients > MAXCLIENTS)
32445 + fprintf(stderr, "invalid number of clients: %d\n", nclients);
32448 +#if !(defined(__CYGWIN__) || defined(__MINGW32__))
32449 +#ifdef RLIMIT_NOFILE /* most platform uses RLIMIT_NOFILE */
32450 + if (getrlimit(RLIMIT_NOFILE, &rlim) == -1)
32452 +#else /* but BSD doesn't ... */
32453 + if (getrlimit(RLIMIT_OFILE, &rlim) == -1)
32455 +#endif /* HAVE_RLIMIT_NOFILE */
32456 + fprintf(stderr, "getrlimit failed. reason: %s\n", strerror(errno));
32459 + if (rlim.rlim_cur <= (nclients + 2))
32461 + fprintf(stderr, "You need at least %d open files resource but you are only allowed to use %ld.\n", nclients + 2, (long) rlim.rlim_cur);
32462 + fprintf(stderr, "Use limit/ulimt to increase the limit before using pgbench.\n");
32465 +#endif /* #if !(defined(__CYGWIN__) || defined(__MINGW32__)) */
32471 + tps = atoi(optarg);
32474 + fprintf(stderr, "invalid scaling factor: %d\n", tps);
32479 + nxacts = atoi(optarg);
32482 + fprintf(stderr, "invalid number of transactions: %d\n", nxacts);
32496 + ttype = CUSTOM_QUERY;
32497 + filename = optarg;
32506 + if (argc > optind)
32507 + dbName = argv[optind];
32510 + if ((env = getenv("PGDATABASE")) != NULL && *env != '\0')
32512 + else if (login != NULL && *login != '\0')
32518 + if (is_init_mode)
32526 + char logpath[64];
32528 + snprintf(logpath, 64, "pgbench_log.%d", getpid());
32529 + LOGFILE = fopen(logpath, "w");
32531 + if (LOGFILE == NULL)
32533 + fprintf(stderr, "Couldn't open logfile \"%s\": %s", logpath, strerror(errno));
32540 + printf("pghost: %s pgport: %s nclients: %d nxacts: %d dbName: %s\n",
32541 + pghost, pgport, nclients, nxacts, dbName);
32544 + /* opening connection... */
32545 + con = doConnect();
32549 + if (PQstatus(con) == CONNECTION_BAD)
32551 + fprintf(stderr, "Connection to database '%s' failed.\n", dbName);
32552 + fprintf(stderr, "%s", PQerrorMessage(con));
32556 + if (ttype == CUSTOM_QUERY)
32559 + if (process_file(filename) == false)
32565 + * get the scaling factor that should be same as count(*) from
32568 + res = PQexec(con, "select count(*) from branches");
32569 + if (PQresultStatus(res) != PGRES_TUPLES_OK)
32571 + fprintf(stderr, "%s", PQerrorMessage(con));
32574 + tps = atoi(PQgetvalue(res, 0, 0));
32577 + fprintf(stderr, "count(*) from branches invalid (%d)\n", tps);
32582 + if (!is_no_vacuum)
32584 + fprintf(stderr, "starting vacuum...");
32585 + res = PQexec(con, "vacuum branches");
32586 + if (PQresultStatus(res) != PGRES_COMMAND_OK)
32588 + fprintf(stderr, "%s", PQerrorMessage(con));
32593 + res = PQexec(con, "vacuum tellers");
32594 + if (PQresultStatus(res) != PGRES_COMMAND_OK)
32596 + fprintf(stderr, "%s", PQerrorMessage(con));
32601 + res = PQexec(con, "delete from history");
32602 + if (PQresultStatus(res) != PGRES_COMMAND_OK)
32604 + fprintf(stderr, "%s", PQerrorMessage(con));
32608 + res = PQexec(con, "vacuum history");
32609 + if (PQresultStatus(res) != PGRES_COMMAND_OK)
32611 + fprintf(stderr, "%s", PQerrorMessage(con));
32616 + fprintf(stderr, "end.\n");
32618 + if (is_full_vacuum)
32620 + fprintf(stderr, "starting full vacuum...");
32621 + res = PQexec(con, "vacuum analyze accounts");
32622 + if (PQresultStatus(res) != PGRES_COMMAND_OK)
32624 + fprintf(stderr, "%s", PQerrorMessage(con));
32628 + fprintf(stderr, "end.\n");
32634 + /* set random seed */
32635 + gettimeofday(&tv1, NULL);
32636 + srand((unsigned int) tv1.tv_usec);
32637 + /* get start up time */
32638 + gettimeofday(&tv1, NULL);
32639 + /* time after connections set up */
32640 + gettimeofday(&tv2, NULL);
32642 + doClient(debug, ttype);
32644 + /* get end time */
32645 + gettimeofday(&tv3, NULL);
32646 + printResults(ttype, nxacts, &tv1, &tv2, &tv3);
32651 diff -aruN postgresql-8.2.4/src/pgcluster/tool/pgcbench.sh pgcluster-1.7.0rc7/src/pgcluster/tool/pgcbench.sh
32652 --- postgresql-8.2.4/src/pgcluster/tool/pgcbench.sh 1970-01-01 01:00:00.000000000 +0100
32653 +++ pgcluster-1.7.0rc7/src/pgcluster/tool/pgcbench.sh 2007-02-18 22:52:17.000000000 +0100
32659 +while getopts ih:nvp:dc:t:s:u:P:CNSlTUIf: opt; do
32665 + opts=(${opts[@]} -$opt $OPTARG)
32669 +shift $(($OPTIND - 1))
32672 +tps=$(psql -At -c "SELECT count(*) FROM branches" $dbname)
32674 +vacuumdb -t branches $dbname
32675 +vacuumdb -t tellers $dbname
32676 +psql -c "DELETE FROM history" $dbname
32677 +vacuumdb -t history $dbname
32679 +if [ -z $filename ]; then
32680 + pgcbench ${opts[@]} $@
32682 + perl -pe "BEGIN { \$tps = $tps } s/\`([^\`]+)\`/eval \$1/eg" $filename \
32683 + | pgcbench ${opts[@]} -f - $@
32685 diff -aruN postgresql-8.2.4/src/pgcluster/tool/tpc-b_like.sql pgcluster-1.7.0rc7/src/pgcluster/tool/tpc-b_like.sql
32686 --- postgresql-8.2.4/src/pgcluster/tool/tpc-b_like.sql 1970-01-01 01:00:00.000000000 +0100
32687 +++ pgcluster-1.7.0rc7/src/pgcluster/tool/tpc-b_like.sql 2007-02-18 22:52:17.000000000 +0100
32689 +\setrandom aid 1 `100000 * $tps`
32690 +\setrandom bid 1 `1 * $tps`
32691 +\setrandom tid 1 `10 * $tps`
32692 +\setrandom delta 1 1000
32694 +UPDATE accounts SET abalance = abalance + :delta WHERE aid = :aid
32695 +SELECT abalance FROM accounts WHERE aid = :aid
32696 +UPDATE tellers SET tbalance = tbalance + :delta WHERE tid = :tid
32697 +UPDATE branches SET bbalance = bbalance + :delta WHERE bid = :bid
32698 +INSERT INTO history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, current_timestamp)