]> git.pld-linux.org Git - packages/postgresql.git/blame - postgresql-pgcluster-1.7.0rc7.patch
- version 8.3.0 (merge from POSTGRESQL_8_3)
[packages/postgresql.git] / postgresql-pgcluster-1.7.0rc7.patch
CommitLineData
b5c35c7b
PG
1diff -aruN postgresql-8.2.4/GNUmakefile.in pgcluster-1.7.0rc7/GNUmakefile.in
2--- postgresql-8.2.4/GNUmakefile.in 2006-08-18 21:58:05.000000000 +0200
3+++ pgcluster-1.7.0rc7/GNUmakefile.in 2007-02-18 22:52:16.000000000 +0100
4@@ -63,13 +63,13 @@
5
6 ##########################################################################
7
8-distdir := postgresql-$(VERSION)
9+distdir := pgcluster-$(PGCLUSTER_VERSION)
10 dummy := =install=
11-garbage := =* "#"* ."#"* *~* *.orig *.rej core postgresql-*
12+garbage := =* "#"* ."#"* *~* *.orig *.rej core pgcluster-*
13
14 dist: $(distdir).tar.gz
15 ifeq ($(split-dist), yes)
16-dist: postgresql-base-$(VERSION).tar.gz postgresql-docs-$(VERSION).tar.gz postgresql-opt-$(VERSION).tar.gz postgresql-test-$(VERSION).tar.gz
17+dist: pgcluster-base-$(PGCLUSTER_VERSION).tar.gz pgcluster-docs-$(PGCLUSTER_VERSION).tar.gz pgcluster-opt-$(PGCLUSTER_VERSION).tar.gz pgcluster-test-$(PGCLUSTER_VERSION).tar.gz
18 endif
19 dist:
20 -rm -rf $(distdir)
21@@ -81,19 +81,19 @@
22 src/tools src/tutorial \
23 $(addprefix src/pl/, plperl plpython tcl)
24
25-docs_files := doc/postgres.tar.gz doc/src doc/TODO.detail
26+docs_files := doc/pgcluster.tar.gz doc/src doc/TODO.detail
27
28-postgresql-base-$(VERSION).tar: distdir
29+pgcluster-base-$(PGCLUSTER_VERSION).tar: distdir
30 $(TAR) -c $(addprefix --exclude $(distdir)/, $(docs_files) $(opt_files) src/test) \
31 -f $@ $(distdir)
32
33-postgresql-docs-$(VERSION).tar: distdir
34+pgcluster-docs-$(PGCLUSTER_VERSION).tar: distdir
35 $(TAR) cf $@ $(addprefix $(distdir)/, $(docs_files))
36
37-postgresql-opt-$(VERSION).tar: distdir
38+pgcluster-opt-$(PGCLUSTER_VERSION).tar: distdir
39 $(TAR) cf $@ $(addprefix $(distdir)/, $(opt_files))
40
41-postgresql-test-$(VERSION).tar: distdir
42+pgcluster-test-$(PGCLUSTER_VERSION).tar: distdir
43 $(TAR) cf $@ $(distdir)/src/test
44
45 distdir:
46diff -aruN postgresql-8.2.4/INSTALL_PGCLUSTER pgcluster-1.7.0rc7/INSTALL_PGCLUSTER
47--- postgresql-8.2.4/INSTALL_PGCLUSTER 1970-01-01 01:00:00.000000000 +0100
48+++ pgcluster-1.7.0rc7/INSTALL_PGCLUSTER 2007-02-19 00:59:13.000000000 +0100
49@@ -0,0 +1,392 @@
50+PGCluster Installation Instructions
51+
52+=============================================================
53+1. Installation
54+=============================================================
55+
56+1-1. Install Cluster DB Server, Replication Server & Load Balancer
57+----------------------------------------------------------------
58+$ cd $source_dir
59+$ ./configure
60+$ gmake
61+$ su
62+# gmake install
63+# chown -R postgres /usr/local/pgsql
64+----------------------------------------------------------------
65+
66+=============================================================
67+2. Initialize DB
68+=============================================================
69+$ su
70+# adduser postgres
71+# mkdir /usr/local/pgsql/data
72+# chown postgres /usr/local/pgsql/data
73+# su - postgres
74+$ /usr/local/pgsql/bin/initdb -D /usr/local/pgsql/data
75+
76+
77+=============================================================
78+3. Configuration
79+=============================================================
80+(EX.System Composition)
81+
82+ |
83+ ((Load Balance Server))
84+ ( hostname: lb.pgcluster.org)
85+ ( receive port:5432 )
86+ ( recovery port:6001 )
87+ |
88+----------+-------------+------------+----------
89+ | |
90+ (( Cluster DB 1 )) (( Cluster DB 2 ))
91+ ( hostname:c1.pgcluster.org) ( hostname:c2.pgcluster.org)
92+ ( receive port: 5432 ) ( receive port:5432 )
93+ ( recovery port:7001 ) ( recovery port 7002 )
94+ | |
95+----------+-------------+------------+----------
96+ |
97+ ((Replication Server))
98+ ( hostname:pgr.pgcluster.org)
99+ ( receive port:8001 )
100+ ( recovery port:8101 )
101+
102+
103+3-1. Load Balance Server
104+
105+The setup file of load balance server is copied from the sample file and edited.
106+(the sample file is installed '/usr/local/pgsql/share' in default)
107+----------------------------------------------------------------
108+$cd /usr/local/pgsql/share
109+$cp pglb.conf.sample pglb.conf
110+----------------------------------------------------------------
111+
112+In the case of the above system composition example,
113+the setup example of pglb.conf file is as the following
114+
115+#============================================================
116+# Load Balance Server configuration file
117+#-------------------------------------------------------------
118+# file: pglb.conf
119+#-------------------------------------------------------------
120+# This file controls:
121+# o which hosts are db cluster server
122+# o which port use connect to db cluster server
123+# o how many connections are allowed on each DB server
124+#============================================================
125+#-------------------------------------------------------------
126+# set cluster DB server information
127+# o Host_Name : hostname
128+# o Port : Connection for postmaster
129+# o Max_Connection : Maximum number of connection to postmaster
130+#-------------------------------------------------------------
131+<Cluster_Server_Info>
132+ <Host_Name> c1.pgcluster.org </Host_Name>
133+ <Port> 5432 </Port>
134+ <Max_Connect> 32 </Max_Connect>
135+</Cluster_Server_Info>
136+<Cluster_Server_Info>
137+ <Host_Name> c2.pgcluster.org </Host_Name>
138+ <Port> 5432 </Port>
139+ <Max_Connect> 32 </Max_Connect>
140+</Cluster_Server_Info>
141+#-------------------------------------------------------------
142+# set Load Balance server information
143+# o Host_Name : The host name of this load balance server.
144+# -- please write a host name by FQDN or IP address.
145+# o Backend_Socket_Dir : Unix domain socket path for the backend
146+# o Receive_Port : Connection from client
147+# o Recovery_Port : Connection for recovery process
148+# o Max_Cluster_Num : Maximum number of cluster DB servers
149+# o Use_Connection_Pooling : Use connection pool [yes/no]
150+# o Lifecheck_Timeout : Timeout of the lifecheck response
151+# o Lifecheck_Interval : Interval time of the lifecheck
152+# (range 1s - 1h)
153+# 10s -- 10 seconds
154+# 10min -- 10 minutes
155+# 1h -- 1 hours
156+#-------------------------------------------------------------
157+<Host_Name> lb.pgcluster.org </Host_Name>
158+<Backend_Socket_Dir> /tmp </Backend_Socket_Dir>
159+<Receive_Port> 5432 </Receive_Port>
160+<Recovery_Port> 6001 </Recovery_Port>
161+<Max_Cluster_Num> 128 </Max_Cluster_Num>
162+<Use_Connection_Pooling> no </Use_Connection_Pooling>
163+<LifeCheck_Timeout> 3s </LifeCheck_Timeout>
164+<LifeCheck_Interval> 15s </LifeCheck_Interval>
165+#-------------------------------------------------------------
166+# A setup of a log files
167+#
168+# o File_Name : Log file name with full path
169+# o File_Size : Maximum size of each log files
170+# Please specify in a number and unit(K or M)
171+# 10 -- 10 Byte
172+# 10K -- 10 KByte
173+# 10M -- 10 MByte
174+# o Rotate : Rotation times
175+# If specified 0, old versions are removed.
176+#-------------------------------------------------------------
177+<Log_File_Info>
178+ <File_Name> /tmp/pglb.log </File_Name>
179+ <File_Size> 1M </File_Size>
180+ <Rotate> 3 </Rotate>
181+</Log_File_Info>
182+
183+3-2. Cluster DB Server
184+
185+The Cluster DB server need edit two configuration files
186+('pg_hba.conf' and 'cluster.conf').
187+These files are create under the $PG_DATA directory after 'initdb'.
188+
189+A. pg_hba.conf
190+Permission to connect DB via IP connectoins is need for this system.
191+
192+B. cluster.conf
193+In the case of the above system composition example,
194+the setup example of cluster.conf file is as the following
195+
196+#============================================================
197+# Cluster DB Server configuration file
198+#-------------------------------------------------------------
199+# file: cluster.conf
200+#-------------------------------------------------------------
201+# This file controls:
202+# o which hosts & port are replication server
203+# o which port use for replication request to replication server
204+# o which command use for recovery function
205+#
206+#============================================================
207+#-------------------------------------------------------------
208+# set cluster DB server information
209+# o Host_Name : hostname
210+# o Port : Connection port for postmaster
211+# o Recovery_Port : Connection for recovery process
212+#-------------------------------------------------------------
213+<Replicate_Server_Info>
214+ <Host_Name> pgr.pgcluster.org </Host_Name>
215+ <Port> 8001 </Port>
216+ <Recovery_Port> 8101 </Recovery_Port>
217+</Replicate_Server_Info>
218+#-------------------------------------------------------------
219+# set Cluster DB Server information
220+# o Host_Name : Host name which connect with replication server
221+# o Recovery_Port : Connection port for recovery
222+# o Rsync_Path : Path of rsync command
223+# o Rsync_Option : File transfer option for rsync
224+# o Rsync_Compress : Use compression option for rsync
225+# [yes/no]. default : yes
226+# o Pg_Dump_Path : path of pg_dump
227+# o When_Stand_Alone : When all replication servers fell,
228+# you can set up two kinds of permittion,
229+# "real_only" or "read_write".
230+# o Replication_Timeout : Timeout of each replication request
231+# o Lifecheck_Timeout : Timeout of the lifecheck response
232+# o Lifecheck_Interval : Interval time of the lifecheck
233+# (range 1s - 1h)
234+# 10s -- 10 seconds
235+# 10min -- 10 minutes
236+# 1h -- 1 hours
237+#-------------------------------------------------------------
238+<Host_Name> c1.pgcluster.org </Host_Name>
239+<Recovery_Port> 7001 </Recovery_Port>
240+<Rsync_Path> /usr/bin/rsync </Rsync_Path>
241+<Rsync_Option> ssh -1 </Rsync_Option>
242+<Rsync_Compress> yes </Rsync_Compress>
243+<Pg_Dump_Path> /usr/local/pgsql/bin/pg_dump </Pg_Dump_Path>
244+<When_Stand_Alone> read_only </When_Stand_Alone>
245+<Replication_Timeout> 1min </Replication_Timeout>
246+<LifeCheck_Timeout> 3s </LifeCheck_Timeout>
247+<LifeCheck_Interval> 11s </LifeCheck_Interval>
248+#-------------------------------------------------------------
249+# set partitional replicate control information
250+# set DB name and Table name to stop reprication
251+# o DB_Name : DB name
252+# o Table_Name : Table name
253+#-------------------------------------------------------------
254+#<Not_Replicate_Info>
255+# <DB_Name> test_db </DB_Name>
256+# <Table_Name> log_table </Table_Name>
257+#</Not_Replicate_Info>
258+
259+3-3. Replication Server
260+
261+The setup file of replication server is copied from the sample file and edited.
262+(the sample file is installed '/usr/local/pgsql/share' in default)
263+----------------------------------------------------------------
264+$cd /usr/local/pgsql/share
265+$cp pgreplicate.conf.sample pgreplicate.conf
266+----------------------------------------------------------------
267+In the case of the above system composition example,
268+the setup example of pgreplicate.conf file is as the following
269+
270+#============================================================
271+# PGReplicate configuration file
272+#-------------------------------------------------------------
273+# file: pgreplicate.conf
274+#-------------------------------------------------------------
275+# This file controls:
276+# o which hosts & port are cluster server
277+# o which port use for replication request from cluster server
278+#============================================================
279+#-------------------------------------------------------------
280+# set cluster DB server information
281+# o Host_Name : hostname
282+# o Port : Connection port for postmaster
283+# o Recovery_Port : Connection port for recovery
284+#-------------------------------------------------------------
285+<Cluster_Server_Info>
286+ <Host_Name> c1.pgcluster.org </Host_Name>
287+ <Port> 5432 </Port>
288+ <Recovery_Port> 7001 </Recovery_Port>
289+</Cluster_Server_Info>
290+<Cluster_Server_Info>
291+ <Host_Name> c2.pgcluster.org </Host_Name>
292+ <Port> 5432 </Port>
293+ <Recovery_Port> 7001 </Recovery_Port>
294+</Cluster_Server_Info>
295+#-------------------------------------------------------------
296+# set Load Balance server information
297+# o Host_Name : hostname
298+# o Recovery_Port : Connection port for recovery
299+#-------------------------------------------------------------
300+<LoadBalance_Server_Info>
301+ <Host_Name> lb.pgcluster.org </Host_Name>
302+ <Recovery_Port> 6001 </Recovery_Port>
303+</LoadBalance_Server_Info>
304+#------------------------------------------------------------
305+# A setup of the cascade connection between replication servers.
306+# When you do not use RLOG recovery, you can skip this setup
307+#
308+# o Host_Name : The host name of the upper replication server.
309+# Please write a host name by FQDN or IP address.
310+# o Port : The connection port with postmaster.
311+# o Recovery_Port : The connection port at the time of
312+# a recovery sequence .
313+#------------------------------------------------------------
314+#<Replicate_Server_Info>
315+# <Host_Name> upper_replicate.pgcluster.org </Host_Name>
316+# <Port> 8002 </Port>
317+# <Recovery_Port> 8102 </Recovery_Port>
318+#</Replicate_Server_Info>
319+#
320+#-------------------------------------------------------------
321+# A setup of a replication server
322+#
323+# o Host_Name : The host name of the this replication server.
324+# Please write a host name by FQDN or IP address.
325+# o Replicate_Port : Connection port for replication
326+# o Recovery_Port : Connection port for recovery
327+# o RLOG_Port : Connection port for replication log
328+# o Response_mode : Timing which returns a response
329+# - normal -- return result of DB which received the query
330+# - reliable -- return result after waiting for response of
331+# all Cluster DBs.
332+# o Use_Replication_Log : Use replication log
333+# [yes/no]. default : no
334+# o Replication_Timeout : Timeout of each replication response
335+# o Lifecheck_Timeout : Timeout of the lifecheck response
336+# o Lifecheck_Interval : Interval time of the lifecheck
337+# (range 1s - 1h)
338+# 10s -- 10 seconds
339+# 10min -- 10 minutes
340+# 1h -- 1 hours
341+#-------------------------------------------------------------
342+<Host_Name> pgr.pgcluster.org </Host_Name>
343+<Replication_Port> 8001 </Replication_Port>
344+<Recovery_Port> 8101 </Recovery_Port>
345+<RLOG_Port> 8301 </RLOG_Port>
346+<Response_Mode> normal </Response_Mode>
347+<Use_Replication_Log> no </Use_Replication_Log>
348+<Replication_Timeout> 1min </Replication_Timeout>
349+<LifeCheck_Timeout> 3s </LifeCheck_Timeout>
350+<LifeCheck_Interval> 15s </LifeCheck_Interval>
351+#-------------------------------------------------------------
352+# A setup of a log files
353+#
354+# o File_Name : Log file name with full path
355+# o File_Size : maximum size of each log files
356+# Please specify in a number and unit(K or M)
357+# 10 -- 10 Byte
358+# 10K -- 10 KByte
359+# 10M -- 10 MByte
360+# o Rotate : Rotation times
361+# If specified 0, old versions are removed.
362+#-------------------------------------------------------------
363+<Log_File_Info>
364+ <File_Name> /tmp/pgreplicate.log </File_Name>
365+ <File_Size> 1M </File_Size>
366+ <Rotate> 3 </Rotate>
367+</Log_File_Info>
368+
369+=============================================================
370+4. Start Up / Stop
371+=============================================================
372+
373+4-1. replication server
374+
375+A. Start replication server
376+----------------------------------------------------------------
377+$ /usr/local/pgsql/bin/pgreplicate -D /usr/local/pgsql/etc
378+----------------------------------------------------------------
379+
380+B. Stop replication server
381+----------------------------------------------------------------
382+$ /usr/local/pgsql/bin/pgreplicate -D /usr/local/pgsql/etc stop
383+----------------------------------------------------------------
384+
385+usage: pgreplicate [-D path_of_config_file] [-W path_of_work_files]
386+[-w wait time before fork process][-U login user][-l][-n][-v][-h][stop]
387+ -l: print error logs in the log file.
388+ -n: don't run in daemon mode.
389+ -v: debug mode. need '-n' flag
390+ -h: print this help
391+ stop: stop pgreplicate
392+(config file default path: ./pgreplicate.conf)
393+
394+4-2. cluster DB server
395+$PG_HOME = /usr/local/pgsql
396+$PG_DATA = /usr/local/pgsql/data
397+
398+A. Start cluster DB server
399+----------------------------------------------------------------
400+$ /usr/local/pgsql/bin/pg_ctl start -D /usr/local/pgsql/data
401+----------------------------------------------------------------
402+
403+B. Stop cluster DB server
404+----------------------------------------------------------------
405+$ /usr/local/pgsql/bin/pg_ctl stop -D /usr/local/pgsql/data
406+----------------------------------------------------------------
407+
408+C-1. RE start (recovery) cluster DB server with backup
409+----------------------------------------------------------------
410+$ /usr/local/pgsql/bin/pg_ctl start -D /usr/local/pgsql/data -o "-R"
411+----------------------------------------------------------------
412+
413+C-2. RE start (recovery) cluster DB server without backup
414+----------------------------------------------------------------
415+$ /usr/local/pgsql/bin/pg_ctl start -D /usr/local/pgsql/data -o "-r"
416+----------------------------------------------------------------
417+
418+D. Upgrade cluster DB server with pg_dump
419+----------------------------------------------------------------
420+$ /usr/local/pgsql/bin/pg_ctl start -D /usr/local/pgsql/data -o "-U"
421+----------------------------------------------------------------
422+
423+4-3. load balance server
424+
425+A. Start load balance server
426+----------------------------------------------------------------
427+$ /usr/local/pgsql/bin/pglb -D /usr/local/pgsql/share
428+----------------------------------------------------------------
429+
430+B. Stop load balance server
431+----------------------------------------------------------------
432+$ /usr/local/pgsql/bin/pglb -D /usr/local/pgsql/share stop
433+----------------------------------------------------------------
434+
435+usage: pglb [-D path_of_config_file] [-W path_of_work_files] [-n][-v][-h][stop]
436+ -l: print error logs in the log file.
437+ -n: don't run in daemon mode.
438+ -v: debug mode. need '-n' flag
439+ -h: print this help
440+ stop: stop pglb
441+ (config file default path: ./pglb.conf)
442diff -aruN postgresql-8.2.4/README_PGCLUSTER pgcluster-1.7.0rc7/README_PGCLUSTER
443--- postgresql-8.2.4/README_PGCLUSTER 1970-01-01 01:00:00.000000000 +0100
444+++ pgcluster-1.7.0rc7/README_PGCLUSTER 2007-02-19 01:00:40.000000000 +0100
445@@ -0,0 +1,118 @@
446+PGCluster: Multi-Master Synchronous Replication System for PostgreSQL
447+===========================================================
448+
449+PGCluster is a multi-master and synchronous replication system that supports load balancing of PostgreSQL.
450+
451+Changed:
452+ $INSTALL_DIR/GNUmakefile.in
453+ $INSTALL_DIR/INSTALL_PGCLUSTER
454+ $INSTALL_DIR/README_PGCLUSTER
455+ $INSTALL_DIR/configure
456+ $INSTALL_DIR/configure.in
457+ $INSTALL_DIR/pgcluster.sh.tmpl
458+ $INSTALL_DIR/src/Makefile
459+ $INSTALL_DIR/src/Makefile.global.in
460+ $INSTALL_DIR/src/backend/Makefile
461+ $INSTALL_DIR/src/backend/access/transam/clog.c
462+ $INSTALL_DIR/src/backend/access/transam/xact.c
463+ $INSTALL_DIR/src/backend/catalog/catalog.c
464+ $INSTALL_DIR/src/backend/commands/analyze.c
465+ $INSTALL_DIR/src/backend/commands/copy.c
466+ $INSTALL_DIR/src/backend/commands/sequence.c
467+ $INSTALL_DIR/src/backend/executor/functions.c
468+ $INSTALL_DIR/src/backend/libpq/Makefile
469+ $INSTALL_DIR/src/backend/libpq/be-fsstubs.c
470+ $INSTALL_DIR/src/backend/libpq/cluster.conf.sample
471+ $INSTALL_DIR/src/backend/libpq/recovery.c
472+ $INSTALL_DIR/src/backend/libpq/lifecheck.c
473+ $INSTALL_DIR/src/backend/libpq/replicate.c
474+ $INSTALL_DIR/src/backend/libpq/replicate_com.c
475+ $INSTALL_DIR/src/backend/main/main.c
476+ $INSTALL_DIR/src/backend/parser/gram.y
477+ $INSTALL_DIR/src/backend/parser/keywords.c
478+ $INSTALL_DIR/src/backend/parser/parse_clause.c
479+ $INSTALL_DIR/src/backend/parser/parse_relation.c
480+ $INSTALL_DIR/src/backend/postmaster/postmaster.c
481+ $INSTALL_DIR/src/backend/storage/large_object/inv_api.c
482+ $INSTALL_DIR/src/backend/storage/lmgr/deadlock.c
483+ $INSTALL_DIR/src/backend/storage/lmgr/lmgr.c
484+ $INSTALL_DIR/src/backend/storage/lmgr/lock.c
485+ $INSTALL_DIR/src/backend/storage/lmgr/proc.c
486+ $INSTALL_DIR/src/backend/tcop/postgres.c
487+ $INSTALL_DIR/src/backend/tcop/pquery.c
488+ $INSTALL_DIR/src/backend/tcop/utility.c
489+ $INSTALL_DIR/src/backend/utils/adt/float.c
490+ $INSTALL_DIR/src/backend/utils/adt/nabstime.c
491+ $INSTALL_DIR/src/backend/utils/adt/ri_triggers.c
492+ $INSTALL_DIR/src/backend/utils/adt/timestamp.c
493+ $INSTALL_DIR/src/backend/utils/error/assert.c
494+ $INSTALL_DIR/src/backend/utils/error/elog.c
495+ $INSTALL_DIR/src/backend/utils/fmgr/fmgr.c
496+ $INSTALL_DIR/src/backend/utils/mb/mbutils.c
497+ $INSTALL_DIR/src/backend/utils/misc/guc.c
498+ $INSTALL_DIR/src/backend/utils/misc/postgresql.conf.sample
499+ $INSTALL_DIR/src/bin/initdb/initdb.c
500+ $INSTALL_DIR/src/bin/pg_dump/pg_dump.c
501+ $INSTALL_DIR/src/bin/pg_dump/pg_dumpall.c
502+ $INSTALL_DIR/src/include/pg_config.h.in
503+ $INSTALL_DIR/src/include/replicate.h
504+ $INSTALL_DIR/src/include/replicate_com.h
505+ $INSTALL_DIR/src/include/storage/lmgr.h
506+ $INSTALL_DIR/src/include/storage/proc.h
507+ $INSTALL_DIR/src/interfaces/libpq/Makefile
508+ $INSTALL_DIR/src/makefiles/Makefile.aix
509+ $INSTALL_DIR/src/makefiles/Makefile.freebsd
510+ $INSTALL_DIR/src/makefiles/Makefile.hpux
511+ $INSTALL_DIR/src/makefiles/Makefile.linux
512+ $INSTALL_DIR/src/makefiles/Makefile.netbsd
513+ $INSTALL_DIR/src/makefiles/Makefile.openbsd
514+ $INSTALL_DIR/src/makefiles/Makefile.solaris
515+ $INSTALL_DIR/src/makefiles/Makefile.sunos4
516+Added:
517+ $INSTALL_DIR/src/pgcluster/Makefile
518+ $INSTALL_DIR/src/pgcluster/libpgc/Makefile
519+ $INSTALL_DIR/src/pgcluster/libpgc/libpgc.h
520+ $INSTALL_DIR/src/pgcluster/libpgc/sem.c
521+ $INSTALL_DIR/src/pgcluster/libpgc/show.c
522+ $INSTALL_DIR/src/pgcluster/libpgc/signal.c
523+ $INSTALL_DIR/src/pgcluster/pglb/AUTHORS
524+ $INSTALL_DIR/src/pgcluster/pglb/COPYING
525+ $INSTALL_DIR/src/pgcluster/pglb/Makefile
526+ $INSTALL_DIR/src/pgcluster/pglb/child.c
527+ $INSTALL_DIR/src/pgcluster/pglb/cluster_table.c
528+ $INSTALL_DIR/src/pgcluster/pglb/lifecheck.c
529+ $INSTALL_DIR/src/pgcluster/pglb/load_balance.c
530+ $INSTALL_DIR/src/pgcluster/pglb/main.c
531+ $INSTALL_DIR/src/pgcluster/pglb/pglb.conf.sample
532+ $INSTALL_DIR/src/pgcluster/pglb/pglb.h
533+ $INSTALL_DIR/src/pgcluster/pglb/pool_auth.c
534+ $INSTALL_DIR/src/pgcluster/pglb/pool_connection_pool.c
535+ $INSTALL_DIR/src/pgcluster/pglb/pool_params.c
536+ $INSTALL_DIR/src/pgcluster/pglb/pool_process_query.c
537+ $INSTALL_DIR/src/pgcluster/pglb/pool_stream.c
538+ $INSTALL_DIR/src/pgcluster/pglb/recovery.c
539+ $INSTALL_DIR/src/pgcluster/pglb/socket.c
540+ $INSTALL_DIR/src/pgcluster/pgrp/AUTHORS
541+ $INSTALL_DIR/src/pgcluster/pgrp/COPYING
542+ $INSTALL_DIR/src/pgcluster/pgrp/Makefile
543+ $INSTALL_DIR/src/pgcluster/pgrp/cascade.c
544+ $INSTALL_DIR/src/pgcluster/pgrp/conf.c
545+ $INSTALL_DIR/src/pgcluster/pgrp/lifecheck.c
546+ $INSTALL_DIR/src/pgcluster/pgrp/main.c
547+ $INSTALL_DIR/src/pgcluster/pgrp/pgreplicate.conf.sample
548+ $INSTALL_DIR/src/pgcluster/pgrp/pgreplicate.h
549+ $INSTALL_DIR/src/pgcluster/pgrp/pqformat.c
550+ $INSTALL_DIR/src/pgcluster/pgrp/recovery.c
551+ $INSTALL_DIR/src/pgcluster/pgrp/replicate.c
552+ $INSTALL_DIR/src/pgcluster/pgrp/rlog.c
553+ $INSTALL_DIR/src/pgcluster/tool/Makefile
554+ $INSTALL_DIR/src/pgcluster/tool/README.jp
555+ $INSTALL_DIR/src/pgcluster/tool/pgcbench.c
556+ $INSTALL_DIR/src/pgcluster/tool/pgcbench.sh
557+ $INSTALL_DIR/src/pgcluster/tool/tpc-b_like.sql
558+
559+The latest version of this software may be obtained at
560+http://pgfoundry.org/projects/pgcluster/
561+
562+For more information look at pgFoundry web site located at
563+http://pgcluster.projects.postgresql.org/
564diff -aruN postgresql-8.2.4/configure pgcluster-1.7.0rc7/configure
565--- postgresql-8.2.4/configure 2007-02-07 04:48:58.000000000 +0100
566+++ pgcluster-1.7.0rc7/configure 2007-03-01 16:27:35.000000000 +0100
567@@ -275,6 +275,8 @@
568 PACKAGE_STRING='PostgreSQL 8.2.4'
569 PACKAGE_BUGREPORT='pgsql-bugs@postgresql.org'
570
571+PGCLUSTER_VERSION='1.7.0rc7'
572+
573 ac_unique_file="src/backend/access/common/heaptuple.c"
574 ac_default_prefix=/usr/local/pgsql
575 # Factoring default headers for most tests.
576@@ -314,7 +316,7 @@
577 # include <unistd.h>
578 #endif"
579
580-ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS configure_args build build_cpu build_vendor build_os host host_cpu host_vendor host_os PORTNAME docdir enable_nls WANTED_LANGUAGES default_port enable_shared enable_rpath enable_debug DTRACE DTRACEFLAGS enable_dtrace CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP GCC TAS autodepend INCLUDES enable_thread_safety with_tcl with_perl with_python with_krb5 krb_srvtab with_pam with_ldap with_bonjour with_openssl with_zlib EGREP ELF_SYS LDFLAGS_SL AWK FLEX FLEXFLAGS LN_S LD with_gnu_ld ld_R_works RANLIB ac_ct_RANLIB TAR STRIP ac_ct_STRIP STRIP_STATIC_LIB STRIP_SHARED_LIB YACC YFLAGS PERL perl_archlibexp perl_privlibexp perl_useshrplib perl_embed_ldflags PYTHON python_version python_configdir python_includespec python_libdir python_libspec python_additional_libs HAVE_IPV6 LIBOBJS acx_pthread_config PTHREAD_CC PTHREAD_LIBS PTHREAD_CFLAGS LDAP_LIBS_FE LDAP_LIBS_BE HAVE_POSIX_SIGNALS MSGFMT MSGMERGE XGETTEXT localedir TCLSH TCL_CONFIG_SH TCL_INCLUDE_SPEC TCL_LIB_FILE TCL_LIBS TCL_LIB_SPEC TCL_SHARED_BUILD TCL_SHLIB_LD_LIBS NSGMLS JADE have_docbook DOCBOOKSTYLE COLLATEINDEX SGMLSPL vpath_build LTLIBOBJS'
581+ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS configure_args build build_cpu build_vendor build_os host host_cpu host_vendor host_os PORTNAME docdir enable_nls WANTED_LANGUAGES default_port enable_shared enable_rpath enable_debug DTRACE DTRACEFLAGS enable_dtrace CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP GCC TAS autodepend INCLUDES enable_thread_safety with_tcl with_perl with_python with_krb5 krb_srvtab with_pam with_ldap with_bonjour with_openssl with_zlib EGREP ELF_SYS LDFLAGS_SL AWK FLEX FLEXFLAGS LN_S LD with_gnu_ld ld_R_works RANLIB ac_ct_RANLIB TAR STRIP ac_ct_STRIP STRIP_STATIC_LIB STRIP_SHARED_LIB YACC YFLAGS PERL perl_archlibexp perl_privlibexp perl_useshrplib perl_embed_ldflags PYTHON python_version python_configdir python_includespec python_libdir python_libspec python_additional_libs HAVE_IPV6 LIBOBJS acx_pthread_config PTHREAD_CC PTHREAD_LIBS PTHREAD_CFLAGS LDAP_LIBS_FE LDAP_LIBS_BE HAVE_POSIX_SIGNALS MSGFMT MSGMERGE XGETTEXT localedir TCLSH TCL_CONFIG_SH TCL_INCLUDE_SPEC TCL_LIB_FILE TCL_LIBS TCL_LIB_SPEC TCL_SHARED_BUILD TCL_SHLIB_LD_LIBS NSGMLS JADE have_docbook DOCBOOKSTYLE COLLATEINDEX SGMLSPL vpath_build LTLIBOBJS PGCLUSTER_VERSION'
582 ac_subst_files=''
583
584 # Initialize some variables set by options.
585@@ -1241,6 +1243,10 @@
586 #define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
587 _ACEOF
588
589+cat >>confdefs.h <<_ACEOF
590+#define PGCLUSTER_VERSION "$PGCLUSTER_VERSION"
591+_ACEOF
592+
593
594 # Let the site file select an alternate cache file if it wants to.
595 # Prefer explicitly selected file to automatically selected ones.
596@@ -23555,6 +23561,7 @@
597 s,@host_os@,$host_os,;t t
598 s,@PORTNAME@,$PORTNAME,;t t
599 s,@docdir@,$docdir,;t t
600+s,@PGCLUSTER_VERSION@,$PGCLUSTER_VERSION,;t t
601 s,@enable_nls@,$enable_nls,;t t
602 s,@WANTED_LANGUAGES@,$WANTED_LANGUAGES,;t t
603 s,@default_port@,$default_port,;t t
604diff -aruN postgresql-8.2.4/configure.in pgcluster-1.7.0rc7/configure.in
605--- postgresql-8.2.4/configure.in 2007-02-07 04:48:58.000000000 +0100
606+++ pgcluster-1.7.0rc7/configure.in 2007-02-18 22:52:16.000000000 +0100
607@@ -27,6 +27,7 @@
608 AC_SUBST(configure_args, [$ac_configure_args])
609
610 AC_DEFINE_UNQUOTED(PG_VERSION, "$PACKAGE_VERSION", [PostgreSQL version as a string])
611+AC_DEFINE_UNQUOTED(PGCLUSTER_VERSION, "$PGCLUSTER_VERSION", [PGCluster version])
612
613 AC_CANONICAL_HOST
614
615diff -aruN postgresql-8.2.4/pgcluster.sh.tmpl pgcluster-1.7.0rc7/pgcluster.sh.tmpl
616--- postgresql-8.2.4/pgcluster.sh.tmpl 1970-01-01 01:00:00.000000000 +0100
617+++ pgcluster-1.7.0rc7/pgcluster.sh.tmpl 2007-02-18 22:52:16.000000000 +0100
618@@ -0,0 +1,56 @@
619+#!/bin/sh
620+#
621+# $FreeBSD: ports/databases/pgcluster/files/pgcluster.sh.tmpl,v 1.1 2004/01/26 09:02:45 kuriyama Exp $
622+#
623+# PROVIDE: pgcluster
624+# REQUIRE: DAEMON
625+# BEFORE: pgreplicate
626+# KEYWORD: FreeBSD
627+#
628+# Add the following line to /etc/rc.conf to enable pgcluster:
629+#
630+# pgcluster_enable="YES"
631+# # optional
632+# pgcluster_data="/home/pgsql/data"
633+# pgcluster_flags="-w -s"
634+#
635+
636+pgcluster_enable="NO"
637+pgcluster_data="%%PREFIX%%/pgsql/data"
638+pgcluster_flags="-w -s"
639+
640+. %%RC_SUBR%%
641+
642+load_rc_config pgcluster
643+
644+name=pgcluster
645+command=%%PREFIX%%/bin/pg_ctl
646+pgcluster_user=pgsql
647+extra_commands="initdb recover"
648+initdb_cmd="pgcluster_initdb"
649+recover_cmd="pgcluster_recover"
650+start_cmd="pgcluster_start"
651+stop_cmd="pgcluster_stop"
652+
653+pgcluster_flags="${pgcluster_flags} -D ${pgcluster_data}"
654+pidfile="${pgcluster_data}/postmaster.pid"
655+
656+pgcluster_start()
657+{
658+ su -m ${pgcluster_user} -c "exec ${command} start ${pgcluster_flags} -o '-i'"
659+}
660+pgcluster_stop()
661+{
662+ su -m ${pgcluster_user} -c "exec ${command} stop ${pgcluster_flags} -m i"
663+}
664+pgcluster_recover()
665+{
666+ su -m ${pgcluster_user} -c "exec ${command} start ${pgcluster_flags} -o '-i -R'"
667+}
668+pgcluster_initdb()
669+{
670+ su -m ${pgcluster_user} -c "exec %%PREFIX%%/bin/initdb -D ${pgcluster_data}"
671+}
672+
673+load_rc_config $name
674+run_rc_command "$1"
675diff -aruN postgresql-8.2.4/src/Makefile pgcluster-1.7.0rc7/src/Makefile
676--- postgresql-8.2.4/src/Makefile 2006-06-23 01:50:35.000000000 +0200
677+++ pgcluster-1.7.0rc7/src/Makefile 2007-02-18 22:52:16.000000000 +0100
678@@ -16,14 +16,15 @@
679 all install installdirs uninstall dep depend distprep:
680 $(MAKE) -C port $@
681 $(MAKE) -C timezone $@
682+ $(MAKE) -C interfaces $@
683 $(MAKE) -C backend $@
684 $(MAKE) -C backend/utils/mb/conversion_procs $@
685 $(MAKE) -C include $@
686- $(MAKE) -C interfaces $@
687 $(MAKE) -C bin $@
688 $(MAKE) -C pl $@
689 $(MAKE) -C makefiles $@
690 $(MAKE) -C test/regress $@
691+ $(MAKE) -C pgcluster $@
692
693 install: install-local
694
695@@ -44,6 +45,7 @@
696 rm -f $(addprefix '$(DESTDIR)$(pgxsdir)/$(subdir)'/, Makefile.global Makefile.port Makefile.shlib nls-global.mk)
697
698 clean:
699+ $(MAKE) -C pgcluster $@
700 $(MAKE) -C port $@
701 $(MAKE) -C timezone $@
702 $(MAKE) -C backend $@
703@@ -57,6 +59,7 @@
704 $(MAKE) -C test/thread $@
705
706 distclean maintainer-clean:
707+ -$(MAKE) -C pgcluster $@
708 -$(MAKE) -C port $@
709 -$(MAKE) -C timezone $@
710 -$(MAKE) -C backend $@
711diff -aruN postgresql-8.2.4/src/Makefile.global.in pgcluster-1.7.0rc7/src/Makefile.global.in
712--- postgresql-8.2.4/src/Makefile.global.in 2006-10-08 19:15:33.000000000 +0200
713+++ pgcluster-1.7.0rc7/src/Makefile.global.in 2007-02-18 22:52:16.000000000 +0100
714@@ -31,6 +31,9 @@
715 # PostgreSQL version number
716 VERSION = @PACKAGE_VERSION@
717
718+# PGCluster version number
719+PGCLUSTER_VERSION = @PGCLUSTER_VERSION@
720+
721 # Support for VPATH builds
722 vpath_build = @vpath_build@
723 abs_top_srcdir = @abs_top_srcdir@
724@@ -207,6 +210,7 @@
725 GCC = @GCC@
726 CFLAGS = @CFLAGS@
727
728+CFLAGS += -DUSE_REPLICATION -DPRINT_DEBUG
729 # Kind-of compilers
730
731 YACC = @YACC@
732diff -aruN postgresql-8.2.4/src/backend/Makefile pgcluster-1.7.0rc7/src/backend/Makefile
733--- postgresql-8.2.4/src/backend/Makefile 2006-10-08 19:15:33.000000000 +0200
734+++ pgcluster-1.7.0rc7/src/backend/Makefile 2007-02-18 22:52:16.000000000 +0100
735@@ -39,7 +39,7 @@
736 ifneq ($(PORTNAME), win32)
737 ifneq ($(PORTNAME), aix)
738
739-postgres: $(OBJS)
740+postgres: $(OBJS) $(libpq_srcdir)/libpq.a
741 $(CC) $(CFLAGS) $(LDFLAGS) $(export_dynamic) $^ $(LIBS) -o $@
742
743 endif
744@@ -169,6 +169,7 @@
745 $(INSTALL_DATA) $(srcdir)/libpq/pg_ident.conf.sample '$(DESTDIR)$(datadir)/pg_ident.conf.sample'
746 $(INSTALL_DATA) $(srcdir)/utils/misc/postgresql.conf.sample '$(DESTDIR)$(datadir)/postgresql.conf.sample'
747 $(INSTALL_DATA) $(srcdir)/access/transam/recovery.conf.sample '$(DESTDIR)$(datadir)/recovery.conf.sample'
748+ $(INSTALL_DATA) $(srcdir)/libpq/cluster.conf.sample $(DESTDIR)$(datadir)/cluster.conf.sample
749
750 install-bin: postgres $(POSTGRES_IMP) installdirs
751 $(INSTALL_PROGRAM) postgres$(X) '$(DESTDIR)$(bindir)/postgres$(X)'
752@@ -221,8 +222,9 @@
753 $(MAKE) -C catalog uninstall-data
754 rm -f '$(DESTDIR)$(datadir)/pg_hba.conf.sample' \
755 '$(DESTDIR)$(datadir)/pg_ident.conf.sample' \
756- '$(DESTDIR)$(datadir)/postgresql.conf.sample' \
757- '$(DESTDIR)$(datadir)/recovery.conf.sample'
758+ '$(DESTDIR)$(datadir)/postgresql.conf.sample' \
759+ '$(DESTDIR)$(datadir)/recovery.conf.sample' \
760+ '$(DESTDIR)$(datadir)/cluster.conf.sample'
761
762
763 ##########################################################################
764diff -aruN postgresql-8.2.4/src/backend/access/transam/clog.c pgcluster-1.7.0rc7/src/backend/access/transam/clog.c
765--- postgresql-8.2.4/src/backend/access/transam/clog.c 2006-11-05 23:42:07.000000000 +0100
766+++ pgcluster-1.7.0rc7/src/backend/access/transam/clog.c 2007-02-18 22:52:16.000000000 +0100
767@@ -57,6 +57,9 @@
768 #define TransactionIdToByte(xid) (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE)
769 #define TransactionIdToBIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE)
770
771+#ifdef USE_REPLICATION
772+#include "replicate.h"
773+#endif /* USE_REPLICATION */
774
775 /*
776 * Link to shared-memory data structures for CLOG control
777@@ -335,7 +338,16 @@
778
779 /* Check to see if there's any files that could be removed */
780 if (!SlruScanDirectory(ClogCtl, cutoffPage, false))
781+#ifdef USE_REPLICATION
782+ {
783+ /* Perform a forced CHECKPOINT */
784+ /* CreateCheckPoint(false, true); */
785+ RequestCheckpoint(true, false);
786+#endif /* USE_REPLICATION */
787 return; /* nothing to remove */
788+#ifdef USE_REPLICATION
789+ }
790+#endif /* USE_REPLICATION */
791
792 /* Write XLOG record and flush XLOG to disk */
793 WriteTruncateXlogRec(cutoffPage);
794diff -aruN postgresql-8.2.4/src/backend/access/transam/xact.c pgcluster-1.7.0rc7/src/backend/access/transam/xact.c
795--- postgresql-8.2.4/src/backend/access/transam/xact.c 2006-11-23 02:14:59.000000000 +0100
796+++ pgcluster-1.7.0rc7/src/backend/access/transam/xact.c 2007-02-18 22:52:16.000000000 +0100
797@@ -44,6 +44,9 @@
798 #include "utils/relcache.h"
799 #include "utils/guc.h"
800
801+#ifdef USE_REPLICATION
802+#include "replicate.h"
803+#endif /* USE_REPLICATION */
804
805 /*
806 * User-tweakable parameters
807@@ -4335,3 +4338,11 @@
808 else
809 appendStringInfo(buf, "UNKNOWN");
810 }
811+
812+#ifdef USE_REPLICATION
813+void
814+PGR_Reload_Start_Time(void)
815+{
816+ xactStartTimestamp = GetCurrentTimestamp();
817+}
818+#endif /* USE_REPLICATION */
819diff -aruN postgresql-8.2.4/src/backend/catalog/catalog.c pgcluster-1.7.0rc7/src/backend/catalog/catalog.c
820--- postgresql-8.2.4/src/backend/catalog/catalog.c 2006-10-04 02:29:50.000000000 +0200
821+++ pgcluster-1.7.0rc7/src/backend/catalog/catalog.c 2007-02-18 22:52:16.000000000 +0100
822@@ -38,6 +38,9 @@
823 #include "utils/fmgroids.h"
824 #include "utils/relcache.h"
825
826+#ifdef USE_REPLICATION
827+#include "replicate.h"
828+#endif /* USE_REPLICATION */
829
830 #define OIDCHARS 10 /* max chars printed by %u */
831
832@@ -360,7 +363,7 @@
833 Oid
834 GetNewOidWithIndex(Relation relation, Relation indexrel)
835 {
836- Oid newOid;
837+ Oid newOid = 0;
838 IndexScanDesc scan;
839 ScanKeyData key;
840 bool collides;
841@@ -368,8 +371,18 @@
842 /* Generate new OIDs until we find one not in the table */
843 do
844 {
845+#ifdef USE_REPLICATION
846+ if (PGR_Is_Sync_OID == true)
847+ {
848+ newOid = PGRGetNewObjectId(newOid);
849+ }
850+ else
851+ {
852+ newOid = GetNewObjectId();
853+ }
854+#else
855 newOid = GetNewObjectId();
856-
857+#endif /* USE_REPLICATION */
858 ScanKeyInit(&key,
859 (AttrNumber) 1,
860 BTEqualStrategyNumber, F_OIDEQ,
861@@ -454,3 +467,4 @@
862
863 return rnode.relNode;
864 }
865+
866diff -aruN postgresql-8.2.4/src/backend/commands/analyze.c pgcluster-1.7.0rc7/src/backend/commands/analyze.c
867--- postgresql-8.2.4/src/backend/commands/analyze.c 2006-11-05 23:42:08.000000000 +0100
868+++ pgcluster-1.7.0rc7/src/backend/commands/analyze.c 2007-02-18 22:52:16.000000000 +0100
869@@ -36,6 +36,9 @@
870 #include "utils/syscache.h"
871 #include "utils/tuplesort.h"
872
873+#ifdef USE_REPLICATION
874+#include "replicate.h"
875+#endif /* USE_REPLICATION */
876
877 /* Data structure for Algorithm S from Knuth 3.4.2 */
878 typedef struct
879@@ -934,7 +937,11 @@
880 static double
881 random_fract(void)
882 {
883+#ifdef USE_REPLICATION
884+ return ((double) PGR_Random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
885+#else
886 return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
887+#endif /* USE_REPLICATION */
888 }
889
890 /*
891diff -aruN postgresql-8.2.4/src/backend/commands/copy.c pgcluster-1.7.0rc7/src/backend/commands/copy.c
892--- postgresql-8.2.4/src/backend/commands/copy.c 2006-10-06 19:13:58.000000000 +0200
893+++ pgcluster-1.7.0rc7/src/backend/commands/copy.c 2007-02-18 22:52:16.000000000 +0100
894@@ -41,6 +41,9 @@
895 #include "utils/lsyscache.h"
896 #include "utils/memutils.h"
897
898+#ifdef USE_REPLICATION
899+#include "replicate.h"
900+#endif /* USE_REPLICATION */
901
902 #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
903 #define OCTVALUE(c) ((c) - '0')
904@@ -488,6 +491,9 @@
905 CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
906 {
907 int bytesread = 0;
908+#ifdef USE_REPLICATION
909+ char * ptr = (char *)databuf;
910+#endif
911
912 switch (cstate->copy_dest)
913 {
914@@ -578,6 +584,9 @@
915 }
916 break;
917 }
918+#ifdef USE_REPLICATION
919+ PGR_Set_Copy_Data(PGRCopyData,ptr,bytesread,0);
920+#endif /* USE_REPLICATION */
921
922 return bytesread;
923 }
924@@ -2093,6 +2102,13 @@
925 }
926 }
927
928+#ifdef USE_REPLICATION
929+ if (done)
930+ {
931+ PGR_Set_Copy_Data(PGRCopyData,(char *)NULL,0,1);
932+ }
933+#endif /* USE_REPLICATION */
934+
935 /* Done, clean up */
936 error_context_stack = errcontext.previous;
937
938@@ -2201,6 +2217,11 @@
939 break;
940 }
941 }
942+#ifdef USE_REPLICATION
943+ /*
944+ PGR_Set_Copy_Data(PGRCopyData,cstate->line_buf.data,cstate->line_buf.len,0);
945+ */
946+#endif
947
948 /* Done reading the line. Convert it to server encoding. */
949 if (cstate->need_transcoding)
950diff -aruN postgresql-8.2.4/src/backend/commands/prepare.c pgcluster-1.7.0rc7/src/backend/commands/prepare.c
951--- postgresql-8.2.4/src/backend/commands/prepare.c 2006-10-04 02:29:51.000000000 +0200
952+++ pgcluster-1.7.0rc7/src/backend/commands/prepare.c 2007-02-18 22:52:16.000000000 +0100
953@@ -29,6 +29,9 @@
954 #include "utils/builtins.h"
955 #include "utils/memutils.h"
956
957+#ifdef USE_REPLICATION
958+#include "replicate.h"
959+#endif /* USE_REPLICATION */
960
961 /*
962 * The hash table in which prepared queries are stored. This is
963@@ -793,3 +796,27 @@
964 result = construct_array(tmp_ary, len, REGTYPEOID, 4, true, 'i');
965 return PointerGetDatum(result);
966 }
967+
968+
969+#ifdef USE_REPLICATION
970+bool
971+PGR_is_select_prepared_statement(PrepareStmt *stmt)
972+{
973+ PreparedStatement *entry;
974+ if ((stmt == NULL) || (stmt->name == NULL))
975+ {
976+ return false;
977+ }
978+ entry = FetchPreparedStatement(stmt->name, true);
979+ if (entry == NULL)
980+ {
981+ return false;
982+ }
983+ if (!strcmp(entry->commandTag,"SELECT"))
984+ {
985+ return true;
986+ }
987+ return false;
988+}
989+#endif /* USE_REPLICATION */
990+
991diff -aruN postgresql-8.2.4/src/backend/commands/sequence.c pgcluster-1.7.0rc7/src/backend/commands/sequence.c
992--- postgresql-8.2.4/src/backend/commands/sequence.c 2006-10-06 19:13:58.000000000 +0200
993+++ pgcluster-1.7.0rc7/src/backend/commands/sequence.c 2007-02-18 22:52:16.000000000 +0100
994@@ -31,6 +31,9 @@
995 #include "utils/resowner.h"
996 #include "utils/syscache.h"
997
998+#ifdef USE_REPLICATION
999+#include "replicate.h"
1000+#endif /* USE_REPLICATION */
1001
1002 /*
1003 * We don't want to log each fetching of a value from a sequence,
1004@@ -396,6 +399,9 @@
1005 RangeVar *sequence;
1006 Oid relid;
1007
1008+#ifdef USE_REPLICATION
1009+ Xlog_Check_Replicate(CMD_UTILITY);
1010+#endif /* USE_REPLICATION */
1011 sequence = makeRangeVarFromNameList(textToQualifiedNameList(seqin));
1012 relid = RangeVarGetRelid(sequence, false);
1013
1014@@ -622,6 +628,10 @@
1015 SeqTable elm;
1016 Relation seqrel;
1017
1018+#ifdef USE_REPLICATION
1019+ Xlog_Check_Replicate(CMD_UTILITY);
1020+#endif /* USE_REPLICATION */
1021+
1022 /* open and AccessShareLock sequence */
1023 init_sequence(relid, &elm, &seqrel);
1024
1025diff -aruN postgresql-8.2.4/src/backend/executor/functions.c pgcluster-1.7.0rc7/src/backend/executor/functions.c
1026--- postgresql-8.2.4/src/backend/executor/functions.c 2007-02-02 01:03:17.000000000 +0100
1027+++ pgcluster-1.7.0rc7/src/backend/executor/functions.c 2007-02-18 22:52:16.000000000 +0100
1028@@ -30,6 +30,9 @@
1029 #include "utils/syscache.h"
1030 #include "utils/typcache.h"
1031
1032+#ifdef USE_REPLICATION
1033+#include "replicate.h"
1034+#endif /* USE_REPLICATION */
1035
1036 /*
1037 * We have an execution_state record for each query in a function. Each
1038@@ -454,6 +457,13 @@
1039 Datum value;
1040 MemoryContext oldcontext;
1041
1042+#ifdef USE_REPLICATION
1043+ if ((es != NULL) && (es->qd != NULL))
1044+ {
1045+ Xlog_Check_Replicate(es->qd->operation);
1046+ }
1047+#endif /* USE_REPLICATION */
1048+
1049 if (es->status == F_EXEC_START)
1050 postquel_start(es, fcache);
1051
1052diff -aruN postgresql-8.2.4/src/backend/libpq/Makefile pgcluster-1.7.0rc7/src/backend/libpq/Makefile
1053--- postgresql-8.2.4/src/backend/libpq/Makefile 2003-11-29 20:51:49.000000000 +0100
1054+++ pgcluster-1.7.0rc7/src/backend/libpq/Makefile 2007-02-18 22:52:16.000000000 +0100
1055@@ -15,7 +15,8 @@
1056 # be-fsstubs is here for historical reasons, probably belongs elsewhere
1057
1058 OBJS = be-fsstubs.o be-secure.o auth.o crypt.o hba.o ip.o md5.o pqcomm.o \
1059- pqformat.o pqsignal.o
1060+ pqformat.o pqsignal.o \
1061+ replicate.o replicate_com.o recovery.o lifecheck.o
1062
1063
1064 all: SUBSYS.o
1065diff -aruN postgresql-8.2.4/src/backend/libpq/auth.c pgcluster-1.7.0rc7/src/backend/libpq/auth.c
1066--- postgresql-8.2.4/src/backend/libpq/auth.c 2006-11-06 02:27:52.000000000 +0100
1067+++ pgcluster-1.7.0rc7/src/backend/libpq/auth.c 2007-02-18 22:52:16.000000000 +0100
1068@@ -31,6 +31,9 @@
1069 #include "libpq/pqformat.h"
1070 #include "storage/ipc.h"
1071
1072+#ifdef USE_REPLICATION
1073+#include "replicate.h"
1074+#endif /* USE_REPLICATION */
1075
1076 static void sendAuthRequest(Port *port, AuthRequest areq);
1077 static void auth_failed(Port *port, int status);
1078@@ -888,6 +891,12 @@
1079 {
1080 StringInfoData buf;
1081
1082+#ifdef USE_REPLICATION
1083+ if (PGR_password == NULL)
1084+ {
1085+ return NULL;
1086+ }
1087+#endif /* USE_REPLICATION */
1088 if (PG_PROTOCOL_MAJOR(port->proto) >= 3)
1089 {
1090 /* Expect 'p' message type */
1091@@ -939,6 +948,19 @@
1092 ereport(DEBUG5,
1093 (errmsg("received password packet")));
1094
1095+#ifdef USE_REPLICATION
1096+ if (strncmp(buf.data,"md5",3) == 0)
1097+ {
1098+ char * ptr = NULL;
1099+ ptr = strchr(buf.data,'(');
1100+ if (ptr != NULL)
1101+ {
1102+ PGR_get_md5salt(PGR_password->md5Salt,ptr);
1103+ *ptr='\0';
1104+ }
1105+ }
1106+ strncpy(PGR_password->password,buf.data, PASSWORD_MAX_LENGTH );
1107+#endif /* USE_REPLICATION */
1108 /*
1109 * Return the received string. Note we do not attempt to do any
1110 * character-set conversion on it; since we don't yet know the client's
1111diff -aruN postgresql-8.2.4/src/backend/libpq/be-fsstubs.c pgcluster-1.7.0rc7/src/backend/libpq/be-fsstubs.c
1112--- postgresql-8.2.4/src/backend/libpq/be-fsstubs.c 2006-09-07 17:37:25.000000000 +0200
1113+++ pgcluster-1.7.0rc7/src/backend/libpq/be-fsstubs.c 2007-02-18 22:52:16.000000000 +0100
1114@@ -49,6 +49,9 @@
1115 #include "storage/large_object.h"
1116 #include "utils/memutils.h"
1117
1118+#ifdef USE_REPLICATION
1119+#include "replicate.h"
1120+#endif /* USE_REPLICATION */
1121
1122 /*#define FSDB 1*/
1123 #define BUFSIZE 8192
1124@@ -93,6 +96,19 @@
1125 LargeObjectDesc *lobjDesc;
1126 int fd;
1127
1128+#ifdef USE_REPLICATION
1129+ if ((PGR_Stand_Alone != NULL) &&
1130+ (PGR_lo_open(lobjId,mode) != STATUS_OK))
1131+ {
1132+ if ((mode & INV_WRITE) &&
1133+ (PGR_Is_Stand_Alone() == true) &&
1134+ (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1135+ {
1136+ elog(WARNING, "This query is not permitted when all replication servers fell down ");
1137+ PG_RETURN_INT32(-1);
1138+ }
1139+ }
1140+#endif /* USE_REPLICATION */
1141 #if FSDB
1142 elog(DEBUG4, "lo_open(%u,%d)", lobjId, mode);
1143 #endif
1144@@ -126,6 +142,9 @@
1145 errmsg("invalid large-object descriptor: %d", fd)));
1146 PG_RETURN_INT32(-1);
1147 }
1148+#ifdef USE_REPLICATION
1149+ PGR_lo_close(fd);
1150+#endif
1151 #if FSDB
1152 elog(DEBUG4, "lo_close(%d)", fd);
1153 #endif
1154@@ -183,6 +202,18 @@
1155 errmsg("large object descriptor %d was not opened for writing",
1156 fd)));
1157
1158+#ifdef USE_REPLICATION
1159+ if ((PGR_Stand_Alone != NULL) &&
1160+ (PGR_lo_write(fd, buf, len) != STATUS_OK))
1161+ {
1162+ if ((PGR_Is_Stand_Alone() == true) &&
1163+ (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1164+ {
1165+ elog(WARNING, "This query is not permitted when all replication servers fell down ");
1166+ return -1;
1167+ }
1168+ }
1169+#endif
1170 status = inv_write(cookies[fd], buf, len);
1171
1172 return status;
1173@@ -205,6 +236,10 @@
1174 PG_RETURN_INT32(-1);
1175 }
1176
1177+#ifdef USE_REPLICATION
1178+ PGR_lo_lseek(fd, offset, whence);
1179+#endif /* USE_REPLICATION */
1180+
1181 status = inv_seek(cookies[fd], offset, whence);
1182
1183 PG_RETURN_INT32(status);
1184@@ -221,6 +256,18 @@
1185 */
1186 CreateFSContext();
1187
1188+#ifdef USE_REPLICATION
1189+ if ((PGR_Stand_Alone != NULL) &&
1190+ (PGR_lo_create(InvalidOid) != STATUS_OK))
1191+ {
1192+ if ((PGR_Is_Stand_Alone() == true) &&
1193+ (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1194+ {
1195+ elog(WARNING, "This query is not permitted when all replication servers fell down ");
1196+ PG_RETURN_INT32(-1);
1197+ }
1198+ }
1199+#endif /* USE_REPLICATION */
1200 lobjId = inv_create(InvalidOid);
1201
1202 PG_RETURN_OID(lobjId);
1203@@ -231,6 +278,18 @@
1204 {
1205 Oid lobjId = PG_GETARG_OID(0);
1206
1207+#ifdef USE_REPLICATION
1208+ if ((PGR_Stand_Alone != NULL) &&
1209+ (PGR_lo_create(lobjId) != STATUS_OK))
1210+ {
1211+ if ((PGR_Is_Stand_Alone() == true) &&
1212+ (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1213+ {
1214+ elog(WARNING, "This query is not permitted when all replication servers fell down ");
1215+ PG_RETURN_INT32(-1);
1216+ }
1217+ }
1218+#endif /* USE_REPLICATION */
1219 /*
1220 * We don't actually need to store into fscxt, but create it anyway to
1221 * ensure that AtEOXact_LargeObject knows there is state to clean up
1222@@ -263,6 +322,18 @@
1223 {
1224 Oid lobjId = PG_GETARG_OID(0);
1225
1226+#ifdef USE_REPLICATION
1227+ if ((PGR_Stand_Alone != NULL) &&
1228+ (PGR_lo_unlink(lobjId) != STATUS_OK))
1229+ {
1230+ if ((PGR_Is_Stand_Alone() == true) &&
1231+ (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1232+ {
1233+ elog(WARNING, "This query is not permitted when all replication servers fell down ");
1234+ return -1;
1235+ }
1236+ }
1237+#endif /* USE_REPLICATION */
1238 /*
1239 * If there are any open LO FDs referencing that ID, close 'em.
1240 */
1241@@ -360,6 +431,19 @@
1242 nbytes = MAXPGPATH - 1;
1243 memcpy(fnamebuf, VARDATA(filename), nbytes);
1244 fnamebuf[nbytes] = '\0';
1245+
1246+#ifdef USE_REPLICATION
1247+ if ((PGR_Stand_Alone != NULL) &&
1248+ (PGR_lo_import((char*)fnamebuf) != STATUS_OK))
1249+ {
1250+ if ((PGR_Is_Stand_Alone() == true) &&
1251+ (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
1252+ {
1253+ elog(WARNING, "This query is not permitted when all replication servers fell down ");
1254+ return -1;
1255+ }
1256+ }
1257+#endif
1258 fd = PathNameOpenFile(fnamebuf, O_RDONLY | PG_BINARY, 0666);
1259 if (fd < 0)
1260 ereport(ERROR,
1261@@ -372,6 +456,7 @@
1262 */
1263 lobjOid = inv_create(InvalidOid);
1264
1265+
1266 /*
1267 * read in from the filesystem and write to the inversion object
1268 */
1269diff -aruN postgresql-8.2.4/src/backend/libpq/cluster.conf.sample pgcluster-1.7.0rc7/src/backend/libpq/cluster.conf.sample
1270--- postgresql-8.2.4/src/backend/libpq/cluster.conf.sample 1970-01-01 01:00:00.000000000 +0100
1271+++ pgcluster-1.7.0rc7/src/backend/libpq/cluster.conf.sample 2007-02-18 22:52:16.000000000 +0100
1272@@ -0,0 +1,71 @@
1273+#============================================================
1274+# Cluster DB Server configuration file
1275+#------------------------------------------------------------
1276+# file: cluster.conf
1277+#------------------------------------------------------------
1278+# This file controls:
1279+# o which hosts & port are replication server
1280+# o which port use for replication request to replication server
1281+# o which command use for recovery function
1282+#============================================================
1283+#------------------------------------------------------------
1284+# set Replication Server information
1285+# o Host_Name : hostname
1286+# o Port : Connection port for postmaster
1287+# o Recovery_Port : Connection port for recovery process
1288+#------------------------------------------------------------
1289+<Replicate_Server_Info>
1290+ <Host_Name> replicate1.pgcluster.org </Host_Name>
1291+ <Port> 8001 </Port>
1292+ <Recovery_Port> 8101 </Recovery_Port>
1293+</Replicate_Server_Info>
1294+#<Replicate_Server_Info>
1295+# <Host_Name> replicate2.pgcluster.org </Host_Name>
1296+# <Port> 8002 </Port>
1297+# <Recovery_Port> 8102 </Recovery_Port>
1298+#</Replicate_Server_Info>
1299+#<Replicate_Server_Info>
1300+# <Host_Name> replicate3.pgcluster.org </Host_Name>
1301+# <Port> 8003 </Port>
1302+# <Recovery_Port> 8103 </Recovery_Port>
1303+#</Replicate_Server_Info>
1304+#-------------------------------------------------------------
1305+# set Cluster DB Server information
1306+# o Host_Name : Host name which connect with replication server
1307+# o Recovery_Port : Connection port for recovery
1308+# o Rsync_Path : Path of rsync command
1309+# o Rsync_Option : File transfer option for rsync
1310+# o Rsync_Compress : Use compression option for rsync
1311+# [yes/no]. default : yes
1312+# o Pg_Dump_Path : Path of pg_dump
1313+# o When_Stand_Alone : When all replication servers fell,
1314+# you can set up two kinds of permission,
1315+# "real_only" or "read_write".
1316+# o Replication_Timeout : Timeout of each replication request
1317+# o Lifecheck_Timeout : Timeout of the lifecheck response
1318+# o Lifecheck_Interval : Interval time of the lifecheck
1319+# (range 1s - 1h)
1320+# 10s -- 10 seconds
1321+# 10min -- 10 minutes
1322+# 1h -- 1 hours
1323+#-------------------------------------------------------------
1324+<Host_Name> cluster1.pgcluster.org </Host_Name>
1325+<Recovery_Port> 7001 </Recovery_Port>
1326+<Rsync_Path> /usr/bin/rsync </Rsync_Path>
1327+<Rsync_Option> ssh -1 </Rsync_Option>
1328+<Rsync_Compress> yes </Rsync_Compress>
1329+<Pg_Dump_Path> /usr/local/pgsql/bin/pg_dump </Pg_Dump_Path>
1330+<When_Stand_Alone> read_only </When_Stand_Alone>
1331+<Replication_Timeout> 1 min </Replication_Timeout>
1332+<LifeCheck_Timeout> 3s </LifeCheck_Timeout>
1333+<LifeCheck_Interval> 11s </LifeCheck_Interval>
1334+#-------------------------------------------------------------
1335+# set partitional replicate control information
1336+# set DB name and Table name to stop reprication
1337+# o DB_Name : DB name
1338+# o Table_Name : Table name
1339+#-------------------------------------------------------------
1340+#<Not_Replicate_Info>
1341+# <DB_Name> test_db </DB_Name>
1342+# <Table_Name> log_table </Table_Name>
1343+#</Not_Replicate_Info>
1344diff -aruN postgresql-8.2.4/src/backend/libpq/crypt.c pgcluster-1.7.0rc7/src/backend/libpq/crypt.c
1345--- postgresql-8.2.4/src/backend/libpq/crypt.c 2006-07-14 16:52:19.000000000 +0200
1346+++ pgcluster-1.7.0rc7/src/backend/libpq/crypt.c 2007-02-18 22:52:16.000000000 +0100
1347@@ -23,6 +23,9 @@
1348 #include "libpq/crypt.h"
1349 #include "libpq/md5.h"
1350
1351+#ifdef USE_REPLICATION
1352+#include "replicate.h"
1353+#endif /* USE_REPLICATION */
1354
1355 int
1356 md5_crypt_verify(const Port *port, const char *role, char *client_pass)
1357@@ -72,13 +75,34 @@
1358 if (isMD5(shadow_pass))
1359 {
1360 /* stored password already encrypted, only do salt */
1361- if (!pg_md5_encrypt(shadow_pass + strlen("md5"),
1362- (char *) port->md5Salt,
1363+#ifdef USE_REPLICATION
1364+ if ((PGR_password != NULL) &&
1365+ ((PGR_password->md5Salt[0] |
1366+ PGR_password->md5Salt[1] |
1367+ PGR_password->md5Salt[2] |
1368+ PGR_password->md5Salt[3]) != 0 ))
1369+ {
1370+ if (!pg_md5_encrypt(shadow_pass + strlen("md5"),
1371+ (char *) PGR_password->md5Salt,
1372 sizeof(port->md5Salt), crypt_pwd))
1373+ {
1374+ pfree(crypt_pwd);
1375+ return STATUS_ERROR;
1376+ }
1377+ }
1378+ else
1379 {
1380- pfree(crypt_pwd);
1381- return STATUS_ERROR;
1382+#endif /* USE_REPLICATION */
1383+ if (!pg_md5_encrypt(shadow_pass + strlen("md5"),
1384+ (char *) port->md5Salt,
1385+ sizeof(port->md5Salt), crypt_pwd))
1386+ {
1387+ pfree(crypt_pwd);
1388+ return STATUS_ERROR;
1389+ }
1390+#ifdef USE_REPLICATION
1391 }
1392+#endif /* USE_REPLICATION */
1393 }
1394 else
1395 {
1396@@ -134,6 +158,16 @@
1397
1398 if (strcmp(crypt_client_pass, crypt_pwd) == 0)
1399 {
1400+#ifdef USE_REPLICATION
1401+ /*
1402+ if (*(PGR_password->password) != '\0')
1403+ {
1404+ memset(PGR_password->password,0,PASSWORD_MAX_LENGTH);
1405+ memset(PGR_password->md5Salt,0,sizeof(PGR_password->md5Salt));
1406+ memset(PGR_password->cryptSalt,0,sizeof(PGR_password->cryptSalt));
1407+ }
1408+ */
1409+#endif /* USE_REPLICATION */
1410 /*
1411 * Password OK, now check to be sure we are not past valuntil
1412 */
1413diff -aruN postgresql-8.2.4/src/backend/libpq/lifecheck.c pgcluster-1.7.0rc7/src/backend/libpq/lifecheck.c
1414--- postgresql-8.2.4/src/backend/libpq/lifecheck.c 1970-01-01 01:00:00.000000000 +0100
1415+++ pgcluster-1.7.0rc7/src/backend/libpq/lifecheck.c 2007-03-01 16:27:15.000000000 +0100
1416@@ -0,0 +1,281 @@
1417+/*--------------------------------------------------------------------
1418+ * FILE:
1419+ * lifecheck.c
1420+ *
1421+ * NOTE:
1422+ * This file is composed of the functions to call with the source
1423+ * at backend for the lifecheck.
1424+ * Low level I/O functions that called by in these functions are
1425+ * contained in 'replicate_com.c'.
1426+ *
1427+ *--------------------------------------------------------------------
1428+ */
1429+
1430+#ifdef USE_REPLICATION
1431+
1432+#include "postgres.h"
1433+
1434+#include <stdio.h>
1435+#include <unistd.h>
1436+#include <signal.h>
1437+#include <sys/wait.h>
1438+#include <ctype.h>
1439+#include <time.h>
1440+#include <pwd.h>
1441+#include <sys/time.h>
1442+#include <sys/types.h>
1443+#include <sys/stat.h>
1444+#include <sys/socket.h>
1445+#include <sys/ipc.h>
1446+#include <sys/shm.h>
1447+#include <netdb.h>
1448+#include <netinet/in.h>
1449+#include <errno.h>
1450+#include <fcntl.h>
1451+#include <time.h>
1452+#include <sys/param.h>
1453+#include <sys/select.h>
1454+#include <netinet/tcp.h>
1455+#include <arpa/inet.h>
1456+#include <sys/file.h>
1457+#include <dirent.h>
1458+
1459+#include "libpq/pqsignal.h"
1460+#include "utils/guc.h"
1461+#include "miscadmin.h"
1462+#include "nodes/nodes.h"
1463+#include "nodes/parsenodes.h"
1464+#include "access/xact.h"
1465+#include "access/xlog.h"
1466+#include "tcop/tcopprot.h"
1467+#include "postmaster/postmaster.h"
1468+
1469+#include "replicate.h"
1470+
1471+#ifdef WIN32
1472+#include "win32.h"
1473+#else
1474+#ifdef HAVE_NETINET_TCP_H
1475+#include <netinet/tcp.h>
1476+#endif
1477+#include <arpa/inet.h>
1478+#endif
1479+
1480+#ifndef HAVE_STRDUP
1481+#include "strdup.h"
1482+#endif
1483+#ifdef HAVE_CRYPT_H
1484+#include <crypt.h>
1485+#endif
1486+
1487+#ifdef MULTIBYTE
1488+#include "mb/pg_wchar.h"
1489+#endif
1490+
1491+static void set_replication_server_status(int status);
1492+static int send_lifecheck(int sock);
1493+static int recv_lifecheck(int sock);
1494+static void set_timeout(SIGNAL_ARGS);
1495+static void exit_lifecheck(SIGNAL_ARGS);
1496+
1497+ReplicateServerInfo * PGR_Replicator_4_Lifecheck = NULL;
1498+
1499+int
1500+PGR_Lifecheck_Main(void)
1501+{
1502+ int status = STATUS_OK;
1503+ int sock = -1;
1504+ int pid = 0;
1505+
1506+ if ((pid = fork()) != 0 )
1507+ {
1508+ return pid;
1509+ }
1510+
1511+ pqsignal(SIGHUP, exit_lifecheck);
1512+ pqsignal(SIGTERM, exit_lifecheck);
1513+ pqsignal(SIGINT, exit_lifecheck);
1514+ pqsignal(SIGQUIT, exit_lifecheck);
1515+ pqsignal(SIGALRM, set_timeout);
1516+ PG_SETMASK(&UnBlockSig);
1517+
1518+ for (;;)
1519+ {
1520+
1521+ PGR_Replicator_4_Lifecheck = PGR_check_replicate_server_info();
1522+ if (PGR_Replicator_4_Lifecheck == NULL)
1523+ {
1524+ alarm(0);
1525+ sleep(PGR_Lifecheck_Interval);
1526+ continue;
1527+ }
1528+ /* get replication server information */
1529+ PGR_Replicator_4_Lifecheck = PGR_get_replicate_server_info();
1530+ if (PGR_Replicator_4_Lifecheck == NULL)
1531+ {
1532+ if (Debug_pretty_print)
1533+ {
1534+ elog(DEBUG1,"not found replication server");
1535+ }
1536+ return STATUS_ERROR;
1537+ }
1538+ sock = PGR_get_replicate_server_socket( PGR_Replicator_4_Lifecheck , PGR_QUERY_SOCKET );
1539+ if (sock < 0)
1540+ {
1541+ set_replication_server_status(DATA_ERR);
1542+ if (Debug_pretty_print)
1543+ elog(DEBUG1,"get_replicate_server_socket failed");
1544+ continue;
1545+ }
1546+
1547+ /* set alarm as lifecheck timeout */
1548+ alarm(PGR_Lifecheck_Timeout * 2);
1549+
1550+ /* send lifecheck to replication server */
1551+ status = send_lifecheck(sock);
1552+ if (status != STATUS_OK)
1553+ {
1554+ set_replication_server_status(DATA_ERR);
1555+ close(sock);
1556+ sock = -1;
1557+ if (Debug_pretty_print)
1558+ elog(DEBUG1,"send life check failed");
1559+ continue;
1560+ }
1561+
1562+ /* receive lifecheck response */
1563+ status = recv_lifecheck(sock);
1564+ if (status != STATUS_OK)
1565+ {
1566+ set_replication_server_status(DATA_ERR);
1567+ close(sock);
1568+ sock = -1;
1569+ if (Debug_pretty_print)
1570+ elog(DEBUG1,"receive life check failed");
1571+ continue;
1572+ }
1573+
1574+ /* stop alarm */
1575+ alarm(0);
1576+ set_replication_server_status(DATA_USE);
1577+
1578+ /* wait next lifecheck as interval */
1579+ sleep(PGR_Lifecheck_Interval);
1580+ }
1581+}
1582+
1583+static void
1584+set_replication_server_status(int status)
1585+{
1586+ if (status == DATA_ERR)
1587+ {
1588+ PGR_Replicator_4_Lifecheck->retry_count ++;
1589+ if (PGR_Replicator_4_Lifecheck->retry_count > MAX_RETRY_TIMES)
1590+ {
1591+ PGR_Set_Replication_Server_Status(PGR_Replicator_4_Lifecheck, status);
1592+ }
1593+ }
1594+ else
1595+ {
1596+ PGR_Replicator_4_Lifecheck->retry_count = 0;
1597+ PGR_Set_Replication_Server_Status(PGR_Replicator_4_Lifecheck, status);
1598+ }
1599+}
1600+
1601+static int
1602+send_lifecheck(int sock)
1603+{
1604+ ReplicateHeader header;
1605+ fd_set wmask;
1606+ struct timeval timeout;
1607+ int send_size = 0;
1608+ int buf_size = 0;
1609+ char * send_ptr = (char *)&header;
1610+ int s = 0;
1611+ int rtn = 0;
1612+
1613+ timeout.tv_sec = PGR_Lifecheck_Timeout;
1614+ timeout.tv_usec = 0;
1615+
1616+ memset(&header,0,sizeof(ReplicateHeader));
1617+ header.cmdSys = CMD_SYS_LIFECHECK;
1618+ header.cmdSts = CMD_STS_CLUSTER;
1619+ buf_size = sizeof(ReplicateHeader);
1620+
1621+ for (;;)
1622+ {
1623+ FD_ZERO(&wmask);
1624+ FD_SET(sock,&wmask);
1625+ rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
1626+ if (rtn < 0)
1627+ {
1628+ if (errno == EINTR)
1629+ {
1630+ return STATUS_OK;
1631+ }
1632+ else
1633+ {
1634+ elog(DEBUG1, "send_lifecheck():select() failed");
1635+ return STATUS_ERROR;
1636+ }
1637+ }
1638+ else if (rtn && FD_ISSET(sock, &wmask))
1639+ {
1640+ s = send(sock,send_ptr + send_size,buf_size - send_size ,0);
1641+ if (s < 0){
1642+ if (errno == EINTR)
1643+ {
1644+ return STATUS_OK;
1645+ }
1646+ if (errno == EAGAIN)
1647+ {
1648+ continue;
1649+ }
1650+ elog(DEBUG1, "send_replicate_packet():send error");
1651+
1652+ /* EPIPE || ENCONNREFUSED || ENSOCK || EHOSTUNREACH */
1653+ return STATUS_ERROR;
1654+ } else if (s == 0) {
1655+ elog(DEBUG1, "send_lifecheck():unexpected EOF");
1656+ return STATUS_ERROR;
1657+ } else /*if (s > 0)*/ {
1658+ send_size += s;
1659+ if (send_size == buf_size)
1660+ {
1661+ return STATUS_OK;
1662+ }
1663+ }
1664+ }
1665+ }
1666+}
1667+
1668+static int
1669+recv_lifecheck(int sock)
1670+{
1671+ int status = STATUS_OK;
1672+ char result[PGR_MESSAGE_BUFSIZE];
1673+
1674+ memset(result,0,PGR_MESSAGE_BUFSIZE);
1675+ status = PGR_recv_replicate_result(sock,result, PGR_Lifecheck_Timeout);
1676+ return ((status >= 0) ?STATUS_OK:STATUS_ERROR);
1677+}
1678+
1679+static void
1680+set_timeout(SIGNAL_ARGS)
1681+{
1682+ if (PGR_Replicator_4_Lifecheck != NULL)
1683+ {
1684+ set_replication_server_status(DATA_ERR);
1685+ if (Debug_pretty_print)
1686+ elog(DEBUG1,"time out is occured in life check");
1687+ }
1688+}
1689+
1690+static void
1691+exit_lifecheck(SIGNAL_ARGS)
1692+{
1693+ fprintf(stderr,"lifecheck stopped\n");
1694+ exit(0);
1695+}
1696+
1697+#endif /* USE_REPLICATION */
1698diff -aruN postgresql-8.2.4/src/backend/libpq/recovery.c pgcluster-1.7.0rc7/src/backend/libpq/recovery.c
1699--- postgresql-8.2.4/src/backend/libpq/recovery.c 1970-01-01 01:00:00.000000000 +0100
1700+++ pgcluster-1.7.0rc7/src/backend/libpq/recovery.c 2007-02-18 22:52:16.000000000 +0100
1701@@ -0,0 +1,1566 @@
1702+/*--------------------------------------------------------------------
1703+ * FILE:
1704+ * recovery.c
1705+ *
1706+ * NOTE:
1707+ * This file is composed of the functions to call with the source
1708+ * at backend for the recovery.
1709+ * Low level I/O functions that called by in these functions are
1710+ * contained in 'replicate_com.c'.
1711+ *
1712+ *--------------------------------------------------------------------
1713+ */
1714+
1715+/*--------------------------------------
1716+ * INTERFACE ROUTINES
1717+ *
1718+ * I/O call:
1719+ * PGR_recovery_finish_send
1720+ * master module:
1721+ * PGR_Master_Main(void);
1722+ * recovery module:
1723+ * PGR_Recovery_Main
1724+ *-------------------------------------
1725+ */
1726+#ifdef USE_REPLICATION
1727+
1728+#include "postgres.h"
1729+
1730+#include <stdio.h>
1731+#include <unistd.h>
1732+#include <signal.h>
1733+#include <sys/wait.h>
1734+#include <ctype.h>
1735+#include <time.h>
1736+#include <pwd.h>
1737+#include <sys/time.h>
1738+#include <sys/types.h>
1739+#include <sys/stat.h>
1740+#include <sys/socket.h>
1741+#include <sys/ipc.h>
1742+#include <sys/shm.h>
1743+#include <netdb.h>
1744+#include <netinet/in.h>
1745+#include <errno.h>
1746+#include <fcntl.h>
1747+#include <time.h>
1748+#include <sys/param.h>
1749+#include <sys/select.h>
1750+#include <netinet/tcp.h>
1751+#include <arpa/inet.h>
1752+#include <sys/file.h>
1753+#include <dirent.h>
1754+
1755+#include "libpq/pqsignal.h"
1756+#include "utils/guc.h"
1757+#include "miscadmin.h"
1758+#include "nodes/nodes.h"
1759+#include "nodes/parsenodes.h"
1760+#include "access/xact.h"
1761+#include "access/xlog.h"
1762+#include "tcop/tcopprot.h"
1763+#include "postmaster/postmaster.h"
1764+
1765+#include "../interfaces/libpq/libpq-fe.h"
1766+#include "../interfaces/libpq/libpq-int.h"
1767+#include "../interfaces/libpq/fe-auth.h"
1768+
1769+#include "replicate.h"
1770+
1771+#ifdef WIN32
1772+#include "win32.h"
1773+#else
1774+#ifdef HAVE_NETINET_TCP_H
1775+#include <netinet/tcp.h>
1776+#endif
1777+#include <arpa/inet.h>
1778+#endif
1779+
1780+#ifndef HAVE_STRDUP
1781+#include "strdup.h"
1782+#endif
1783+#ifdef HAVE_CRYPT_H
1784+#include <crypt.h>
1785+#endif
1786+
1787+#ifdef MULTIBYTE
1788+#include "mb/pg_wchar.h"
1789+#endif
1790+
1791+#define RECOVERY_LOOP_END (0)
1792+#define RECOVERY_LOOP_CONTINUE (1)
1793+#define RECOVERY_LOOP_FAIL (2)
1794+char Local_Host_Name[HOSTNAME_MAX_LENGTH];
1795+int PGR_Recovery_Mode = 0;
1796+
1797+static int read_packet(int sock,RecoveryPacket * packet);
1798+static int send_recovery_packet(int sock, RecoveryPacket * packet);
1799+static int send_packet(int * sock, RecoveryPacket * packet );
1800+static void master_loop(int fd);
1801+static int start_recovery_send(int * sock, ReplicateServerInfo * host);
1802+static int stop_recovery_send(int * sock, ReplicateServerInfo * host);
1803+static int rsync_pg_data(char * src , char * dest);
1804+static int remove_dir(char * dir_name);
1805+static int clear_bkup_dir(char * dir_name);
1806+static int bkup_dir(char * dir_name);
1807+static int restore_dir(char * dir_name);
1808+static int rsync_global_dir(char * src, char * dest, int stage);
1809+static int first_recovery(char * src, char * dest, char * dir);
1810+static int second_recovery(char * src, char * dest, char * dir);
1811+static int recovery_rsync(char * src , char * dest, int stage);
1812+static int recovery_loop(int fd, int mode);
1813+static void show_recovery_packet(RecoveryPacket * packet);
1814+static int direct_send_packet(int packet_no);
1815+static void set_recovery_packet(RecoveryPacket * packet, int packet_no);
1816+static int cold_recovery(char * src, RecoveryPacket *packet, bool need_sync_table_space, int stage);
1817+static int hot_recovery(RecoveryPacket *packet, int stage);
1818+static int restore_from_dumpall( char * hostName, uint16_t portNum, char * userName);
1819+static int restore_from_dump( char * hostName, uint16_t portNum, char * userName, char * dbName);
1820+static int restore_from_each_dump( char * hostName, uint16_t portNum, char * userName);
1821+static PGresult * get_dbName(char * hostName, uint16_t portNum, char * userName);
1822+
1823+static int sync_table_space(char * hostName, uint16_t portNum, char * userName, int stage);
1824+static PGresult * get_table_space_location(char * hostName, uint16_t portNum, char * userName);
1825+static int rsync_table_space(char * hostName, char * location, int stage);
1826+
1827+int PGR_recovery_error_send(void);
1828+int PGR_recovery_finish_send(void);
1829+int PGR_recovery_queue_data_req(void);
1830+int PGR_Master_Main(void);
1831+int PGR_Recovery_Main(int mode);
1832+
1833+static int
1834+read_packet(int sock,RecoveryPacket * packet)
1835+{
1836+ int r;
1837+ char * read_ptr;
1838+ int read_size = 0;
1839+ int packet_size = 0;
1840+
1841+ read_ptr = (char*)packet;
1842+ packet_size = sizeof(RecoveryPacket);
1843+
1844+ for (;;){
1845+ r = recv(sock,read_ptr + read_size ,packet_size, MSG_WAITALL);
1846+ if (r < 0) {
1847+ if (errno == EINTR || errno == EAGAIN) {
1848+ continue;
1849+ } else {
1850+ elog(DEBUG1, "read_packet():recv failed");
1851+ return -1;
1852+ }
1853+ } else if (r == 0) {
1854+ elog(DEBUG1, "read_packet():unexpected EOF");
1855+ return -1;
1856+ } else /*if (r > 0)*/ {
1857+ read_size += r;
1858+ if (read_size == packet_size) {
1859+ show_recovery_packet(packet);
1860+ return read_size;
1861+ }
1862+ }
1863+ }
1864+ return -1;
1865+}
1866+
1867+static int
1868+send_recovery_packet(int sock, RecoveryPacket * packet)
1869+{
1870+ char * send_ptr;
1871+ int send_size= 0;
1872+ int buf_size = 0;
1873+ int s;
1874+ int rtn;
1875+ fd_set wmask;
1876+ struct timeval timeout;
1877+
1878+ timeout.tv_sec = RECOVERY_TIMEOUT;
1879+ timeout.tv_usec = 0;
1880+
1881+ /*
1882+ * Wait for something to happen.
1883+ */
1884+ rtn = 1;
1885+ while (rtn)
1886+ {
1887+ for (;;)
1888+ {
1889+ timeout.tv_sec = RECOVERY_TIMEOUT;
1890+ timeout.tv_usec = 0;
1891+
1892+ FD_ZERO(&wmask);
1893+ FD_SET(sock,&wmask);
1894+ rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
1895+
1896+ if (rtn < 0)
1897+ {
1898+ if (errno == EINTR || errno == EAGAIN)
1899+ {
1900+ continue;
1901+ }
1902+ else
1903+ {
1904+ rtn = 0;
1905+ break;
1906+ }
1907+ }
1908+ else if (rtn && FD_ISSET(sock, &wmask))
1909+ {
1910+ send_ptr = (char *)packet;
1911+ buf_size = sizeof(RecoveryPacket);
1912+
1913+ s = send(sock,send_ptr + send_size,buf_size - send_size ,0);
1914+ if (s < 0) {
1915+ if (errno == EINTR || errno == EAGAIN) {
1916+ continue;
1917+ }
1918+ elog(DEBUG1, "send_recovery_packet():send error");
1919+
1920+ /* EPIPE || ENCONNREFUSED || ENSOCK || EHOSTUNREACH */
1921+ return STATUS_ERROR;
1922+ } else if (s == 0) {
1923+ elog(DEBUG1, "send_recovery_packet():unexpected EOF");
1924+ return STATUS_ERROR;
1925+ } else /*if (s > 0)*/ {
1926+ send_size += s;
1927+ if (send_size == buf_size)
1928+ {
1929+ return STATUS_OK;
1930+ }
1931+ }
1932+ }
1933+ }
1934+ }
1935+ return STATUS_ERROR;
1936+}
1937+
1938+static int
1939+send_packet(int * sock, RecoveryPacket * packet )
1940+{
1941+ int count = 0;
1942+ ReplicateServerInfo * host = NULL;
1943+
1944+ host = PGR_get_replicate_server_info();
1945+ if (host == (ReplicateServerInfo*)NULL)
1946+ {
1947+ return STATUS_ERROR;
1948+ }
1949+ count = 0;
1950+ while (send_recovery_packet(*sock,packet) != STATUS_OK)
1951+ {
1952+ if (count < MAX_RETRY_TIMES )
1953+ {
1954+ count ++;
1955+ continue;
1956+ }
1957+ count = 0;
1958+ close(*sock);
1959+ PGR_Set_Replication_Server_Status(host,DATA_ERR);
1960+ host = PGR_get_replicate_server_info();
1961+ if (host == (ReplicateServerInfo*)NULL)
1962+ {
1963+ return STATUS_ERROR;
1964+ }
1965+ PGR_Set_Replication_Server_Status(host,DATA_USE);
1966+ PGR_Create_Socket_Connect(sock, host->hostName , host->recoveryPortNumber);
1967+ }
1968+ return STATUS_OK;
1969+}
1970+
1971+static void
1972+master_loop(int fd)
1973+{
1974+ int count;
1975+ int sock;
1976+ int status = STATUS_OK;
1977+ RecoveryPacket packet;
1978+ int r_size = 0;
1979+ bool loop_end = false;
1980+
1981+ count = 0;
1982+ while ((status = PGR_Create_Acception(fd,&sock,"",RecoveryPortNumber)) != STATUS_OK)
1983+ {
1984+ PGR_Close_Sock(&sock);
1985+ sock = -1;
1986+ if ( count > MAX_RETRY_TIMES)
1987+ {
1988+ return;
1989+ }
1990+ count ++;
1991+ }
1992+ for(;;)
1993+ {
1994+ int rtn;
1995+ fd_set rmask;
1996+ struct timeval timeout;
1997+
1998+ timeout.tv_sec = RECOVERY_TIMEOUT;
1999+ timeout.tv_usec = 0;
2000+
2001+ /*
2002+ * Wait for something to happen.
2003+ */
2004+ FD_ZERO(&rmask);
2005+ FD_SET(sock,&rmask);
2006+ memset(&packet,0,sizeof(RecoveryPacket));
2007+ rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
2008+ if (rtn && FD_ISSET(sock, &rmask))
2009+ {
2010+ r_size = read_packet(sock,&packet);
2011+ if (r_size == 0)
2012+ {
2013+ continue;
2014+ }
2015+ else if (r_size < 0)
2016+ {
2017+ loop_end=true;
2018+ break;
2019+ }
2020+ }
2021+ else
2022+ {
2023+ continue;
2024+ }
2025+ switch (ntohs(packet.packet_no))
2026+ {
2027+ case RECOVERY_PGDATA_REQ :
2028+ /*
2029+ * PGDATA information request
2030+ */
2031+ /*
2032+ * get master server information
2033+ */
2034+ memset(&packet,0,sizeof(packet));
2035+ set_recovery_packet(&packet, RECOVERY_PGDATA_ANS) ;
2036+ status = send_packet(&sock,&packet);
2037+ PGR_Set_Cluster_Status(STATUS_RECOVERY);
2038+ break;
2039+ case RECOVERY_FSYNC_REQ :
2040+ /*
2041+ * get master server information
2042+ */
2043+ memset(&packet,0,sizeof(packet));
2044+ set_recovery_packet(&packet, RECOVERY_FSYNC_ANS );
2045+ status = send_packet(&sock,&packet);
2046+ PGR_Set_Cluster_Status(STATUS_RECOVERY);
2047+ loop_end = true;
2048+ break;
2049+ case RECOVERY_ERROR_TARGET_ONLY:
2050+ memset(&packet,0,sizeof(packet));
2051+ set_recovery_packet(&packet, RECOVERY_ERROR_ANS );
2052+ status = send_packet(&sock,&packet);
2053+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
2054+ break;
2055+ case RECOVERY_ERROR_CONNECTION:
2056+ memset(&packet,0,sizeof(packet));
2057+ set_recovery_packet(&packet, RECOVERY_ERROR_ANS );
2058+ status = send_packet(&sock,&packet);
2059+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
2060+ /**
2061+ * kill broken cluster db.
2062+ * FIXME: missing MyProcPid here. It must be postmaster's pid.
2063+ * but here's a bug MyProcPid doesn't initialized properly , so MyProcPid = postmaster's pid.
2064+ * To fix this, define variable to set posmaster's pid.
2065+ */
2066+ kill(MyProcPid,SIGQUIT);
2067+ loop_end = true;
2068+ break;
2069+ case RECOVERY_ERROR_ANS:
2070+ /* TODO: recovery failed. close this postmaster */
2071+ loop_end = true;
2072+ break;
2073+ case RECOVERY_FINISH:
2074+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
2075+ loop_end = true;
2076+ break;
2077+ default:
2078+ loop_end = true;
2079+ break;
2080+ }
2081+ if (loop_end)
2082+ {
2083+ break;
2084+ }
2085+ }
2086+ PGR_Close_Sock(&sock);
2087+}
2088+
2089+int
2090+PGR_Master_Main(void)
2091+{
2092+ int status;
2093+ int fd = -1;
2094+ int rtn;
2095+ int pid;
2096+
2097+ if ((pid = fork()) != 0 )
2098+ {
2099+ return pid;
2100+ }
2101+
2102+ memset(Local_Host_Name,0,sizeof(Local_Host_Name));
2103+ gethostname(Local_Host_Name,sizeof(Local_Host_Name));
2104+ pqsignal(SIGHUP, authdie);
2105+ pqsignal(SIGTERM, authdie);
2106+ pqsignal(SIGINT, authdie);
2107+ pqsignal(SIGQUIT, authdie);
2108+ pqsignal(SIGALRM, authdie);
2109+ PG_SETMASK(&UnBlockSig);
2110+
2111+ status = STATUS_ERROR;
2112+ status = PGR_Create_Socket_Bind(&fd, "", RecoveryPortNumber);
2113+
2114+ if (status != STATUS_OK)
2115+ {
2116+ return pid;
2117+ }
2118+ for (;;)
2119+ {
2120+ fd_set rmask;
2121+ struct timeval timeout;
2122+
2123+ timeout.tv_sec = 60;
2124+ timeout.tv_usec = 0;
2125+
2126+ /*
2127+ * Wait for something to happen.
2128+ */
2129+ FD_ZERO(&rmask);
2130+ FD_SET(fd,&rmask);
2131+ rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
2132+ if (rtn && FD_ISSET(fd, &rmask))
2133+ {
2134+ master_loop(fd);
2135+ }
2136+ }
2137+ return pid;
2138+}
2139+
2140+static int
2141+start_recovery_send(int * sock, ReplicateServerInfo * host)
2142+{
2143+ int status;
2144+ RecoveryPacket packet;
2145+ status = PGR_Create_Socket_Connect(sock, host->hostName, host->recoveryPortNumber);
2146+ if (status != STATUS_OK)
2147+ {
2148+ if (Debug_pretty_print)
2149+ {
2150+ elog(DEBUG1,"connection error to replication server");
2151+ }
2152+ return STATUS_ERROR;
2153+ }
2154+
2155+ memset(&packet,0,sizeof(packet));
2156+ set_recovery_packet(&packet, RECOVERY_PREPARE_REQ );
2157+ status = send_packet(sock,&packet);
2158+
2159+ return status;
2160+}
2161+
2162+static int
2163+stop_recovery_send(int * sock, ReplicateServerInfo * host)
2164+{
2165+ int status;
2166+ RecoveryPacket packet;
2167+
2168+ memset(&packet,0,sizeof(packet));
2169+ set_recovery_packet(&packet, RECOVERY_ERROR_ANS );
2170+ status = send_packet(sock,&packet);
2171+ return status;
2172+}
2173+
2174+static int
2175+direct_send_packet(int packet_no)
2176+{
2177+
2178+ int status;
2179+ int fd = -1;
2180+ ReplicateServerInfo * host;
2181+ RecoveryPacket packet;
2182+
2183+ host = PGR_get_replicate_server_info();
2184+ if (host == NULL)
2185+ {
2186+ return STATUS_ERROR;
2187+ }
2188+ status = PGR_Create_Socket_Connect(&fd, host->hostName, host->recoveryPortNumber);
2189+ if (status != STATUS_OK)
2190+ {
2191+ PGR_Set_Replication_Server_Status(host,DATA_ERR);
2192+ return STATUS_ERROR;
2193+ }
2194+
2195+ memset(&packet,0,sizeof(packet));
2196+ set_recovery_packet(&packet, packet_no );
2197+ status = send_packet(&fd,&packet);
2198+
2199+ close(fd);
2200+
2201+ return status;
2202+}
2203+
2204+int
2205+PGR_recovery_error_send(void)
2206+{
2207+ return direct_send_packet(RECOVERY_ERROR_ANS);
2208+}
2209+
2210+int
2211+PGR_recovery_finish_send(void)
2212+{
2213+ return direct_send_packet(RECOVERY_FINISH);
2214+}
2215+
2216+int
2217+PGR_recovery_queue_data_req(void)
2218+{
2219+ int status = STATUS_OK;
2220+ int r_size = 0;
2221+ int rtn = STATUS_OK;
2222+ int fd = -1;
2223+ ReplicateServerInfo * host = NULL;
2224+ RecoveryPacket packet;
2225+
2226+ host = PGR_get_replicate_server_info();
2227+ if (host == NULL)
2228+ {
2229+ return STATUS_ERROR;
2230+ }
2231+ status = PGR_Create_Socket_Connect(&fd, host->hostName, host->recoveryPortNumber);
2232+ if (status != STATUS_OK)
2233+ {
2234+ PGR_Set_Replication_Server_Status(host,DATA_ERR);
2235+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
2236+ close(fd);
2237+ return STATUS_ERROR;
2238+ }
2239+
2240+ memset(&packet,0,sizeof(packet));
2241+ PGRset_recovery_packet_no(&packet, RECOVERY_QUEUE_DATA_REQ );
2242+ status = send_packet(&fd,&packet);
2243+ if (status != STATUS_OK)
2244+ {
2245+ status = stop_recovery_send(&fd,host);
2246+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
2247+ close(fd);
2248+ return STATUS_ERROR;
2249+ }
2250+ memset(&packet,0,sizeof(RecoveryPacket));
2251+ r_size = read_packet(fd,&packet);
2252+ if (r_size <= 0)
2253+ {
2254+ rtn = STATUS_ERROR;
2255+ }
2256+ switch (ntohs(packet.packet_no))
2257+ {
2258+ case RECOVERY_QUEUE_DATA_ANS:
2259+ rtn = STATUS_OK;
2260+ break;
2261+ default:
2262+ rtn = STATUS_ERROR;
2263+ break;
2264+ }
2265+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
2266+ close(fd);
2267+ return rtn;
2268+}
2269+
2270+static int
2271+rsync_pg_data(char * src, char * dest)
2272+{
2273+ int status;
2274+ char *args[12];
2275+ int pid, i = 0;
2276+
2277+ args[i++] = "rsync";
2278+ args[i++] = "-a";
2279+ args[i++] = "-r";
2280+ if (RsyncCompress)
2281+ args[i++] = "-z";
2282+ args[i++] = "--delete";
2283+ args[i++] = "-e";
2284+ args[i++] = RsyncOption;
2285+ args[i++] = src;
2286+ args[i++] = dest;
2287+ args[i++] = NULL;
2288+
2289+ pid = fork();
2290+ if (pid == 0)
2291+ {
2292+ status = execv(RsyncPath,args);
2293+ }
2294+ else
2295+ {
2296+ for (;;)
2297+ {
2298+ int result;
2299+ result = wait(&status);
2300+ if (result < 0)
2301+ {
2302+ if (errno == EINTR)
2303+ continue;
2304+ return STATUS_ERROR;
2305+ }
2306+
2307+ if (WIFEXITED(status) == 0 || WEXITSTATUS(status) != 0)
2308+ return STATUS_ERROR;
2309+ else
2310+ break;
2311+ }
2312+ }
2313+ return STATUS_OK;
2314+}
2315+
2316+static int
2317+remove_dir(char * dir_name)
2318+{
2319+ DIR * dp = NULL;
2320+ struct dirent *dirp = NULL;
2321+ char fname[256];
2322+ int status = 0;
2323+
2324+ if ((dp = opendir(dir_name)) == NULL)
2325+ {
2326+ return STATUS_ERROR;
2327+ }
2328+ while ((dirp = readdir(dp)) != NULL)
2329+ {
2330+ if ((!strcmp(dirp->d_name,".")) ||
2331+ (!strcmp(dirp->d_name,"..")))
2332+ {
2333+ continue;
2334+ }
2335+ sprintf(fname,"%s/%s",dir_name,dirp->d_name);
2336+ status = remove(fname);
2337+ if (status < 0)
2338+ {
2339+ remove_dir(fname);
2340+ }
2341+ }
2342+ closedir(dp);
2343+ if (remove(dir_name) < 0)
2344+ {
2345+ return STATUS_ERROR;
2346+ }
2347+ return STATUS_OK;
2348+}
2349+
2350+static int
2351+clear_bkup_dir(char * dir_name)
2352+{
2353+ char bkp_dir[256];
2354+ pid_t pid = getpid();
2355+
2356+ sprintf(bkp_dir,"%s_%d",dir_name,pid);
2357+ return (remove_dir(bkp_dir));
2358+}
2359+
2360+static int
2361+bkup_dir(char * dir_name)
2362+{
2363+ int status;
2364+ char org_dir[256];
2365+ char bkp_dir[256];
2366+ pid_t pid = getpid();
2367+
2368+ sprintf(org_dir,"%s",dir_name);
2369+ sprintf(bkp_dir,"%s_%d",dir_name,pid);
2370+ status = rename(org_dir,bkp_dir);
2371+ if (status < 0)
2372+ {
2373+ return STATUS_ERROR;
2374+ }
2375+ return STATUS_OK;
2376+}
2377+
2378+static int
2379+restore_dir(char * dir_name)
2380+{
2381+ int status;
2382+ char org_dir[256];
2383+ char bkp_dir[256];
2384+ pid_t pid = getpid();
2385+
2386+ sprintf(org_dir,"%s",dir_name);
2387+ sprintf(bkp_dir,"%s_%d",dir_name,pid);
2388+ status = rename(bkp_dir,org_dir);
2389+ if (status < 0)
2390+ {
2391+ remove_dir(org_dir);
2392+ status = rename(bkp_dir,org_dir);
2393+ if (status < 0)
2394+ {
2395+ return STATUS_ERROR;
2396+ }
2397+ }
2398+ return STATUS_OK;
2399+}
2400+
2401+static int
2402+rsync_global_dir(char * src, char * dest, int stage)
2403+{
2404+ int status;
2405+ char control_file[256];
2406+ char org_dir[256];
2407+ char src_dir[256];
2408+ struct stat fstat;
2409+ int cnt;
2410+
2411+ sprintf(org_dir,"%s/global",dest);
2412+ sprintf(control_file,"%s/global/pg_control",dest);
2413+ if ((stage == PGR_1ST_RECOVERY) && (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP))
2414+ {
2415+ if (bkup_dir(org_dir) != STATUS_OK)
2416+ {
2417+ return STATUS_ERROR;
2418+ }
2419+ }
2420+ sprintf(src_dir,"%s/global",src);
2421+ status = rsync_pg_data(src_dir, dest);
2422+ if (status != STATUS_OK )
2423+ {
2424+ restore_dir(org_dir);
2425+ return STATUS_ERROR;
2426+ }
2427+ /* check pg_control file */
2428+ cnt = 0;
2429+ while (stat(control_file, &fstat) < 0)
2430+ {
2431+ if (cnt > MAX_RETRY_TIMES )
2432+ {
2433+ restore_dir(org_dir);
2434+ return STATUS_ERROR;
2435+ }
2436+ cnt ++;
2437+ sleep(1);
2438+ }
2439+ if ((stage == PGR_2ND_RECOVERY) && (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP))
2440+ {
2441+ clear_bkup_dir(org_dir);
2442+ }
2443+ return STATUS_OK;
2444+}
2445+
2446+static int
2447+first_recovery(char * src, char * dest, char * dir)
2448+{
2449+ int status = STATUS_OK;
2450+ char src_dir[256];
2451+ char dest_dir[256];
2452+
2453+ memset(src_dir,0,sizeof(src_dir));
2454+ memset(dest_dir,0,sizeof(dest_dir));
2455+ sprintf(src_dir,"%s/%s",src,dir);
2456+ sprintf(dest_dir,"%s/%s",dest,dir);
2457+ if (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP)
2458+ {
2459+ status = bkup_dir(dest_dir);
2460+ if (status < 0)
2461+ {
2462+ return STATUS_ERROR;
2463+ }
2464+ }
2465+ status = rsync_pg_data(src_dir, dest);
2466+ if (status != STATUS_OK )
2467+ {
2468+ restore_dir(dest_dir);
2469+ return STATUS_ERROR;
2470+ }
2471+ return STATUS_OK;
2472+}
2473+
2474+static int
2475+second_recovery(char * src, char * dest, char * dir)
2476+{
2477+ int status = STATUS_OK;
2478+ char src_dir[256];
2479+ char dest_dir[256];
2480+
2481+ memset(src_dir,0,sizeof(src_dir));
2482+ memset(dest_dir,0,sizeof(dest_dir));
2483+ sprintf(src_dir,"%s/%s",src,dir);
2484+ sprintf(dest_dir,"%s/%s",dest,dir);
2485+
2486+ status = rsync_pg_data(src_dir, dest);
2487+ if (status != STATUS_OK )
2488+ {
2489+ restore_dir(dest_dir);
2490+ return STATUS_ERROR;
2491+ }
2492+ if (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP)
2493+ {
2494+ clear_bkup_dir(dest_dir);
2495+ }
2496+
2497+ return STATUS_OK;
2498+}
2499+
2500+static int
2501+recovery_rsync(char * src , char * dest, int stage)
2502+{
2503+ if ((src== NULL) || ( dest == NULL))
2504+ {
2505+ return STATUS_ERROR;
2506+ }
2507+
2508+ /* recovery step of "global" directory */
2509+ fprintf(stderr,"%s recovery step of [global] directory...",
2510+ ((stage == 1)?"1st":"2nd"));
2511+ if (rsync_global_dir(src, dest, stage) != STATUS_OK)
2512+ {
2513+ fprintf(stderr,"NG\n");
2514+ return STATUS_ERROR;
2515+ }
2516+ fprintf(stderr,"OK\n");
2517+
2518+ if (stage == PGR_1ST_RECOVERY)
2519+ {
2520+ /* 1st recovery step of "base" directory */
2521+ fprintf(stderr,"1st recovery step of [base] directory...");
2522+ if (first_recovery(src,dest,"base") != STATUS_OK)
2523+ {
2524+ fprintf(stderr,"NG\n");
2525+ return STATUS_ERROR;
2526+ }
2527+ fprintf(stderr,"OK\n");
2528+
2529+ fprintf(stderr,"1st recovery step of [pg_clog] directory...");
2530+ /* 1st recovery step of "pg_clog" directory */
2531+ if (first_recovery(src,dest,"pg_clog") != STATUS_OK)
2532+ {
2533+ fprintf(stderr,"NG\n");
2534+ return STATUS_ERROR;
2535+ }
2536+ fprintf(stderr,"OK\n");
2537+
2538+ /* 1st recovery step of "pg_xlog" directory */
2539+ fprintf(stderr,"1st recovery step of [pg_xlog] directory...");
2540+ if (first_recovery(src,dest,"pg_xlog") != STATUS_OK)
2541+ {
2542+ fprintf(stderr,"NG\n");
2543+ return STATUS_ERROR;
2544+ }
2545+ fprintf(stderr,"OK\n");
2546+ }
2547+ else
2548+ {
2549+ /* 2nd recovery step of "base" directory */
2550+ fprintf(stderr,"2nd recovery step of [base] directory...");
2551+ if (second_recovery(src,dest,"base") != STATUS_OK)
2552+ {
2553+ fprintf(stderr,"NG\n");
2554+ return STATUS_ERROR;
2555+ }
2556+ fprintf(stderr,"OK\n");
2557+
2558+ /* 2nd recovery step of "pg_clog" directory */
2559+ fprintf(stderr,"2nd recovery step of [pg_clog] directory...");
2560+ if (second_recovery(src,dest,"pg_clog") != STATUS_OK)
2561+ {
2562+ fprintf(stderr,"NG\n");
2563+ return STATUS_ERROR;
2564+ }
2565+ fprintf(stderr,"OK\n");
2566+
2567+ /* 2nd recovery step of "pg_xlog" directory */
2568+ fprintf(stderr,"2nd recovery step of [pg_xlog] directory...");
2569+ if (second_recovery(src,dest,"pg_xlog") != STATUS_OK)
2570+ {
2571+ fprintf(stderr,"NG\n");
2572+ return STATUS_ERROR;
2573+ }
2574+ fprintf(stderr,"OK\n");
2575+ }
2576+
2577+ return STATUS_OK;
2578+}
2579+
2580+static int
2581+recovery_loop(int fd, int mode)
2582+{
2583+
2584+ int status = STATUS_OK;
2585+ RecoveryPacket packet;
2586+ int r_size = 0;
2587+ int rtn = RECOVERY_LOOP_END;
2588+ char src[256];
2589+ bool need_sync_table_space = false;
2590+
2591+ memset(&packet,0,sizeof(RecoveryPacket));
2592+ r_size = read_packet(fd,&packet);
2593+ if (r_size <= 0)
2594+ {
2595+ rtn = RECOVERY_LOOP_FAIL;
2596+ }
2597+ switch (ntohs(packet.packet_no))
2598+ {
2599+ case RECOVERY_PREPARE_ANS :
2600+ /*
2601+ * get master information
2602+ */
2603+ /*
2604+ * sync master data before recovery
2605+ */
2606+ if (Debug_pretty_print)
2607+ {
2608+ elog(DEBUG1,"local host : %s master:%s",Local_Host_Name,packet.hostName);
2609+ }
2610+ if (!strncmp(Local_Host_Name,packet.hostName,strlen(Local_Host_Name)))
2611+ {
2612+ strcpy(src,packet.pg_data);
2613+ need_sync_table_space = false;
2614+ }
2615+ else
2616+ {
2617+ sprintf(src,"%s:%s",packet.hostName,packet.pg_data);
2618+ need_sync_table_space = true;
2619+ }
2620+ if (PGR_Recovery_Mode == PGR_COLD_RECOVERY)
2621+ {
2622+ rtn = cold_recovery(src,&packet,need_sync_table_space,PGR_1ST_RECOVERY);
2623+ }
2624+ else
2625+ {
2626+ rtn = hot_recovery(&packet,PGR_1ST_RECOVERY);
2627+ }
2628+ if (rtn != STATUS_OK)
2629+ {
2630+ rtn = RECOVERY_LOOP_FAIL;
2631+ break;
2632+ }
2633+
2634+ /*
2635+ * send recovery start request
2636+ */
2637+ PGRset_recovery_packet_no(&packet, RECOVERY_START_REQ );
2638+ status = send_packet(&fd,&packet);
2639+ if (status != STATUS_OK)
2640+ {
2641+ fprintf(stderr,"RECOVERY_START_REQ send error\n");
2642+ rtn = RECOVERY_LOOP_FAIL;
2643+ break;
2644+ }
2645+ rtn = RECOVERY_LOOP_CONTINUE;
2646+ break;
2647+ case RECOVERY_START_ANS :
2648+ /*
2649+ * sync master data for recovery
2650+ */
2651+ if (!strncmp(Local_Host_Name,packet.hostName,strlen(Local_Host_Name)))
2652+ {
2653+ strcpy(src,packet.pg_data);
2654+ need_sync_table_space = false;
2655+ }
2656+ else
2657+ {
2658+ sprintf(src,"%s:%s",packet.hostName,packet.pg_data);
2659+ need_sync_table_space = true;
2660+ }
2661+ if (PGR_Recovery_Mode == PGR_COLD_RECOVERY)
2662+ {
2663+ rtn = cold_recovery(src,&packet,need_sync_table_space,PGR_2ND_RECOVERY);
2664+ }
2665+ else
2666+ {
2667+ rtn = hot_recovery(&packet,PGR_2ND_RECOVERY);
2668+ }
2669+
2670+ if (rtn == STATUS_OK)
2671+ {
2672+ fprintf(stderr,"2nd recovery successed\n");
2673+ if (mode == PGR_HOT_RECOVERY)
2674+ {
2675+ rtn = RECOVERY_LOOP_CONTINUE;
2676+ /*
2677+ * send recovery queued data request
2678+ */
2679+ PGRset_recovery_packet_no(&packet, RECOVERY_QUEUE_DATA_REQ );
2680+ status = send_packet(&fd,&packet);
2681+ if (status != STATUS_OK)
2682+ {
2683+ fprintf(stderr,"RECOVERY_QUEUE_DATA_REQ send error\n");
2684+ rtn = RECOVERY_LOOP_FAIL;
2685+ break;
2686+ }
2687+ }
2688+ else
2689+ {
2690+ rtn = RECOVERY_LOOP_END;
2691+ }
2692+ }
2693+ else
2694+ {
2695+ fprintf(stderr,"2nd hot recovery failed\n");
2696+ rtn = RECOVERY_LOOP_FAIL;
2697+ }
2698+ break;
2699+ case RECOVERY_QUEUE_DATA_ANS:
2700+ rtn = RECOVERY_LOOP_END;
2701+ break;
2702+ case RECOVERY_ERROR_OCCUPIED:
2703+ fprintf(stderr,"already in use for another recovery\n");
2704+ rtn = RECOVERY_LOOP_FAIL;
2705+ break;
2706+ case RECOVERY_ERROR_CONNECTION:
2707+ fprintf(stderr,"connection failed\n");
2708+ rtn = RECOVERY_LOOP_FAIL;
2709+ break;
2710+ default:
2711+ fprintf(stderr,"unknown packet received\n");
2712+ rtn = RECOVERY_LOOP_FAIL;
2713+ break;
2714+ }
2715+
2716+ return rtn;
2717+}
2718+
2719+int
2720+PGR_Recovery_Main(int mode)
2721+{
2722+ int status;
2723+ int fd = -1;
2724+ int rtn;
2725+ ReplicateServerInfo * host;
2726+
2727+ memset(Local_Host_Name,0,sizeof(Local_Host_Name));
2728+ gethostname(Local_Host_Name,sizeof(Local_Host_Name));
2729+ PGR_Recovery_Mode = mode;
2730+
2731+ status = STATUS_ERROR;
2732+
2733+Retry_Start_Recovery:
2734+ host = PGR_get_replicate_server_info();
2735+ if (host == NULL)
2736+ {
2737+ if (Debug_pretty_print)
2738+ {
2739+ elog(DEBUG1,"not found replication server");
2740+ }
2741+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
2742+ return STATUS_ERROR;
2743+ }
2744+
2745+ PGR_Set_Cluster_Status(STATUS_RECOVERY);
2746+ status = start_recovery_send(&fd,host);
2747+ if (status != STATUS_OK)
2748+ {
2749+ PGR_Set_Replication_Server_Status(host,DATA_ERR);
2750+ close(fd);
2751+ if (Debug_pretty_print)
2752+ {
2753+ elog(DEBUG1,"start recovery packet send error");
2754+ }
2755+ goto Retry_Start_Recovery;
2756+ }
2757+
2758+ for (;;)
2759+ {
2760+ fd_set rmask;
2761+ struct timeval timeout;
2762+
2763+ timeout.tv_sec = RECOVERY_TIMEOUT;
2764+ timeout.tv_usec = 0;
2765+
2766+ /*
2767+ * Wait for something to happen.
2768+ */
2769+ FD_ZERO(&rmask);
2770+ FD_SET(fd,&rmask);
2771+ rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
2772+ if (rtn && FD_ISSET(fd, &rmask))
2773+ {
2774+ status = recovery_loop(fd, mode);
2775+ if (status == RECOVERY_LOOP_CONTINUE)
2776+ {
2777+ continue;
2778+ }
2779+ else if (status == RECOVERY_LOOP_END)
2780+ {
2781+ close(fd);
2782+ break;
2783+ }
2784+ else if (status == RECOVERY_LOOP_FAIL)
2785+ {
2786+ status = stop_recovery_send(&fd,host);
2787+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
2788+ if (status != STATUS_OK)
2789+ {
2790+ close(fd);
2791+ return STATUS_ERROR;
2792+ }
2793+ close(fd);
2794+ return STATUS_ERROR;
2795+ }
2796+ else
2797+ {
2798+ close(fd);
2799+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
2800+ return STATUS_ERROR;
2801+ }
2802+ }
2803+ }
2804+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
2805+ return STATUS_OK;
2806+}
2807+
2808+static void
2809+show_recovery_packet(RecoveryPacket * packet)
2810+{
2811+
2812+ if (Debug_pretty_print)
2813+ {
2814+ elog(DEBUG1,"no = %d",ntohs(packet->packet_no));
2815+ elog(DEBUG1,"max_connect = %d",ntohs(packet->max_connect));
2816+ elog(DEBUG1,"port = %d",ntohs(packet->port));
2817+ elog(DEBUG1,"recoveryPort = %d",ntohs(packet->recoveryPort));
2818+ if (packet->hostName != NULL)
2819+ elog(DEBUG1,"hostName = %s",packet->hostName);
2820+ if (packet->pg_data != NULL)
2821+ elog(DEBUG1,"pg_data = %s",packet->pg_data);
2822+ }
2823+}
2824+
2825+static void
2826+set_recovery_packet(RecoveryPacket * packet, int packet_no)
2827+{
2828+ struct passwd * pw = NULL;
2829+
2830+ if (packet == NULL)
2831+ {
2832+ return;
2833+ }
2834+ PGRset_recovery_packet_no(packet, packet_no );
2835+ packet->max_connect = htons(MaxBackends);
2836+ packet->port = htons(PostPortNumber);
2837+ packet->recoveryPort = htons(RecoveryPortNumber);
2838+ gethostname(packet->hostName,sizeof(packet->hostName));
2839+ memcpy(packet->pg_data,DataDir,sizeof(packet->pg_data));
2840+ memset(packet->userName,0,sizeof(packet->userName));
2841+ if ((pw = getpwuid(geteuid())) != NULL)
2842+ {
2843+ strncpy(packet->userName,pw->pw_name,sizeof(packet->userName));
2844+ }
2845+ else
2846+ {
2847+ cuserid(packet->userName);
2848+ }
2849+}
2850+
2851+static int
2852+sync_table_space(char * hostName, uint16_t portNum, char * userName, int stage)
2853+{
2854+ PGresult * res = (PGresult *)NULL;
2855+ int i = 0;
2856+ int row_num = 0;
2857+ char * location = NULL;
2858+ int rtn = STATUS_OK;
2859+
2860+ res = get_table_space_location(hostName, portNum, userName);
2861+ if (res == (PGresult *)NULL)
2862+ {
2863+ return STATUS_ERROR;
2864+ }
2865+ row_num = PQntuples(res);
2866+ for ( i = 0 ; i < row_num ; i ++)
2867+ {
2868+ location = PQgetvalue(res,i,0);
2869+ if (strlen(location) > 0 )
2870+ {
2871+ fprintf(stderr,"sync tablespace[%s]...",location);
2872+ rtn = rsync_table_space(hostName, location, stage);
2873+ fprintf(stderr,"%s\n", (rtn == STATUS_OK)?"OK":"NG");
2874+ }
2875+ }
2876+ if (res != (PGresult *)NULL)
2877+ {
2878+ PQclear(res);
2879+ }
2880+
2881+ return STATUS_OK;
2882+}
2883+
2884+static PGresult *
2885+get_table_space_location(char * hostName, uint16_t portNum, char * userName)
2886+{
2887+ PGresult * res = (PGresult *)NULL;
2888+ int cnt = 0;
2889+ PGconn * conn = (PGconn *)NULL;
2890+ char port[8];
2891+ char *database = "template1";
2892+ char * query = "select spclocation from pg_tablespace where spcname not like 'pg_%'";
2893+
2894+ if ( (hostName == NULL) ||
2895+ (portNum <= 0) ||
2896+ (userName == NULL))
2897+ {
2898+ return (PGresult *)NULL;
2899+ }
2900+ snprintf(port,sizeof(port),"%d", portNum);
2901+
2902+ /* create connection to master */
2903+ conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
2904+ if (conn == NULL)
2905+ {
2906+ return (PGresult *)NULL;
2907+ }
2908+ /* check to see that the backend Connection was successfully made */
2909+ cnt = 0;
2910+ while (PQstatus(conn) == CONNECTION_BAD)
2911+ {
2912+ if (conn != NULL)
2913+ {
2914+ PQfinish(conn);
2915+ }
2916+ if (cnt > MAX_RETRY_TIMES )
2917+ {
2918+ return (PGresult *)NULL;
2919+ }
2920+ conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
2921+ cnt ++;
2922+ }
2923+ res = PQexec(conn , query);
2924+ if ((res == NULL) ||
2925+ (PQresultStatus(res) != PGRES_TUPLES_OK))
2926+ {
2927+ PQclear(res);
2928+ res = (PGresult *)NULL;
2929+ }
2930+ if (conn != NULL)
2931+ {
2932+ PQfinish(conn);
2933+ }
2934+
2935+ return res;
2936+}
2937+
2938+static int
2939+rsync_table_space(char * hostName, char * location, int stage)
2940+{
2941+ int status = STATUS_OK;
2942+ char src_dir[256];
2943+ char dest_dir[256];
2944+ struct stat fstat;
2945+ int cnt = 0;
2946+
2947+ sprintf(src_dir,"%s:%s",hostName,location);
2948+ strncpy(dest_dir,location,sizeof(dest_dir));
2949+
2950+ if ((stage == PGR_1ST_RECOVERY) && (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP))
2951+ {
2952+ status = bkup_dir(location);
2953+ }
2954+ status = rsync_pg_data(src_dir, dest_dir);
2955+ if (status != STATUS_OK )
2956+ {
2957+ restore_dir(location);
2958+ return STATUS_ERROR;
2959+ }
2960+ /* check file status */
2961+ cnt = 0;
2962+ while (stat(location,&fstat) < 0)
2963+ {
2964+ if (cnt > MAX_RETRY_TIMES )
2965+ {
2966+ restore_dir(location);
2967+ return STATUS_ERROR;
2968+ }
2969+ cnt ++;
2970+ sleep(1);
2971+ }
2972+ if ((stage == PGR_2ND_RECOVERY) && (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP))
2973+ {
2974+ clear_bkup_dir(location);
2975+ }
2976+ return STATUS_OK;
2977+}
2978+
2979+static int
2980+cold_recovery(char * src, RecoveryPacket *packet, bool need_sync_table_space, int stage)
2981+{
2982+ int status = STATUS_OK;
2983+
2984+ status = recovery_rsync(src,DataDir,stage);
2985+ if (status != STATUS_OK)
2986+ {
2987+ if (Debug_pretty_print)
2988+ {
2989+ elog(DEBUG1,"%s rsync error",
2990+ ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
2991+ }
2992+ return STATUS_ERROR;
2993+ }
2994+ if (need_sync_table_space == true)
2995+ {
2996+ status = sync_table_space(packet->hostName, ntohs(packet->port), packet->userName, stage);
2997+ fprintf(stderr,"%s sync_table_space ",
2998+ ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
2999+ if (status != STATUS_OK)
3000+ {
3001+ if (Debug_pretty_print)
3002+ {
3003+ elog(DEBUG1,"%s sync table space error",
3004+ ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
3005+ }
3006+ fprintf(stderr,"NG\n");
3007+ return STATUS_ERROR;
3008+ }
3009+ fprintf(stderr,"OK\n");
3010+ }
3011+ return STATUS_OK;
3012+}
3013+
3014+static int
3015+hot_recovery(RecoveryPacket *packet, int stage)
3016+{
3017+ int status = STATUS_OK;
3018+
3019+ fprintf(stderr,"%s restore from pg_dump ",
3020+ ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
3021+ if (stage == PGR_1ST_RECOVERY)
3022+ {
3023+ status = restore_from_dumpall(packet->hostName, ntohs(packet->port), packet->userName );
3024+ }
3025+ else
3026+ {
3027+ status = restore_from_each_dump(packet->hostName, ntohs(packet->port), packet->userName );
3028+ }
3029+ if (status != STATUS_OK)
3030+ {
3031+ if (Debug_pretty_print)
3032+ {
3033+ elog(DEBUG1,"%s sync table space error",
3034+ ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
3035+ }
3036+ fprintf(stderr,"->NG\n");
3037+ return STATUS_ERROR;
3038+ }
3039+ fprintf(stderr,"->OK\n");
3040+ return STATUS_OK;
3041+}
3042+
3043+static int
3044+restore_from_dumpall( char * hostName, uint16_t portNum, char * userName)
3045+{
3046+ int status;
3047+ char exec_command[512];
3048+ int pid;
3049+ char pg_dumpall[256];
3050+ char psql[256];
3051+ char *p=NULL;
3052+
3053+ /* set pg_dumpall path */
3054+ memset(pg_dumpall, 0, sizeof(pg_dumpall));
3055+ strncpy(pg_dumpall, PgDumpPath, sizeof(pg_dumpall));
3056+ p = strrchr(pg_dumpall,'/');
3057+ if (p == NULL)
3058+ {
3059+ return STATUS_ERROR;
3060+ }
3061+ p++;
3062+ strcpy(p,"pg_dumpall");
3063+
3064+ /* set psql path */
3065+ p = NULL;
3066+ memset(psql, 0, sizeof(psql));
3067+ strncpy(psql, PgDumpPath, sizeof(psql));
3068+ p = strrchr(psql,'/');
3069+ if (p == NULL)
3070+ {
3071+ return STATUS_ERROR;
3072+ }
3073+ p++;
3074+ strcpy(p,"psql");
3075+ p+=4;
3076+ *p = '\0';
3077+
3078+ snprintf(exec_command,sizeof(exec_command),"%s -i -o -c -h %s -p %d -U %s | %s -p %d template1",
3079+ pg_dumpall,
3080+ hostName,
3081+ portNum,
3082+ userName,
3083+ psql,
3084+ PostPortNumber
3085+ );
3086+ fprintf(stderr,"1st exec:[%s]\n",exec_command);
3087+
3088+ pid = fork();
3089+ if (pid == 0)
3090+ {
3091+ system(exec_command);
3092+ exit(0);
3093+ }
3094+ else
3095+ {
3096+ for (;;)
3097+ {
3098+ int result;
3099+ result = wait(&status);
3100+ if (result < 0)
3101+ {
3102+ if (errno == EINTR)
3103+ continue;
3104+ return STATUS_ERROR;
3105+ }
3106+
3107+ if (WIFEXITED(status) == 0 || WEXITSTATUS(status) != 0)
3108+ return STATUS_ERROR;
3109+ else
3110+ break;
3111+ }
3112+ }
3113+ return STATUS_OK;
3114+}
3115+
3116+static int
3117+restore_from_dump( char * hostName, uint16_t portNum, char * userName, char * dbName)
3118+{
3119+ int status;
3120+ char exec_command[512];
3121+ int pid= 0;
3122+ char pg_restore[256];
3123+ char *p=NULL;
3124+
3125+ /* set pq_restore path */
3126+ p = NULL;
3127+ memset(pg_restore, 0, sizeof(pg_restore));
3128+ strncpy(pg_restore, PgDumpPath, sizeof(pg_restore));
3129+ p = strrchr(pg_restore,'/');
3130+ if (p == NULL)
3131+ {
3132+ return STATUS_ERROR;
3133+ }
3134+ p++;
3135+ strcpy(p,"pg_restore");
3136+
3137+ snprintf(exec_command,sizeof(exec_command),"%s -i -Fc -o -b -h %s -p %d -U %s %s | %s -i -c -p %d -d %s",
3138+ PgDumpPath,
3139+ hostName,
3140+ portNum,
3141+ userName,
3142+ dbName,
3143+ pg_restore,
3144+ PostPortNumber,
3145+ dbName
3146+ );
3147+
3148+ fprintf(stderr,"2nd exec:[%s]\n",exec_command);
3149+ pid = fork();
3150+ if (pid == 0)
3151+ {
3152+ system(exec_command);
3153+ exit(0);
3154+ }
3155+ else
3156+ {
3157+ for (;;)
3158+ {
3159+ int result;
3160+ result = wait(&status);
3161+ if (result < 0)
3162+ {
3163+ if (errno == EINTR)
3164+ continue;
3165+ return STATUS_ERROR;
3166+ }
3167+
3168+ if (WIFEXITED(status) == 0 || WEXITSTATUS(status) != 0)
3169+ return STATUS_ERROR;
3170+ else
3171+ break;
3172+ }
3173+ }
3174+ return STATUS_OK;
3175+}
3176+
3177+static int
3178+restore_from_each_dump( char * hostName, uint16_t portNum, char * userName)
3179+{
3180+ PGresult * res = (PGresult *)NULL;
3181+ int i = 0;
3182+ int row_num = 0;
3183+ char * dbName = NULL;
3184+ int rtn = STATUS_OK;
3185+
3186+ res = get_dbName(hostName, portNum, userName);
3187+ if (res == (PGresult *)NULL)
3188+ {
3189+ return STATUS_ERROR;
3190+ }
3191+ row_num = PQntuples(res);
3192+ for ( i = 0 ; i < row_num ; i ++)
3193+ {
3194+ dbName = PQgetvalue(res,i,0);
3195+ if (strlen(dbName) > 0 )
3196+ {
3197+ if ((strcmp("template0",dbName)) &&
3198+ (strcmp("template1",dbName)))
3199+ {
3200+ rtn = restore_from_dump(hostName, portNum, userName, dbName);
3201+ fprintf(stderr,".");
3202+ }
3203+ }
3204+ }
3205+ if (res != (PGresult *)NULL)
3206+ {
3207+ PQclear(res);
3208+ }
3209+
3210+ return STATUS_OK;
3211+}
3212+
3213+static PGresult *
3214+get_dbName(char * hostName, uint16_t portNum, char * userName)
3215+{
3216+ PGresult * res = (PGresult *)NULL;
3217+ int cnt = 0;
3218+ PGconn * conn = (PGconn *)NULL;
3219+ char port[8];
3220+ char *database = "template1";
3221+ char * query = "SELECT datname FROM pg_database";
3222+
3223+ if ( (hostName == NULL) ||
3224+ (portNum <= 0) ||
3225+ (userName == NULL))
3226+ {
3227+ return (PGresult *)NULL;
3228+ }
3229+ snprintf(port,sizeof(port),"%d", portNum);
3230+
3231+ /* create connection to master */
3232+ conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
3233+ if (conn == NULL)
3234+ {
3235+ return (PGresult *)NULL;
3236+ }
3237+ /* check to see that the backend Connection was successfully made */
3238+ cnt = 0;
3239+ while (PQstatus(conn) == CONNECTION_BAD)
3240+ {
3241+ if (conn != NULL)
3242+ {
3243+ PQfinish(conn);
3244+ }
3245+ if (cnt > MAX_RETRY_TIMES )
3246+ {
3247+ return (PGresult *)NULL;
3248+ }
3249+ conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
3250+ cnt ++;
3251+ }
3252+ res = PQexec(conn , query);
3253+ if ((res == NULL) ||
3254+ (PQresultStatus(res) != PGRES_TUPLES_OK))
3255+ {
3256+ PQclear(res);
3257+ res = (PGresult *)NULL;
3258+ }
3259+ if (conn != NULL)
3260+ {
3261+ PQfinish(conn);
3262+ }
3263+
3264+ return res;
3265+}
3266+
3267+#endif /* USE_REPLICATION */
3268diff -aruN postgresql-8.2.4/src/backend/libpq/replicate.c pgcluster-1.7.0rc7/src/backend/libpq/replicate.c
3269--- postgresql-8.2.4/src/backend/libpq/replicate.c 1970-01-01 01:00:00.000000000 +0100
3270+++ pgcluster-1.7.0rc7/src/backend/libpq/replicate.c 2007-02-18 22:52:16.000000000 +0100
3271@@ -0,0 +1,4021 @@
3272+/*--------------------------------------------------------------------
3273+ * FILE:
3274+ * replicate.c
3275+ *
3276+ * NOTE:
3277+ * This file is composed of the functions to call with the source
3278+ * at backend for the replication.
3279+ * Low level I/O functions that called by in these functions are
3280+ * contained in 'replicate_com.c'.
3281+ *
3282+ *--------------------------------------------------------------------
3283+ */
3284+
3285+/*--------------------------------------
3286+ * INTERFACE ROUTINES
3287+ *
3288+ * setup/teardown:
3289+ * PGR_Init_Replicate_Server_Data
3290+ * PGR_Set_Replicate_Server_Socket
3291+ * PGR_delete_shm
3292+ * I/O call:
3293+ * PGR_Send_Replicate_Command
3294+ * table handling:
3295+ * PGR_get_replicate_server_info
3296+ * status distinction:
3297+ * PGR_Is_Replicated_Command
3298+ * Xlog_Check_Replicatec
3299+ * replicateion main:
3300+ * PGR_replication
3301+ *-------------------------------------
3302+ */
3303+#ifdef USE_REPLICATION
3304+
3305+#include "postgres.h"
3306+
3307+#include <stdio.h>
3308+#include <strings.h>
3309+#include <signal.h>
3310+#include <errno.h>
3311+#include <fcntl.h>
3312+#include <grp.h>
3313+#include <unistd.h>
3314+#include <ctype.h>
3315+#include <time.h>
3316+#include <sys/time.h>
3317+#include <sys/types.h>
3318+#include <sys/stat.h>
3319+#include <sys/socket.h>
3320+#include <sys/ipc.h>
3321+#include <sys/shm.h>
3322+#include <netdb.h>
3323+#include <netinet/in.h>
3324+#ifdef HAVE_NETINET_TCP_H
3325+#include <netinet/tcp.h>
3326+#endif
3327+#include <arpa/inet.h>
3328+#include <sys/file.h>
3329+#include <netdb.h>
3330+
3331+#include "access/transam.h"
3332+#include "bootstrap/bootstrap.h"
3333+#include "libpq/libpq.h"
3334+#include "libpq/pqformat.h"
3335+#include "miscadmin.h"
3336+#include "commands/prepare.h"
3337+#include "nodes/nodes.h"
3338+#include "nodes/print.h"
3339+#include "utils/guc.h"
3340+#include "parser/parser.h"
3341+#include "access/xact.h"
3342+#include "storage/proc.h"
3343+#include "tcop/tcopprot.h"
3344+#include "tcop/utility.h"
3345+#include "postmaster/postmaster.h"
3346+#include "replicate.h"
3347+
3348+/* the source of this value is 'access/transam/varsup.c' */
3349+#define VAR_OID_PREFETCH (8192)
3350+
3351+PGR_ReplicationLog_Info ReplicationLog_Info;
3352+bool pgr_skip_in_prepared_query = false;
3353+
3354+/*--------------------------------------
3355+ * PROTOTYPE DECLARATION
3356+ *--------------------------------------
3357+ */
3358+static int set_command_args(char argv[PGR_CMD_ARG_NUM][256],char *str);
3359+static bool is_same_replication_server(ReplicateServerInfo * sp1, ReplicateServerInfo * sp2 );
3360+static ReplicateServerInfo * search_new_replication_server ( ReplicateServerInfo * sp , int socket_type );
3361+
3362+static int close_replicate_server_socket ( ReplicateServerInfo * sp , int socket_type );
3363+static int recv_message(int sock,char * buf,int flag);
3364+static int send_replicate_packet(int sock,ReplicateHeader * header, char * query_string);
3365+static bool is_copy_from(char * query);
3366+static int get_words( char words[MAX_WORDS][MAX_WORD_LETTERS] ,char * string,int length,int upper);
3367+static int get_table_name(char * table_name, char * query, int position );
3368+static bool is_not_replication_query(char * query_string, int query_len, char cmdType);
3369+static int Comp_Not_Replicate(PGR_Not_Replicate_Type * nrp1,PGR_Not_Replicate_Type* nrp2);
3370+static bool is_serial_control_query(char cmdType,char * query);
3371+static bool is_select_into_query(char cmdType,char * query);
3372+static int send_response_to_replication_server(const char * notice);
3373+static bool do_not_replication_command(const char * commandTag);
3374+static bool is_create_temp_table(char * query);
3375+static int add_replication_server(char * hostname,char * port, char * recovery_port);
3376+static int change_replication_server(char * hostname,char * port, char * recovery_port);
3377+static int get_new_replication_socket( ReplicateServerInfo * base, ReplicateServerInfo * sp, int socket_type);
3378+static char * get_hostName(char * str);
3379+static void set_response_mode(char * mode);
3380+static void PGR_Set_Current_Replication_Query_ID(char *id);
3381+#ifdef CONTROL_LOCK_CONFLICT
3382+static int wait_lock_answer(void);
3383+static int read_trigger(char * result, int buf_size);
3384+#endif /* CONTROL_LOCK_CONFLICT */
3385+static int check_conf_data(void);
3386+
3387+static unsigned int get_next_request_id(void);
3388+static bool is_this_query_replicated(char * id);
3389+static int set_replication_id(char * id);
3390+static int return_current_oid(void);
3391+static int sync_oid(char * oid);
3392+static bool is_concerned_with_prepared_select(char cmdType, char * query_string);
3393+static int skip_non_blank(char * ptr, int max);
3394+static int skip_blank(char * ptr, int max);
3395+static int parse_message(char * query_string);
3396+static bool is_prepared_as_select(char * query_string);
3397+static bool is_statement_as_select(char * query_string);
3398+
3399+extern ssize_t secure_read(Port *, void *, size_t);
3400+/*--------------------------------------------------------------------
3401+ * SYMBOL
3402+ * PGR_Init_Replicate_Server_Data()
3403+ * NOTES
3404+ * Read Configuration file and create ReplicateServerData table
3405+ * ARGS
3406+ * void
3407+ * RETURN
3408+ * OK: STATUS_OK
3409+ * NG: STATUS_ERROR
3410+ *--------------------------------------------------------------------
3411+ */
3412+int
3413+PGR_Init_Replicate_Server_Data(void)
3414+{
3415+ int table_size,str_size;
3416+ ReplicateServerInfo *sp;
3417+ PGR_Not_Replicate_Type * nrp;
3418+ ConfDataType * conf;
3419+ int rec_no,cnt;
3420+ unsigned int ip;
3421+ char HostName[HOSTNAME_MAX_LENGTH];
3422+
3423+ memset (HostName,0,sizeof(HostName));
3424+ if (ConfData_Top == (ConfDataType *)NULL)
3425+ {
3426+ return STATUS_ERROR;
3427+ }
3428+
3429+ /* allocate replication server information table */
3430+ table_size = sizeof(ReplicateServerInfo) * MAX_SERVER_NUM;
3431+ ReplicateServerShmid = shmget(IPC_PRIVATE,table_size,IPC_CREAT | IPC_EXCL | 0600);
3432+ if (ReplicateServerShmid < 0)
3433+ {
3434+ return STATUS_ERROR;
3435+ }
3436+ ReplicateServerData = (ReplicateServerInfo *)shmat(ReplicateServerShmid,0,0);
3437+ if (ReplicateServerData == (ReplicateServerInfo *)-1)
3438+ {
3439+ return STATUS_ERROR;
3440+ }
3441+ memset(ReplicateServerData,0,table_size);
3442+ sp = ReplicateServerData;
3443+
3444+ /* allocate cluster db information table */
3445+ ClusterDBShmid = shmget(IPC_PRIVATE,sizeof(ClusterDBInfo),IPC_CREAT | IPC_EXCL | 0600);
3446+ if (ClusterDBShmid < 0)
3447+ {
3448+ return STATUS_ERROR;
3449+ }
3450+ ClusterDBData = (ClusterDBInfo *)shmat(ClusterDBShmid,0,0);
3451+ if (ClusterDBData == (ClusterDBInfo *)-1)
3452+ {
3453+ return STATUS_ERROR;
3454+ }
3455+ memset(ClusterDBData,0,sizeof(ClusterDBInfo));
3456+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
3457+
3458+ /* allocate partial replicate table */
3459+ table_size = sizeof(PGR_Not_Replicate_Type) * MAX_SERVER_NUM;
3460+ PGR_Not_Replicate = malloc(table_size);
3461+ if (PGR_Not_Replicate == (PGR_Not_Replicate_Type*)NULL)
3462+ {
3463+ return STATUS_ERROR;
3464+ }
3465+ memset(PGR_Not_Replicate, 0, table_size);
3466+ nrp = PGR_Not_Replicate;
3467+ cnt = 0;
3468+ conf = ConfData_Top;
3469+ while ((conf != (ConfDataType *)NULL) && (cnt < MAX_SERVER_NUM))
3470+ {
3471+ /* set replication server table */
3472+ if (!strcmp(conf->table,REPLICATION_SERVER_INFO_TAG))
3473+ {
3474+ rec_no = conf->rec_no;
3475+ cnt = rec_no;
3476+ if (!strcmp(conf->key,HOST_NAME_TAG))
3477+ {
3478+ strncpy((sp + rec_no)->hostName,conf->value,sizeof(sp->hostName));
3479+ conf = (ConfDataType *)conf->next;
3480+ continue;
3481+ }
3482+ if (!strcmp(conf->key,PORT_TAG))
3483+ {
3484+ (sp + rec_no)->portNumber = atoi(conf->value);
3485+ (sp + rec_no)->sock = -1;
3486+ if ((sp + rec_no)->useFlag != DATA_USE)
3487+ {
3488+ PGR_Set_Replication_Server_Status((sp+rec_no), DATA_INIT);
3489+ }
3490+ memset((sp + rec_no + 1)->hostName,0,sizeof(sp->hostName));
3491+ (sp + rec_no + 1)->useFlag = DATA_END;
3492+ conf = (ConfDataType *)conf->next;
3493+ continue;
3494+ }
3495+ if (!strcmp(conf->key,RECOVERY_PORT_TAG))
3496+ {
3497+ (sp + rec_no)->recoveryPortNumber = atoi(conf->value);
3498+ if ((sp + rec_no)->useFlag != DATA_USE)
3499+ {
3500+ PGR_Set_Replication_Server_Status((sp+rec_no), DATA_INIT);
3501+ }
3502+ memset((sp + rec_no + 1)->hostName,0,sizeof(sp->hostName));
3503+ (sp + rec_no + 1)->useFlag = DATA_END;
3504+ conf = (ConfDataType *)conf->next;
3505+ continue;
3506+ }
3507+ }
3508+ /* set part replication table */
3509+ if (!strcmp(conf->table,NOT_REPLICATE_INFO_TAG))
3510+ {
3511+ rec_no = conf->rec_no;
3512+ cnt = rec_no;
3513+ if (PGR_Not_Replicate_Rec_Num < rec_no +1)
3514+ {
3515+ PGR_Not_Replicate_Rec_Num = rec_no +1;
3516+ }
3517+ if (!strcmp(conf->key,DB_NAME_TAG))
3518+ {
3519+ strncpy((nrp + rec_no)->db_name,conf->value,sizeof(nrp->db_name));
3520+ conf = (ConfDataType *)conf->next;
3521+ continue;
3522+ }
3523+ if (!strcmp(conf->key,TABLE_NAME_TAG))
3524+ {
3525+ strncpy((nrp + rec_no)->table_name,conf->value,sizeof(nrp->table_name));
3526+ conf = (ConfDataType *)conf->next;
3527+ continue;
3528+ }
3529+ }
3530+ if (!strcmp(conf->key,HOST_NAME_TAG))
3531+ {
3532+ str_size = sizeof(HostName) ;
3533+ memset(HostName,0,str_size);
3534+ strncpy(HostName,conf->value,str_size-1);
3535+ }
3536+ else if (!strcmp(conf->key,RECOVERY_PORT_TAG))
3537+ {
3538+ RecoveryPortNumber = atoi(conf->value);
3539+ }
3540+ else if (!strcmp(conf->key,RSYNC_PATH_TAG))
3541+ {
3542+ str_size = strlen(conf->value) ;
3543+ RsyncPath = malloc(str_size + 1);
3544+ if (RsyncPath == NULL)
3545+ {
3546+ return STATUS_ERROR;
3547+ }
3548+ memset(RsyncPath,0,str_size + 1);
3549+ strncpy(RsyncPath,conf->value,str_size);
3550+ }
3551+ else if (!strcmp(conf->key,RSYNC_OPTION_TAG))
3552+ {
3553+ str_size = strlen(conf->value) ;
3554+ RsyncOption = malloc(str_size + 1);
3555+ if (RsyncOption == NULL)
3556+ {
3557+ return STATUS_ERROR;
3558+ }
3559+ memset(RsyncOption,0,str_size + 1);
3560+ strncpy(RsyncOption,conf->value,str_size);
3561+ }
3562+ else if (!strcmp(conf->key,RSYNC_COMPRESS_TAG))
3563+ {
3564+ if (!strcmp(conf->value, "yes"))
3565+ RsyncCompress = true;
3566+ else if (!strcmp(conf->value, "no"))
3567+ RsyncCompress = false;
3568+ }
3569+ else if (!strcmp(conf->key,PG_DUMP_PATH_TAG))
3570+ {
3571+ str_size = strlen(conf->value) ;
3572+ PgDumpPath = malloc(str_size + 1);
3573+ if (PgDumpPath == NULL)
3574+ {
3575+ return STATUS_ERROR;
3576+ }
3577+ memset(PgDumpPath,0,str_size + 1);
3578+ strncpy(PgDumpPath,conf->value,str_size);
3579+ }
3580+ else if (!strcmp(conf->key,STAND_ALONE_TAG))
3581+ {
3582+ PGR_Stand_Alone = (PGR_Stand_Alone_Type*)malloc(sizeof(PGR_Stand_Alone_Type));
3583+ if (PGR_Stand_Alone == (PGR_Stand_Alone_Type *)NULL)
3584+ {
3585+ return STATUS_ERROR;
3586+ }
3587+ PGR_Stand_Alone->is_stand_alone = false;
3588+ if (!strcmp(conf->value,READ_WRITE_IF_STAND_ALONE))
3589+ {
3590+ PGR_Stand_Alone->permit = PERMIT_READ_WRITE;
3591+ }
3592+ else
3593+ {
3594+ PGR_Stand_Alone->permit = PERMIT_READ_ONLY;
3595+ }
3596+ }
3597+ else if (!strcmp(conf->key,TIMEOUT_TAG))
3598+ {
3599+ /* get repliaction timeout */
3600+ PGR_Replication_Timeout = PGRget_time_value(conf->value);
3601+ if ((PGR_Replication_Timeout < 1) || (PGR_Replication_Timeout > 3600))
3602+ {
3603+ fprintf(stderr,"%s is out of range. It should be between 1sec-1hr.\n",TIMEOUT_TAG);
3604+ return STATUS_ERROR;
3605+ }
3606+ }
3607+ else if (!strcmp(conf->key,LIFECHECK_TIMEOUT_TAG))
3608+ {
3609+ /* get lifecheck timeout */
3610+ PGR_Lifecheck_Timeout = PGRget_time_value(conf->value);
3611+ if ((PGR_Lifecheck_Timeout < 1) || (PGR_Lifecheck_Timeout > 3600))
3612+ {
3613+ fprintf(stderr,"%s is out of range. It should be between 1sec-1hr.\n",LIFECHECK_TIMEOUT_TAG);
3614+ return STATUS_ERROR;
3615+ }
3616+ }
3617+ else if (!strcmp(conf->key,LIFECHECK_INTERVAL_TAG))
3618+ {
3619+ /* get lifecheck interval */
3620+ PGR_Lifecheck_Interval = PGRget_time_value(conf->value);
3621+ if ((PGR_Lifecheck_Interval < 1) || (PGR_Lifecheck_Interval > 3600))
3622+ {
3623+ fprintf(stderr,"%s is out of range. It should between 1sec-1hr.\n",LIFECHECK_INTERVAL_TAG);
3624+ return STATUS_ERROR;
3625+ }
3626+ }
3627+ conf = (ConfDataType *)conf->next;
3628+ }
3629+ TransactionSock = -1;
3630+ ReplicateCurrentTime = (ReplicateNow *)malloc(sizeof(ReplicateNow));
3631+ if (ReplicateCurrentTime == (ReplicateNow *)NULL)
3632+ {
3633+ return STATUS_ERROR;
3634+ }
3635+ memset(ReplicateCurrentTime,0,sizeof(ReplicateNow));
3636+
3637+ PGRCopyData = (CopyData *)malloc(sizeof(CopyData));
3638+ if (PGRCopyData == (CopyData *)NULL)
3639+ {
3640+ return STATUS_ERROR;
3641+ }
3642+ memset(PGRCopyData,0,sizeof(CopyData));
3643+
3644+ if (PGR_Not_Replicate_Rec_Num == 0)
3645+ {
3646+ free(PGR_Not_Replicate);
3647+ PGR_Not_Replicate = NULL;
3648+ }
3649+ else
3650+ {
3651+ qsort((char *)PGR_Not_Replicate,PGR_Not_Replicate_Rec_Num,sizeof(PGR_Not_Replicate_Type), (int (*)(const void*,const void*))Comp_Not_Replicate);
3652+ }
3653+
3654+ PGRSelfHostName = malloc(HOSTNAME_MAX_LENGTH);
3655+ if (PGRSelfHostName == NULL)
3656+ {
3657+ return STATUS_ERROR;
3658+ }
3659+ memset(PGRSelfHostName,0,HOSTNAME_MAX_LENGTH);
3660+
3661+ PGR_password = malloc(sizeof(PGR_Password_Info));
3662+ if (PGR_password == NULL)
3663+ {
3664+ return STATUS_ERROR;
3665+ }
3666+ memset(PGR_password,0,sizeof(PGR_Password_Info));
3667+ PGR_password->password = malloc(PASSWORD_MAX_LENGTH);
3668+ if (PGR_password->password == NULL)
3669+ {
3670+ return STATUS_ERROR;
3671+ }
3672+ memset(PGR_password->password,0,PASSWORD_MAX_LENGTH);
3673+
3674+ if (HostName[0] == 0)
3675+ {
3676+ if (gethostname(HostName,HOSTNAME_MAX_LENGTH) < 0)
3677+ {
3678+ return STATUS_ERROR;
3679+ }
3680+ }
3681+ ip=PGRget_ip_by_name(HostName);
3682+
3683+ sprintf(PGRSelfHostName,
3684+ "%d.%d.%d.%d",
3685+ (ip ) & 0xff ,
3686+ (ip >> 8) & 0xff ,
3687+ (ip >> 16) & 0xff ,
3688+ (ip >> 24) & 0xff );
3689+ if (RsyncPath == NULL)
3690+ {
3691+ RsyncPath = strdup(DEFAULT_RSYNC);
3692+ }
3693+ if (PgDumpPath == NULL)
3694+ {
3695+ PgDumpPath = strdup(DEFAULT_PG_DUMP);
3696+ }
3697+
3698+ return (check_conf_data());
3699+}
3700+
3701+static int
3702+check_conf_data(void)
3703+{
3704+ int i = 0;
3705+ ReplicateServerInfo *sp;
3706+ sp = ReplicateServerData;
3707+ while ((sp + i)->useFlag != DATA_END)
3708+ {
3709+ if (*((sp + i)->hostName) == 0)
3710+ {
3711+ fprintf(stderr,"Hostname of replication server is not valid.\n");
3712+ return STATUS_ERROR;
3713+ }
3714+ if ((sp + i)->portNumber < 1024)
3715+ {
3716+ fprintf(stderr,"Replication Port of replication server is not valid. It's required larger than 1024.\n");
3717+ return STATUS_ERROR;
3718+ }
3719+ if ((sp + i)->recoveryPortNumber < 1024)
3720+ {
3721+ fprintf(stderr,"RecoveryPort of replication server is not valid. It's required larger than 1024.\n");
3722+ return STATUS_ERROR;
3723+ }
3724+ if ((sp + i)->portNumber == (sp + i)->recoveryPortNumber)
3725+ {
3726+ fprintf(stderr,"Replication Port and RecoveryPort is conflicted.\n");
3727+ return STATUS_ERROR;
3728+ }
3729+ i++;
3730+ }
3731+ if (RecoveryPortNumber < 1024)
3732+ {
3733+ fprintf(stderr,"RecoveryPort of Cluster DB is not valid. It's required larger than 1024.\n");
3734+ return STATUS_ERROR;
3735+ }
3736+ if (PGR_Stand_Alone == NULL)
3737+ {
3738+ fprintf(stderr,"Stand Alone Mode is not specified.\n");
3739+ return STATUS_ERROR;
3740+ }
3741+ if (RsyncOption == NULL)
3742+ {
3743+ fprintf(stderr,"Option of rsync command is not specified.\n");
3744+ return STATUS_ERROR;
3745+ }
3746+ if (strlen(PGRSelfHostName) <= 0)
3747+ {
3748+ fprintf(stderr,"Hostname of Cluster DB is not valid.\n");
3749+ return STATUS_ERROR;
3750+ }
3751+ if (PGR_Lifecheck_Timeout > PGR_Lifecheck_Interval)
3752+ {
3753+ fprintf(stderr,"The lifecheck timeouti(%d) should be shorter than interval(%d).\n",PGR_Lifecheck_Timeout,PGR_Lifecheck_Interval);
3754+ return STATUS_ERROR;
3755+ }
3756+ return STATUS_OK;
3757+ }
3758+
3759+/*--------------------------------------------------------------------
3760+ * SYMBOL
3761+ * PGR_Set_Replicate_Server_Socket()
3762+ * NOTES
3763+ * Create new socket and set ReplicateServerData table
3764+ * ARGS
3765+ * void
3766+ * RETURN
3767+ * OK: STATUS_OK
3768+ * NG: STATUS_ERROR
3769+ *--------------------------------------------------------------------
3770+ */
3771+int
3772+PGR_Set_Replicate_Server_Socket(void)
3773+{
3774+ ReplicateServerInfo * sp;
3775+ if (ReplicateServerData == NULL)
3776+ {
3777+ return STATUS_ERROR;
3778+ }
3779+ sp = ReplicateServerData;
3780+ while (sp->useFlag != DATA_END){
3781+ sp->sock = -1;
3782+ PGR_Create_Socket_Connect(&(sp->sock),sp->hostName,sp->portNumber);
3783+ sp ++;
3784+ }
3785+ return STATUS_OK;
3786+}
3787+
3788+/*--------------------------------------------------------------------
3789+ * SYMBOL
3790+ * PGR_get_replicate_server_socket()
3791+ * NOTES
3792+ * search or create a socket to connect with the replication server
3793+ * ARGS
3794+ * ReplicateServerInfo * sp: replication server data (I)
3795+ * int socket_type: socket type (I)
3796+ * -PGR_TRANSACTION_SOCKET:
3797+ * -PGR_QUERY_SOCKET:
3798+ * RETURN
3799+ * OK: >0(socket)
3800+ * NG: -1
3801+ *--------------------------------------------------------------------
3802+ */
3803+int
3804+PGR_get_replicate_server_socket ( ReplicateServerInfo * sp , int socket_type )
3805+{
3806+ ReplicateServerInfo * tmp;
3807+ tmp = sp;
3808+ if (tmp == (ReplicateServerInfo *) NULL)
3809+ {
3810+ return -1;
3811+ }
3812+ if (tmp->hostName[0] == '\0')
3813+ {
3814+ return -1;
3815+ }
3816+
3817+ if (TransactionSock != -1)
3818+ {
3819+ return TransactionSock;
3820+ }
3821+
3822+ while(PGR_Create_Socket_Connect(&TransactionSock,tmp->hostName,tmp->portNumber) != STATUS_OK)
3823+ {
3824+ close(TransactionSock);
3825+ TransactionSock = -1;
3826+ PGR_Set_Replication_Server_Status(tmp, DATA_ERR);
3827+ usleep(20);
3828+ tmp = PGR_get_replicate_server_info();
3829+ if (tmp == (ReplicateServerInfo *)NULL)
3830+ {
3831+ return -1;
3832+ }
3833+ PGR_Set_Replication_Server_Status(tmp, DATA_USE);
3834+ usleep(10);
3835+ }
3836+ return TransactionSock;
3837+}
3838+
3839+/*--------------------------------------------------------------------
3840+ * SYMBOL
3841+ * close_replicate_server_socket()
3842+ * NOTES
3843+ * close the socket connected with the replication server
3844+ * ARGS
3845+ * ReplicateServerInfo * sp: replication server data (I)
3846+ * int socket_type: socket type (I)
3847+ * -PGR_TRANSACTION_SOCKET:
3848+ * -PGR_QUERY_SOCKET:
3849+ * RETURN
3850+ * OK: STATUS_OK
3851+ * NG: STATUS_ERROR
3852+ *--------------------------------------------------------------------
3853+ */
3854+static int
3855+close_replicate_server_socket ( ReplicateServerInfo * sp , int socket_type )
3856+{
3857+ if (sp == (ReplicateServerInfo *)NULL )
3858+ {
3859+ return STATUS_ERROR;
3860+ }
3861+ if (sp->hostName[0] == '\0')
3862+ {
3863+ return STATUS_ERROR;
3864+ }
3865+ if (TransactionSock != -1)
3866+ {
3867+ PGR_Close_Sock(&(TransactionSock));
3868+ TransactionSock = -1;
3869+ }
3870+ switch (socket_type)
3871+ {
3872+ case PGR_TRANSACTION_SOCKET:
3873+ if (TransactionSock != -1)
3874+ {
3875+ PGR_Close_Sock(&(TransactionSock));
3876+ }
3877+ TransactionSock = -1;
3878+ sp->sock = -1;
3879+ break;
3880+ case PGR_QUERY_SOCKET:
3881+ if (sp->sock != -1)
3882+ {
3883+ PGR_Close_Sock(&(sp->sock));
3884+ }
3885+ sp->sock = -1;
3886+ break;
3887+ }
3888+ PGR_Set_Replication_Server_Status(sp, DATA_INIT);
3889+ return STATUS_OK;
3890+}
3891+
3892+static bool
3893+is_same_replication_server(ReplicateServerInfo * sp1, ReplicateServerInfo * sp2 )
3894+{
3895+ if ((sp1 == NULL) || (sp2 == NULL))
3896+ {
3897+ return false;
3898+ }
3899+ if ((!strcmp(sp1->hostName,sp2->hostName)) &&
3900+ (sp1->portNumber == sp2->portNumber) &&
3901+ (sp1->recoveryPortNumber == sp2->recoveryPortNumber))
3902+ {
3903+ return true;
3904+ }
3905+ return false;
3906+}
3907+
3908+static ReplicateServerInfo *
3909+search_new_replication_server ( ReplicateServerInfo * sp , int socket_type )
3910+{
3911+ ReplicateHeader dummy_header;
3912+ ReplicateServerInfo * rs_tbl;
3913+ char command[256];
3914+ int sock = -1;
3915+ int cnt = 0;
3916+
3917+ if ((ReplicateServerData == NULL) || ( sp == NULL))
3918+ {
3919+ return NULL;
3920+ }
3921+ rs_tbl = sp;
3922+ close_replicate_server_socket ( sp , socket_type);
3923+ sp ++;
3924+ while (is_same_replication_server(sp,rs_tbl) != true)
3925+ {
3926+ if (sp->useFlag == DATA_END)
3927+ {
3928+ sp = ReplicateServerData;
3929+ }
3930+ sock = PGR_get_replicate_server_socket( sp , socket_type);
3931+ if (sock < 0 )
3932+ {
3933+ if (is_same_replication_server(sp,rs_tbl) == true)
3934+ {
3935+ return NULL;
3936+ }
3937+ else
3938+ {
3939+ sp++;
3940+ }
3941+ continue;
3942+ }
3943+ memset(&dummy_header, 0, sizeof(ReplicateHeader));
3944+ memset(command,0,sizeof(command));
3945+ snprintf(command,sizeof(command)-1,"SELECT %s(%d,%s,%d,%d)",
3946+ PGR_SYSTEM_COMMAND_FUNC,
3947+ PGR_CHANGE_REPLICATION_SERVER_FUNC_NO,
3948+ sp->hostName,
3949+ sp->portNumber,
3950+ sp->recoveryPortNumber);
3951+ dummy_header.cmdSys = CMD_SYS_CALL;
3952+ dummy_header.cmdSts = CMD_STS_NOTICE;
3953+ dummy_header.query_size = htonl(strlen(command));
3954+ if (send_replicate_packet(sock,&dummy_header,command) != STATUS_OK)
3955+ {
3956+ cnt ++;
3957+ close_replicate_server_socket ( sp , socket_type);
3958+ PGR_Set_Replication_Server_Status(sp, DATA_ERR);
3959+ }
3960+ else
3961+ {
3962+ PGR_Set_Replication_Server_Status(sp, DATA_USE);
3963+ return sp;
3964+ }
3965+ if (cnt > MAX_RETRY_TIMES )
3966+ {
3967+ sp++;
3968+ cnt = 0;
3969+ }
3970+ else
3971+ {
3972+ continue;
3973+ }
3974+ }
3975+ return NULL;
3976+}
3977+
3978+static int
3979+get_table_name(char * table_name, char * query, int position )
3980+{
3981+
3982+ int i,wc;
3983+ char * p;
3984+ char * sp;
3985+ int length;
3986+
3987+ if ((table_name == NULL) || (query == NULL) || (position < 1))
3988+ {
3989+ return STATUS_ERROR;
3990+ }
3991+ length = strlen(query);
3992+ p = query;
3993+ wc = 1;
3994+ sp = table_name;
3995+ for (i = 0 ; i < length ; i ++)
3996+ {
3997+ while(isspace(*p))
3998+ {
3999+ p++;
4000+ i++;
4001+ }
4002+ while((*p != '\0') && (! isspace(*p)))
4003+ {
4004+ if ((*p == ';') || (*p == '('))
4005+ break;
4006+ if (wc == position)
4007+ {
4008+ *sp = *p;
4009+ sp++;
4010+ }
4011+ p++;
4012+ i++;
4013+ }
4014+ if (wc == position)
4015+ {
4016+ *sp = '\0';
4017+ break;
4018+ }
4019+ wc++;
4020+ }
4021+ return STATUS_OK;
4022+}
4023+
4024+static bool
4025+is_not_replication_query(char * query_string, int query_len, char cmdType)
4026+{
4027+ PGR_Not_Replicate_Type key;
4028+ PGR_Not_Replicate_Type * ptr = NULL;
4029+
4030+ if (PGR_Not_Replicate_Rec_Num <= 0)
4031+ return false;
4032+ if (query_string == NULL)
4033+ return true;
4034+ memset(&key,0,sizeof(PGR_Not_Replicate_Type));
4035+ strncpy(key.db_name ,(char *)(MyProcPort->database_name),sizeof(key.db_name)-1);
4036+ switch (cmdType)
4037+ {
4038+ case CMD_TYPE_INSERT:
4039+ get_table_name(key.table_name,query_string,3);
4040+ break;
4041+ case CMD_TYPE_UPDATE:
4042+ get_table_name(key.table_name,query_string,2);
4043+ break;
4044+ case CMD_TYPE_DELETE:
4045+ get_table_name(key.table_name,query_string,3);
4046+ break;
4047+ case CMD_TYPE_COPY:
4048+ get_table_name(key.table_name,query_string,2);
4049+ break;
4050+ default:
4051+ return false;
4052+ }
4053+ ptr = (PGR_Not_Replicate_Type*)bsearch((void*)&key,(void*)PGR_Not_Replicate,PGR_Not_Replicate_Rec_Num,sizeof(PGR_Not_Replicate_Type), (int (*)(const void*,const void*))Comp_Not_Replicate);
4054+ if (ptr == NULL)
4055+ {
4056+ return false;
4057+ }
4058+ return true;
4059+
4060+}
4061+
4062+/*--------------------------------------------------------------------
4063+ * SYMBOL
4064+ * PGR_Send_Replicate_Command()
4065+ * NOTES
4066+ * create new socket
4067+ * ARGS
4068+ * char * query_string: query strings (I)
4069+ * char cmdSts:
4070+ * char cmdType:
4071+ * RETURN
4072+ * OK: result
4073+ * NG: NULL
4074+ *--------------------------------------------------------------------
4075+ */
4076+char *
4077+PGR_Send_Replicate_Command(char * query_string, int query_len, char cmdSts ,char cmdType)
4078+{
4079+ int sock = -1;
4080+ int cnt = 0;
4081+ ReplicateHeader header;
4082+ char * serverName = NULL;
4083+ int portNumber=0;
4084+ char * result = NULL;
4085+ ReplicateServerInfo * sp = NULL;
4086+ ReplicateServerInfo * base = NULL;
4087+ int socket_type = 0;
4088+ char argv[ PGR_CMD_ARG_NUM ][256];
4089+ int argc = 0;
4090+ int func_no = 0;
4091+ int check_flag =0;
4092+ bool in_transaction = false;
4093+
4094+
4095+ /*
4096+ * check query string
4097+ */
4098+ if ((query_string == NULL) ||
4099+ (query_len < 0))
4100+ {
4101+ return NULL;
4102+ }
4103+ /* check not replication query */
4104+ if (is_not_replication_query(query_string, query_len, cmdType) == true)
4105+ {
4106+ PGR_Copy_Data_Need_Replicate = false;
4107+ return NULL;
4108+ }
4109+
4110+ if ((cmdSts == CMD_STS_TRANSACTION ) ||
4111+ (cmdSts == CMD_STS_SET_SESSION_AUTHORIZATION ) ||
4112+ (cmdSts == CMD_STS_TEMP_TABLE ))
4113+ {
4114+ socket_type = PGR_TRANSACTION_SOCKET ;
4115+ }
4116+ else
4117+ {
4118+ socket_type = PGR_QUERY_SOCKET ;
4119+ }
4120+
4121+ if(cmdSts==CMD_STS_TRANSACTION
4122+ && (cmdType!=CMD_TYPE_BEGIN && cmdType!=CMD_TYPE_ROLLBACK))
4123+ {
4124+ in_transaction = true;
4125+ }
4126+
4127+ sp = PGR_get_replicate_server_info();
4128+ if (sp == NULL)
4129+ {
4130+ if (Debug_pretty_print)
4131+ elog(DEBUG1,"PGR_get_replicate_server_info get error");
4132+ return NULL;
4133+ }
4134+ sock = PGR_get_replicate_server_socket( sp , socket_type);
4135+ if (sock < 0)
4136+ {
4137+ if (Debug_pretty_print)
4138+ elog(DEBUG1,"PGR_get_replicate_server_socket fail");
4139+ return NULL;
4140+ }
4141+ result = malloc(PGR_MESSAGE_BUFSIZE + 4);
4142+ if (result == NULL)
4143+ {
4144+ return NULL;
4145+ }
4146+
4147+ serverName = sp->hostName;
4148+ portNumber = (int)sp->portNumber;
4149+ memset(&header,0,sizeof(ReplicateHeader));
4150+
4151+ header.cmdSts = cmdSts;
4152+ header.cmdType = cmdType;
4153+ header.port = htons(PostPortNumber);
4154+ header.pid = htons(getpid());
4155+ header.query_size = htonl(query_len);
4156+ strncpy(header.dbName ,(char *)(MyProcPort->database_name),sizeof(header.dbName)-1);
4157+ strncpy(header.userName , (char *)(MyProcPort->user_name),sizeof(header.userName)-1);
4158+ strncpy(header.password , PGR_password->password, PASSWORD_MAX_LENGTH );
4159+ memcpy(header.md5Salt ,MyProcPort->md5Salt, sizeof(header.md5Salt));
4160+ memcpy(header.cryptSalt ,MyProcPort->cryptSalt, sizeof(header.cryptSalt));
4161+ header.request_id = htonl(get_next_request_id());
4162+ header.rlog = 0;
4163+
4164+ if (PGRSelfHostName != NULL)
4165+ {
4166+ strncpy(header.from_host, PGRSelfHostName, HOSTNAME_MAX_LENGTH);
4167+ }
4168+
4169+ base = sp;
4170+ PGR_Sock_To_Replication_Server = sock;
4171+
4172+retry_send_prereplicate_packet:
4173+
4174+ memset(result,0,PGR_MESSAGE_BUFSIZE + 4);
4175+ cnt = 0;
4176+ header.cmdSys=CMD_SYS_PREREPLICATE;
4177+
4178+ while (send_replicate_packet(sock,&header,query_string) != STATUS_OK)
4179+ {
4180+ cnt++;
4181+ if (cnt >= MAX_RETRY_TIMES )
4182+ {
4183+ sock = get_new_replication_socket( base, sp, socket_type);
4184+ if (sock < 0)
4185+ {
4186+ if (Debug_pretty_print)
4187+ elog(DEBUG1,"all replication servers may be down");
4188+ PGR_Stand_Alone->is_stand_alone = true;
4189+ if (cmdSts == CMD_STS_TRANSACTION )
4190+ {
4191+ strcpy(result,PGR_REPLICATION_ABORT_MSG);
4192+ return result;
4193+ }
4194+ free(result);
4195+ result = NULL;
4196+ return NULL;
4197+
4198+ }
4199+ if(in_transaction)
4200+ {
4201+ elog(ERROR,"replicate server down during replicating transaction. aborted.");
4202+ free(result);
4203+ return NULL;
4204+ }
4205+ PGR_Sock_To_Replication_Server = sock;
4206+ cnt = 0;
4207+ }
4208+ }
4209+
4210+ memset(result,0,PGR_MESSAGE_BUFSIZE);
4211+ if (PGR_recv_replicate_result(sock,result,0) < 0)
4212+ {
4213+
4214+ sock = get_new_replication_socket( base, sp, socket_type);
4215+ if (sock < 0)
4216+ {
4217+ if (Debug_pretty_print)
4218+ elog(DEBUG1,"all replication servers may be down");
4219+ PGR_Stand_Alone->is_stand_alone = true;
4220+
4221+ if (cmdSts == CMD_STS_TRANSACTION )
4222+ {
4223+ strcpy(result,PGR_REPLICATION_ABORT_MSG);
4224+ return result;
4225+ }
4226+ if(result!=NULL) {
4227+ free(result);
4228+ result = NULL;
4229+ }
4230+ return NULL;
4231+ }
4232+ PGR_Sock_To_Replication_Server = sock;
4233+ /* replication server should be down */
4234+
4235+ if(in_transaction)
4236+ {
4237+ elog(ERROR,"replicate server down during replicating transaction. aborted.");
4238+ free(result);
4239+ return NULL;
4240+ }
4241+
4242+ goto retry_send_prereplicate_packet;
4243+ }
4244+
4245+
4246+ argc = set_command_args(argv,result);
4247+ func_no=atoi(argv[0]);
4248+ if(func_no==0) {
4249+ /* this server is not primary replicate server*/
4250+ sock=-1;
4251+ goto retry_send_prereplicate_packet;
4252+ }
4253+retry_send_replicate_packet:
4254+
4255+ memset(result,0,PGR_MESSAGE_BUFSIZE + 4);
4256+ cnt = 0;
4257+ header.cmdSys = CMD_SYS_REPLICATE;
4258+ while (send_replicate_packet(sock,&header,query_string) != STATUS_OK)
4259+ {
4260+ if (cnt > MAX_RETRY_TIMES )
4261+ {
4262+ sock = get_new_replication_socket( base, sp, socket_type);
4263+ if (sock < 0)
4264+ {
4265+ if (Debug_pretty_print)
4266+ elog(DEBUG1,"all replication servers may be down");
4267+ PGR_Stand_Alone->is_stand_alone = true;
4268+ if (cmdSts == CMD_STS_TRANSACTION )
4269+ {
4270+ strcpy(result,PGR_REPLICATION_ABORT_MSG);
4271+ return result;
4272+ }
4273+ free(result);
4274+ result = NULL;
4275+ return NULL;
4276+
4277+ }
4278+ PGR_Sock_To_Replication_Server = sock;
4279+ header.rlog = CONNECTION_SUSPENDED_TYPE;
4280+ cnt = 0;
4281+ }
4282+ cnt ++;
4283+ }
4284+
4285+ memset(result,0,PGR_MESSAGE_BUFSIZE);
4286+ if (PGR_recv_replicate_result(sock,result,0) < 0)
4287+ {
4288+ /* replication server should be down */
4289+ sock = get_new_replication_socket( base, sp, socket_type);
4290+ if (sock < 0)
4291+ {
4292+ if (Debug_pretty_print)
4293+ elog(DEBUG1,"all replication servers may be down");
4294+ PGR_Stand_Alone->is_stand_alone = true;
4295+
4296+ if (cmdSts == CMD_STS_TRANSACTION )
4297+ {
4298+ strcpy(result,PGR_REPLICATION_ABORT_MSG);
4299+ return result;
4300+ }
4301+ if(result!=NULL) {
4302+ free(result);
4303+ result = NULL;
4304+ }
4305+ return NULL;
4306+ }
4307+ PGR_Sock_To_Replication_Server = sock;
4308+ header.rlog = CONNECTION_SUSPENDED_TYPE;
4309+
4310+ goto retry_send_replicate_packet;
4311+ }
4312+
4313+ argc = set_command_args(argv,result);
4314+ if (argc >= 1)
4315+ {
4316+ func_no = atoi(argv[0]);
4317+ if (func_no == PGR_SET_CURRENT_TIME_FUNC_NO)
4318+ {
4319+ if(! in_transaction)
4320+ PGR_Set_Current_Time(argv[1],argv[2]);
4321+ set_replication_id(argv[3]);
4322+ set_response_mode(argv[4]);
4323+ PGR_Set_Current_Replication_Query_ID(argv[5]);
4324+ }
4325+ else if (func_no == PGR_NOTICE_DEADLOCK_DETECTION_FUNC_NO)
4326+ {
4327+ memset(result,0,PGR_MESSAGE_BUFSIZE);
4328+ strcpy(result,PGR_DEADLOCK_DETECTION_MSG);
4329+ }
4330+ else if (func_no == PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO)
4331+ {
4332+ PGR_Set_Current_Replication_Query_ID(argv[1]);
4333+ }
4334+ else if (func_no == PGR_QUERY_CONFIRM_ANSWER_FUNC_NO)
4335+ {
4336+ check_flag = atoi(argv[1]);
4337+ if (check_flag == PGR_ALREADY_COMMITTED )
4338+ {
4339+ if(! in_transaction)
4340+ PGR_Set_Current_Time(argv[2],argv[3]);
4341+ set_replication_id(argv[4]);
4342+ }
4343+ else
4344+ {
4345+ if(! in_transaction)
4346+ PGR_Set_Current_Time(argv[1],argv[2]);
4347+ set_replication_id(argv[3]);
4348+ /* this query is not replicated */
4349+ /*
4350+ free(result);
4351+ return NULL;
4352+ */
4353+ }
4354+ }
4355+ }
4356+ return result;
4357+}
4358+
4359+uint32_t
4360+PGRget_replication_id(void)
4361+{
4362+ return (ReplicationLog_Info.PGR_Replicate_ID);
4363+}
4364+
4365+static int
4366+set_replication_id(char * id)
4367+{
4368+ uint32_t rid=0;
4369+ uint32_t saved_id;
4370+ if (id == NULL)
4371+ {
4372+ return STATUS_ERROR;
4373+ }
4374+
4375+ rid=(uint32_t)atol(id);
4376+ if(rid==0)
4377+ return STATUS_OK;
4378+
4379+ needToUpdateReplicateIdOnNextQueryIsDone=true;
4380+ saved_id=ReplicationLog_Info.PGR_Replicate_ID;
4381+
4382+ ReplicationLog_Info.PGR_Replicate_ID =rid;
4383+
4384+
4385+ /*set replicate id in this process */
4386+
4387+
4388+ if (CurrentReplicateServer == NULL)
4389+ {
4390+ PGR_get_replicate_server_info();
4391+ }
4392+ if (CurrentReplicateServer != NULL)
4393+ {
4394+ /* set replicate id in this system */
4395+ saved_id=CurrentReplicateServer->replicate_id;
4396+ elog(DEBUG1, "replication id set from %d to %d", saved_id, rid);
4397+
4398+ CurrentReplicateServer->replicate_id = (uint32_t)(atol(id));
4399+ }
4400+
4401+ return STATUS_OK;
4402+}
4403+
4404+
4405+static unsigned int
4406+get_next_request_id(void)
4407+{
4408+ if (ReplicationLog_Info.PGR_Request_ID +1 < PGR_MAX_COUNTER)
4409+ {
4410+ ReplicationLog_Info.PGR_Request_ID ++;
4411+ }
4412+ else
4413+ {
4414+ ReplicationLog_Info.PGR_Request_ID = 0;
4415+ }
4416+ return ReplicationLog_Info.PGR_Request_ID ;
4417+
4418+}
4419+
4420+static bool
4421+is_this_query_replicated(char * id)
4422+{
4423+ uint32_t replicate_id = 0;
4424+ uint32_t saved_id = 0;
4425+ int32_t diff=0;
4426+ ReplicateServerInfo * replicate_server_info = NULL;
4427+
4428+ if (id == NULL)
4429+ {
4430+ return false;
4431+ }
4432+ replicate_id = (uint32_t)atol(id);
4433+ elog(DEBUG1, "check for replication id , input=%u", replicate_id);
4434+
4435+ if (CurrentReplicateServer == NULL)
4436+ {
4437+ PGR_get_replicate_server_info();
4438+ }
4439+
4440+ if (CurrentReplicateServer != NULL)
4441+ {
4442+ replicate_server_info = CurrentReplicateServer;
4443+ }
4444+ else if (LastReplicateServer != NULL)
4445+ {
4446+ replicate_server_info = LastReplicateServer;
4447+ }
4448+ if (replicate_server_info != NULL)
4449+ {
4450+
4451+ saved_id=replicate_server_info->replicate_id;
4452+ saved_id = saved_id < ReplicationLog_Info.PGR_Replicate_ID
4453+ ? ReplicationLog_Info.PGR_Replicate_ID
4454+ : saved_id;
4455+
4456+ elog(DEBUG1, "check for replication id , now=%u", saved_id);
4457+ /* check replicate_id < saved_id logically
4458+ *
4459+ * see also:
4460+ * backend/transam/transam.c#TransactionIdPrecedes
4461+ */
4462+
4463+ diff = (int32) (saved_id-replicate_id);
4464+ return (diff > 0);
4465+ }
4466+ elog(DEBUG1, "check for replication id check failed. no replication server");
4467+ return false;
4468+}
4469+
4470+
4471+static int
4472+get_new_replication_socket( ReplicateServerInfo * base, ReplicateServerInfo * sp, int socket_type)
4473+{
4474+ int sock = -1;
4475+
4476+ if (( base == NULL) ||
4477+ ( sp == NULL))
4478+ {
4479+ return -1;
4480+ }
4481+ close_replicate_server_socket ( sp , socket_type);
4482+ PGR_Set_Replication_Server_Status(sp, DATA_ERR);
4483+ sp = search_new_replication_server(base, socket_type);
4484+ if (sp == NULL)
4485+ {
4486+ if (Debug_pretty_print)
4487+ elog(DEBUG1,"all replication servers may be down");
4488+ PGR_Stand_Alone->is_stand_alone = true;
4489+ return -1;
4490+ }
4491+ sock = PGR_get_replicate_server_socket( sp , socket_type);
4492+ return sock;
4493+}
4494+
4495+
4496+int
4497+PGR_recv_replicate_result(int sock,char * result,int user_timeout)
4498+{
4499+ fd_set rmask;
4500+ struct timeval timeout;
4501+ int rtn;
4502+
4503+ if (result == NULL)
4504+ {
4505+ return -1;
4506+ }
4507+
4508+ /*
4509+ * Wait for something to happen.
4510+ */
4511+ for (;;)
4512+ {
4513+ if (user_timeout == 0)
4514+ timeout.tv_sec = PGR_Replication_Timeout;
4515+ else
4516+ timeout.tv_sec = user_timeout;
4517+
4518+ timeout.tv_usec = 0;
4519+
4520+ FD_ZERO(&rmask);
4521+ FD_SET(sock,&rmask);
4522+ rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
4523+ if (rtn <= 0)
4524+ {
4525+ if (errno != EINTR)
4526+ return -1;
4527+ }
4528+
4529+ else if ((rtn > 0) && (FD_ISSET(sock, &rmask)))
4530+ {
4531+ return (recv_message(sock, result,0));
4532+ }
4533+ }
4534+ return -1;
4535+}
4536+
4537+static int
4538+recv_message(int sock,char * buf,int flag)
4539+{
4540+ int cnt = 0;
4541+ int r = 0;
4542+ char * read_ptr;
4543+ int read_size = 0;
4544+ cnt = 0;
4545+ read_ptr = buf;
4546+
4547+ for (;;)
4548+ {
4549+ r = recv(sock,read_ptr + read_size ,PGR_MESSAGE_BUFSIZE - read_size, flag);
4550+ if (r < 0) {
4551+ if (errno == EINTR || errno == EAGAIN) {
4552+ continue;
4553+ } else {
4554+ elog(DEBUG1, "recv_message():recv failed");
4555+ return -1;
4556+ }
4557+ } else if (r == 0) {
4558+ elog(DEBUG1, "recv_message():unexpected EOF");
4559+ return -1;
4560+ } else /*if (r > 0)*/ {
4561+ read_size += r;
4562+ if (read_size == PGR_MESSAGE_BUFSIZE)
4563+ {
4564+ return read_size;
4565+ }
4566+ }
4567+ }
4568+ return -1;
4569+}
4570+
4571+static int
4572+send_replicate_packet(int sock,ReplicateHeader * header, char * query_string)
4573+{
4574+ int s = 0;
4575+ char * send_ptr = NULL;
4576+ char * buf = NULL;
4577+ int send_size = 0;
4578+ int buf_size = 0;
4579+ int header_size = 0;
4580+ int rtn = 0;
4581+ fd_set wmask;
4582+ struct timeval timeout;
4583+ int query_size = 0;
4584+
4585+ /* check parameter */
4586+ if ((sock < 0) || (header == NULL))
4587+ {
4588+ return STATUS_ERROR;
4589+ }
4590+
4591+ query_size = ntohl(header->query_size);
4592+ header_size = sizeof(ReplicateHeader);
4593+ buf_size = header_size + query_size + 4;
4594+ buf = malloc(buf_size);
4595+ if (buf == NULL)
4596+ {
4597+ return STATUS_ERROR;
4598+ }
4599+ memset(buf,0,buf_size);
4600+ buf_size -= 4;
4601+ memcpy(buf,header,header_size);
4602+ if (query_string != NULL)
4603+ {
4604+ memcpy((char *)(buf+header_size),query_string,query_size+1);
4605+ }
4606+ send_ptr = buf;
4607+
4608+ /*
4609+ * Wait for something to happen.
4610+ */
4611+ rtn = 1;
4612+ for (;;)
4613+ {
4614+ timeout.tv_sec = PGR_Replication_Timeout;
4615+ timeout.tv_usec = 0;
4616+
4617+ FD_ZERO(&wmask);
4618+ FD_SET(sock,&wmask);
4619+ rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
4620+ if (rtn < 0)
4621+ {
4622+ if (errno == EINTR)
4623+ continue;
4624+ else
4625+ {
4626+ elog(DEBUG1, "send_replicate_packet():select() failed");
4627+ return STATUS_ERROR;
4628+ }
4629+ }
4630+ else if (rtn && FD_ISSET(sock, &wmask))
4631+ {
4632+
4633+
4634+ s = send(sock,send_ptr + send_size,buf_size - send_size ,0);
4635+ if (s < 0){
4636+ if (errno == EINTR || errno == EAGAIN)
4637+ {
4638+ continue;
4639+ }
4640+ elog(DEBUG1, "send_replicate_packet():send error");
4641+
4642+ /* EPIPE || ENCONNREFUSED || ENSOCK || EHOSTUNREACH */
4643+ return STATUS_ERROR;
4644+ } else if (s == 0) {
4645+ free(buf);
4646+ buf = NULL;
4647+ elog(DEBUG1, "send_replicate_packet():unexpected EOF");
4648+ return STATUS_ERROR;
4649+ } else /*if (s > 0)*/ {
4650+ send_size += s;
4651+ if (send_size == buf_size)
4652+ {
4653+ free(buf);
4654+ buf = NULL;
4655+ return STATUS_OK;
4656+ }
4657+ }
4658+ }
4659+ }
4660+ if (buf != NULL)
4661+ {
4662+ free(buf);
4663+ buf = NULL;
4664+ }
4665+ return STATUS_ERROR;
4666+}
4667+
4668+bool
4669+PGR_Is_Replicated_Command(char * query)
4670+{
4671+
4672+ return (PGR_Is_System_Command(query));
4673+}
4674+
4675+int
4676+Xlog_Check_Replicate(int operation)
4677+{
4678+ if (PGR_Get_Cluster_Status() == STATUS_RECOVERY)
4679+ {
4680+ return STATUS_OK;
4681+ /* elog(WARNING, "This query is not permitted while recovery db "); */
4682+ }
4683+ else if ((operation == CMD_UTILITY ) ||
4684+ (operation == CMD_INSERT ) ||
4685+ (operation == CMD_UPDATE ) ||
4686+ (operation == CMD_DELETE ))
4687+ {
4688+ return (PGR_Replicate_Function_Call());
4689+ }
4690+ return STATUS_OK;
4691+}
4692+
4693+int
4694+PGR_Replicate_Function_Call(void)
4695+{
4696+ char *result = NULL;
4697+ int status = STATUS_OK;
4698+
4699+ if ((PGR_Get_Cluster_Status() == STATUS_RECOVERY) ||
4700+ (PGR_Stand_Alone == NULL))
4701+ {
4702+ return STATUS_OK;
4703+ }
4704+ if (Query_String != NULL)
4705+ {
4706+ if (PGR_Is_Stand_Alone() == true)
4707+ {
4708+ if (PGR_Stand_Alone->permit == PERMIT_READ_ONLY)
4709+ {
4710+ Query_String = NULL;
4711+ return STATUS_ERROR;
4712+ }
4713+ }
4714+ PGR_Need_Notice = true;
4715+ PGR_Check_Lock.check_lock_conflict = true;
4716+ result = PGR_Send_Replicate_Command(Query_String,strlen(Query_String), CMD_STS_QUERY,CMD_TYPE_SELECT);
4717+ if (result != NULL)
4718+ {
4719+ PGR_Reload_Start_Time();
4720+ if (!strncmp(result,PGR_DEADLOCK_DETECTION_MSG,strlen(PGR_DEADLOCK_DETECTION_MSG)))
4721+ {
4722+ status = STATUS_DEADLOCK_DETECT;
4723+ }
4724+ free(result);
4725+ result = NULL;
4726+ }
4727+ else
4728+ {
4729+ status = STATUS_ERROR;
4730+ }
4731+ Query_String = NULL;
4732+ }
4733+ return status;
4734+}
4735+
4736+void
4737+PGR_delete_shm(void)
4738+{
4739+
4740+ if (ReplicateServerData != NULL)
4741+ {
4742+ shmdt(ReplicateServerData);
4743+ ReplicateServerData = NULL;
4744+ shmctl(ReplicateServerShmid,IPC_RMID,(struct shmid_ds *)NULL);
4745+ }
4746+ if (ClusterDBData != NULL)
4747+ {
4748+ shmdt(ClusterDBData);
4749+ ClusterDBData = NULL;
4750+ shmctl(ClusterDBShmid,IPC_RMID,(struct shmid_ds *)NULL);
4751+ }
4752+
4753+ if (TransactionSock != -1)
4754+ {
4755+ close(TransactionSock);
4756+ }
4757+
4758+ if (RsyncPath != NULL)
4759+ {
4760+ free(RsyncPath);
4761+ RsyncPath = NULL;
4762+ }
4763+ if (RsyncOption != NULL)
4764+ {
4765+ free(RsyncOption);
4766+ RsyncOption = NULL;
4767+ }
4768+
4769+ if (ReplicateCurrentTime != NULL)
4770+ {
4771+ free(ReplicateCurrentTime);
4772+ ReplicateCurrentTime = NULL;
4773+ }
4774+
4775+ if (PGRCopyData != NULL)
4776+ {
4777+ free (PGRCopyData);
4778+ PGRCopyData = NULL;
4779+ }
4780+
4781+ if (PGR_Stand_Alone != NULL)
4782+ {
4783+ free(PGR_Stand_Alone);
4784+ PGR_Stand_Alone = NULL;
4785+ }
4786+
4787+ if (PGR_Not_Replicate != NULL)
4788+ {
4789+ free(PGR_Not_Replicate);
4790+ PGR_Not_Replicate = NULL;
4791+ }
4792+ if (PGRSelfHostName != NULL)
4793+ {
4794+ free(PGRSelfHostName);
4795+ PGRSelfHostName = NULL;
4796+ }
4797+ if (PGR_password != NULL)
4798+ {
4799+ if (PGR_password->password != NULL)
4800+ {
4801+ free(PGR_password->password);
4802+ PGR_password->password = NULL;
4803+ }
4804+ free(PGR_password);
4805+ PGR_password = NULL;
4806+ }
4807+}
4808+
4809+ReplicateServerInfo *
4810+PGR_get_replicate_server_info(void)
4811+{
4812+
4813+ ReplicateServerInfo * sp;
4814+
4815+ if (ReplicateServerData == NULL)
4816+ {
4817+ return (ReplicateServerInfo *)NULL;
4818+ }
4819+ /* check current using replication server */
4820+ sp = PGR_check_replicate_server_info();
4821+ if (sp != NULL)
4822+ {
4823+ if (CurrentReplicateServer != NULL)
4824+ {
4825+ LastReplicateServer = CurrentReplicateServer;
4826+ CurrentReplicateServer->replicate_id = LastReplicateServer->replicate_id;
4827+ }
4828+ CurrentReplicateServer = sp;
4829+ return sp;
4830+ }
4831+ /* there is no used replication server */
4832+ /* however it may exist still in initial status */
4833+ sp = ReplicateServerData;
4834+ while (sp->useFlag != DATA_END)
4835+ {
4836+ if (sp->useFlag != DATA_ERR )
4837+ {
4838+ if (CurrentReplicateServer != NULL)
4839+ {
4840+ LastReplicateServer = CurrentReplicateServer;
4841+ CurrentReplicateServer->replicate_id = LastReplicateServer-> replicate_id;
4842+ }
4843+ CurrentReplicateServer = sp;
4844+ PGR_Set_Replication_Server_Status(sp, DATA_USE);
4845+ return sp;
4846+ }
4847+ sp++;
4848+ }
4849+ PGR_Stand_Alone->is_stand_alone = true;
4850+ if (CurrentReplicateServer != NULL)
4851+ {
4852+ LastReplicateServer = CurrentReplicateServer;
4853+ CurrentReplicateServer->replicate_id = LastReplicateServer-> replicate_id;
4854+ }
4855+ CurrentReplicateServer = NULL;
4856+ return (ReplicateServerInfo *)NULL;
4857+}
4858+
4859+ReplicateServerInfo *
4860+PGR_check_replicate_server_info(void)
4861+{
4862+
4863+ ReplicateServerInfo * sp;
4864+
4865+ if (ReplicateServerData == NULL)
4866+ {
4867+ return (ReplicateServerInfo *)NULL;
4868+ }
4869+ sp = ReplicateServerData;
4870+ while (sp->useFlag != DATA_END)
4871+ {
4872+ if (sp->useFlag == DATA_USE )
4873+ {
4874+ return sp;
4875+ }
4876+ sp++;
4877+ }
4878+ return NULL;
4879+}
4880+
4881+int
4882+PGR_Send_Copy(CopyData * copy,int end )
4883+{
4884+
4885+ char cmdSts,cmdType;
4886+ char * p = NULL;
4887+ char *result = NULL;
4888+ char term[8];
4889+ /*int status = 0; */
4890+
4891+ if (copy == NULL)
4892+ {
4893+ return STATUS_ERROR;
4894+ }
4895+
4896+ cmdSts = CMD_STS_COPY;
4897+
4898+ if (Transaction_Mode > 0)
4899+ {
4900+ cmdSts = CMD_STS_TRANSACTION ;
4901+ }
4902+ if (Session_Authorization_Mode)
4903+ {
4904+ cmdSts = CMD_STS_SET_SESSION_AUTHORIZATION ;
4905+ }
4906+ cmdType = CMD_TYPE_COPY_DATA;
4907+
4908+ copy->copy_data[copy->cnt] = '\0';
4909+ if (end)
4910+ {
4911+ memset(term,0,sizeof(term));
4912+ term[0]='\\';
4913+ term[1]='.';
4914+ term[2]='\n';
4915+
4916+ cmdType = CMD_TYPE_COPY_DATA_END;
4917+ p = NULL;
4918+ if (copy->cnt > 0)
4919+ {
4920+ copy->copy_data[copy->cnt] = '\0';
4921+ p = strstr(copy->copy_data,term);
4922+ if (p == NULL)
4923+ {
4924+ p = &(copy->copy_data[copy->cnt-1]);
4925+ copy->cnt--;
4926+ }
4927+ else
4928+ {
4929+ p = NULL;
4930+ }
4931+ }
4932+ if (p != NULL)
4933+ {
4934+ strncpy(p,term,sizeof(term));
4935+ copy->cnt += 4;
4936+ }
4937+ }
4938+ result = PGR_Send_Replicate_Command(copy->copy_data, copy->cnt, cmdSts, cmdType);
4939+ memset(copy,0,sizeof(CopyData));
4940+
4941+ if (result != NULL)
4942+ {
4943+ PGR_Reload_Start_Time();
4944+ free(result);
4945+ result = NULL;
4946+ return STATUS_OK;
4947+ }
4948+ else
4949+ {
4950+ return STATUS_ERROR;
4951+ }
4952+}
4953+
4954+CopyData *
4955+PGR_Set_Copy_Data(CopyData * copy, char *str, int len,int end)
4956+{
4957+ CopyData save;
4958+ int save_len = 0;
4959+ int read_index = 0;
4960+ int send_size = 0;
4961+ int buf_size = 0;
4962+ int rest_len = 0;
4963+ int rest_buf_size = 0;
4964+ int status = STATUS_OK;
4965+ char * ep = NULL;
4966+ char term[4];
4967+
4968+ #define BUFF_OFFSET (8)
4969+
4970+ if ((PGR_Copy_Data_Need_Replicate == false) ||
4971+ (copy == NULL))
4972+ {
4973+ return (CopyData *)NULL;
4974+ }
4975+ memset(term,0,sizeof(term));
4976+ term[0]='\n';
4977+ term[1]='\\';
4978+ term[2]='.';
4979+ buf_size = COPYBUFSIZ - BUFF_OFFSET;
4980+ read_index = 0;
4981+ rest_len = len;
4982+ rest_buf_size = buf_size - copy->cnt;
4983+ while ((rest_len > 0) && (rest_buf_size > 0))
4984+ {
4985+ if (rest_buf_size < rest_len)
4986+ {
4987+ send_size = rest_buf_size;
4988+ rest_len -= send_size;
4989+ }
4990+ else
4991+ {
4992+ send_size = rest_len;
4993+ rest_len = 0;
4994+ }
4995+ memcpy(&(copy->copy_data[copy->cnt]) ,str + read_index ,send_size);
4996+ copy->cnt += send_size;
4997+ read_index += send_size;
4998+ rest_buf_size = buf_size - copy->cnt;
4999+ if (strstr(copy->copy_data,term) != NULL)
5000+ {
5001+ break;
5002+ }
5003+ if (rest_buf_size <= 0)
5004+ {
5005+ ep = strrchr(copy->copy_data,'\n');
5006+ if (ep != NULL)
5007+ {
5008+ *ep = '\0';
5009+ save_len = copy->cnt - strlen(copy->copy_data) -1;
5010+ copy->cnt -= save_len ;
5011+ memset(&save,0,sizeof(CopyData));
5012+ memcpy(save.copy_data,(ep+1),save_len+1);
5013+ save.cnt = save_len;
5014+ *ep = '\n';
5015+ *(ep+1) = '\0';
5016+ status = PGR_Send_Copy(copy,0);
5017+ memset(copy,0,sizeof(CopyData));
5018+ if (save_len > 0)
5019+ {
5020+ memcpy(copy,&save,sizeof(CopyData));
5021+ }
5022+ rest_buf_size = buf_size - copy->cnt;
5023+
5024+ }
5025+ else
5026+ {
5027+ /* one record is bigger than COPYBUFSIZ */
5028+ /* buffer would be over flow*/
5029+ status = PGR_Send_Copy(copy,0);
5030+ memset(copy,0,sizeof(CopyData));
5031+ rest_buf_size = buf_size - copy->cnt;
5032+ }
5033+ }
5034+ }
5035+ if (end)
5036+ {
5037+ status = PGR_Send_Copy(copy,end);
5038+ memset(copy,0,sizeof(CopyData));
5039+ }
5040+ if (status != STATUS_OK)
5041+ {
5042+ return (CopyData *)NULL;
5043+ }
5044+ return copy;
5045+}
5046+
5047+int
5048+PGR_replication(char * query_string, CommandDest dest, Node *parsetree, const char * commandTag)
5049+{
5050+ char *result = NULL;
5051+ char cmdSts = CMD_STS_OTHER;
5052+ char cmdType = CMD_TYPE_OTHER;
5053+ int query_len = 0;
5054+
5055+ if ((query_string == NULL) ||
5056+ (commandTag == NULL))
5057+ {
5058+ return STATUS_ERROR;
5059+ }
5060+
5061+ Query_String = NULL;
5062+ query_len = strlen(query_string);
5063+
5064+ /* save query data for retry */
5065+ PGR_Retry_Query.query_string = query_string;
5066+ PGR_Retry_Query.query_len = query_len;
5067+ PGR_Retry_Query.cmdSts = cmdSts;
5068+ PGR_Retry_Query.cmdType = cmdType;
5069+ PGR_Retry_Query.useFlag = DATA_USE;
5070+ /* set cmdType */
5071+ if (!strcmp(commandTag,"BEGIN")) cmdType = CMD_TYPE_BEGIN ;
5072+ else if (!strcmp(commandTag,"COMMIT")) cmdType = CMD_TYPE_COMMIT ;
5073+ else if (!strcmp(commandTag,"SELECT")) cmdType = CMD_TYPE_SELECT ;
5074+ else if (!strcmp(commandTag,"INSERT")) cmdType = CMD_TYPE_INSERT ;
5075+ else if (!strcmp(commandTag,"UPDATE")) cmdType = CMD_TYPE_UPDATE ;
5076+ else if (!strcmp(commandTag,"DELETE")) cmdType = CMD_TYPE_DELETE ;
5077+ else if (!strcmp(commandTag,"VACUUM")) cmdType = CMD_TYPE_VACUUM ;
5078+ else if (!strcmp(commandTag,"ANALYZE")) cmdType = CMD_TYPE_ANALYZE ;
5079+ else if (!strcmp(commandTag,"REINDEX")) cmdType = CMD_TYPE_REINDEX ;
5080+ else if (!strcmp(commandTag,"ROLLBACK")) cmdType = CMD_TYPE_ROLLBACK ;
5081+ else if (!strcmp(commandTag,"RESET")) cmdType = CMD_TYPE_RESET ;
5082+ else if (!strcmp(commandTag,"START TRANSACTION")) cmdType = CMD_TYPE_BEGIN ;
5083+
5084+ /* only "replication_server" statement-name is replicated for SHOW. */
5085+ /* see CreateCommandTag() @ backend/tcop/postgres.c */
5086+
5087+ else if (!strcmp(commandTag,"COPY"))
5088+ {
5089+ cmdType = CMD_TYPE_COPY ;
5090+ if (is_copy_from(query_string))
5091+ {
5092+ PGR_Copy_Data_Need_Replicate = true;
5093+ }
5094+ else
5095+ {
5096+ PGR_Copy_Data_Need_Replicate = false;
5097+ return STATUS_NOT_REPLICATE;
5098+ }
5099+ }
5100+ else if (!strcmp(commandTag,"SET"))
5101+ {
5102+ cmdType = CMD_TYPE_SET;
5103+ /*
5104+ VariableSetStmt *stmt = (VariableSetStmt *)parsetree;
5105+ if (strcmp(stmt->name, "TRANSACTION ISOLATION LEVEL") &&
5106+ strcmp(stmt->name, "datestyle") &&
5107+ strcmp(stmt->name, "autocommit") &&
5108+ strcmp(stmt->name, "client_encoding") &&
5109+ strcmp(stmt->name, "password_encryption") &&
5110+ strcmp(stmt->name, "search_path") &&
5111+ strcmp(stmt->name, "session_authorization") &&
5112+ strcmp(stmt->name, "timezone"))
5113+
5114+ return STATUS_NOT_REPLICATE;
5115+ */
5116+ if (strstr(query_string,SYS_QUERY_1) != NULL)
5117+ {
5118+ return STATUS_NOT_REPLICATE;
5119+ }
5120+ }
5121+ else if (!strcmp(commandTag,"CREATE TABLE"))
5122+ {
5123+ if (is_create_temp_table(query_string))
5124+ {
5125+ Create_Temp_Table_Mode = true;
5126+ }
5127+ }
5128+ if (Create_Temp_Table_Mode)
5129+ {
5130+ cmdSts = CMD_STS_TEMP_TABLE ;
5131+ }
5132+ if (Transaction_Mode > 0)
5133+ {
5134+ cmdSts = CMD_STS_TRANSACTION ;
5135+ }
5136+ else
5137+ {
5138+ if ((cmdType == CMD_TYPE_COMMIT ) ||
5139+ (cmdType == CMD_TYPE_ROLLBACK ))
5140+ {
5141+ cmdSts = CMD_STS_TRANSACTION ;
5142+ if (ReplicateCurrentTime != NULL)
5143+ {
5144+ ReplicateCurrentTime->useFlag = DATA_INIT;
5145+ ReplicateCurrentTime->use_seed = 0;
5146+ }
5147+ }
5148+ }
5149+ if (Session_Authorization_Mode)
5150+ {
5151+ cmdSts = CMD_STS_SET_SESSION_AUTHORIZATION ;
5152+ if (cmdType == CMD_TYPE_SESSION_AUTHORIZATION_END)
5153+ {
5154+ Session_Authorization_Mode = false;
5155+ }
5156+ }
5157+ if ((cmdSts == CMD_STS_TRANSACTION ) ||
5158+ (cmdSts == CMD_STS_SET_SESSION_AUTHORIZATION ) ||
5159+ (cmdSts == CMD_STS_TEMP_TABLE ))
5160+ {
5161+ /* check partitional replication table */
5162+ if (is_not_replication_query(query_string, query_len, cmdType)== true )
5163+ {
5164+ PGR_Copy_Data_Need_Replicate = false;
5165+ return STATUS_NOT_REPLICATE;
5166+ }
5167+ Query_String = NULL;
5168+ if (( do_not_replication_command(commandTag) == true) &&
5169+ (strcmp(commandTag,"SELECT")))
5170+ {
5171+ return STATUS_NOT_REPLICATE;
5172+ }
5173+
5174+ if (Debug_pretty_print)
5175+ elog(DEBUG1,"transaction query send :%s",(char *)query_string);
5176+ PGR_Retry_Query.cmdSts = cmdSts;
5177+ PGR_Retry_Query.cmdType = cmdType;
5178+ result = PGR_Send_Replicate_Command(query_string,query_len, cmdSts,cmdType);
5179+ if (result != NULL)
5180+ {
5181+ if (!strncmp(result,PGR_DEADLOCK_DETECTION_MSG,strlen(PGR_DEADLOCK_DETECTION_MSG)))
5182+ {
5183+ /*
5184+ PGR_Send_Message_To_Frontend(result);
5185+ */
5186+ free(result);
5187+ result = NULL;
5188+ return STATUS_DEADLOCK_DETECT;
5189+ }
5190+ else if (!strncmp(result,PGR_REPLICATION_ABORT_MSG,strlen(PGR_REPLICATION_ABORT_MSG)))
5191+ {
5192+ free(result);
5193+ result = NULL;
5194+ return STATUS_REPLICATION_ABORT;
5195+ }
5196+ free(result);
5197+ result = NULL;
5198+ return STATUS_CONTINUE;
5199+ }
5200+ else
5201+ {
5202+ return STATUS_ERROR;
5203+ }
5204+ }
5205+ else
5206+ {
5207+ cmdSts = CMD_STS_QUERY ;
5208+ if ( do_not_replication_command(commandTag) == false)
5209+ {
5210+ Query_String = NULL;
5211+ /* check partitional replication table */
5212+ if (is_not_replication_query(query_string, query_len, cmdType)== true )
5213+ {
5214+ PGR_Copy_Data_Need_Replicate = false;
5215+ return STATUS_NOT_REPLICATE;
5216+ }
5217+ result = PGR_Send_Replicate_Command(query_string,query_len,cmdSts,cmdType);
5218+ if (result != NULL)
5219+ {
5220+ if (!strncmp(result,PGR_DEADLOCK_DETECTION_MSG,strlen(PGR_DEADLOCK_DETECTION_MSG)))
5221+ {
5222+ free(result);
5223+ result = NULL;
5224+ return STATUS_DEADLOCK_DETECT;
5225+ }
5226+ else if (!strncmp(result,PGR_REPLICATION_ABORT_MSG,strlen(PGR_REPLICATION_ABORT_MSG)))
5227+ {
5228+ free(result);
5229+ result = NULL;
5230+ return STATUS_REPLICATION_ABORT;
5231+ }
5232+ /*
5233+ PGR_Send_Message_To_Frontend(result);
5234+ */
5235+ free(result);
5236+ result = NULL;
5237+ return STATUS_CONTINUE;
5238+ }
5239+ else
5240+ {
5241+ return STATUS_ERROR;
5242+ }
5243+ }
5244+ else
5245+ {
5246+ if (( is_serial_control_query(cmdType,query_string) == true) ||
5247+ ( is_select_into_query(cmdType,query_string) == true))
5248+ {
5249+ Query_String = NULL;
5250+ PGR_Need_Notice = true;
5251+ PGR_Check_Lock.check_lock_conflict = true;
5252+ result = PGR_Send_Replicate_Command(query_string,query_len,cmdSts,cmdType);
5253+ if (result != NULL)
5254+ {
5255+ /*
5256+ PGR_Send_Message_To_Frontend(result);
5257+ */
5258+ if (!strncmp(result,PGR_DEADLOCK_DETECTION_MSG,strlen(PGR_DEADLOCK_DETECTION_MSG)))
5259+ {
5260+ free(result);
5261+ return STATUS_DEADLOCK_DETECT;
5262+ }
5263+ free(result);
5264+ result = NULL;
5265+ return STATUS_CONTINUE;
5266+ }
5267+ else
5268+ {
5269+ return STATUS_ERROR;
5270+ }
5271+ }
5272+ else
5273+ {
5274+ Query_String = query_string;
5275+ /*PGR_Sock_To_Replication_Server = -1;*/
5276+ }
5277+ return STATUS_CONTINUE_SELECT;
5278+ }
5279+ }
5280+ return STATUS_CONTINUE;
5281+}
5282+
5283+
5284+bool
5285+PGR_Is_System_Command(char * query)
5286+{
5287+ char * ptr;
5288+
5289+ if (query == NULL)
5290+ {
5291+ return false;
5292+ }
5293+ ptr = strstr(query,PGR_SYSTEM_COMMAND_FUNC);
5294+ if (ptr != NULL)
5295+ {
5296+ ptr = strchr(ptr,'(');
5297+ if (ptr == NULL)
5298+ return false;
5299+ return true;
5300+ }
5301+ return false;
5302+}
5303+
5304+static int
5305+set_command_args(char argv[ PGR_CMD_ARG_NUM ][256],char *str)
5306+{
5307+ int i,j,cnt,len;
5308+ char * ptr = str;
5309+
5310+ if (str == NULL)
5311+ {
5312+ return 0;
5313+ }
5314+ len = strlen(str);
5315+ cnt = j = 0;
5316+ for ( i = 0 ; i < len ; i++,ptr++)
5317+ {
5318+ if (cnt >= PGR_CMD_ARG_NUM)
5319+ break;
5320+ if (( *ptr == ',') || (*ptr == ')'))
5321+ {
5322+ argv[cnt][j] = '\0';
5323+ cnt ++;
5324+ j = 0;
5325+ continue;
5326+ }
5327+ argv[cnt][j] = *ptr;
5328+ j++;
5329+ }
5330+ if (cnt < PGR_CMD_ARG_NUM)
5331+ argv[cnt][j] = '\0';
5332+ cnt ++;
5333+
5334+ return cnt;
5335+}
5336+
5337+static int
5338+add_replication_server(char * hostname,char * port, char * recovery_port)
5339+{
5340+ int cnt;
5341+ int portNumber;
5342+ int recoveryPortNumber;
5343+ ReplicateServerInfo * sp;
5344+
5345+ if ((hostname == NULL) ||
5346+ (port == NULL ) ||
5347+ (recovery_port == NULL ))
5348+ {
5349+ return STATUS_ERROR;
5350+ }
5351+ if (ReplicateServerData == NULL)
5352+ {
5353+ return STATUS_ERROR;
5354+ }
5355+ portNumber = atoi(port);
5356+ recoveryPortNumber = atoi(recovery_port);
5357+ cnt = 0;
5358+ sp = ReplicateServerData;
5359+ while (sp->useFlag != DATA_END){
5360+ if((!strncmp(sp->hostName,hostname,sizeof(sp->hostName))) &&
5361+ (sp->portNumber == portNumber) &&
5362+ (sp->recoveryPortNumber == recoveryPortNumber))
5363+ {
5364+ if (sp->useFlag != DATA_USE)
5365+ {
5366+ PGR_Set_Replication_Server_Status(sp, DATA_INIT);
5367+ }
5368+ return STATUS_OK;
5369+ }
5370+ sp ++;
5371+ cnt ++;
5372+ }
5373+ if (cnt < MAX_SERVER_NUM)
5374+ {
5375+ strncpy(sp->hostName,hostname,sizeof(sp->hostName));
5376+ sp->portNumber = portNumber;
5377+ sp->recoveryPortNumber = recoveryPortNumber;
5378+ PGR_Set_Replication_Server_Status(sp, DATA_INIT);
5379+ memset((sp+1),0,sizeof(ReplicateServerInfo));
5380+ (sp + 1)->useFlag = DATA_END;
5381+ }
5382+ else
5383+ {
5384+ return STATUS_ERROR;
5385+ }
5386+ return STATUS_OK;
5387+}
5388+
5389+static int
5390+change_replication_server(char * hostname,char * port, char * recovery_port)
5391+{
5392+ int cnt;
5393+ int portNumber;
5394+ int recoveryPortNumber;
5395+ ReplicateServerInfo * sp;
5396+
5397+ if ((hostname == NULL) ||
5398+ (port == NULL ) ||
5399+ (recovery_port == NULL ))
5400+ {
5401+ return STATUS_ERROR;
5402+ }
5403+ if (ReplicateServerData == NULL)
5404+ {
5405+ return STATUS_ERROR;
5406+ }
5407+ portNumber = atoi(port);
5408+ recoveryPortNumber = atoi(recovery_port);
5409+ cnt = 0;
5410+ sp = ReplicateServerData;
5411+ while (sp->useFlag != DATA_END){
5412+ if((!strcmp(sp->hostName,hostname)) &&
5413+ (sp->portNumber == portNumber) &&
5414+ (sp->recoveryPortNumber == recoveryPortNumber))
5415+ {
5416+ PGR_Set_Replication_Server_Status(sp, DATA_USE);
5417+ }
5418+ else
5419+ {
5420+ if (sp->useFlag == DATA_USE)
5421+ {
5422+ PGR_Set_Replication_Server_Status(sp, DATA_INIT);
5423+ }
5424+ }
5425+ sp ++;
5426+ cnt ++;
5427+ }
5428+ return STATUS_OK;
5429+}
5430+
5431+int
5432+PGR_Set_Current_Time(char * sec, char * usec)
5433+{
5434+ int rtn = 0;
5435+ struct timeval local_tp;
5436+ struct timezone local_tpz;
5437+ struct timeval tv;
5438+
5439+ if ((sec == NULL) ||
5440+ (usec == NULL))
5441+ {
5442+ return STATUS_ERROR;
5443+ }
5444+ rtn = gettimeofday(&local_tp, &local_tpz);
5445+ tv.tv_sec = atol(sec);
5446+ tv.tv_usec = atol(usec);
5447+ ReplicateCurrentTime->offset_sec = local_tp.tv_sec - tv.tv_sec;
5448+ ReplicateCurrentTime->offset_usec = local_tp.tv_usec - tv.tv_usec;
5449+ ReplicateCurrentTime->tp.tv_sec = tv.tv_sec;
5450+ ReplicateCurrentTime->tp.tv_usec = tv.tv_usec;
5451+ ReplicateCurrentTime->useFlag = DATA_USE;
5452+ ReplicateCurrentTime->use_seed = 0;
5453+
5454+ return STATUS_OK;
5455+}
5456+
5457+static void
5458+PGR_Set_Current_Replication_Query_ID(char *id) {
5459+ MyProc->replicationId=atol(id);
5460+ return;
5461+}
5462+
5463+static void
5464+set_response_mode(char * mode)
5465+{
5466+ int response_mode = 0;
5467+
5468+ if (mode == NULL)
5469+ return;
5470+ response_mode = atoi(mode);
5471+ if (response_mode < 0)
5472+ return;
5473+ if (CurrentReplicateServer == NULL)
5474+ {
5475+ PGR_get_replicate_server_info();
5476+ if (CurrentReplicateServer == NULL)
5477+ {
5478+ return;
5479+ }
5480+ }
5481+ if (CurrentReplicateServer->response_mode != response_mode)
5482+ {
5483+ CurrentReplicateServer->response_mode = response_mode;
5484+ }
5485+}
5486+
5487+int
5488+PGR_Call_System_Command(char * command)
5489+{
5490+ char * ptr;
5491+ char * args;
5492+ char argv[ PGR_CMD_ARG_NUM ][256];
5493+ int argc = 0;
5494+ int func_no;
5495+ char * hostName = NULL;
5496+
5497+ if ((command == NULL) || (ReplicateCurrentTime == NULL))
5498+ {
5499+ return STATUS_ERROR;
5500+ }
5501+ ptr = strstr(command,PGR_SYSTEM_COMMAND_FUNC);
5502+ if (ptr == NULL)
5503+ return STATUS_ERROR;
5504+ ptr = strchr(ptr,'(');
5505+ if (ptr == NULL)
5506+ return STATUS_ERROR;
5507+ args = ptr+1;
5508+ ptr = strchr(ptr,')');
5509+ if (ptr == NULL)
5510+ return STATUS_ERROR;
5511+ *ptr = '\0';
5512+ argc = set_command_args(argv,args);
5513+ if (argc < 1)
5514+ return STATUS_ERROR;
5515+ func_no = atoi(argv[0]);
5516+ switch (func_no)
5517+ {
5518+ /* set current system time */
5519+ case PGR_SET_CURRENT_TIME_FUNC_NO:
5520+ if (atol(argv[1]) == 0)
5521+ {
5522+ CreateCheckPoint(false,true);
5523+ }
5524+ else
5525+ {
5526+ /*
5527+ if ((atoi(argv[3]) > 0) &&
5528+ (is_this_query_replicated(argv[3]) == true))
5529+ {
5530+ return STATUS_SKIP_QUERY;
5531+ }
5532+ */
5533+ PGR_Set_Current_Time(argv[1],argv[2]);
5534+ set_replication_id(argv[3]);
5535+ set_response_mode(argv[4]);
5536+ PGR_Set_Current_Replication_Query_ID(argv[5]);
5537+
5538+ }
5539+ break;
5540+ /* add new replication server data */
5541+ case PGR_STARTUP_REPLICATION_SERVER_FUNC_NO:
5542+ hostName = get_hostName(argv[1]);
5543+ add_replication_server(hostName,argv[2],argv[3]);
5544+ break;
5545+ /* change new replication server */
5546+ case PGR_CHANGE_REPLICATION_SERVER_FUNC_NO:
5547+ hostName = get_hostName(argv[1]);
5548+ change_replication_server(hostName,argv[2],argv[3]);
5549+ break;
5550+ case PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO:
5551+ PGR_Set_Current_Replication_Query_ID(argv[1]);
5552+ break;
5553+ case PGR_QUERY_CONFIRM_ANSWER_FUNC_NO:
5554+ if ((atoi(argv[3]) > 0) &&
5555+ (is_this_query_replicated(argv[3]) == true))
5556+ {
5557+ /* skip this query */
5558+ return STATUS_SKIP_QUERY;
5559+ }
5560+ else
5561+ {
5562+ PGR_Set_Current_Time(argv[1],argv[2]);
5563+ set_replication_id(argv[3]);
5564+ }
5565+ break;
5566+ /* get current oid */
5567+ case PGR_GET_OID_FUNC_NO:
5568+ return_current_oid();
5569+ break;
5570+ /* set current oid */
5571+ case PGR_SET_OID_FUNC_NO:
5572+ sync_oid(argv[1]);
5573+ break;
5574+ /* set noticed session abort */
5575+ case PGR_NOTICE_ABORT_FUNC_NO:
5576+ PGR_Noticed_Abort = true;
5577+ break;
5578+ }
5579+ return STATUS_OK;
5580+}
5581+
5582+int
5583+PGR_GetTimeOfDay(struct timeval *tp, struct timezone *tpz)
5584+{
5585+
5586+ int rtn;
5587+
5588+ rtn = gettimeofday(tp, tpz);
5589+ if (ReplicateCurrentTime == NULL)
5590+ {
5591+ return rtn;
5592+ }
5593+ if (ReplicateCurrentTime->useFlag == DATA_USE)
5594+ {
5595+ if (ReplicateCurrentTime->use_seed != 0)
5596+ {
5597+ tp->tv_sec -= ReplicateCurrentTime->offset_sec;
5598+ if (tp->tv_usec < ReplicateCurrentTime->offset_usec)
5599+ {
5600+ tp->tv_usec += (1000000 - ReplicateCurrentTime->offset_usec);
5601+ tp->tv_sec -= 1;
5602+ }
5603+ else
5604+ {
5605+ tp->tv_usec -= ReplicateCurrentTime->offset_usec;
5606+ }
5607+ }
5608+ else
5609+ {
5610+ tp->tv_sec = ReplicateCurrentTime->tp.tv_sec;
5611+ tp->tv_usec = ReplicateCurrentTime->tp.tv_usec;
5612+ }
5613+ rtn = 0;
5614+ }
5615+ return rtn;
5616+}
5617+
5618+long
5619+PGR_Random(void)
5620+{
5621+ double rtn;
5622+ if (ReplicateCurrentTime != NULL)
5623+ {
5624+ if ( ReplicateCurrentTime->use_seed == 0)
5625+ {
5626+ srand( ReplicateCurrentTime->tp.tv_usec );
5627+ ReplicateCurrentTime->use_seed = 1;
5628+ }
5629+ }
5630+ rtn = random();
5631+ return rtn;
5632+}
5633+
5634+char *
5635+PGR_scan_terminate( char * str)
5636+{
5637+ char * p;
5638+ int sflag = 0;
5639+ int dflag = 0;
5640+ int lflag = 0;
5641+ int i = 0;
5642+ char tag[256];
5643+
5644+ if (str == NULL)
5645+ return NULL;
5646+ p = str;
5647+ memset(tag,0,sizeof(tag));
5648+ while ( *p != '\0' )
5649+ {
5650+ if ((!strncmp(p,"--",2)) ||
5651+ (!strncmp(p,"//",2)))
5652+ {
5653+ while (( *p != '\n') && (*p != '\0'))
5654+ {
5655+ p++;
5656+ }
5657+ continue;
5658+ }
5659+
5660+ switch (*p)
5661+ {
5662+ case '\'':
5663+ sflag ^= 1;
5664+ break;
5665+ case '\"':
5666+ dflag ^= 1;
5667+ break;
5668+ case '$':
5669+ i = 0;
5670+ p++;
5671+ while (( *p != '\n') && (*p != '\0'))
5672+ {
5673+ if (isalnum(*p) == 0)
5674+ {
5675+ if (*p == '$')
5676+ {
5677+ lflag ^= 1;
5678+ }
5679+ break;
5680+ }
5681+ else
5682+ {
5683+ if (i >= sizeof(tag))
5684+ break;
5685+ if (lflag == 0)
5686+ {
5687+ tag[i] = *p;
5688+ }
5689+ else
5690+ {
5691+ if (tag[i] != *p)
5692+ {
5693+ break;
5694+ }
5695+ }
5696+ i++;
5697+ }
5698+ p++;
5699+ }
5700+ break;
5701+ case '\\':
5702+ p +=2;
5703+ continue;
5704+ break;
5705+ case ';':
5706+ if ((!sflag) && (!dflag) && (!lflag))
5707+ return p;
5708+ break;
5709+ }
5710+ p++;
5711+ }
5712+ return NULL;
5713+}
5714+
5715+static bool
5716+is_copy_from(char * query)
5717+{
5718+ char * p;
5719+ int i;
5720+ char buf[12];
5721+ int c_flag = 0;
5722+ if (query == NULL)
5723+ return false;
5724+ p = query;
5725+ for ( i = 0 ; i <= 1 ; i ++)
5726+ {
5727+ /* get 'copy table_name' string */
5728+ while(isspace(*p))
5729+ p++;
5730+ while ((*p != '\0') && (*p != '(') && (!isspace(*p)))
5731+ p++;
5732+ }
5733+ while(isspace(*p))
5734+ p++;
5735+ /* skip table column */
5736+ if (*p == '(')
5737+ {
5738+ c_flag = 1;
5739+ p++;
5740+ while (*p != '\0')
5741+ {
5742+ if (*p == '(')
5743+ c_flag ++;
5744+ if (*p == ')')
5745+ c_flag --;
5746+ if (c_flag == 0)
5747+ {
5748+ p++;
5749+ break;
5750+ }
5751+ p++;
5752+ }
5753+ while(isspace(*p))
5754+ p++;
5755+ }
5756+ /* get 'from' or 'to' */
5757+ i = 0;
5758+ memset(buf,0,sizeof(buf));
5759+ while ((*p != '\0') && (!isspace(*p)) && ( i < sizeof(buf)-1))
5760+ {
5761+ buf[i] = (char)toupper(*p);
5762+ p++;
5763+ i++;
5764+ }
5765+ if (!strcmp(buf,"FROM"))
5766+ {
5767+ return true;
5768+ }
5769+ else
5770+ {
5771+ return false;
5772+ }
5773+}
5774+
5775+static bool
5776+is_create_temp_table(char * query)
5777+{
5778+ int len,wc;
5779+ char buf[MAX_WORDS][MAX_WORD_LETTERS];
5780+
5781+ if (query == NULL)
5782+ return false;
5783+ len = strlen(query);
5784+ wc = get_words(buf,query,len,1);
5785+ if (wc < 4)
5786+ return false;
5787+ if ((!strncmp(buf[0],"CREATE", strlen("CREATE"))) &&
5788+ (!strncmp(buf[1],"TEMP",strlen("TEMP"))) &&
5789+ (!strncmp(buf[2],"TABLE",strlen("TABLE"))))
5790+ {
5791+ return true;
5792+ }
5793+ return false;
5794+}
5795+
5796+static int
5797+get_words( char words[MAX_WORDS][MAX_WORD_LETTERS] ,char * string,int length,int upper)
5798+{
5799+ int i,wc,lc;
5800+ char * p = NULL;
5801+ char * buf = NULL;
5802+
5803+ if (string == NULL)
5804+ return STATUS_ERROR;
5805+ buf = malloc(length);
5806+ if (buf == NULL)
5807+ return STATUS_ERROR;
5808+
5809+ memset(buf,0,length);
5810+ p = string;
5811+ wc = 0;
5812+ for (i = 0 ; i < length ; i ++)
5813+ {
5814+ if ((*p == '\0') || (wc >= MAX_WORDS))
5815+ break;
5816+ while (isspace(*p))
5817+ {
5818+ p++;
5819+ i++;
5820+ }
5821+ lc = 0;
5822+ while ((*p != '\0') && (! isspace(*p)))
5823+ {
5824+ if (upper)
5825+ *(buf+lc) = (char)toupper(*p);
5826+ else
5827+ *(buf+lc) = *p;
5828+
5829+ p++;
5830+ i++;
5831+ lc++;
5832+ }
5833+ memset(words[wc],0,MAX_WORD_LETTERS);
5834+ memcpy(words[wc],buf,lc);
5835+ memset(buf,0,length);
5836+ wc++;
5837+ }
5838+ free(buf);
5839+ buf = NULL;
5840+ return wc;
5841+}
5842+
5843+static int
5844+Comp_Not_Replicate(PGR_Not_Replicate_Type * nrp1,PGR_Not_Replicate_Type* nrp2)
5845+{
5846+ int rtn;
5847+
5848+ if ((nrp1 == NULL) ||
5849+ (nrp2 == NULL))
5850+ {
5851+ return 0;
5852+ }
5853+ rtn = strcasecmp(nrp1->table_name,nrp2->table_name);
5854+ if (rtn == 0)
5855+ {
5856+ rtn = strcasecmp(nrp1->db_name,nrp2->db_name);
5857+ }
5858+ return rtn;
5859+}
5860+
5861+bool
5862+PGR_Is_Stand_Alone(void)
5863+{
5864+ ReplicateServerInfo * sp = NULL;
5865+
5866+ if (PGR_Stand_Alone == NULL)
5867+ return true;
5868+ if (PGR_Stand_Alone->is_stand_alone == true)
5869+ {
5870+ sp = PGR_get_replicate_server_info();
5871+ if (sp == NULL)
5872+ {
5873+ return true;
5874+ }
5875+ }
5876+ return false;
5877+}
5878+
5879+void
5880+PGR_Send_Message_To_Frontend(char * msg)
5881+{
5882+ StringInfoData msgbuf;
5883+
5884+ pq_beginmessage(&msgbuf, 'N');
5885+
5886+ if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
5887+ {
5888+ /* New style with separate fields */
5889+ char tbuf[12];
5890+ int ssval;
5891+ int i;
5892+
5893+ pq_sendbyte(&msgbuf, PG_DIAG_SEVERITY);
5894+ pq_sendstring(&msgbuf, "NOTICE" );
5895+
5896+ /* unpack MAKE_SQLSTATE code */
5897+ ssval = ERRCODE_WARNING ;
5898+ for (i = 0; i < 5; i++)
5899+ {
5900+ tbuf[i] = PGUNSIXBIT(ssval);
5901+ ssval >>= 6;
5902+ }
5903+ tbuf[i] = '\0';
5904+
5905+ pq_sendbyte(&msgbuf, PG_DIAG_SQLSTATE);
5906+ pq_sendstring(&msgbuf, tbuf);
5907+
5908+ /* M field is required per protocol, so always send something */
5909+ pq_sendbyte(&msgbuf, PG_DIAG_MESSAGE_PRIMARY);
5910+ if (msg)
5911+ pq_sendstring(&msgbuf, msg);
5912+ else
5913+ pq_sendstring(&msgbuf, _("missing error text"));
5914+
5915+ pq_sendbyte(&msgbuf, '\0'); /* terminator */
5916+ }
5917+ else
5918+ {
5919+ /* Old style --- gin up a backwards-compatible message */
5920+ StringInfoData buf;
5921+
5922+ initStringInfo(&buf);
5923+
5924+ appendStringInfo(&buf, "%s: ", "NOTICE");
5925+
5926+ if (msg)
5927+ appendStringInfoString(&buf, msg);
5928+ else
5929+ appendStringInfoString(&buf, _("missing error text"));
5930+
5931+ appendStringInfoChar(&buf, '\n');
5932+
5933+ pq_sendstring(&msgbuf, buf.data);
5934+
5935+ pfree(buf.data);
5936+ }
5937+
5938+ pq_endmessage(&msgbuf);
5939+
5940+ /*
5941+ * This flush is normally not necessary, since postgres.c will flush out
5942+ * waiting data when control returns to the main loop. But it seems best
5943+ * to leave it here, so that the client has some clue what happened if the
5944+ * backend dies before getting back to the main loop ... error/notice
5945+ * messages should not be a performance-critical path anyway, so an extra
5946+ * flush won't hurt much ...
5947+ */
5948+ pq_flush();
5949+}
5950+
5951+static bool
5952+is_serial_control_query(char cmdType,char * query)
5953+{
5954+ char * buf = NULL;
5955+ int len = 0;
5956+ int i = 0;
5957+ char * p = NULL;
5958+
5959+ if ((cmdType != CMD_TYPE_SELECT ) ||
5960+ ( query == NULL))
5961+ {
5962+ return false;
5963+ }
5964+
5965+ p = query;
5966+ len = strlen(query) +1;
5967+ buf = malloc(len);
5968+ if (buf == NULL)
5969+ return false;
5970+
5971+ memset(buf,0,len);
5972+ for ( i = 0 ; i < len ; i ++)
5973+ {
5974+ *(buf+i) = toupper(*(query+i));
5975+ }
5976+ if ((strstr(buf,"NEXTVAL") != NULL) ||
5977+ (strstr(buf,"SETVAL") != NULL))
5978+ {
5979+ free(buf);
5980+ buf = NULL;
5981+ return true;
5982+ }
5983+ free(buf);
5984+ buf = NULL;
5985+ return false;
5986+}
5987+
5988+static bool
5989+is_select_into_query(char cmdType,char * query)
5990+{
5991+ char * buf = NULL;
5992+ int len = 0;
5993+ int i = 0;
5994+ char * p = NULL;
5995+
5996+ if ((cmdType != CMD_TYPE_SELECT ) ||
5997+ ( query == NULL))
5998+ {
5999+ return false;
6000+ }
6001+
6002+ p = query;
6003+ len = strlen(query) +1;
6004+ buf = malloc(len);
6005+ if (buf == NULL)
6006+ return false;
6007+
6008+ memset(buf,0,len);
6009+ for ( i = 0 ; i < len ; i ++)
6010+ {
6011+ *(buf+i) = toupper(*(query+i));
6012+ }
6013+ if (strstr(buf,"INTO") != NULL)
6014+ {
6015+ free(buf);
6016+ buf = NULL;
6017+ return true;
6018+ }
6019+ if (strstr(buf,"CREATE") != NULL)
6020+ {
6021+ free(buf);
6022+ buf = NULL;
6023+ return true;
6024+ }
6025+ free(buf);
6026+ buf = NULL;
6027+ return false;
6028+}
6029+
6030+static int
6031+send_response_to_replication_server(const char * notice)
6032+{
6033+ ReplicateHeader header;
6034+ int status;
6035+
6036+ if (PGR_Lock_Noticed)
6037+ {
6038+ return STATUS_OK;
6039+ }
6040+ if ((notice == NULL) ||
6041+ (PGR_Sock_To_Replication_Server < 0))
6042+ {
6043+ return STATUS_ERROR;
6044+ }
6045+
6046+ memset(&header,0,sizeof(ReplicateHeader));
6047+ header.cmdSys = CMD_SYS_CALL;
6048+ header.cmdSts = CMD_STS_RESPONSE;
6049+ if (!strcmp(notice,PGR_QUERY_ABORTED_NOTICE_CMD))
6050+ {
6051+ header.cmdType = CMD_TYPE_FRONTEND_CLOSED;
6052+ }
6053+ header.query_size = htonl(strlen(notice));
6054+ status = send_replicate_packet(PGR_Sock_To_Replication_Server,&header,(char *)notice);
6055+ return status;
6056+}
6057+
6058+void
6059+PGR_Notice_Transaction_Query_Done(void)
6060+{
6061+ send_response_to_replication_server(PGR_QUERY_DONE_NOTICE_CMD);
6062+}
6063+
6064+void
6065+PGR_Notice_Transaction_Query_Aborted(void)
6066+{
6067+ send_response_to_replication_server(PGR_QUERY_ABORTED_NOTICE_CMD);
6068+}
6069+
6070+int
6071+PGR_Notice_Conflict(void)
6072+{
6073+ const char * msg = NULL ;
6074+ int rtn = STATUS_OK;
6075+
6076+ msg = PGR_LOCK_CONFLICT_NOTICE_CMD ;
6077+ if (PGR_Check_Lock.deadlock == true)
6078+ {
6079+ msg = PGR_DEADLOCK_DETECT_NOTICE_CMD ;
6080+ }
6081+ if (PGR_Check_Lock.dest == TO_FRONTEND)
6082+ {
6083+ ReadyForQuery(DestRemote);
6084+ EndCommand(msg,DestRemote);
6085+#ifdef CONTROL_LOCK_CONFLICT
6086+ rtn = wait_lock_answer();
6087+#endif /* CONTROL_LOCK_CONFLICT */
6088+ }
6089+ else
6090+ {
6091+ send_response_to_replication_server(msg);
6092+#ifdef CONTROL_LOCK_CONFLICT
6093+ rtn = PGR_Recv_Trigger (PGR_Replication_Timeout);
6094+#endif /* CONTROL_LOCK_CONFLICT */
6095+ }
6096+ return rtn;
6097+}
6098+
6099+#ifdef CONTROL_LOCK_CONFLICT
6100+static int
6101+wait_lock_answer(void)
6102+{
6103+ char result[PGR_MESSAGE_BUFSIZE+4];
6104+ int rtn = 0;
6105+
6106+ memset(result,0,sizeof(result));
6107+ rtn = read_trigger(result, PGR_MESSAGE_BUFSIZE);
6108+ if (rtn < 0)
6109+ return STATUS_ERROR;
6110+ return STATUS_OK;
6111+}
6112+
6113+static int
6114+read_trigger(char * result, int buf_size)
6115+{
6116+ int i = 0;
6117+ char c;
6118+ int r = 0;
6119+
6120+ if ((result == NULL) || (buf_size <= 0 ))
6121+ {
6122+ return EOF;
6123+ }
6124+ /*
6125+ pq_getbytes(result,buf_size);
6126+ */
6127+ while ((r = pq_getbytes(&c,1)) == 0)
6128+ {
6129+ if (i < buf_size -1)
6130+ {
6131+ *(result + i) = c;
6132+ }
6133+ else
6134+ {
6135+ break;
6136+ }
6137+ if (c == '\0')
6138+ break;
6139+ i++;
6140+ }
6141+
6142+ return r;
6143+}
6144+#endif /* CONTROL_LOCK_CONFLICT */
6145+
6146+int
6147+PGR_Recv_Trigger (int user_timeout)
6148+{
6149+ char result[PGR_MESSAGE_BUFSIZE];
6150+ int rtn = 0;
6151+ int func_no = 0;
6152+
6153+
6154+ if (PGR_Lock_Noticed)
6155+ {
6156+ return STATUS_OK;
6157+ }
6158+ if (PGR_Sock_To_Replication_Server < 0)
6159+ return STATUS_ERROR;
6160+ memset(result,0,sizeof(result));
6161+ rtn = PGR_recv_replicate_result(PGR_Sock_To_Replication_Server,result,user_timeout);
6162+ if (rtn > 0)
6163+ {
6164+ func_no = atoi(result);
6165+ if (func_no <= 0)
6166+ {
6167+ func_no = STATUS_OK;
6168+ }
6169+ return func_no;
6170+ }
6171+ else
6172+ {
6173+ if (user_timeout == 0)
6174+ {
6175+ PGR_Set_Replication_Server_Status(CurrentReplicateServer, DATA_ERR);
6176+ }
6177+ return STATUS_ERROR;
6178+ }
6179+ return STATUS_OK;
6180+}
6181+
6182+
6183+int
6184+PGR_Set_Transaction_Mode(int mode,const char * commandTag)
6185+{
6186+ if (commandTag == NULL)
6187+ {
6188+ return mode;
6189+ }
6190+ if ((!strcmp(commandTag,"BEGIN")) ||
6191+ (!strcmp(commandTag,"START TRANSACTION")) )
6192+ {
6193+ return (++mode);
6194+ }
6195+ if (mode > 0)
6196+ {
6197+ if ((!strncmp(commandTag,"COMMIT",strlen("COMMIT"))) ||
6198+ (!strncmp(commandTag,"ROLLBACK",strlen("ROLLBACK"))))
6199+ {
6200+ return (--mode);
6201+ }
6202+ }
6203+ return mode;
6204+}
6205+
6206+static bool
6207+do_not_replication_command(const char * commandTag)
6208+{
6209+ if (commandTag == NULL)
6210+ {
6211+ return true;
6212+ }
6213+ if ((!strcmp(commandTag,"SELECT")) ||
6214+ (!strcmp(commandTag,"CLOSE CURSOR")) ||
6215+ (!strcmp(commandTag,"MOVE")) ||
6216+ (!strcmp(commandTag,"FETCH")) ||
6217+ (!strcmp(commandTag,"EXPLAIN")))
6218+ {
6219+ return true;
6220+ }
6221+ else
6222+ {
6223+ return false;
6224+ }
6225+}
6226+
6227+void
6228+PGR_Set_Replication_Server_Status( ReplicateServerInfo * sp, int status)
6229+{
6230+ if (sp == NULL)
6231+ {
6232+ return;
6233+ }
6234+ if (sp->useFlag != status)
6235+ {
6236+ sp->useFlag = status;
6237+ }
6238+}
6239+
6240+int
6241+PGR_Is_Skip_Replication(char * query)
6242+{
6243+ char skip_2[256];
6244+
6245+ if ((query == NULL) ||
6246+ (MyProcPort == NULL))
6247+ {
6248+ return -1;
6249+ }
6250+ snprintf(skip_2,sizeof(skip_2),SKIP_QUERY_2,MyProcPort->user_name);
6251+ if ((strncmp(query,SKIP_QUERY_1,strlen(SKIP_QUERY_1)) == 0) ||
6252+ (strncmp(query,skip_2,strlen(skip_2)) == 0))
6253+ {
6254+ return 3;
6255+ }
6256+ if ((strncmp(query,SKIP_QUERY_3,strlen(SKIP_QUERY_3)) == 0) ||
6257+ (strncmp(query,SKIP_QUERY_4,strlen(SKIP_QUERY_4)) == 0))
6258+ {
6259+ return 1;
6260+ }
6261+ return 0;
6262+}
6263+
6264+bool
6265+PGR_Did_Commit_Transaction(void)
6266+{
6267+
6268+ int sock = -1;
6269+ int cnt = 0;
6270+ ReplicateHeader header;
6271+ char * serverName = NULL;
6272+ int portNumber=0;
6273+ char * result = NULL;
6274+ ReplicateServerInfo * sp = NULL;
6275+ ReplicateServerInfo * base = NULL;
6276+ int socket_type = 0;
6277+ char argv[ PGR_CMD_ARG_NUM ][256];
6278+ int argc = 0;
6279+ int func_no = 0;
6280+
6281+ if (ReplicateCurrentTime->useFlag != DATA_USE)
6282+ {
6283+ return false;
6284+ }
6285+ sp = PGR_get_replicate_server_info();
6286+ if (sp == NULL)
6287+ {
6288+ if (Debug_pretty_print)
6289+ elog(DEBUG1,"PGR_get_replicate_server_info get error");
6290+ return false;
6291+ }
6292+ sock = PGR_get_replicate_server_socket( sp , PGR_QUERY_SOCKET);
6293+ if (sock < 0)
6294+ {
6295+ if (Debug_pretty_print)
6296+ elog(DEBUG1,"PGR_get_replicate_server_socket fail");
6297+ return false;
6298+ }
6299+ result = malloc(PGR_MESSAGE_BUFSIZE);
6300+ if (result == NULL)
6301+ {
6302+ return false;
6303+ }
6304+ memset(result,0,PGR_MESSAGE_BUFSIZE);
6305+
6306+ serverName = sp->hostName;
6307+ portNumber = (int)sp->portNumber;
6308+ header.cmdSys = CMD_SYS_CALL;
6309+ header.cmdSts = CMD_STS_TRANSACTION_ABORT;
6310+ header.cmdType = CMD_TYPE_COMMIT_CONFIRM;
6311+ header.port = htons(PostPortNumber);
6312+ header.pid = htons(getpid());
6313+ header.tv.tv_sec = htonl(ReplicateCurrentTime->tp.tv_sec);
6314+ header.tv.tv_usec = htonl(ReplicateCurrentTime->tp.tv_usec);
6315+ header.query_size = htonl(0);
6316+ strncpy(header.dbName ,(char *)(MyProcPort->database_name),sizeof(header.dbName)-1);
6317+ strncpy(header.userName , (char *)(MyProcPort->user_name),sizeof(header.userName)-1);
6318+ strncpy(header.password , PGR_password->password, PASSWORD_MAX_LENGTH );
6319+ memcpy(header.md5Salt ,MyProcPort->md5Salt, sizeof(header.md5Salt));
6320+ memcpy(header.cryptSalt ,MyProcPort->cryptSalt, sizeof(header.cryptSalt));
6321+ if (PGRSelfHostName != NULL)
6322+ {
6323+ strncpy(header.from_host, PGRSelfHostName, HOSTNAME_MAX_LENGTH);
6324+ }
6325+ header.replicate_id = htonl(ReplicationLog_Info.PGR_Replicate_ID);
6326+ header.request_id = 0;
6327+
6328+ base = sp;
6329+ PGR_Sock_To_Replication_Server = sock;
6330+
6331+ cnt = 0;
6332+ while (send_replicate_packet(sock,&header,"") != STATUS_OK)
6333+ {
6334+ if (cnt > MAX_RETRY_TIMES )
6335+ {
6336+ sock = get_new_replication_socket( base, sp, socket_type);
6337+ if (sock < 0)
6338+ {
6339+ if (Debug_pretty_print)
6340+ elog(DEBUG1,"all replication servers may be down");
6341+ PGR_Stand_Alone->is_stand_alone = true;
6342+ free(result);
6343+ result = NULL;
6344+ return false;
6345+ }
6346+ PGR_Sock_To_Replication_Server = sock;
6347+ cnt = 0;
6348+ }
6349+ cnt ++;
6350+ }
6351+
6352+ if (PGR_recv_replicate_result(sock,result,6) < 0)
6353+ {
6354+ free(result);
6355+ result = NULL;
6356+ return false;
6357+ }
6358+ /* read answer */
6359+ argc = set_command_args(argv,result);
6360+ if (argc >= 1)
6361+ {
6362+ func_no = atoi(argv[0]);
6363+ if (func_no == PGR_TRANSACTION_CONFIRM_ANSWER_FUNC_NO)
6364+ {
6365+ /* the transaction was commited in other server */
6366+ if (atoi(argv[1]) == PGR_ALREADY_COMMITTED)
6367+ {
6368+ free(result);
6369+ result = NULL;
6370+ return true;
6371+ }
6372+ }
6373+ }
6374+ free(result);
6375+ result = NULL;
6376+ return false;
6377+}
6378+
6379+int
6380+PGRsend_system_command(char cmdSts, char cmdType)
6381+{
6382+ ReplicateServerInfo * sp = NULL;
6383+ int sock = -1;
6384+ int socket_type = 0;
6385+ char * result = NULL;
6386+ char * serverName = NULL;
6387+ int portNumber=0;
6388+ ReplicateHeader header;
6389+ int cnt = 0;
6390+ ReplicateServerInfo * base = NULL;
6391+
6392+ sp = PGR_get_replicate_server_info();
6393+ if (sp == NULL)
6394+ {
6395+ if (Debug_pretty_print)
6396+ elog(DEBUG1,"PGR_get_replicate_server_info get error");
6397+ return STATUS_ERROR;
6398+ }
6399+ sock = PGR_get_replicate_server_socket( sp , PGR_QUERY_SOCKET);
6400+ if (sock < 0)
6401+ {
6402+ if (Debug_pretty_print)
6403+ elog(DEBUG1,"PGR_get_replicate_server_socket fail");
6404+ return STATUS_ERROR;
6405+ }
6406+ result = malloc(PGR_MESSAGE_BUFSIZE);
6407+ if (result == NULL)
6408+ {
6409+ return STATUS_ERROR;
6410+ }
6411+ memset(result,0,PGR_MESSAGE_BUFSIZE);
6412+
6413+ serverName = sp->hostName;
6414+ portNumber = (int)sp->portNumber;
6415+ header.cmdSys = CMD_SYS_CALL;
6416+ header.cmdSts = cmdSts;
6417+ header.cmdType = cmdType;
6418+ header.port = htons(PostPortNumber);
6419+ header.pid = htons(getpid());
6420+ header.tv.tv_sec = htonl(ReplicateCurrentTime->tp.tv_sec);
6421+ header.tv.tv_usec = htonl(ReplicateCurrentTime->tp.tv_usec);
6422+ header.query_size = htonl(0);
6423+ strncpy(header.dbName ,(char *)(MyProcPort->database_name),sizeof(header.dbName)-1);
6424+ strncpy(header.userName , (char *)(MyProcPort->user_name),sizeof(header.userName)-1);
6425+ strncpy(header.password , PGR_password->password, PASSWORD_MAX_LENGTH );
6426+ memcpy(header.md5Salt ,MyProcPort->md5Salt, sizeof(header.md5Salt));
6427+ memcpy(header.cryptSalt ,MyProcPort->cryptSalt, sizeof(header.cryptSalt));
6428+ if (PGRSelfHostName != NULL)
6429+ {
6430+ strncpy(header.from_host, PGRSelfHostName, HOSTNAME_MAX_LENGTH);
6431+ }
6432+ header.replicate_id = htonl(ReplicationLog_Info.PGR_Replicate_ID);
6433+ header.request_id = 0;
6434+
6435+ base = sp;
6436+ PGR_Sock_To_Replication_Server = sock;
6437+ cnt = 0;
6438+ while (send_replicate_packet(sock,&header,"") != STATUS_OK)
6439+ {
6440+ if (cnt > MAX_RETRY_TIMES )
6441+ {
6442+ sock = get_new_replication_socket( base, sp, socket_type);
6443+ if (sock < 0)
6444+ {
6445+ if (Debug_pretty_print)
6446+ elog(DEBUG1,"all replication servers may be down");
6447+ PGR_Stand_Alone->is_stand_alone = true;
6448+ free(result);
6449+ result = NULL;
6450+ return STATUS_ERROR;
6451+ }
6452+ PGR_Sock_To_Replication_Server = sock;
6453+ cnt = 0;
6454+ }
6455+ cnt ++;
6456+ }
6457+ free(result);
6458+ result = NULL;
6459+ return STATUS_OK;
6460+}
6461+
6462+static char *
6463+get_hostName(char * str)
6464+{
6465+ char * top = NULL;
6466+ char * p = NULL;
6467+
6468+ p = str;
6469+ while ( *p != '\0')
6470+ {
6471+ if (*p == '\'')
6472+ {
6473+ *p = '\0';
6474+ p++;
6475+ if (top == NULL)
6476+ {
6477+ top = p;
6478+ }
6479+ }
6480+ p++;
6481+ }
6482+ return top;
6483+}
6484+
6485+char *
6486+PGR_Remove_Comment(char * str)
6487+{
6488+ char * p = NULL;
6489+ p = str;
6490+ while( *p != '\0')
6491+ {
6492+ while(isspace(*p))
6493+ {
6494+ p++;
6495+ }
6496+ if ((!memcmp(p,"--",2)) ||
6497+ (!memcmp(p,"//",2)))
6498+ {
6499+ while((*p != '\n') && (*p != '\0'))
6500+ {
6501+ p++;
6502+ }
6503+ continue;
6504+ }
6505+ break;
6506+ }
6507+ return p;
6508+}
6509+
6510+void
6511+PGR_Force_Replicate_Query(void)
6512+{
6513+ if (PGR_Retry_Query.useFlag == DATA_USE)
6514+ {
6515+ PGR_Send_Replicate_Command(PGR_Retry_Query.query_string,
6516+ PGR_Retry_Query.query_len,
6517+ PGR_Retry_Query.cmdSts,
6518+ PGR_Retry_Query.cmdType);
6519+ }
6520+}
6521+
6522+void
6523+PGR_Notice_DeadLock(void)
6524+{
6525+ ReplicateHeader header;
6526+
6527+ memset(&header,0,sizeof(ReplicateHeader));
6528+ header.cmdSys = CMD_SYS_CALL;
6529+ header.cmdSts = CMD_STS_NOTICE;
6530+ header.cmdType = CMD_TYPE_DEADLOCK_DETECT;
6531+ header.query_size = 0;
6532+ send_replicate_packet(PGR_Sock_To_Replication_Server,&header,(char *)NULL);
6533+}
6534+
6535+void
6536+PGR_Set_Cluster_Status(int status)
6537+{
6538+ if (ClusterDBData != NULL)
6539+ {
6540+ if (ClusterDBData->status != status)
6541+ {
6542+ ClusterDBData->status = status;
6543+ }
6544+ }
6545+}
6546+
6547+int
6548+PGR_Get_Cluster_Status(void)
6549+{
6550+ if (ClusterDBData != NULL)
6551+ {
6552+ return (ClusterDBData->status);
6553+ }
6554+ return 0;
6555+}
6556+
6557+int
6558+PGR_Check_Replicate_Server_Status(ReplicateServerInfo * sp)
6559+{
6560+ ReplicateHeader header;
6561+ char * result = NULL;
6562+ int status;
6563+ int fdP;
6564+
6565+ result = malloc(PGR_MESSAGE_BUFSIZE + 4);
6566+ if (result == NULL)
6567+ {
6568+ if (Debug_pretty_print)
6569+ elog(DEBUG1,"malloc failed in PGR_Check_Replicate_Server_Status()");
6570+ return STATUS_ERROR;
6571+ }
6572+
6573+ memset(&header, 0, sizeof(ReplicateHeader));
6574+ memset(result, 0, PGR_MESSAGE_BUFSIZE + 4);
6575+
6576+ header.cmdSys = CMD_SYS_PREREPLICATE;
6577+ header.cmdSts = CMD_STS_OTHER;
6578+ header.cmdType = CMD_TYPE_OTHER;
6579+ header.port = htons(PostPortNumber);
6580+ header.pid = htons(getpid());
6581+ header.query_size = 0;
6582+ strncpy(header.dbName ,(char *)(MyProcPort->database_name),sizeof(header.dbName)-1);
6583+ strncpy(header.userName , (char *)(MyProcPort->user_name),sizeof(header.userName)-1);
6584+ strncpy(header.password , PGR_password->password, PASSWORD_MAX_LENGTH );
6585+ memcpy(header.md5Salt ,MyProcPort->md5Salt, sizeof(header.md5Salt));
6586+ memcpy(header.cryptSalt ,MyProcPort->cryptSalt, sizeof(header.cryptSalt));
6587+ header.request_id = htonl(get_next_request_id());
6588+ header.rlog = 0;
6589+ if (PGRSelfHostName != NULL) {
6590+ strncpy(header.from_host, PGRSelfHostName, HOSTNAME_MAX_LENGTH);
6591+ }
6592+
6593+ /* open a new socket for lifecheck */
6594+ if ((status = PGR_Create_Socket_Connect(&fdP, sp->hostName, sp->portNumber)) == STATUS_ERROR) {
6595+ if (Debug_pretty_print) {
6596+ elog(DEBUG1,"create socket failed in PGR_Check_Replicate_Server_Status()");
6597+ }
6598+
6599+ /* status = STATUS_OK */
6600+ } else {
6601+ if ((status = send_replicate_packet(fdP, &header, (char *)NULL)) == STATUS_OK) {
6602+ /* receive result to check for possible deadlock */
6603+ status = (0 >= PGR_recv_replicate_result(fdP, result ,0))
6604+ ? STATUS_OK : STATUS_ERROR;
6605+ }
6606+ }
6607+
6608+ free(result);
6609+ PGR_Close_Sock(&fdP);
6610+
6611+ return status;
6612+}
6613+
6614+static int
6615+return_current_oid(void)
6616+{
6617+ char msg[PGR_MESSAGE_BUFSIZE];
6618+
6619+ LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
6620+
6621+ if (ShmemVariableCache->nextOid < ((Oid) FirstBootstrapObjectId))
6622+ {
6623+ ShmemVariableCache->nextOid = FirstBootstrapObjectId;
6624+ ShmemVariableCache->oidCount = 0;
6625+ }
6626+
6627+ if (ShmemVariableCache->oidCount == 0)
6628+ {
6629+ XLogPutNextOid(ShmemVariableCache->nextOid + VAR_OID_PREFETCH);
6630+ ShmemVariableCache->oidCount = VAR_OID_PREFETCH;
6631+ }
6632+ LWLockRelease(OidGenLock);
6633+
6634+ memset(msg,0,sizeof(msg));
6635+ snprintf(msg, sizeof(msg), "%u", ShmemVariableCache->nextOid);
6636+ if (PGR_Check_Lock.dest == TO_FRONTEND)
6637+ {
6638+ pq_puttextmessage('C',msg);
6639+ pq_flush();
6640+ }
6641+ else
6642+ {
6643+ send_response_to_replication_server(msg);
6644+ }
6645+ return STATUS_OK;
6646+}
6647+
6648+static int
6649+sync_oid(char * oid)
6650+{
6651+ uint32_t next_oid = 0;
6652+ int offset = 0;
6653+ char msg[PGR_MESSAGE_BUFSIZE];
6654+
6655+ LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
6656+
6657+ next_oid = strtoul(oid, NULL, 10);
6658+ if (next_oid <= 0)
6659+ return STATUS_ERROR;
6660+ next_oid ++;
6661+ offset = next_oid - ShmemVariableCache->nextOid ;
6662+ if (offset <= 0)
6663+ return STATUS_ERROR;
6664+
6665+ if (next_oid < FirstBootstrapObjectId)
6666+ {
6667+ ShmemVariableCache->nextOid = FirstBootstrapObjectId;
6668+ ShmemVariableCache->oidCount = 0;
6669+ }
6670+
6671+ /* If we run out of logged for use oids then we must log more */
6672+ while (ShmemVariableCache->oidCount - offset <= 0)
6673+ {
6674+ offset -= (ShmemVariableCache->oidCount) ;
6675+ (ShmemVariableCache->nextOid) += (ShmemVariableCache->oidCount);
6676+ XLogPutNextOid(ShmemVariableCache->nextOid + VAR_OID_PREFETCH);
6677+ ShmemVariableCache->oidCount = VAR_OID_PREFETCH;
6678+ }
6679+
6680+ (ShmemVariableCache->nextOid) += offset;
6681+ (ShmemVariableCache->oidCount) -= offset;
6682+
6683+ LWLockRelease(OidGenLock);
6684+
6685+ memset(msg,0,sizeof(msg));
6686+ snprintf(msg, sizeof(msg), "%u", ShmemVariableCache->nextOid);
6687+ if (PGR_Check_Lock.dest == TO_FRONTEND)
6688+ {
6689+ pq_puttextmessage('C',msg);
6690+ pq_flush();
6691+ }
6692+ else
6693+ {
6694+ send_response_to_replication_server(msg);
6695+ }
6696+ return STATUS_OK;
6697+}
6698+
6699+int
6700+PGR_lo_import(char * filename)
6701+{
6702+ char * result = NULL;
6703+ LOArgs *lo_args;
6704+ int len = 0;
6705+ int buf_size = 0;
6706+
6707+ if ((PGR_Is_Replicated_Query == true) ||
6708+ (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6709+ {
6710+ return STATUS_OK;
6711+ }
6712+ if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6713+ (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6714+ {
6715+ return STATUS_OK;
6716+ }
6717+
6718+ len = strlen(filename);
6719+ buf_size = sizeof(LOArgs) + len;
6720+ lo_args = (LOArgs *)malloc(buf_size + 4);
6721+ if (lo_args == (LOArgs *)NULL)
6722+ {
6723+ return STATUS_ERROR;
6724+ }
6725+ memset(lo_args, 0, buf_size + 4);
6726+ lo_args->arg1 = htonl((uint32_t)len);
6727+ memcpy(lo_args->buf, filename, len);
6728+
6729+ result = PGR_Send_Replicate_Command((char *)lo_args,
6730+ buf_size,
6731+ CMD_STS_LARGE_OBJECT,
6732+ CMD_TYPE_LO_IMPORT);
6733+
6734+ free(lo_args);
6735+ if (result != NULL)
6736+ {
6737+ free(result);
6738+ return STATUS_OK;
6739+ }
6740+
6741+ return STATUS_ERROR;
6742+}
6743+
6744+int
6745+PGR_lo_create(int flags)
6746+{
6747+ char * result = NULL;
6748+ LOArgs lo_args;
6749+
6750+ if ((PGR_Is_Replicated_Query == true) ||
6751+ (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6752+ {
6753+ return STATUS_OK;
6754+ }
6755+ if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6756+ (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6757+ {
6758+ return STATUS_OK;
6759+ }
6760+ memset(&lo_args, 0, sizeof(LOArgs));
6761+ lo_args.arg1 = htonl(flags);
6762+
6763+ result = PGR_Send_Replicate_Command((char *)&lo_args,
6764+ sizeof(LOArgs),
6765+ CMD_STS_LARGE_OBJECT,
6766+ CMD_TYPE_LO_CREATE);
6767+
6768+ if (result != NULL)
6769+ {
6770+ free(result);
6771+ return STATUS_OK;
6772+ }
6773+
6774+ return STATUS_ERROR;
6775+}
6776+
6777+int
6778+PGR_lo_open(Oid lobjId,int32 mode)
6779+{
6780+ char * result = NULL;
6781+ LOArgs lo_args;
6782+
6783+ if ((PGR_Is_Replicated_Query == true) ||
6784+ (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6785+ {
6786+ return STATUS_OK;
6787+ }
6788+ if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6789+ (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6790+ {
6791+ return STATUS_OK;
6792+ }
6793+ memset(&lo_args, 0, sizeof(LOArgs));
6794+ lo_args.arg1 = htonl((uint32_t)lobjId);
6795+ lo_args.arg2 = htonl((uint32_t)mode);
6796+
6797+ result = PGR_Send_Replicate_Command((char *)&lo_args,
6798+ sizeof(LOArgs),
6799+ CMD_STS_LARGE_OBJECT,
6800+ CMD_TYPE_LO_OPEN);
6801+
6802+ if (result != NULL)
6803+ {
6804+ free(result);
6805+ return STATUS_OK;
6806+ }
6807+
6808+ return STATUS_ERROR;
6809+}
6810+
6811+int
6812+PGR_lo_close(int32 fd)
6813+{
6814+ char * result = NULL;
6815+ LOArgs lo_args;
6816+
6817+ if ((PGR_Is_Replicated_Query == true) ||
6818+ (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6819+ {
6820+ return STATUS_OK;
6821+ }
6822+ if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6823+ (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6824+ {
6825+ return STATUS_OK;
6826+ }
6827+ memset(&lo_args, 0, sizeof(LOArgs));
6828+ lo_args.arg1 = htonl((uint32_t)fd);
6829+
6830+ result = PGR_Send_Replicate_Command((char *)&lo_args,
6831+ sizeof(LOArgs),
6832+ CMD_STS_LARGE_OBJECT,
6833+ CMD_TYPE_LO_CLOSE);
6834+
6835+ if (result != NULL)
6836+ {
6837+ free(result);
6838+ return STATUS_OK;
6839+ }
6840+
6841+ return STATUS_ERROR;
6842+}
6843+
6844+int
6845+PGR_lo_write(int fd, char *buf, int len)
6846+{
6847+ char * result = NULL;
6848+ LOArgs *lo_args = NULL;
6849+ int buf_size = 0;
6850+
6851+ if ((PGR_Is_Replicated_Query == true) ||
6852+ (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6853+ {
6854+ return STATUS_OK;
6855+ }
6856+ if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6857+ (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6858+ {
6859+ return STATUS_OK;
6860+ }
6861+ buf_size = sizeof(LOArgs) + len;
6862+ lo_args = malloc(buf_size + 4);
6863+ if (lo_args == (LOArgs *)NULL)
6864+ {
6865+ return STATUS_ERROR;
6866+ }
6867+ memset(lo_args, 0, buf_size + 4);
6868+ lo_args->arg1 = htonl((uint32_t)fd);
6869+ lo_args->arg2 = htonl((uint32_t)len);
6870+ memcpy(lo_args->buf, buf, len);
6871+ result = PGR_Send_Replicate_Command((char *)lo_args,
6872+ buf_size,
6873+ CMD_STS_LARGE_OBJECT,
6874+ CMD_TYPE_LO_WRITE);
6875+
6876+ free(lo_args);
6877+ if (result != NULL)
6878+ {
6879+ free(result);
6880+ return STATUS_OK;
6881+ }
6882+
6883+ return STATUS_ERROR;
6884+}
6885+
6886+int
6887+PGR_lo_lseek(int32 fd, int32 offset, int32 whence)
6888+{
6889+ char * result = NULL;
6890+ LOArgs lo_args;
6891+
6892+ if ((PGR_Is_Replicated_Query == true) ||
6893+ (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6894+ {
6895+ return STATUS_OK;
6896+ }
6897+ if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6898+ (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6899+ {
6900+ return STATUS_OK;
6901+ }
6902+ memset(&lo_args, 0, sizeof(LOArgs));
6903+ lo_args.arg1 = htonl((uint32_t)fd);
6904+ lo_args.arg2 = htonl((uint32_t)offset);
6905+ lo_args.arg3 = htonl((uint32_t)whence);
6906+
6907+ result = PGR_Send_Replicate_Command((char *)&lo_args,
6908+ sizeof(LOArgs),
6909+ CMD_STS_LARGE_OBJECT,
6910+ CMD_TYPE_LO_LSEEK);
6911+
6912+ if (result != NULL)
6913+ {
6914+ free(result);
6915+ return STATUS_OK;
6916+ }
6917+
6918+ return STATUS_ERROR;
6919+}
6920+
6921+int
6922+PGR_lo_unlink(Oid lobjId)
6923+{
6924+ char * result = NULL;
6925+ LOArgs lo_args;
6926+
6927+ if ((PGR_Is_Replicated_Query == true) ||
6928+ (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
6929+ {
6930+ return STATUS_OK;
6931+ }
6932+ if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
6933+ (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
6934+ {
6935+ return STATUS_OK;
6936+ }
6937+ memset(&lo_args, 0, sizeof(LOArgs));
6938+ lo_args.arg1 = htonl((uint32_t)lobjId);
6939+
6940+ result = PGR_Send_Replicate_Command((char *)&lo_args,
6941+ sizeof(LOArgs),
6942+ CMD_STS_LARGE_OBJECT,
6943+ CMD_TYPE_LO_UNLINK);
6944+
6945+ if (result != NULL)
6946+ {
6947+ free(result);
6948+ return STATUS_OK;
6949+ }
6950+
6951+ return STATUS_ERROR;
6952+}
6953+
6954+Oid
6955+PGRGetNewObjectId(Oid last_id)
6956+{
6957+ Oid newId = 0;
6958+
6959+ if (last_id == 0)
6960+ {
6961+ newId = (Oid)PGRget_replication_id();
6962+ }
6963+ else
6964+ {
6965+ newId = last_id + 1;
6966+ }
6967+ return newId;
6968+}
6969+
6970+int
6971+PGR_Send_Input_Message(char cmdType,StringInfo input_message)
6972+{
6973+ int len = 0;
6974+ char * ptr = NULL;
6975+ char * result = NULL;
6976+
6977+ if (input_message == NULL)
6978+ {
6979+ return STATUS_ERROR;
6980+ }
6981+ if (PGR_Is_Replicated_Query == true)
6982+ {
6983+ return STATUS_OK;
6984+ }
6985+ len = input_message->len+1;
6986+ ptr = input_message->data;
6987+
6988+ /* check setting of configuration value */
6989+ if ( PGRnotReplicatePreparedSelect == true)
6990+ {
6991+ if (is_concerned_with_prepared_select(cmdType, ptr+1) == true)
6992+ {
6993+ return STATUS_OK;
6994+ }
6995+ }
6996+ result = PGR_Send_Replicate_Command(ptr,len, CMD_STS_PREPARE,cmdType);
6997+ if (result != NULL)
6998+ {
6999+ PGR_Reload_Start_Time();
7000+ free(result);
7001+ result = NULL;
7002+ return STATUS_OK;
7003+ }
7004+ else
7005+ {
7006+ return STATUS_ERROR;
7007+ }
7008+}
7009+
7010+static bool
7011+is_concerned_with_prepared_select(char cmdType, char * query_string)
7012+{
7013+ if (cmdType == CMD_TYPE_P_PARSE)
7014+ {
7015+ switch (parse_message(query_string))
7016+ {
7017+ case PGR_MESSAGE_SELECT:
7018+ pgr_skip_in_prepared_query = true;
7019+ break;
7020+ case PGR_MESSAGE_PREPARE:
7021+ if (is_prepared_as_select(query_string) == true)
7022+ {
7023+ pgr_skip_in_prepared_query = true;
7024+ }
7025+ break;
7026+ case PGR_MESSAGE_EXECUTE:
7027+ case PGR_MESSAGE_DEALLOCATE:
7028+ if (is_statement_as_select(query_string) == true)
7029+ {
7030+ pgr_skip_in_prepared_query = true;
7031+ }
7032+ break;
7033+ }
7034+ if (pgr_skip_in_prepared_query == true)
7035+ {
7036+ return true;
7037+ }
7038+ }
7039+ if (pgr_skip_in_prepared_query == true)
7040+ {
7041+ if (cmdType == CMD_TYPE_P_SYNC)
7042+ {
7043+ pgr_skip_in_prepared_query = false;
7044+ }
7045+ return true;
7046+ }
7047+ return false;
7048+}
7049+
7050+static int
7051+skip_non_blank(char * ptr, int max)
7052+{
7053+ int i= 0;
7054+ while(!isspace(*(ptr+i)))
7055+ {
7056+ if ((*(ptr+1) == '(') || (*(ptr+1) == ')'))
7057+ {
7058+ return i;
7059+ }
7060+ i++;
7061+ if (i > max)
7062+ return -1;
7063+ }
7064+ return i;
7065+}
7066+
7067+static int
7068+skip_blank(char * ptr, int max)
7069+{
7070+ int i = 0;
7071+ while(isspace(*(ptr+i)))
7072+ {
7073+ i++;
7074+ if (i > max)
7075+ return -1;
7076+ }
7077+ return i;
7078+}
7079+
7080+static int
7081+parse_message(char * query_string)
7082+{
7083+ char * ptr =NULL;
7084+ int rtn = 0;
7085+ int i = 0;
7086+ int len = 0;
7087+ if (query_string == NULL)
7088+ {
7089+ return PGR_MESSAGE_OTHER;
7090+ }
7091+ len = strlen (query_string);
7092+ if (len <= 0)
7093+ {
7094+ return PGR_MESSAGE_OTHER;
7095+ }
7096+ ptr = (char *)query_string;
7097+ i = 0;
7098+ /* skip space */
7099+ rtn = skip_blank(ptr+i, len-i);
7100+ if (rtn < 0)
7101+ return PGR_MESSAGE_OTHER;
7102+ i += rtn;
7103+
7104+ if (!strncasecmp(ptr+i,"SELECT",strlen("SELECT")))
7105+ {
7106+ return PGR_MESSAGE_SELECT;
7107+ }
7108+ if (!strncasecmp(ptr+i,"PREPARE",strlen("PREPARE")))
7109+ {
7110+ return PGR_MESSAGE_PREPARE;
7111+ }
7112+ if (!strncasecmp(ptr+i,"EXECUTE",strlen("EXECUTE")))
7113+ {
7114+ return PGR_MESSAGE_EXECUTE;
7115+ }
7116+ if (!strncasecmp(ptr+i,"DEALLOCATE",strlen("DEALLOCATE")))
7117+ {
7118+ return PGR_MESSAGE_DEALLOCATE;
7119+ }
7120+ return PGR_MESSAGE_OTHER;
7121+}
7122+
7123+static bool
7124+is_prepared_as_select(char * query_string)
7125+{
7126+ char * ptr =NULL;
7127+ int rtn = 0;
7128+ int i = 0;
7129+ int len = 0;
7130+ int args =0;
7131+ if (query_string == NULL)
7132+ {
7133+ return false;
7134+ }
7135+ ptr = (char *)query_string;
7136+ len = strlen (query_string);
7137+ i = 0;
7138+ /* skip "PREPARE" word */
7139+ rtn = skip_non_blank(ptr+i, len-i);
7140+ if (rtn < 0)
7141+ return false;
7142+ i += rtn;
7143+ /* skip space */
7144+ rtn = skip_blank(ptr+i, len-i);
7145+ if (rtn < 0)
7146+ return false;
7147+ i += rtn;
7148+ /* skip plan_name */
7149+ rtn = skip_non_blank(ptr+i, len-i);
7150+ if (rtn < 0)
7151+ return false;
7152+ i += rtn;
7153+ /* skip space */
7154+ rtn = skip_blank(ptr+i, len-i);
7155+ if (rtn < 0)
7156+ return false;
7157+ i += rtn;
7158+ /* skip args */
7159+ args = 0;
7160+ if (*(ptr+i) == '(')
7161+ {
7162+ args ++;
7163+ i++;
7164+ while(args > 0)
7165+ {
7166+ if (*(ptr+i) == ')')
7167+ args --;
7168+ else if (*(ptr+i) == '(')
7169+ args ++;
7170+ i++;
7171+ if (i >= len)
7172+ return false;
7173+ }
7174+ /* skip space */
7175+ rtn = skip_blank(ptr+i, len-i);
7176+ if (rtn < 0)
7177+ return false;
7178+ i += rtn;
7179+ }
7180+ /* skip "AS" word */
7181+ i += strlen("AS");
7182+ if (i >= len)
7183+ return false;
7184+ /* skip space */
7185+ rtn = skip_blank(ptr+i, len-i);
7186+ if (rtn < 0)
7187+ return false;
7188+ i += rtn;
7189+ /* check "SELECT" word */
7190+ if (len-i < strlen("SELECT"))
7191+ return false;
7192+ if (!strncasecmp(ptr+i,"SELECT",strlen("SELECT")))
7193+ {
7194+ return true;
7195+ }
7196+ return false;
7197+
7198+}
7199+
7200+static bool
7201+is_statement_as_select(char * query_string)
7202+{
7203+ char * ptr =NULL;
7204+ int rtn = 0;
7205+ int i = 0;
7206+ int j = 0;
7207+ int len = 0;
7208+ bool result = false;
7209+ PrepareStmt stmt;
7210+ char * name = NULL;
7211+ if (query_string == NULL)
7212+ {
7213+ return false;
7214+ }
7215+ ptr = (char *)query_string;
7216+ len = strlen (query_string);
7217+ i = 0;
7218+ /* skip "EXECUTE" or "DEALLOCATE" word */
7219+ rtn = skip_non_blank(ptr+i, len-i);
7220+ if (rtn < 0)
7221+ return false;
7222+ i += rtn;
7223+ /* skip space */
7224+ rtn = skip_blank(ptr+i, len-i);
7225+ if (rtn < 0)
7226+ return false;
7227+ i += rtn;
7228+ if ((name = malloc(len)) == NULL)
7229+ return false;
7230+ memset(name,0,len);
7231+ j = 0;
7232+ while(isalnum(*(ptr+i)))
7233+ {
7234+ *(name+j) = *(ptr+i);
7235+ i++;
7236+ j++;
7237+ if (i > len)
7238+ return false;
7239+ }
7240+ stmt.name = name;
7241+ result = PGR_is_select_prepared_statement(&stmt);
7242+ free(name);
7243+ return result;
7244+}
7245+
7246+bool
7247+PGR_is_select_prepare_query(void)
7248+{
7249+ if (debug_query_string == NULL)
7250+ {
7251+ return false;
7252+ }
7253+ return (is_prepared_as_select((char *)debug_query_string));
7254+}
7255+
7256+char *
7257+PGR_get_md5salt(char * md5Salt, char * string)
7258+{
7259+ char buf[24];
7260+ char * ptr = NULL;
7261+ int len = 0;
7262+ int i = 0;
7263+ int cnt = 0;
7264+ int index = 0;
7265+ bool set_flag = false;
7266+
7267+ ptr = (char *)md5Salt;
7268+ len = strlen(string);
7269+ for ( i = 0 ; i < len ; i ++)
7270+ {
7271+ if (*(string+i) == ')')
7272+ {
7273+ buf[index++] = '\0';
7274+ *ptr = (char)atoi(buf);
7275+ set_flag = false;
7276+ }
7277+ if (set_flag)
7278+ {
7279+ buf[index++] = *(string+i);
7280+ }
7281+ if (*(string+i) == '(')
7282+ {
7283+ set_flag = true;
7284+ index = 0;
7285+ ptr = (char *)(md5Salt + cnt);
7286+ cnt++;
7287+ }
7288+ }
7289+ return md5Salt;
7290+}
7291+
7292+#endif /* USE_REPLICATION */
7293diff -aruN postgresql-8.2.4/src/backend/libpq/replicate_com.c pgcluster-1.7.0rc7/src/backend/libpq/replicate_com.c
7294--- postgresql-8.2.4/src/backend/libpq/replicate_com.c 1970-01-01 01:00:00.000000000 +0100
7295+++ pgcluster-1.7.0rc7/src/backend/libpq/replicate_com.c 2007-02-18 22:52:16.000000000 +0100
7296@@ -0,0 +1,675 @@
7297+/*--------------------------------------------------------------------
7298+ * FILE:
7299+ * replicate_com.c
7300+ *
7301+ * NOTE:
7302+ * This file is composed of the functions to call with the source
7303+ * at backend for the replication.
7304+ * Low level I/O functions that called by in these functions are
7305+ * contained in 'replicate_com.c'.
7306+ *
7307+ *--------------------------------------------------------------------
7308+ */
7309+
7310+/*--------------------------------------
7311+ * INTERFACE ROUTINES
7312+ *
7313+ * setup/teardown:
7314+ * PGR_Close_Sock
7315+ * PGR_Free_Conf_Data
7316+ * I/O call:
7317+ * PGR_Create_Socket_Connect
7318+ * PGR_Create_Socket_Bind
7319+ * PGR_Create_Acception
7320+ * table handling:
7321+ * PGR_Get_Conf_Data
7322+ *-------------------------------------
7323+ */
7324+#ifdef USE_REPLICATION
7325+
7326+#include "postgres.h"
7327+
7328+#include <signal.h>
7329+#include <errno.h>
7330+#include <fcntl.h>
7331+#include <grp.h>
7332+#include <unistd.h>
7333+#include <ctype.h>
7334+#include <time.h>
7335+#include <sys/types.h>
7336+#include <sys/stat.h>
7337+#include <sys/socket.h>
7338+#include <sys/ipc.h>
7339+#include <sys/shm.h>
7340+#include <netdb.h>
7341+#include <netinet/in.h>
7342+#ifdef HAVE_NETINET_TCP_H
7343+#include <netinet/tcp.h>
7344+#endif
7345+#include <arpa/inet.h>
7346+#include <sys/file.h>
7347+#include <netdb.h>
7348+
7349+#include "libpq/libpq.h"
7350+#include "miscadmin.h"
7351+#include "nodes/print.h"
7352+#include "utils/guc.h"
7353+#include "parser/parser.h"
7354+#include "access/xact.h"
7355+#include "replicate_com.h"
7356+
7357+int PGR_Create_Socket_Connect(int * fdP, char * hostName , unsigned short portNumber);
7358+void PGR_Close_Sock(int * sock);
7359+int PGR_Create_Socket_Bind(int * fdP, char * hostName , unsigned short portNumber);
7360+int PGR_Create_Acception(int fd, int * sockP, char * hostName , unsigned short portNumber);
7361+int PGR_Free_Conf_Data(void);
7362+int PGR_Get_Conf_Data(char * dir , char * fname);
7363+void PGRset_recovery_packet_no(RecoveryPacket * packet, int packet_no);
7364+unsigned int PGRget_ip_by_name(char * host);
7365+int PGRget_time_value(char *str);
7366+
7367+static char * get_string(char * buf);
7368+static bool is_start_tag(char * ptr);
7369+static bool is_end_tag(char * ptr);
7370+static void init_conf_data(ConfDataType *conf);
7371+static int get_key(char * key, char * str);
7372+static int get_conf_key_value(char * key, char * value , char * str);
7373+static int add_conf_data(char *table,int rec_no, char *key,char * value);
7374+static int get_table_data(FILE * fp,char * table, int rec_no);
7375+static int get_single_data(char * str);
7376+static int get_conf_file(char * fname);
7377+
7378+/*--------------------------------------------------------------------
7379+ * SYMBOL
7380+ * PGR_Create_Socket_Connect()
7381+ * NOTES
7382+ * create new socket
7383+ * ARGS
7384+ * int * fdP:
7385+ * char * hostName:
7386+ * unsigned short portNumber:
7387+ * RETURN
7388+ * OK: STATUS_OK
7389+ * NG: STATUS_ERROR
7390+ *--------------------------------------------------------------------
7391+ */
7392+int
7393+PGR_Create_Socket_Connect(int * fdP, char * hostName , unsigned short portNumber)
7394+{
7395+
7396+ int sock;
7397+ size_t len = 0;
7398+ struct sockaddr_in addr;
7399+ int one = 1;
7400+
7401+ if ((*hostName == '\0') || (portNumber < 1000))
7402+ {
7403+ * fdP = -1;
7404+ return STATUS_ERROR;
7405+ }
7406+ if ((*fdP = socket(AF_INET, SOCK_STREAM, 0)) < 0)
7407+ {
7408+ * fdP = -1;
7409+ return STATUS_ERROR;
7410+ }
7411+ if ((setsockopt(*fdP, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one))) == -1)
7412+ {
7413+ PGR_Close_Sock(fdP);
7414+ return STATUS_ERROR;
7415+ }
7416+ if (setsockopt(*fdP, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) < 0)
7417+ {
7418+ PGR_Close_Sock(fdP);
7419+ return STATUS_ERROR;
7420+ }
7421+
7422+ addr.sin_family = AF_INET;
7423+ if ((hostName == NULL ) || (hostName[0] == '\0'))
7424+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
7425+ else
7426+ {
7427+ struct hostent *hp;
7428+
7429+ hp = gethostbyname(hostName);
7430+ if ((hp == NULL) || (hp->h_addrtype != AF_INET))
7431+ {
7432+ PGR_Close_Sock(fdP);
7433+ return STATUS_ERROR;
7434+ }
7435+ memmove((char *) &(addr.sin_addr), (char *) hp->h_addr, hp->h_length);
7436+ }
7437+
7438+ addr.sin_port = htons(portNumber);
7439+ len = sizeof(struct sockaddr_in);
7440+
7441+ if ((sock = connect(*fdP,(struct sockaddr*)&addr,len)) < 0)
7442+ {
7443+ PGR_Close_Sock(fdP);
7444+ return STATUS_ERROR;
7445+ }
7446+
7447+ return STATUS_OK;
7448+}
7449+
7450+int
7451+PGR_Create_Socket_Bind(int * fdP, char * hostName , unsigned short portNumber)
7452+{
7453+
7454+ int err;
7455+ size_t len = 0;
7456+ struct sockaddr_in addr;
7457+ int one = 1;
7458+
7459+ if ((*fdP = socket(AF_INET, SOCK_STREAM, 0)) < 0)
7460+ {
7461+ return STATUS_ERROR;
7462+ }
7463+ if ((setsockopt(*fdP, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one))) == -1)
7464+ {
7465+ PGR_Close_Sock(fdP);
7466+ return STATUS_ERROR;
7467+ }
7468+ addr.sin_family = AF_INET;
7469+ if ((hostName == NULL ) || (hostName[0] == '\0'))
7470+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
7471+ else
7472+ {
7473+ struct hostent *hp;
7474+
7475+ hp = gethostbyname(hostName);
7476+ if ((hp == NULL) || (hp->h_addrtype != AF_INET))
7477+ {
7478+ PGR_Close_Sock(fdP);
7479+ return STATUS_ERROR;
7480+ }
7481+ memmove((char *) &(addr.sin_addr), (char *) hp->h_addr, hp->h_length);
7482+ }
7483+
7484+ addr.sin_port = htons(portNumber);
7485+ len = sizeof(struct sockaddr_in);
7486+
7487+ err = bind(*fdP, (struct sockaddr *) & addr, len);
7488+ if (err < 0)
7489+ {
7490+ PGR_Close_Sock(fdP);
7491+ return STATUS_ERROR;
7492+ }
7493+ err = listen(*fdP, MAX_SOCKET_QUEUE );
7494+ if (err < 0)
7495+ {
7496+ PGR_Close_Sock(fdP);
7497+ return STATUS_ERROR;
7498+ }
7499+ return STATUS_OK;
7500+}
7501+
7502+int
7503+PGR_Create_Acception(int fd, int * sockP, char * hostName , unsigned short portNumber)
7504+{
7505+ int sock;
7506+ struct sockaddr addr;
7507+ size_t len = 0;
7508+ int one = 1;
7509+
7510+ len = sizeof(struct sockaddr);
7511+ if ((sock = accept(fd, &addr, &len)) < 0)
7512+ {
7513+ *sockP = -1;
7514+ return STATUS_ERROR;
7515+ }
7516+
7517+ if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) < 0)
7518+ {
7519+ return STATUS_ERROR;
7520+ }
7521+ if (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one)) < 0)
7522+ {
7523+ return STATUS_ERROR;
7524+ }
7525+ *sockP = sock;
7526+
7527+ return STATUS_OK;
7528+}
7529+
7530+void
7531+PGR_Close_Sock(int * sock)
7532+{
7533+ close( (int)*sock);
7534+ *sock = -1;
7535+}
7536+
7537+static char *
7538+get_string(char * buf)
7539+{
7540+ int i,len1,len2,start_flag;
7541+ char *readp, *writep;
7542+
7543+ writep = readp = buf;
7544+ i = len1 = 0;
7545+ while (*(readp +i) != '\0')
7546+ {
7547+ if (!isspace(*(readp+ i)))
7548+ {
7549+ len1 ++;
7550+ }
7551+ i++;
7552+ }
7553+ start_flag = len2 = 0;
7554+ while (*readp != '\0')
7555+ {
7556+ if (*readp == '#')
7557+ {
7558+ *writep = '\0';
7559+ break;
7560+ }
7561+ if (isspace(*readp))
7562+ {
7563+ if ((len2 >= len1) || (!start_flag))
7564+ {
7565+ readp++;
7566+ continue;
7567+ }
7568+ *writep = *readp;
7569+ }
7570+ else
7571+ {
7572+ start_flag = 1;
7573+ *writep = *readp;
7574+ len2 ++;
7575+ }
7576+ readp ++;
7577+ writep ++;
7578+ }
7579+ *writep = '\0';
7580+ return buf;
7581+}
7582+
7583+static bool
7584+is_start_tag(char * ptr)
7585+{
7586+ if ((*ptr == '<') && (*(ptr+1) != '/'))
7587+ {
7588+ return true;
7589+ }
7590+ return false;
7591+}
7592+
7593+static bool
7594+is_end_tag(char * ptr)
7595+{
7596+ if ((*ptr == '<') && (*(ptr+1) == '/'))
7597+ {
7598+ return true;
7599+ }
7600+ return false;
7601+}
7602+
7603+static void
7604+init_conf_data(ConfDataType *conf)
7605+{
7606+ memset(conf->table,0,sizeof(conf->table));
7607+ memset(conf->key,0,sizeof(conf->key));
7608+ memset(conf->value,0,sizeof(conf->value));
7609+ conf->rec_no = 0;
7610+ conf->last = NULL;
7611+ conf->next = NULL;
7612+}
7613+
7614+static int
7615+get_key(char * key, char * str)
7616+{
7617+ int offset = 1;
7618+ char * ptr_s,*ptr_e;
7619+
7620+ ptr_s = strchr(str,'<');
7621+ if (ptr_s == NULL)
7622+ {
7623+ return STATUS_ERROR;
7624+ }
7625+ if (*(ptr_s+1) == '/')
7626+ {
7627+ offset = 2;
7628+ }
7629+ ptr_e = strchr(str,'>');
7630+ if (ptr_e == NULL)
7631+ {
7632+ return STATUS_ERROR;
7633+ }
7634+ *ptr_e = '\0';
7635+ strcpy(key,ptr_s + offset);
7636+ *ptr_e = '>';
7637+ return STATUS_OK;
7638+}
7639+
7640+static int
7641+get_conf_key_value(char * key, char * value , char * str)
7642+{
7643+ int i;
7644+ int len1,len2,start_flag;
7645+ char * ptr_s,*ptr_e;
7646+
7647+ if(get_key(key,str) == STATUS_ERROR)
7648+ {
7649+ return STATUS_ERROR;
7650+ }
7651+ ptr_e = strchr(str,'>');
7652+ if (ptr_e == NULL)
7653+ {
7654+ return STATUS_ERROR;
7655+ }
7656+ ptr_s = ptr_e + 1;
7657+
7658+ len1 = 0;
7659+ while ((*ptr_s != '<') && (*ptr_s != '\0'))
7660+ {
7661+ if (! isspace(*ptr_s))
7662+ {
7663+ len1 ++;
7664+ }
7665+ ptr_s ++;
7666+ }
7667+ ptr_s = ptr_e + 1;
7668+ i = len2 = start_flag = 0;
7669+ while ((*ptr_s != '<') && (*ptr_s != '\0'))
7670+ {
7671+ if (isspace(*ptr_s))
7672+ {
7673+ if ((len2 >= len1) || (!start_flag))
7674+ {
7675+ ptr_s ++;
7676+ continue;
7677+ }
7678+ *(value + i) = *ptr_s;
7679+ }
7680+ else
7681+ {
7682+ start_flag = 1;
7683+ *(value + i) = *ptr_s;
7684+ len2 ++;
7685+ }
7686+ i++;
7687+ ptr_s ++;
7688+ }
7689+ *(value + i) = '\0';
7690+ return STATUS_OK;
7691+}
7692+
7693+static int
7694+add_conf_data(char *table,int rec_no, char *key,char * value)
7695+{
7696+ ConfDataType * conf_data;
7697+
7698+ conf_data = (ConfDataType *)malloc(sizeof(ConfDataType));
7699+ if (conf_data == NULL)
7700+ {
7701+ return STATUS_ERROR;
7702+ }
7703+ init_conf_data(conf_data);
7704+ if (table != NULL)
7705+ {
7706+ memcpy(conf_data->table,table,sizeof(conf_data->table));
7707+ }
7708+ else
7709+ {
7710+ memset(conf_data->table,0,sizeof(conf_data->table));
7711+ }
7712+ memcpy(conf_data->key,key,sizeof(conf_data->key));
7713+ memcpy(conf_data->value,value,sizeof(conf_data->value));
7714+ conf_data->rec_no = rec_no;
7715+ if (ConfData_Top == (ConfDataType *)NULL)
7716+ {
7717+ ConfData_Top = conf_data;
7718+ conf_data->last = (char *)NULL;
7719+ }
7720+ if (ConfData_End == (ConfDataType *)NULL)
7721+ {
7722+ conf_data->last = (char *)NULL;
7723+ }
7724+ else
7725+ {
7726+ conf_data->last = (char *)ConfData_End;
7727+ ConfData_End->next = (char *)conf_data;
7728+ }
7729+ ConfData_End = conf_data;
7730+ conf_data->next = (char *)NULL;
7731+ return STATUS_OK;
7732+}
7733+
7734+static int
7735+get_table_data(FILE * fp,char * table, int rec_no)
7736+{
7737+ char buf[1024];
7738+ char key_buf[1024];
7739+ char value_buf[1024];
7740+ int len = 0;
7741+ char * ptr;
7742+
7743+ while (fgets(buf,sizeof(buf),fp) != NULL)
7744+ {
7745+ /*
7746+ * pic up a data string
7747+ */
7748+ ptr = get_string(buf);
7749+ len = strlen(ptr);
7750+ if (len == 0)
7751+ {
7752+ continue;
7753+ }
7754+ if (is_end_tag(ptr))
7755+ {
7756+ if(get_key(key_buf,ptr) == STATUS_ERROR)
7757+ {
7758+ return STATUS_ERROR;
7759+ }
7760+ if (!strcmp(key_buf,table))
7761+ {
7762+ return STATUS_OK;
7763+ }
7764+ }
7765+ if (is_start_tag(ptr))
7766+ {
7767+ if(get_conf_key_value(key_buf,value_buf,ptr) == STATUS_ERROR)
7768+ {
7769+ return STATUS_ERROR;
7770+ }
7771+ add_conf_data(table,rec_no,key_buf,value_buf);
7772+ }
7773+ }
7774+ return STATUS_ERROR;
7775+}
7776+
7777+static int
7778+get_single_data(char * str)
7779+{
7780+ char key_buf[1024];
7781+ char value_buf[1024];
7782+ if(get_conf_key_value(key_buf,value_buf,str) == STATUS_ERROR)
7783+ {
7784+ return STATUS_ERROR;
7785+ }
7786+ add_conf_data(NULL,0,key_buf,value_buf);
7787+ return STATUS_OK;
7788+}
7789+
7790+
7791+static int
7792+get_conf_file(char * fname)
7793+{
7794+ FILE * fp = NULL;
7795+ int len;
7796+ char buf[1024];
7797+ char key_buf[1024];
7798+ char last_key_buf[1024];
7799+ char *ptr;
7800+ int rec_no = 0;
7801+
7802+ /*
7803+ * configuration file open
7804+ */
7805+ if ((fp = fopen(fname,"r")) == NULL)
7806+ {
7807+ return STATUS_ERROR;
7808+ }
7809+ /*
7810+ * configuration file read
7811+ */
7812+ memset(last_key_buf,0,sizeof(last_key_buf));
7813+ memset(key_buf,0,sizeof(key_buf));
7814+ while (fgets(buf,sizeof(buf),fp) != NULL)
7815+ {
7816+ /*
7817+ * pic up a data string
7818+ */
7819+ ptr = get_string(buf);
7820+ len = strlen(ptr);
7821+ if (len == 0)
7822+ {
7823+ continue;
7824+ }
7825+ if (is_start_tag(ptr))
7826+ {
7827+ if(get_key(key_buf,ptr) == STATUS_ERROR)
7828+ {
7829+ fclose(fp);
7830+ return STATUS_ERROR;
7831+ }
7832+ if (strstr(ptr,"</") == NULL)
7833+ {
7834+ if (strcmp(last_key_buf,key_buf))
7835+ {
7836+ rec_no = 0;
7837+ strcpy(last_key_buf,key_buf);
7838+ }
7839+ get_table_data(fp,key_buf,rec_no);
7840+ rec_no ++;
7841+ }
7842+ else
7843+ {
7844+ get_single_data(ptr);
7845+ }
7846+ }
7847+ }
7848+ fclose(fp);
7849+ return STATUS_OK;
7850+}
7851+
7852+int
7853+PGR_Free_Conf_Data(void)
7854+{
7855+ ConfDataType * conf, *nextp;
7856+
7857+ if (ConfData_Top == (ConfDataType *)NULL)
7858+ {
7859+ return STATUS_ERROR;
7860+ }
7861+ conf = ConfData_Top;
7862+
7863+ while (conf != (ConfDataType *)NULL)
7864+ {
7865+ nextp = (ConfDataType*)conf->next;
7866+ free (conf);
7867+ conf = nextp;
7868+ }
7869+ ConfData_Top = ConfData_End = (ConfDataType *)NULL;
7870+ return STATUS_OK;
7871+}
7872+
7873+int
7874+PGR_Get_Conf_Data(char * dir , char * fname)
7875+{
7876+
7877+ int status;
7878+
7879+ char * conf_file;
7880+ if ((dir == NULL) || ( fname == NULL))
7881+ {
7882+ return STATUS_ERROR;
7883+ }
7884+ conf_file = malloc(strlen(dir) + strlen(fname) + 2);
7885+ if (conf_file == NULL)
7886+ {
7887+ return STATUS_ERROR;
7888+ }
7889+ sprintf(conf_file,"%s/%s",dir,fname);
7890+
7891+ ConfData_Top = ConfData_End = (ConfDataType * )NULL;
7892+ status = get_conf_file(conf_file);
7893+ free (conf_file);
7894+ conf_file = NULL;
7895+
7896+ return status;
7897+}
7898+
7899+void
7900+PGRset_recovery_packet_no(RecoveryPacket * packet, int packet_no)
7901+{
7902+ if (packet == NULL)
7903+ {
7904+ return;
7905+ }
7906+ packet->packet_no = htons(packet_no) ;
7907+
7908+}
7909+
7910+unsigned int
7911+PGRget_ip_by_name(char * host)
7912+{
7913+ struct hostent *hp = NULL;
7914+ unsigned int ip = 0;
7915+ unsigned char uc = 0;
7916+ int i;
7917+
7918+ if ((host == NULL) || (*host == '\0'))
7919+ {
7920+ return 0;
7921+ }
7922+ hp = gethostbyname( host );
7923+ if (hp == NULL)
7924+ {
7925+ return 0;
7926+ }
7927+ for (i = 3 ; i>= 0 ; i --)
7928+ {
7929+ uc = (unsigned char)hp->h_addr_list[0][i];
7930+ ip = ip | uc;
7931+ if (i > 0)
7932+ ip = ip << 8;
7933+ }
7934+ return ip;
7935+}
7936+
7937+int
7938+PGRget_time_value(char *str)
7939+{
7940+ int i,len;
7941+ char * ptr;
7942+ int unit = 1;
7943+
7944+ if (str == NULL)
7945+ return -1;
7946+
7947+ len = strlen(str);
7948+ ptr = str;
7949+ for (i = 0; i < len ; i ++,ptr++)
7950+ {
7951+ if ((! isdigit(*ptr)) && (! isspace(*ptr)))
7952+ {
7953+ switch (*ptr)
7954+ {
7955+ case 'm':
7956+ case 'M':
7957+ unit = 60;
7958+ break;
7959+ case 'h':
7960+ case 'H':
7961+ unit = 60*60;
7962+ break;
7963+ }
7964+ *ptr = '\0';
7965+ break;
7966+ }
7967+ }
7968+ return (atoi(str) * unit);
7969+}
7970+
7971+#endif /* USE_REPLICATION */
7972diff -aruN postgresql-8.2.4/src/backend/main/main.c pgcluster-1.7.0rc7/src/backend/main/main.c
7973--- postgresql-8.2.4/src/backend/main/main.c 2007-01-04 01:58:01.000000000 +0100
7974+++ pgcluster-1.7.0rc7/src/backend/main/main.c 2007-02-18 22:52:16.000000000 +0100
7975@@ -316,6 +316,13 @@
7976 printf(_(" -r FILENAME send stdout and stderr to given file\n"));
7977 printf(_(" -x NUM internal use\n"));
7978
7979+#ifdef USE_REPLICATION
7980+ printf(_("\nOptions for PGCluster only:\n"));
7981+ printf(_(" -R recovery startup with rsync\n"));
7982+ printf(_(" -u recovery startup with rsync(it is not create backup files.\n"));
7983+ printf(_(" -U recovery startup with pg_dump\n"));
7984+#endif /* USE_REPLICATION */
7985+
7986 printf(_("\nPlease read the documentation for the complete list of run-time\n"
7987 "configuration settings and how to set them on the command line or in\n"
7988 "the configuration file.\n\n"
7989diff -aruN postgresql-8.2.4/src/backend/parser/gram.y pgcluster-1.7.0rc7/src/backend/parser/gram.y
7990--- postgresql-8.2.4/src/backend/parser/gram.y 2006-11-05 23:42:09.000000000 +0100
7991+++ pgcluster-1.7.0rc7/src/backend/parser/gram.y 2007-02-18 22:52:16.000000000 +0100
7992@@ -412,10 +412,10 @@
7993 QUOTE
7994
7995 READ REAL REASSIGN RECHECK REFERENCES REINDEX RELATIVE_P RELEASE RENAME
7996- REPEATABLE REPLACE RESET RESTART RESTRICT RETURNING RETURNS REVOKE RIGHT
7997+ REPEATABLE REPLACE REPLICATION RESET RESTART RESTRICT RETURNING RETURNS REVOKE RIGHT
7998 ROLE ROLLBACK ROW ROWS RULE
7999
8000- SAVEPOINT SCHEMA SCROLL SECOND_P SECURITY SELECT SEQUENCE
8001+ SAVEPOINT SCHEMA SCROLL SECOND_P SECURITY SELECT SEQUENCE SERVER
8002 SERIALIZABLE SESSION SESSION_USER SET SETOF SHARE
8003 SHOW SIMILAR SIMPLE SMALLINT SOME STABLE START STATEMENT
8004 STATISTICS STDIN STDOUT STORAGE STRICT_P SUBSTRING SUPERUSER_P SYMMETRIC
8005@@ -1224,6 +1224,12 @@
8006 n->name = $2;
8007 $$ = (Node *) n;
8008 }
8009+ | SHOW REPLICATION SERVER
8010+ {
8011+ VariableShowStmt *n = makeNode(VariableShowStmt);
8012+ n->name = "replication_server";
8013+ $$ = (Node *) n;
8014+ }
8015 | SHOW TIME ZONE
8016 {
8017 VariableShowStmt *n = makeNode(VariableShowStmt);
8018@@ -8678,6 +8684,7 @@
8019 | RENAME
8020 | REPEATABLE
8021 | REPLACE
8022+ | REPLICATION
8023 | RESET
8024 | RESTART
8025 | RESTRICT
8026@@ -8692,6 +8699,7 @@
8027 | SCROLL
8028 | SECOND_P
8029 | SECURITY
8030+ | SERVER
8031 | SEQUENCE
8032 | SERIALIZABLE
8033 | SESSION
8034diff -aruN postgresql-8.2.4/src/backend/parser/keywords.c pgcluster-1.7.0rc7/src/backend/parser/keywords.c
8035--- postgresql-8.2.4/src/backend/parser/keywords.c 2006-10-07 23:51:02.000000000 +0200
8036+++ pgcluster-1.7.0rc7/src/backend/parser/keywords.c 2007-02-18 22:52:16.000000000 +0100
8037@@ -281,6 +281,7 @@
8038 {"relative", RELATIVE_P},
8039 {"release", RELEASE},
8040 {"rename", RENAME},
8041+ {"replication", REPLICATION},
8042 {"repeatable", REPEATABLE},
8043 {"replace", REPLACE},
8044 {"reset", RESET},
8045diff -aruN postgresql-8.2.4/src/backend/parser/parse_clause.c pgcluster-1.7.0rc7/src/backend/parser/parse_clause.c
8046--- postgresql-8.2.4/src/backend/parser/parse_clause.c 2006-11-28 13:54:41.000000000 +0100
8047+++ pgcluster-1.7.0rc7/src/backend/parser/parse_clause.c 2007-02-18 22:52:16.000000000 +0100
8048@@ -34,6 +34,9 @@
8049 #include "rewrite/rewriteManip.h"
8050 #include "utils/guc.h"
8051
8052+#ifdef USE_REPLICATION
8053+#include "replicate.h"
8054+#endif /* USE_REPLICATION */
8055
8056 #define ORDER_CLAUSE 0
8057 #define GROUP_CLAUSE 1
8058@@ -154,7 +157,18 @@
8059 * analyze.c will eventually do the corresponding heap_close(), but *not*
8060 * release the lock.
8061 */
8062+#ifdef USE_REPLICATION
8063+ if (PGRautoLockTable == true)
8064+ {
8065+ pstate->p_target_relation = heap_openrv(relation, ShareRowExclusiveLock);
8066+ }
8067+ else
8068+ {
8069+ pstate->p_target_relation = heap_openrv(relation, RowExclusiveLock);
8070+ }
8071+#else
8072 pstate->p_target_relation = heap_openrv(relation, RowExclusiveLock);
8073+#endif /* USE_REPLICATION */
8074
8075 /*
8076 * Now build an RTE.
8077diff -aruN postgresql-8.2.4/src/backend/parser/parse_relation.c pgcluster-1.7.0rc7/src/backend/parser/parse_relation.c
8078--- postgresql-8.2.4/src/backend/parser/parse_relation.c 2006-10-04 02:29:56.000000000 +0200
8079+++ pgcluster-1.7.0rc7/src/backend/parser/parse_relation.c 2007-02-18 22:52:16.000000000 +0100
8080@@ -30,6 +30,9 @@
8081 #include "utils/lsyscache.h"
8082 #include "utils/syscache.h"
8083
8084+#ifdef USE_REPLICATION
8085+#include "replicate.h"
8086+#endif /* USE_REPLICATION */
8087
8088 /* GUC parameter */
8089 bool add_missing_from;
8090@@ -636,7 +639,14 @@
8091 * to a rel in a statement, be careful to get the right access level
8092 * depending on whether we're doing SELECT FOR UPDATE/SHARE.
8093 */
8094+#ifdef USE_REPLICATION
8095+ if (PGRautoLockTable == true)
8096+ lockmode = isLockedRel(pstate, refname) ? ShareRowExclusiveLock : AccessShareLock;
8097+ else
8098+ lockmode = isLockedRel(pstate, refname) ? RowShareLock : AccessShareLock;
8099+#else
8100 lockmode = isLockedRel(pstate, refname) ? RowShareLock : AccessShareLock;
8101+#endif /* USE_REPLICATION */
8102 rel = heap_openrv(relation, lockmode);
8103 rte->relid = RelationGetRelid(rel);
8104
8105diff -aruN postgresql-8.2.4/src/backend/postmaster/postmaster.c pgcluster-1.7.0rc7/src/backend/postmaster/postmaster.c
8106--- postgresql-8.2.4/src/backend/postmaster/postmaster.c 2007-01-04 01:58:01.000000000 +0100
8107+++ pgcluster-1.7.0rc7/src/backend/postmaster/postmaster.c 2007-02-18 22:52:16.000000000 +0100
8108@@ -122,6 +122,9 @@
8109 #include "storage/spin.h"
8110 #endif
8111
8112+#ifdef USE_REPLICATION
8113+#include "replicate.h"
8114+#endif /* USE_REPLICATION */
8115
8116 /*
8117 * List of active backends (or child processes anyway; we don't actually
8118@@ -363,6 +366,61 @@
8119 #define EXIT_STATUS_0(st) ((st) == 0)
8120 #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
8121
8122+#ifdef USE_REPLICATION
8123+char * Query_String = NULL;
8124+ReplicateServerInfo * ReplicateServerData = NULL;
8125+ReplicateServerInfo * CurrentReplicateServer = NULL;
8126+ReplicateServerInfo * LastReplicateServer = NULL;
8127+int ReplicateServerShmid = -1;
8128+int TransactionQuery = 0;
8129+int TransactionSock = -1;
8130+int Transaction_Mode = 0;
8131+bool PGR_Noticed_Abort = false;
8132+bool Session_Authorization_Mode = false;
8133+bool Create_Temp_Table_Mode = false;
8134+ConfDataType * ConfData_Top = (ConfDataType *)NULL;
8135+ConfDataType * ConfData_End = (ConfDataType *)NULL;
8136+int RecoveryPortNumber = 0;
8137+char * RsyncPath = NULL;
8138+char * RsyncOption = NULL;
8139+char * PgDumpPath = NULL;
8140+bool RsyncCompress = true;
8141+ReplicateNow * ReplicateCurrentTime = NULL;
8142+CopyData * PGRCopyData = NULL;
8143+bool PGR_Copy_Data_Need_Replicate = false;
8144+PGR_Stand_Alone_Type * PGR_Stand_Alone = NULL;
8145+PGR_Not_Replicate_Type * PGR_Not_Replicate = NULL;
8146+int PGR_Not_Replicate_Rec_Num = 0;
8147+bool PGR_Is_Replicated_Query = false;
8148+PGR_Check_Lock_Type PGR_Check_Lock;
8149+int PGR_Sock_To_Replication_Server = -1;
8150+bool PGR_Need_Notice = false;
8151+bool PGR_Lock_Noticed = false;
8152+bool PGR_Recovery_Option = false;
8153+int PGR_recovery_mode = 0;
8154+char * PGRSelfHostName = NULL;
8155+int PGR_Pending_Sem_Num = 0;
8156+bool PGR_Reliable_Mode_Wait = true;
8157+PGR_Retry_Query_Type PGR_Retry_Query;
8158+int ClusterDBShmid = -1;
8159+ClusterDBInfo * ClusterDBData = NULL;
8160+PGR_Password_Info * PGR_password = NULL;
8161+int PGR_Replication_Timeout = 60;
8162+int PGR_Lifecheck_Timeout = 3;
8163+int PGR_Lifecheck_Interval = 11;
8164+
8165+/* initialize in utils/misc/guc.c */
8166+bool PGRforceLoadBalance = false;
8167+bool PGRcheckConstraintWithLock = false;
8168+bool PGRautoLockTable = true;
8169+bool PGRnotReplicatePreparedSelect = false;
8170+
8171+bool needToUpdateReplicateIdOnNextQueryIsDone=false;
8172+bool PGR_Is_Sync_OID = false;
8173+
8174+static int Master_Pid = 0;
8175+static int Lifecheck_Pid = 0;
8176+#endif /* USE_REPLICATION */
8177
8178 /*
8179 * Postmaster main entry point
8180@@ -375,6 +433,11 @@
8181 char *userDoption = NULL;
8182 int i;
8183
8184+#ifdef USE_REPLICATION
8185+ PGR_Check_Lock.check_lock_conflict = false;
8186+ PGR_Check_Lock.status_lock_conflict = STATUS_OK;
8187+#endif /* USE REPLICATION */
8188+
8189 MyProcPid = PostmasterPid = getpid();
8190
8191 IsPostmasterEnvironment = true;
8192@@ -420,10 +483,24 @@
8193 * tcop/postgres.c (the option sets should not conflict)
8194 * and with the common help() function in main/main.c.
8195 */
8196- while ((opt = getopt(argc, argv, "A:B:c:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:")) != -1)
8197+ while ((opt = getopt(argc, argv, "A:B:c:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:URu")) != -1)
8198 {
8199 switch (opt)
8200 {
8201+#ifdef USE_REPLICATION
8202+ case 'U':
8203+ PGR_Recovery_Option = true;
8204+ PGR_recovery_mode = PGR_HOT_RECOVERY;
8205+ break;
8206+ case 'R':
8207+ PGR_Recovery_Option = true;
8208+ PGR_recovery_mode = PGR_COLD_RECOVERY;
8209+ break;
8210+ case 'u':
8211+ PGR_Recovery_Option = true;
8212+ PGR_recovery_mode = PGR_WITHOUT_BACKUP;
8213+ break;
8214+#endif /* USE_REPLICATION */
8215 case 'A':
8216 SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
8217 break;
8218@@ -696,6 +773,30 @@
8219 */
8220 CreateDataDirLockFile(true);
8221
8222+#ifdef USE_REPLICATION
8223+ if (PGR_Get_Conf_Data( DataDir, CLUSTER_CONF_FILE ) == STATUS_OK)
8224+ {
8225+ if (PGR_Init_Replicate_Server_Data() != STATUS_OK)
8226+ {
8227+ fprintf(stderr,"PGR_Init_Replicate_Server_Data failed\n");
8228+ ExitPostmaster(0);
8229+ }
8230+ PGR_Set_Replicate_Server_Socket();
8231+ PGR_Free_Conf_Data();
8232+ if ((PGR_Recovery_Option) &&
8233+ (PGR_recovery_mode != PGR_HOT_RECOVERY))
8234+ {
8235+ fprintf(stderr,"Start in recovery mode! \n");
8236+ fprintf(stderr,"Please wait until a data synchronization finishes from Master DB... \n");
8237+ if (PGR_Recovery_Main(PGR_recovery_mode) != STATUS_OK)
8238+ {
8239+ fprintf(stderr,"PGR_Recovery_Main() failed with cold recovery\n");
8240+ ExitPostmaster(0);
8241+ }
8242+ }
8243+ }
8244+#endif /* USE_REPLICATION */
8245+
8246 /*
8247 * If timezone is not set, determine what the OS uses. (In theory this
8248 * should be done during GUC initialization, but because it can take as
8249@@ -960,6 +1061,21 @@
8250 */
8251 StartupPID = StartupDataBase();
8252
8253+#ifdef USE_REPLICATION
8254+ Master_Pid = PGR_Master_Main();
8255+ if (Master_Pid < 0)
8256+ {
8257+ elog(DEBUG1,"PGR_Master_Main failed");
8258+ ExitPostmaster(1);
8259+ }
8260+ Lifecheck_Pid = PGR_Lifecheck_Main();
8261+ if (Lifecheck_Pid < 0)
8262+ {
8263+ elog(DEBUG1,"PGR_Lifecheck_Main failed");
8264+ ExitPostmaster(1);
8265+ }
8266+#endif /* USE_REPLICATION */
8267+
8268 status = ServerLoop();
8269
8270 /*
8271@@ -1133,6 +1249,60 @@
8272 last_touch_time = time(NULL);
8273
8274 nSockets = initMasks(&readmask);
8275+#ifdef USE_REPLICATION
8276+ if (PGR_Recovery_Option)
8277+ {
8278+ int pid = 0;
8279+ pid = fork_process();
8280+ if (pid == 0) /* child */
8281+ {
8282+ fprintf(stderr,"Start in recovery mode! \n");
8283+ fprintf(stderr,"Please wait until a data synchronization finishes from Master DB... \n");
8284+ IsUnderPostmaster = true; /* we are a postmaster subprocess now */
8285+
8286+ /* Close the postmaster's sockets */
8287+ ClosePostmasterPorts(false);
8288+ /* Lose the postmaster's on-exit routines and port connections */
8289+ on_exit_reset();
8290+ /* Release postmaster's working memory context */
8291+ MemoryContextSwitchTo(TopMemoryContext);
8292+ MemoryContextDelete(PostmasterContext);
8293+ PostmasterContext = NULL;
8294+ if (PGR_recovery_mode == PGR_HOT_RECOVERY)
8295+ {
8296+ if (PGR_Recovery_Main(PGR_recovery_mode) != STATUS_OK)
8297+ {
8298+ elog(DEBUG1,"PGR_Recovery_Main() failed with hot recovery.");
8299+ ExitPostmaster(1);
8300+ }
8301+ }
8302+ else
8303+ {
8304+ if (PGR_recovery_queue_data_req() != STATUS_OK)
8305+ {
8306+ elog(DEBUG1,"PGR_recovery_queue_data_req failed");
8307+ ExitPostmaster(1);
8308+ }
8309+ }
8310+ PGR_recovery_finish_send();
8311+ PGR_Recovery_Option = false;
8312+ fprintf(stderr,"OK! The data synchronization with Master DB was finished. \n");
8313+
8314+ ExitPostmaster(0);
8315+ }
8316+ else if (pid < 0)
8317+ {
8318+ ExitPostmaster(1);
8319+ }
8320+ }
8321+ if (PGR_password != NULL)
8322+ {
8323+ if(PGR_password->password != NULL)
8324+ memset(PGR_password->password,0,PASSWORD_MAX_LENGTH);
8325+ memset(PGR_password->md5Salt,0,sizeof(PGR_password->md5Salt));
8326+ memset(PGR_password->cryptSalt,0,sizeof(PGR_password->cryptSalt));
8327+ }
8328+#endif /* USE_REPLICATION */
8329
8330 for (;;)
8331 {
8332@@ -1591,6 +1761,9 @@
8333 ereport(FATAL,
8334 (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
8335 errmsg("sorry, too many clients already")));
8336+#ifdef USE_REPLICATION
8337+ return STATUS_ERROR;
8338+#endif
8339 break;
8340 case CAC_OK:
8341 default:
8342@@ -1858,6 +2031,23 @@
8343 (errmsg_internal("postmaster received signal %d",
8344 postgres_signal_arg)));
8345
8346+#ifdef USE_REPLICATION
8347+ if (PGR_Get_Cluster_Status() == STATUS_RECOVERY)
8348+ {
8349+ PGR_recovery_error_send();
8350+ PGR_Recovery_Option = false;
8351+ }
8352+ if (Master_Pid > 0)
8353+ {
8354+ kill (Master_Pid,postgres_signal_arg);
8355+ }
8356+ if (Lifecheck_Pid > 0)
8357+ {
8358+ kill (Lifecheck_Pid,postgres_signal_arg);
8359+ }
8360+ PGR_delete_shm();
8361+#endif /* USE_REPLICATION */
8362+
8363 switch (postgres_signal_arg)
8364 {
8365 case SIGTERM:
8366@@ -3452,6 +3642,16 @@
8367 * MUST -- vadim 05-10-1999
8368 */
8369
8370+#ifdef USE_REPLICATION
8371+ if (PGR_Get_Cluster_Status() == STATUS_RECOVERY)
8372+ {
8373+ write_stderr("sorry, recovery failed.");
8374+ PGR_recovery_error_send();
8375+ PGR_Recovery_Option = false;
8376+ }
8377+ PGR_delete_shm();
8378+#endif /* USE_REPLICATION */
8379+
8380 proc_exit(status);
8381 }
8382
8383diff -aruN postgresql-8.2.4/src/backend/storage/large_object/inv_api.c pgcluster-1.7.0rc7/src/backend/storage/large_object/inv_api.c
8384--- postgresql-8.2.4/src/backend/storage/large_object/inv_api.c 2006-09-07 17:37:25.000000000 +0200
8385+++ pgcluster-1.7.0rc7/src/backend/storage/large_object/inv_api.c 2007-02-18 22:52:16.000000000 +0100
8386@@ -36,6 +36,10 @@
8387 #include "utils/fmgroids.h"
8388 #include "utils/resowner.h"
8389
8390+#ifdef USE_REPLICATION
8391+#include "replicate.h"
8392+#endif /* USE_REPLICATION */
8393+
8394
8395 /*
8396 * All accesses to pg_largeobject and its index make use of a single Relation
8397@@ -188,6 +192,9 @@
8398 * use. We can use the index on pg_largeobject for checking OID
8399 * uniqueness, even though it has additional columns besides OID.
8400 */
8401+#ifdef USE_REPLICATION
8402+ PGR_Is_Sync_OID = true;
8403+#endif /* USE_REPLICATION */
8404 if (!OidIsValid(lobjId))
8405 {
8406 open_lo_relation();
8407@@ -206,6 +213,9 @@
8408 */
8409 CommandCounterIncrement();
8410
8411+#ifdef USE_REPLICATION
8412+ PGR_Is_Sync_OID = false;
8413+#endif /* USE_REPLICATION */
8414 return lobjId;
8415 }
8416
8417diff -aruN postgresql-8.2.4/src/backend/storage/lmgr/deadlock.c pgcluster-1.7.0rc7/src/backend/storage/lmgr/deadlock.c
8418--- postgresql-8.2.4/src/backend/storage/lmgr/deadlock.c 2006-09-23 01:20:13.000000000 +0200
8419+++ pgcluster-1.7.0rc7/src/backend/storage/lmgr/deadlock.c 2007-02-18 22:52:16.000000000 +0100
8420@@ -30,6 +30,9 @@
8421 #include "storage/proc.h"
8422 #include "utils/memutils.h"
8423
8424+#ifdef USE_REPLICATION
8425+#include "replicate.h"
8426+#endif /* USE_REPLICATION */
8427
8428 /* One edge in the waits-for graph */
8429 typedef struct
8430@@ -217,6 +220,13 @@
8431 if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges))
8432 elog(FATAL, "deadlock seems to have disappeared");
8433
8434+#ifdef USE_REPLICATION
8435+ if (PGR_Notice_Conflict() == STATUS_ERROR)
8436+ {
8437+ return FALSE;
8438+ }
8439+ PGR_Lock_Noticed =true;
8440+#endif
8441 return true; /* cannot find a non-deadlocked state */
8442 }
8443
8444@@ -426,6 +436,18 @@
8445 int numLockModes,
8446 lm;
8447
8448+#ifdef USE_REPLICATION
8449+ /*
8450+ * In PGCluster mode , conflicts with procs has younger rep-id didn't
8451+ * matter. It's also processed younger proc's CheckDeadLock().
8452+ * It's nesseary to make sure all nodes have same deadlock order.
8453+ * So, always most young (rep-id) process only will rollback by deadlock.
8454+ */
8455+ if ( MyProc->replicationId!=0 &&
8456+ MyProc -> replicationId < checkProc->replicationId)
8457+ return false;
8458+
8459+#endif
8460 /*
8461 * Have we already seen this proc?
8462 */
8463diff -aruN postgresql-8.2.4/src/backend/storage/lmgr/lmgr.c pgcluster-1.7.0rc7/src/backend/storage/lmgr/lmgr.c
8464--- postgresql-8.2.4/src/backend/storage/lmgr/lmgr.c 2006-10-04 02:29:57.000000000 +0200
8465+++ pgcluster-1.7.0rc7/src/backend/storage/lmgr/lmgr.c 2007-02-18 22:52:16.000000000 +0100
8466@@ -26,6 +26,9 @@
8467 #include "utils/inval.h"
8468 #include "utils/lsyscache.h"
8469
8470+#ifdef USE_REPLICATION
8471+#include "replicate.h"
8472+#endif /* USE_REPLICATION */
8473
8474 /*
8475 * RelationInitLockInfo
8476@@ -476,9 +479,16 @@
8477
8478 SET_LOCKTAG_TRANSACTION(tag, xid);
8479
8480+#ifdef USE_REPLICATION
8481+ if (!LockAcquire(&tag, ExclusiveLock, false,false))
8482+ elog(ERROR, "XactLockTableWait: LockAcquire failed");
8483+
8484+ LockRelease(&tag, ExclusiveLock,false);
8485+#else
8486 (void) LockAcquire(&tag, ShareLock, false, false);
8487
8488 LockRelease(&tag, ShareLock, false);
8489+#endif /* USE_REPLICATION */
8490
8491 if (!TransactionIdIsInProgress(xid))
8492 break;
8493@@ -635,3 +645,37 @@
8494 }
8495 return false; /* default case */
8496 }
8497+
8498+#ifdef USE_REPLICATION
8499+/*
8500+ * XactLockTableWait
8501+ *
8502+ * Wait for the specified transaction to commit or abort.
8503+ */
8504+void
8505+XactLockTableWaitForCluster(TransactionId xid,Buffer buffer)
8506+{
8507+ LOCKTAG tag;
8508+ TransactionId myxid = GetCurrentTransactionId();
8509+
8510+ Assert(!TransactionIdEquals( xid, myxid ));
8511+
8512+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
8513+
8514+ SET_LOCKTAG_TRANSACTION(tag, xid);
8515+
8516+ if (!LockAcquire(&tag, ExclusiveLock, false,false))
8517+ elog(ERROR, "XactLockTableWait: LockAcquire failed");
8518+
8519+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
8520+
8521+ LockRelease(&tag, ExclusiveLock,false);
8522+
8523+ /*
8524+ * Transaction was committed/aborted/crashed - we have to update
8525+ * pg_clog if transaction is still marked as running.
8526+ */
8527+ if (!TransactionIdDidCommit(xid) && !TransactionIdDidAbort(xid))
8528+ TransactionIdAbort(xid);
8529+}
8530+#endif /*USE_REPLICATION*/
8531diff -aruN postgresql-8.2.4/src/backend/storage/lmgr/lock.c pgcluster-1.7.0rc7/src/backend/storage/lmgr/lock.c
8532--- postgresql-8.2.4/src/backend/storage/lmgr/lock.c 2006-10-04 02:29:57.000000000 +0200
8533+++ pgcluster-1.7.0rc7/src/backend/storage/lmgr/lock.c 2007-02-18 22:52:16.000000000 +0100
8534@@ -42,6 +42,10 @@
8535 #include "utils/ps_status.h"
8536 #include "utils/resowner.h"
8537
8538+#ifdef USE_REPLICATION
8539+#include "storage/lmgr.h"
8540+#include "replicate.h"
8541+#endif /* USE_REPLICATION */
8542
8543 /* This configuration variable is used to set the lock table size */
8544 int max_locks_per_xact; /* set by guc.c */
8545@@ -737,6 +741,10 @@
8546 status = LockCheckConflicts(lockMethodTable, lockmode,
8547 lock, proclock, MyProc);
8548
8549+#ifdef USE_REPLICATION
8550+ PGR_Check_Lock.status_lock_conflict = status;
8551+ PGR_Check_Lock.deadlock = false;
8552+#endif /* USE_REPLICATION */
8553 if (status == STATUS_OK)
8554 {
8555 /* No conflict with held or previously requested locks */
8556@@ -746,6 +754,17 @@
8557 else
8558 {
8559 Assert(status == STATUS_FOUND);
8560+#ifdef USE_REPLICATION
8561+ if ((PGR_Need_Notice == true) &&
8562+ (PGR_Check_Lock.check_lock_conflict == true))
8563+ {
8564+ if (!PGR_Lock_Noticed && PGR_Notice_Conflict() == STATUS_ERROR)
8565+ {
8566+ return FALSE;
8567+ }
8568+ PGR_Lock_Noticed = true;
8569+ }
8570+#endif /* USE_REPLICATION */
8571
8572 /*
8573 * We can't acquire the lock immediately. If caller specified no
8574diff -aruN postgresql-8.2.4/src/backend/storage/lmgr/proc.c pgcluster-1.7.0rc7/src/backend/storage/lmgr/proc.c
8575--- postgresql-8.2.4/src/backend/storage/lmgr/proc.c 2006-11-21 21:59:52.000000000 +0100
8576+++ pgcluster-1.7.0rc7/src/backend/storage/lmgr/proc.c 2007-02-18 22:52:16.000000000 +0100
8577@@ -43,6 +43,9 @@
8578 #include "storage/procarray.h"
8579 #include "storage/spin.h"
8580
8581+#ifdef USE_REPLICATION
8582+#include "replicate.h"
8583+#endif /* USE_REPLICATION */
8584
8585 /* GUC variables */
8586 int DeadlockTimeout = 1000;
8587@@ -263,6 +266,9 @@
8588 MyProc->lwWaitLink = NULL;
8589 MyProc->waitLock = NULL;
8590 MyProc->waitProcLock = NULL;
8591+#ifdef USE_REPLICATION
8592+ MyProc->replicationId = 0;
8593+#endif
8594 for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
8595 SHMQueueInit(&(MyProc->myProcLocks[i]));
8596
8597@@ -395,6 +401,9 @@
8598 MyProc->lwWaitLink = NULL;
8599 MyProc->waitLock = NULL;
8600 MyProc->waitProcLock = NULL;
8601+#ifdef USE_REPLICATION
8602+ MyProc->replicationId = 0;
8603+#endif
8604 for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
8605 SHMQueueInit(&(MyProc->myProcLocks[i]));
8606
8607@@ -737,6 +746,17 @@
8608 GrantAwaitedLock();
8609 return STATUS_OK;
8610 }
8611+#ifdef USE_REPLICATION
8612+ if(proc->replicationId == 0 ||
8613+ (MyProc->replicationId > proc->replicationId &&
8614+ proc->heldLocks & aheadRequests) ) {
8615+ elog(DEBUG1,"origin's RId = %d , MyProc->RId = %d , skip",proc->replicationId,MyProc->replicationId);
8616+ aheadRequests |= (1 << proc->waitLockMode);
8617+ proc = (PGPROC *) MAKE_PTR(proc->links.next);
8618+ continue;
8619+ }
8620+
8621+#endif
8622 /* Break out of loop to put myself before him */
8623 break;
8624 }
8625@@ -752,8 +772,21 @@
8626 }
8627 else
8628 {
8629+#ifdef USE_REPLICATION
8630+ proc = (PGPROC *) &(waitQueue->links);
8631+ for (i = 0; i < waitQueue->size+1; i++){
8632+ elog(DEBUG1,"origin's RId = %d , MyProc->RId = %d",proc->replicationId,MyProc->replicationId);
8633+ if(proc->replicationId == 0 ||
8634+ MyProc->replicationId > proc->replicationId) {
8635+ proc= (PGPROC *) MAKE_PTR(proc->links.next);
8636+ }else {
8637+ break;
8638+ }
8639+ }
8640+#else
8641 /* I hold no locks, so I can't push in front of anyone. */
8642 proc = (PGPROC *) &(waitQueue->links);
8643+#endif /* USE_REPLICATION */
8644 }
8645
8646 /*
8647@@ -776,7 +809,11 @@
8648 * CheckDeadLock's recovery code, except that we shouldn't release the
8649 * semaphore since we haven't tried to lock it yet.
8650 */
8651+#ifdef USE_REPLICATION
8652+ if (early_deadlock && proc->replicationId < MyProc->replicationId)
8653+#else
8654 if (early_deadlock)
8655+#endif
8656 {
8657 RemoveFromWaitQueue(MyProc, hashcode);
8658 return STATUS_ERROR;
8659@@ -976,6 +1013,9 @@
8660 CheckDeadLock(void)
8661 {
8662 int i;
8663+#ifdef USE_REPLICATION
8664+ bool pgr_notice = false;
8665+#endif /* USE_REPLICATION */
8666
8667 /*
8668 * Acquire exclusive lock on the entire shared lock data structures. Must
8669@@ -1047,6 +1087,10 @@
8670 * such processes.
8671 */
8672
8673+#ifdef USE_REPLICATION
8674+ pgr_notice = true;
8675+#endif
8676+
8677 /*
8678 * Release locks acquired at head of routine. Order is not critical, so
8679 * do it back-to-front to avoid waking another CheckDeadLock instance
8680@@ -1055,6 +1099,12 @@
8681 check_done:
8682 for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
8683 LWLockRelease(FirstLockMgrLock + i);
8684+#ifdef USE_REPLICATION
8685+ if (pgr_notice == true)
8686+ {
8687+ PGR_Notice_DeadLock();
8688+ }
8689+#endif
8690 }
8691
8692
8693@@ -1110,6 +1160,15 @@
8694 {
8695 TimestampTz fin_time;
8696 struct itimerval timeval;
8697+#ifdef USE_REPLICATION
8698+ int useFlag = 0;
8699+
8700+ if (ReplicateCurrentTime != NULL)
8701+ {
8702+ useFlag = ReplicateCurrentTime->useFlag;
8703+ ReplicateCurrentTime->useFlag = DATA_INIT;
8704+ }
8705+#endif /* USE_REPLICATION */
8706
8707 if (is_statement_timeout)
8708 {
8709@@ -1154,6 +1213,12 @@
8710 fin_time = GetCurrentTimestamp();
8711 fin_time = TimestampTzPlusMilliseconds(fin_time, delayms);
8712 deadlock_timeout_active = true;
8713+#ifdef USE_REPLICATION
8714+ if (ReplicateCurrentTime != NULL)
8715+ {
8716+ ReplicateCurrentTime->useFlag = useFlag;
8717+ }
8718+#endif /* USE_REPLICATION */
8719 if (fin_time >= statement_fin_time)
8720 return true;
8721 }
8722@@ -1167,6 +1232,12 @@
8723 MemSet(&timeval, 0, sizeof(struct itimerval));
8724 timeval.it_value.tv_sec = delayms / 1000;
8725 timeval.it_value.tv_usec = (delayms % 1000) * 1000;
8726+#ifdef USE_REPLICATION
8727+ if (ReplicateCurrentTime != NULL)
8728+ {
8729+ ReplicateCurrentTime->useFlag = useFlag;
8730+ }
8731+#endif /* USE_REPLICATION */
8732 if (setitimer(ITIMER_REAL, &timeval, NULL))
8733 return false;
8734 return true;
8735@@ -1232,12 +1303,30 @@
8736 CheckStatementTimeout(void)
8737 {
8738 TimestampTz now;
8739+#ifdef USE_REPLICATION
8740+ int useFlag = 0;
8741+#endif /* USE_REPLICATION */
8742
8743 if (!statement_timeout_active)
8744 return true; /* do nothing if not active */
8745
8746+#ifdef USE_REPLICATION
8747+ if (ReplicateCurrentTime != NULL)
8748+ {
8749+ useFlag = ReplicateCurrentTime->useFlag;
8750+ ReplicateCurrentTime->useFlag = DATA_INIT;
8751+ }
8752+#endif /* USE_REPLICATION */
8753+
8754 now = GetCurrentTimestamp();
8755
8756+#ifdef USE_REPLICATION
8757+ if (ReplicateCurrentTime != NULL)
8758+ {
8759+ ReplicateCurrentTime->useFlag = useFlag;
8760+ }
8761+#endif /* USE_REPLICATION */
8762+
8763 if (now >= statement_fin_time)
8764 {
8765 /* Time to die */
8766diff -aruN postgresql-8.2.4/src/backend/tcop/postgres.c pgcluster-1.7.0rc7/src/backend/tcop/postgres.c
8767--- postgresql-8.2.4/src/backend/tcop/postgres.c 2007-01-04 01:58:01.000000000 +0100
8768+++ pgcluster-1.7.0rc7/src/backend/tcop/postgres.c 2007-02-18 22:52:16.000000000 +0100
8769@@ -68,6 +68,10 @@
8770
8771 #include "pgstat.h"
8772
8773+#ifdef USE_REPLICATION
8774+#include "replicate.h"
8775+#endif /* USE_REPLICATION */
8776+
8777 extern int optind;
8778 extern char *optarg;
8779
8780@@ -91,7 +95,9 @@
8781 /* wait N seconds to allow attach from a debugger */
8782 int PostAuthDelay = 0;
8783
8784-
8785+#ifdef USE_REPLICATION
8786+bool PGR_Not_Replication_Query = false;
8787+#endif /* USE_REPLICATION */
8788
8789 /* ----------------
8790 * private variables
8791@@ -753,6 +759,24 @@
8792 bool was_logged = false;
8793 char msec_str[32];
8794
8795+#ifdef USE_REPLICATION
8796+ char * query_ptr = NULL;
8797+ char * null_ptr = NULL;
8798+ int skip_cnt = 0;
8799+ int status = 0;
8800+
8801+ PGR_Reliable_Mode_Wait = false;
8802+ query_ptr = (char *)query_string;
8803+ if (PGR_Is_Replicated_Query == false)
8804+ {
8805+ PGR_Is_Replicated_Query = PGR_Is_Replicated_Command(query_ptr);
8806+ }
8807+ PGR_Retry_Query.query_string = (char *)query_string;
8808+ PGR_Retry_Query.query_len = strlen(query_string);
8809+ PGR_Retry_Query.cmdSts = CMD_STS_OTHER;
8810+ PGR_Retry_Query.cmdType = CMD_TYPE_OTHER;
8811+#endif /* USE_REPLICATION */
8812+
8813 /*
8814 * Report query to various monitoring facilities.
8815 */
8816@@ -831,6 +855,18 @@
8817 DestReceiver *receiver;
8818 int16 format;
8819
8820+#ifdef USE_REPLICATION
8821+ PGR_Not_Replication_Query = false;
8822+ PGR_Reliable_Mode_Wait = false;
8823+
8824+ PGR_Retry_Query.query_string = NULL;
8825+ PGR_Retry_Query.query_len = 0;
8826+ PGR_Retry_Query.cmdSts = CMD_STS_OTHER;
8827+ PGR_Retry_Query.cmdType = CMD_TYPE_OTHER;
8828+ PGR_Retry_Query.useFlag = DATA_INIT;
8829+ PGR_Lock_Noticed = false;
8830+#endif /* USE_REPLICATION */
8831+
8832 /*
8833 * Get the command name for use in status display (it also becomes the
8834 * default completion tag, down inside PortalRun). Set ps_status and
8835@@ -853,10 +889,232 @@
8836 */
8837 if (IsAbortedTransactionBlockState() &&
8838 !IsTransactionExitStmt(parsetree))
8839+ {
8840+#ifdef USE_REPLICATION
8841+ Transaction_Mode = 0;
8842+#endif
8843 ereport(ERROR,
8844 (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
8845 errmsg("current transaction is aborted, "
8846 "commands ignored until end of transaction block")));
8847+ }
8848+
8849+#ifdef USE_REPLICATION
8850+ Query_String = NULL;
8851+ query_ptr = PGR_Remove_Comment(query_ptr);
8852+ PGR_Check_Lock.dest = TO_FRONTEND;
8853+ PGR_Need_Notice = false;
8854+ PGR_Check_Lock.check_lock_conflict = false;
8855+
8856+ /* skip replication during recovery mode runing */
8857+ if (PGR_Get_Cluster_Status() == STATUS_RECOVERY)
8858+ {
8859+ /*
8860+ PGR_Not_Replication_Query = true;
8861+ */
8862+ PGR_Is_Replicated_Query = true;
8863+ if (!strcmp(commandTag,"SELECT"))
8864+ {
8865+ if (PGR_Is_System_Command(query_ptr))
8866+ {
8867+ status = PGR_Call_System_Command(query_ptr);
8868+ if (status == STATUS_SKIP_QUERY)
8869+ {
8870+ EndCommand(PGR_ALREADY_REPLICATED_NOTICE_CMD,dest);
8871+ break;
8872+ }
8873+ else
8874+ {
8875+ EndCommand("SYSTEM_COMMAND",dest);
8876+ continue;
8877+ }
8878+ }
8879+ }
8880+ Transaction_Mode = PGR_Set_Transaction_Mode(Transaction_Mode,commandTag);
8881+ if (Transaction_Mode > 0)
8882+ {
8883+ PGR_Need_Notice = true;
8884+ PGR_Check_Lock.check_lock_conflict = true;
8885+ }
8886+ goto Skip_Replication;
8887+ }
8888+
8889+ /*
8890+ if (!xact_started)
8891+ {
8892+ start_xact_command();
8893+ xact_started = true;
8894+ }
8895+ */
8896+ if (skip_cnt == 0)
8897+ {
8898+ skip_cnt = PGR_Is_Skip_Replication(query_ptr);
8899+ }
8900+ null_ptr = PGR_scan_terminate (query_ptr);
8901+ if(null_ptr != NULL)
8902+ {
8903+ *null_ptr = '\0';
8904+ }
8905+ Transaction_Mode = PGR_Set_Transaction_Mode(Transaction_Mode,commandTag);
8906+ if ((PGR_Is_Replicated_Query ) ||
8907+ (skip_cnt != 0))
8908+ {
8909+ if (skip_cnt > 0)
8910+ {
8911+ skip_cnt --;
8912+ }
8913+ else
8914+ {
8915+ skip_cnt = 0;
8916+ }
8917+ PGR_Copy_Data_Need_Replicate = false;
8918+ if (!strncmp(commandTag,"SELECT",strlen("SELECT")))
8919+ {
8920+ if (PGR_Is_System_Command(query_ptr))
8921+ {
8922+ status = PGR_Call_System_Command(query_ptr);
8923+ if (status == STATUS_SKIP_QUERY)
8924+ {
8925+ EndCommand(PGR_ALREADY_REPLICATED_NOTICE_CMD,dest);
8926+ break;
8927+ }
8928+ else
8929+ {
8930+ EndCommand("SYSTEM_COMMAND",dest);
8931+ continue;
8932+ }
8933+ }
8934+ }
8935+ PGR_Check_Lock.status_lock_conflict = STATUS_OK;
8936+ PGR_Check_Lock.dest = TO_FRONTEND;
8937+ }
8938+ else
8939+ {
8940+ PGR_Copy_Data_Need_Replicate = false;
8941+
8942+ /* check cluster db status */
8943+ /*
8944+ if ((PGR_Get_Cluster_Status() == STATUS_RECOVERY) &&
8945+ (PGR_Not_Replication_Query == false) &&
8946+ (Transaction_Mode == 0 ) )
8947+ {
8948+ elog(WARNING, "This query is not permitted while recovery db ");
8949+ if(null_ptr != NULL)
8950+ {
8951+ *null_ptr = ';';
8952+ query_ptr = null_ptr +1;
8953+ }
8954+ continue;
8955+ }
8956+ */
8957+ if (PGR_Is_Stand_Alone() == true)
8958+ {
8959+ if (PGR_Stand_Alone->permit == PERMIT_READ_ONLY)
8960+ {
8961+ if (!strcmp(commandTag, "SHOW")) {
8962+ VariableShowStmt *stmt = (VariableShowStmt *)parsetree;
8963+ if (!strcmp(stmt->name, "replication_server")) {
8964+ PGR_Not_Replication_Query = true;
8965+ }
8966+ }
8967+
8968+ if (PGR_Not_Replication_Query == false)
8969+ elog(ERROR, "This query is not permitted when all replication servers fell down ");
8970+ }
8971+ }
8972+ else if ((PGRforceLoadBalance == false) &&
8973+ ((PGR_Not_Replication_Query == false ) ||
8974+ (!strcmp(commandTag,"SELECT"))))
8975+ {
8976+ status = PGR_replication(query_ptr,dest,parsetree,commandTag);
8977+ if (status == STATUS_REPLICATED)
8978+ {
8979+ if (xact_started)
8980+ {
8981+ finish_xact_command();
8982+ xact_started = false;
8983+ }
8984+ CommandCounterIncrement();
8985+ continue;
8986+ }
8987+ else if (status == STATUS_ERROR)
8988+ {
8989+ if (!strcmp(commandTag, "SHOW")) {
8990+ VariableShowStmt *stmt = (VariableShowStmt *)parsetree;
8991+ if (!strcmp(stmt->name, "replication_server")) {
8992+ PGR_Not_Replication_Query = true;
8993+ }
8994+ }
8995+ else if (PGR_Stand_Alone->permit == PERMIT_READ_ONLY)
8996+ {
8997+ elog(ERROR, "This query is not permitted when all replication servers fell down ");
8998+ }
8999+ }
9000+ else if (status == STATUS_DEADLOCK_DETECT)
9001+ {
9002+ PGR_Need_Notice = false;
9003+ elog(ERROR, "postmaster deadlock detected");
9004+ continue;
9005+ }
9006+ else if (status == STATUS_REPLICATION_ABORT)
9007+ {
9008+ PGR_Need_Notice = false;
9009+ elog(ERROR, "replication server should be down, transaction aborted.");
9010+ continue;
9011+ }
9012+ else if (status != STATUS_CONTINUE)
9013+ {
9014+ PGR_Check_Lock.dest = TO_FRONTEND;
9015+ }
9016+ else
9017+ {
9018+ PGR_Check_Lock.dest = TO_REPLICATION_SERVER;
9019+ PGR_Reliable_Mode_Wait = true;
9020+ }
9021+ }
9022+ }
9023+ if(null_ptr != NULL)
9024+ {
9025+ *null_ptr = ';';
9026+ query_ptr = null_ptr +1;
9027+ }
9028+ if (!PGR_Is_Replicated_Query )
9029+ {
9030+ if ((!strcmp(commandTag,"BEGIN")) ||
9031+ (!strcmp(commandTag, "START TRANSACTION")) ||
9032+ (Transaction_Mode == 0 ) )
9033+ {
9034+ PGR_Reload_Start_Time();
9035+ }
9036+ }
9037+ if (((IsA(parsetree, TransactionStmt)) ||
9038+ (Transaction_Mode > 0) ||
9039+ (Create_Temp_Table_Mode == true) ||
9040+ (Session_Authorization_Mode == true)) ||
9041+ (!strcmp(commandTag,"COPY")))
9042+ {
9043+ PGR_Need_Notice = true;
9044+ PGR_Check_Lock.check_lock_conflict = true;
9045+ }
9046+ else
9047+ {
9048+ if (PGR_Not_Replication_Query == false)
9049+ {
9050+ PGR_Need_Notice = true;
9051+ PGR_Check_Lock.check_lock_conflict = true;
9052+ }
9053+ else
9054+ {
9055+ if ((PGR_Is_Replicated_Query ) &&
9056+ (!strncmp(commandTag, "SELECT",strlen("SELECT"))))
9057+ {
9058+ PGR_Need_Notice = true;
9059+ PGR_Check_Lock.check_lock_conflict = true;
9060+ }
9061+ }
9062+ }
9063+Skip_Replication:
9064+#endif /* USE_REPLICATION */
9065
9066 /* Make sure we are in a transaction command */
9067 start_xact_command();
9068@@ -983,7 +1241,44 @@
9069 * command the client sent, regardless of rewriting. (But a command
9070 * aborted by error will not send an EndCommand report at all.)
9071 */
9072+#ifdef USE_REPLICATION
9073+ /*
9074+ * In Non-CONTROL LOCK CONFLICT mode, we *MUST NOT* send command tag twice.
9075+ * So , if it was already sent for lock notification , we didn't send
9076+ * tag here. also ReadyForQuery,too.
9077+ */
9078+ if(!(PGR_Is_Replicated_Query && PGR_Lock_Noticed))
9079+#endif
9080 EndCommand(completionTag, dest);
9081+
9082+#ifdef USE_REPLICATION
9083+ if(PGR_Is_Replicated_Query &&
9084+ needToUpdateReplicateIdOnNextQueryIsDone) {
9085+ ++(ReplicationLog_Info.PGR_Replicate_ID);
9086+
9087+ if (CurrentReplicateServer != NULL)
9088+ {
9089+ /* set replicate id in this system */
9090+ ++(CurrentReplicateServer->replicate_id);
9091+ }
9092+ elog(DEBUG1,"increased replicate_id to %d",CurrentReplicateServer->replicate_id);
9093+ needToUpdateReplicateIdOnNextQueryIsDone=false;
9094+ }
9095+
9096+ if (PGR_Get_Cluster_Status() != STATUS_RECOVERY)
9097+ {
9098+ if ((PGR_Need_Notice == true) &&
9099+ (PGRforceLoadBalance == false))
9100+ {
9101+ PGR_Notice_Transaction_Query_Done();
9102+ }
9103+ if ((Transaction_Mode == 0) &&
9104+ (ReplicateCurrentTime != NULL))
9105+ {
9106+ ReplicateCurrentTime->use_seed = 1;
9107+ }
9108+ }
9109+#endif
9110 } /* end loop over parsetrees */
9111
9112 /*
9113@@ -1144,11 +1439,15 @@
9114 */
9115 if (IsAbortedTransactionBlockState() &&
9116 !IsTransactionExitStmt(parsetree))
9117+ {
9118+#ifdef USE_REPLICATION
9119+ Transaction_Mode = 0;
9120+#endif
9121 ereport(ERROR,
9122 (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
9123 errmsg("current transaction is aborted, "
9124 "commands ignored until end of transaction block")));
9125-
9126+ }
9127 /*
9128 * OK to analyze, rewrite, and plan this query. Note that the
9129 * originally specified parameter set is not required to be complete,
9130@@ -1382,11 +1681,15 @@
9131 if (IsAbortedTransactionBlockState() &&
9132 (!IsTransactionExitStmtList(pstmt->query_list) ||
9133 numParams != 0))
9134+ {
9135+#ifdef USE_REPLICATION
9136+ Transaction_Mode = 0;
9137+#endif
9138 ereport(ERROR,
9139 (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
9140 errmsg("current transaction is aborted, "
9141 "commands ignored until end of transaction block")));
9142-
9143+ }
9144 /*
9145 * Create the portal. Allow silent replacement of an existing portal only
9146 * if the unnamed portal is specified.
9147@@ -1769,11 +2072,15 @@
9148 */
9149 if (IsAbortedTransactionBlockState() &&
9150 !IsTransactionExitStmtList(portal->parseTrees))
9151+ {
9152+#ifdef USE_REPLICATION
9153+ Transaction_Mode = 0;
9154+#endif
9155 ereport(ERROR,
9156 (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
9157 errmsg("current transaction is aborted, "
9158 "commands ignored until end of transaction block")));
9159-
9160+ }
9161 /* Check for cancel signal before we start execution */
9162 CHECK_FOR_INTERRUPTS();
9163
9164@@ -2101,11 +2408,15 @@
9165 */
9166 if (IsAbortedTransactionBlockState() &&
9167 PreparedStatementReturnsTuples(pstmt))
9168+ {
9169+#ifdef USE_REPLICATION
9170+ Transaction_Mode = 0;
9171+#endif
9172 ereport(ERROR,
9173 (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
9174 errmsg("current transaction is aborted, "
9175 "commands ignored until end of transaction block")));
9176-
9177+ }
9178 if (whereToSendOutput != DestRemote)
9179 return; /* can't actually do anything... */
9180
9181@@ -2171,11 +2482,15 @@
9182 */
9183 if (IsAbortedTransactionBlockState() &&
9184 portal->tupDesc)
9185+ {
9186+#ifdef USE_REPLICATION
9187+ Transaction_Mode = 0;
9188+#endif
9189 ereport(ERROR,
9190 (errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
9191 errmsg("current transaction is aborted, "
9192 "commands ignored until end of transaction block")));
9193-
9194+ }
9195 if (whereToSendOutput != DestRemote)
9196 return; /* can't actually do anything... */
9197
9198@@ -2332,6 +2647,9 @@
9199 * backend. This is necessary precisely because we don't clean up our
9200 * shared memory state.
9201 */
9202+#ifdef USE_REPLICATION
9203+ PGR_delete_shm();
9204+#endif /* USE_REPLICATION */
9205 exit(2);
9206 }
9207
9208@@ -2369,6 +2687,9 @@
9209 }
9210 }
9211
9212+#ifdef USE_REPLICATION
9213+ PGR_delete_shm();
9214+#endif /* USE_REPLICATION */
9215 errno = save_errno;
9216 }
9217
9218@@ -2383,6 +2704,9 @@
9219 void
9220 authdie(SIGNAL_ARGS)
9221 {
9222+#ifdef USE_REPLICATION
9223+ PGR_delete_shm();
9224+#endif /* USE_REPLICATION */
9225 exit(1);
9226 }
9227
9228@@ -3369,6 +3693,14 @@
9229 pgstat_report_activity("<IDLE>");
9230 }
9231
9232+#ifdef USE_REPLICATION
9233+ /*
9234+ * In Non-CONTROL LOCK CONFLICT mode, we *MUST NOT* send command tag twice.
9235+ * So , if it was already sent for lock notification , we didn't send
9236+ * tag here. also ReadyForQuery,too.
9237+ */
9238+ if(!(PGR_Is_Replicated_Query && PGR_Lock_Noticed))
9239+#endif
9240 ReadyForQuery(whereToSendOutput);
9241 send_ready_for_query = false;
9242 }
9243@@ -3409,6 +3741,26 @@
9244 if (ignore_till_sync && firstchar != EOF)
9245 continue;
9246
9247+#ifdef USE_REPLICATION
9248+ if ((firstchar == CMD_TYPE_P_PARSE) ||
9249+ (firstchar == CMD_TYPE_P_BIND) ||
9250+ (firstchar == CMD_TYPE_P_DESCRIBE) ||
9251+ (firstchar == CMD_TYPE_P_EXECUTE) ||
9252+ (firstchar == CMD_TYPE_P_SYNC) ||
9253+ (firstchar == CMD_TYPE_P_CLOSE))
9254+ {
9255+ if (PGR_Send_Input_Message(firstchar, &input_message) != STATUS_OK)
9256+ {
9257+ if ((PGR_Is_Stand_Alone() == true) &&
9258+ (PGR_Stand_Alone->permit == PERMIT_READ_ONLY))
9259+ {
9260+ elog(WARNING, "This query is not permitted when all replication servers fell down ");
9261+ break;
9262+ }
9263+ }
9264+ }
9265+#endif /* USE_REPLICATION */
9266+
9267 switch (firstchar)
9268 {
9269 case 'Q': /* simple query */
9270@@ -3622,6 +3974,27 @@
9271 case 'X':
9272 case EOF:
9273
9274+#ifdef USE_REPLICATION
9275+ if (PGRforceLoadBalance == false)
9276+ {
9277+ if (PGR_Is_Replicated_Query == false)
9278+ {
9279+ PGR_Noticed_Abort = true;
9280+ PGRsend_system_command(CMD_STS_TRANSACTION_ABORT, CMD_TYPE_FRONTEND_CLOSED);
9281+ }
9282+ else if ((Transaction_Mode >= 1) && (PGR_Noticed_Abort == false))
9283+ {
9284+ if (PGR_Did_Commit_Transaction() == true)
9285+ {
9286+ pgstat_report_activity("commit");
9287+ exec_simple_query("commit");
9288+ }
9289+ }
9290+ }
9291+ /*
9292+ PGR_Notice_Transaction_Query_Aborted();
9293+ */
9294+#endif /* USE_REPLICATION */
9295 /*
9296 * Reset whereToSendOutput to prevent ereport from attempting
9297 * to send any more messages to client.
9298diff -aruN postgresql-8.2.4/src/backend/tcop/pquery.c pgcluster-1.7.0rc7/src/backend/tcop/pquery.c
9299--- postgresql-8.2.4/src/backend/tcop/pquery.c 2006-10-04 02:29:58.000000000 +0200
9300+++ pgcluster-1.7.0rc7/src/backend/tcop/pquery.c 2007-02-18 22:52:16.000000000 +0100
9301@@ -24,6 +24,9 @@
9302 #include "tcop/utility.h"
9303 #include "utils/memutils.h"
9304
9305+#ifdef USE_REPLICATION
9306+#include "replicate.h"
9307+#endif /* USE_REPLICATION */
9308
9309 /*
9310 * ActivePortal is the currently executing Portal (the most closely nested,
9311@@ -188,6 +191,19 @@
9312 strcpy(completionTag, "???");
9313 break;
9314 }
9315+#ifdef USE_REPLICATION
9316+ if ((PGR_Is_Replicated_Query == true ) &&
9317+ (PGR_Get_Cluster_Status() != STATUS_RECOVERY))
9318+ {
9319+ /*
9320+ * Replicated *SELECT* query is used to replicate
9321+ * ONLY lock and function execution , results . All of
9322+ * them will be discarded by pgrp processes.
9323+ * So , we don't need to send it.
9324+ */
9325+ dest = None_Receiver;
9326+ }
9327+#endif /*USE_REPLICATION */
9328 }
9329
9330 /* Now take care of any queued AFTER triggers */
9331diff -aruN postgresql-8.2.4/src/backend/tcop/utility.c pgcluster-1.7.0rc7/src/backend/tcop/utility.c
9332--- postgresql-8.2.4/src/backend/tcop/utility.c 2006-10-04 02:29:58.000000000 +0200
9333+++ pgcluster-1.7.0rc7/src/backend/tcop/utility.c 2007-02-18 22:52:16.000000000 +0100
9334@@ -54,6 +54,9 @@
9335 #include "utils/guc.h"
9336 #include "utils/syscache.h"
9337
9338+#ifdef USE_REPLICATION
9339+#include "replicate.h"
9340+#endif /* USE_REPLICATION */
9341
9342 /*
9343 * Error-checking support for DROP commands
9344@@ -1289,29 +1292,48 @@
9345
9346 case T_SelectStmt:
9347 tag = "SELECT";
9348+#ifdef USE_REPLICATION
9349+ PGR_Not_Replication_Query = true;
9350+#endif /* USE_REPLICATION */
9351 break;
9352
9353 case T_TransactionStmt:
9354 {
9355 TransactionStmt *stmt = (TransactionStmt *) parsetree;
9356
9357+#ifdef USE_REPLICATION
9358+ bool isInTransaction=IsTransactionBlock();
9359+#endif /* USE_REPLICATION */
9360+
9361 switch (stmt->kind)
9362 {
9363 case TRANS_STMT_BEGIN:
9364 tag = "BEGIN";
9365+#ifdef USE_REPLICATION
9366+ PGR_Not_Replication_Query=isInTransaction;
9367+#endif /* USE_REPLICATION */
9368 break;
9369
9370 case TRANS_STMT_START:
9371 tag = "START TRANSACTION";
9372+#ifdef USE_REPLICATION
9373+ PGR_Not_Replication_Query=isInTransaction;
9374+#endif /* USE_REPLICATION */
9375 break;
9376
9377 case TRANS_STMT_COMMIT:
9378 tag = "COMMIT";
9379+#ifdef USE_REPLICATION
9380+ PGR_Not_Replication_Query=!isInTransaction;
9381+#endif /* USE_REPLICATION */
9382 break;
9383
9384 case TRANS_STMT_ROLLBACK:
9385 case TRANS_STMT_ROLLBACK_TO:
9386 tag = "ROLLBACK";
9387+#ifdef USE_REPLICATION
9388+ PGR_Not_Replication_Query=!isInTransaction;
9389+#endif /* USE_REPLICATION */
9390 break;
9391
9392 case TRANS_STMT_SAVEPOINT:
9393@@ -1343,10 +1365,16 @@
9394
9395 case T_DeclareCursorStmt:
9396 tag = "DECLARE CURSOR";
9397+#ifdef USE_REPLICATION
9398+ PGR_Not_Replication_Query = true;
9399+#endif /* USE_REPLICATION */
9400 break;
9401
9402 case T_ClosePortalStmt:
9403 tag = "CLOSE CURSOR";
9404+#ifdef USE_REPLICATION
9405+ PGR_Not_Replication_Query = true;
9406+#endif /* USE_REPLICATION */
9407 break;
9408
9409 case T_FetchStmt:
9410@@ -1355,6 +1383,9 @@
9411
9412 tag = (stmt->ismove) ? "MOVE" : "FETCH";
9413 }
9414+#ifdef USE_REPLICATION
9415+ PGR_Not_Replication_Query = true;
9416+#endif /* USE_REPLICATION */
9417 break;
9418
9419 case T_CreateDomainStmt:
9420@@ -1677,10 +1708,16 @@
9421 tag = "VACUUM";
9422 else
9423 tag = "ANALYZE";
9424+#ifdef USE_REPLICATION
9425+ PGR_Not_Replication_Query = true;
9426+#endif /* USE_REPLICATION */
9427 break;
9428
9429 case T_ExplainStmt:
9430 tag = "EXPLAIN";
9431+#ifdef USE_REPLICATION
9432+ PGR_Not_Replication_Query = true;
9433+#endif /* USE_REPLICATION */
9434 break;
9435
9436 case T_VariableSetStmt:
9437@@ -1689,6 +1726,14 @@
9438
9439 case T_VariableShowStmt:
9440 tag = "SHOW";
9441+#ifdef USE_REPLICATION
9442+ {
9443+ VariableShowStmt *stmt = (VariableShowStmt *)parsetree;
9444+ if (strcasecmp(stmt->name, "replication_server")) {
9445+ PGR_Not_Replication_Query = true;
9446+ }
9447+ }
9448+#endif /* USE_REPLICATION */
9449 break;
9450
9451 case T_VariableResetStmt:
9452@@ -1755,10 +1800,16 @@
9453
9454 case T_CheckPointStmt:
9455 tag = "CHECKPOINT";
9456+#ifdef USE_REPLICATION
9457+ PGR_Not_Replication_Query = true;
9458+#endif /* USE_REPLICATION */
9459 break;
9460
9461 case T_ReindexStmt:
9462 tag = "REINDEX";
9463+#ifdef USE_REPLICATION
9464+ PGR_Not_Replication_Query = true;
9465+#endif /* USE_REPLICATION */
9466 break;
9467
9468 case T_CreateConversionStmt:
9469@@ -1783,14 +1834,35 @@
9470
9471 case T_PrepareStmt:
9472 tag = "PREPARE";
9473+#ifdef USE_REPLICATION
9474+ if ((PGRnotReplicatePreparedSelect == true) &&
9475+ (PGR_is_select_prepare_query() == true))
9476+ {
9477+ PGR_Not_Replication_Query = true;
9478+ }
9479+#endif /* USE_REPLICATION */
9480 break;
9481
9482 case T_ExecuteStmt:
9483 tag = "EXECUTE";
9484+#ifdef USE_REPLICATION
9485+ if ((PGRnotReplicatePreparedSelect == true) &&
9486+ (PGR_is_select_prepared_statement((PrepareStmt *)parsetree) == true))
9487+ {
9488+ PGR_Not_Replication_Query = true;
9489+ }
9490+#endif /* USE_REPLICATION */
9491 break;
9492
9493 case T_DeallocateStmt:
9494 tag = "DEALLOCATE";
9495+#ifdef USE_REPLICATION
9496+ if ((PGRnotReplicatePreparedSelect == true) &&
9497+ (PGR_is_select_prepared_statement((PrepareStmt *)parsetree) == true))
9498+ {
9499+ PGR_Not_Replication_Query = true;
9500+ }
9501+#endif /* USE_REPLICATION */
9502 break;
9503
9504 default:
9505@@ -1800,6 +1872,13 @@
9506 break;
9507 }
9508
9509+#ifdef USE_REPLICATION
9510+ if(PGRforceLoadBalance == true)
9511+ {
9512+ PGR_Not_Replication_Query = true;
9513+ }
9514+#endif /* USE_REPLICATION */
9515+
9516 return tag;
9517 }
9518
9519@@ -1835,7 +1914,12 @@
9520 tag = "SELECT FOR SHARE";
9521 }
9522 else
9523+ {
9524 tag = "SELECT";
9525+#ifdef USE_REPLICATION
9526+ PGR_Not_Replication_Query = true;
9527+#endif /* USE_REPLICATION */
9528+ }
9529 break;
9530 case CMD_UPDATE:
9531 tag = "UPDATE";
9532@@ -1853,6 +1937,9 @@
9533 elog(WARNING, "unrecognized commandType: %d",
9534 (int) parsetree->commandType);
9535 tag = "???";
9536+#ifdef USE_REPLICATION
9537+ PGR_Not_Replication_Query = true;
9538+#endif /* USE_REPLICATION */
9539 break;
9540 }
9541
9542diff -aruN postgresql-8.2.4/src/backend/utils/adt/float.c pgcluster-1.7.0rc7/src/backend/utils/adt/float.c
9543--- postgresql-8.2.4/src/backend/utils/adt/float.c 2006-10-05 03:40:45.000000000 +0200
9544+++ pgcluster-1.7.0rc7/src/backend/utils/adt/float.c 2007-02-18 22:52:16.000000000 +0100
9545@@ -66,6 +66,9 @@
9546 #include "utils/array.h"
9547 #include "utils/builtins.h"
9548
9549+#ifdef USE_REPLICATION
9550+#include "replicate.h"
9551+#endif /* USE_REPLICATION */
9552
9553 #ifndef M_PI
9554 /* from my RH5.2 gcc math.h file - thomas 2000-04-03 */
9555@@ -1886,7 +1889,11 @@
9556 float8 result;
9557
9558 /* result [0.0 - 1.0) */
9559+#ifdef USE_REPLICATION
9560+ result = ((double) PGR_Random()) / ((double) MAX_RANDOM_VALUE + 1);
9561+#else
9562 result = (double) random() / ((double) MAX_RANDOM_VALUE + 1);
9563+#endif /* USE_REPLICATION */
9564
9565 PG_RETURN_FLOAT8(result);
9566 }
9567diff -aruN postgresql-8.2.4/src/backend/utils/adt/nabstime.c pgcluster-1.7.0rc7/src/backend/utils/adt/nabstime.c
9568--- postgresql-8.2.4/src/backend/utils/adt/nabstime.c 2006-07-14 16:52:24.000000000 +0200
9569+++ pgcluster-1.7.0rc7/src/backend/utils/adt/nabstime.c 2007-02-18 22:52:16.000000000 +0100
9570@@ -27,6 +27,10 @@
9571 #include "utils/builtins.h"
9572 #include "utils/nabstime.h"
9573
9574+#ifdef USE_REPLICATION
9575+#include "replicate.h"
9576+#endif /* USE_REPLICATION */
9577+
9578 #define MIN_DAYNUM (-24856) /* December 13, 1901 */
9579 #define MAX_DAYNUM 24854 /* January 18, 2038 */
9580
9581@@ -92,7 +96,13 @@
9582 {
9583 time_t now;
9584
9585+#ifdef USE_REPLICATION
9586+ struct timeval tp;
9587+ PGR_GetTimeOfDay(&tp,NULL);
9588+ now = tp.tv_sec;
9589+#else
9590 now = time(NULL);
9591+#endif /* USE_REPLICATION */
9592 return (AbsoluteTime) now;
9593 }
9594
9595@@ -1031,9 +1041,14 @@
9596 {
9597 time_t sec;
9598
9599+#ifdef USE_REPLICATION
9600+ struct timeval tp;
9601+ PGR_GetTimeOfDay(&tp,NULL);
9602+ sec = tp.tv_sec;
9603+#else
9604 if (time(&sec) < 0)
9605 PG_RETURN_ABSOLUTETIME(INVALID_ABSTIME);
9606-
9607+#endif
9608 PG_RETURN_ABSOLUTETIME((AbsoluteTime) sec);
9609 }
9610
9611@@ -1588,7 +1603,11 @@
9612 int len;
9613 pg_time_t tt;
9614
9615+#ifdef USE_REPLICATION
9616+ PGR_GetTimeOfDay(&tp,NULL);
9617+#else
9618 gettimeofday(&tp, NULL);
9619+#endif /* USE_REPLICATION */
9620 tt = (pg_time_t) tp.tv_sec;
9621 pg_strftime(templ, sizeof(templ), "%a %b %d %H:%M:%S.%%06d %Y %Z",
9622 pg_localtime(&tt, global_timezone));
9623diff -aruN postgresql-8.2.4/src/backend/utils/adt/ri_triggers.c pgcluster-1.7.0rc7/src/backend/utils/adt/ri_triggers.c
9624--- postgresql-8.2.4/src/backend/utils/adt/ri_triggers.c 2006-10-04 02:29:59.000000000 +0200
9625+++ pgcluster-1.7.0rc7/src/backend/utils/adt/ri_triggers.c 2007-02-18 22:52:16.000000000 +0100
9626@@ -40,6 +40,9 @@
9627 #include "utils/typcache.h"
9628 #include "miscadmin.h"
9629
9630+#ifdef USE_REPLICATION
9631+#include "replicate.h"
9632+#endif /* USE_REPLICATION */
9633
9634 /* ----------
9635 * Local definitions
9636@@ -271,8 +274,18 @@
9637 * ----------
9638 */
9639 quoteRelationName(pkrelname, pk_rel);
9640+#ifdef USE_REPLICATION
9641+ if (PGRcheckConstraintWithLock)
9642+ snprintf(querystr, sizeof(querystr), "SELECT 1 FROM ONLY %s x FOR UPDATE OF x",
9643+ pkrelname);
9644+ else
9645+ snprintf(querystr, sizeof(querystr), "SELECT 1 FROM ONLY %s x ",
9646+ pkrelname);
9647+
9648+#else
9649 snprintf(querystr, sizeof(querystr), "SELECT 1 FROM ONLY %s x FOR SHARE OF x",
9650 pkrelname);
9651+#endif /* USE_REPLICATION */
9652
9653 /* Prepare and save the plan */
9654 qplan = ri_PlanCheck(querystr, 0, NULL,
9655@@ -416,6 +429,9 @@
9656 queryoids[i] = SPI_gettypeid(fk_rel->rd_att,
9657 qkey.keypair[i][RI_KEYPAIR_FK_IDX]);
9658 }
9659+#ifdef USE_REPLICATION
9660+ if (PGRcheckConstraintWithLock)
9661+#endif /* USE_REPLICATION */
9662 strcat(querystr, " FOR SHARE OF x");
9663
9664 /* Prepare and save the plan */
9665@@ -577,6 +593,9 @@
9666 queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
9667 qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
9668 }
9669+#ifdef USE_REPLICATION
9670+ if (PGRcheckConstraintWithLock)
9671+#endif /* USE_REPLICATION */
9672 strcat(querystr, " FOR SHARE OF x");
9673
9674 /* Prepare and save the plan */
9675@@ -733,6 +752,9 @@
9676 queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
9677 qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
9678 }
9679+#ifdef USE_REPLICATION
9680+ if (PGRcheckConstraintWithLock)
9681+#endif /* USE_REPLICATION */
9682 strcat(querystr, " FOR SHARE OF x");
9683
9684 /* Prepare and save the plan */
9685@@ -922,6 +944,9 @@
9686 queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
9687 qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
9688 }
9689+#ifdef USE_REPLICATION
9690+ if (PGRcheckConstraintWithLock)
9691+#endif /* USE_REPLICATION */
9692 strcat(querystr, " FOR SHARE OF x");
9693
9694 /* Prepare and save the plan */
9695@@ -1428,6 +1453,9 @@
9696 queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
9697 qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
9698 }
9699+#ifdef USE_REPLICATION
9700+ if (PGRcheckConstraintWithLock)
9701+#endif /* USE_REPLICATION */
9702 strcat(querystr, " FOR SHARE OF x");
9703
9704 /* Prepare and save the plan */
9705@@ -1607,6 +1635,9 @@
9706 queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
9707 qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
9708 }
9709+#ifdef USE_REPLICATION
9710+ if (PGRcheckConstraintWithLock)
9711+#endif /* USE_REPLICATION */
9712 strcat(querystr, " FOR SHARE OF x");
9713
9714 /* Prepare and save the plan */
9715diff -aruN postgresql-8.2.4/src/backend/utils/adt/timestamp.c pgcluster-1.7.0rc7/src/backend/utils/adt/timestamp.c
9716--- postgresql-8.2.4/src/backend/utils/adt/timestamp.c 2006-11-11 02:14:19.000000000 +0100
9717+++ pgcluster-1.7.0rc7/src/backend/utils/adt/timestamp.c 2007-02-18 22:52:16.000000000 +0100
9718@@ -39,6 +39,9 @@
9719 #error -ffast-math is known to break this code
9720 #endif
9721
9722+#ifdef USE_REPLICATION
9723+#include "replicate.h"
9724+#endif /* USE_REPLICATION */
9725
9726 /* Set at postmaster start */
9727 TimestampTz PgStartTime;
9728@@ -948,7 +951,11 @@
9729 TimestampTz result;
9730 struct timeval tp;
9731
9732+#ifdef USE_REPLICATION
9733+ PGR_GetTimeOfDay(&tp,NULL);
9734+#else
9735 gettimeofday(&tp, NULL);
9736+#endif
9737
9738 result = (TimestampTz) tp.tv_sec -
9739 ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY);
9740diff -aruN postgresql-8.2.4/src/backend/utils/error/assert.c pgcluster-1.7.0rc7/src/backend/utils/error/assert.c
9741--- postgresql-8.2.4/src/backend/utils/error/assert.c 2006-03-05 16:58:46.000000000 +0100
9742+++ pgcluster-1.7.0rc7/src/backend/utils/error/assert.c 2007-02-18 22:52:16.000000000 +0100
9743@@ -19,6 +19,10 @@
9744
9745 #include <unistd.h>
9746
9747+#ifdef USE_REPLICATION
9748+#include "replicate.h"
9749+#endif /* USE_REPLICATION */
9750+
9751 /*
9752 * ExceptionalCondition - Handles the failure of an Assert()
9753 */
9754@@ -39,6 +43,18 @@
9755 fileName, lineNumber);
9756 }
9757
9758+#ifdef USE_REPLICATION
9759+ if ((PGR_Check_Lock.dest == TO_REPLICATION_SERVER ) &&
9760+ (PGR_Need_Notice == true))
9761+ {
9762+ PGR_Notice_Transaction_Query_Aborted();
9763+ }
9764+ if (PGR_Copy_Data_Need_Replicate)
9765+ {
9766+ PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
9767+ }
9768+#endif /* USE_REPLICATION */
9769+
9770 #ifdef SLEEP_ON_ASSERT
9771
9772 /*
9773diff -aruN postgresql-8.2.4/src/backend/utils/error/elog.c pgcluster-1.7.0rc7/src/backend/utils/error/elog.c
9774--- postgresql-8.2.4/src/backend/utils/error/elog.c 2006-11-28 13:54:42.000000000 +0100
9775+++ pgcluster-1.7.0rc7/src/backend/utils/error/elog.c 2007-02-18 22:52:16.000000000 +0100
9776@@ -70,6 +70,9 @@
9777 #include "utils/memutils.h"
9778 #include "utils/ps_status.h"
9779
9780+#ifdef USE_REPLICATION
9781+#include "replicate.h"
9782+#endif /* USE_REPLICATION */
9783
9784 /* Global variables */
9785 ErrorContextCallback *error_context_stack = NULL;
9786@@ -314,6 +317,16 @@
9787 MemoryContext oldcontext;
9788 ErrorContextCallback *econtext;
9789
9790+#ifdef USE_REPLICATION
9791+ int status = 0;
9792+ bool parse_error_flag = false;
9793+
9794+ if ((edata->message) && (strstr(edata->message,"parse error") != NULL))
9795+ {
9796+ parse_error_flag = true;
9797+ }
9798+#endif /* USE_REPLICATION */
9799+
9800 recursion_depth++;
9801 CHECK_STACK_DEPTH();
9802
9803@@ -363,6 +376,24 @@
9804 * handler should reset it to something else soon.
9805 */
9806
9807+#ifdef USE_REPLICATION
9808+ if (parse_error_flag)
9809+ {
9810+ if ((PGR_Check_Lock.dest != TO_FRONTEND) &&
9811+ (Transaction_Mode > 0))
9812+ {
9813+ PGR_Force_Replicate_Query();
9814+ }
9815+ }
9816+ if (PGR_Copy_Data_Need_Replicate)
9817+ {
9818+ PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
9819+ }
9820+ else if (PGR_Need_Notice == true)
9821+ {
9822+ PGR_Notice_Transaction_Query_Done();
9823+ }
9824+#endif /* USE_REPLICATION */
9825 recursion_depth--;
9826 PG_RE_THROW();
9827 }
9828@@ -377,7 +408,16 @@
9829 * client_min_messages above FATAL, so don't look at output_to_client.
9830 */
9831 if (elevel >= FATAL && whereToSendOutput == DestRemote)
9832+ {
9833+#ifdef USE_REPLICATION
9834+ if (PGR_Copy_Data_Need_Replicate)
9835+ {
9836+ PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
9837+ }
9838+#endif /* USE_REPLICATION */
9839 pq_endcopyout(true);
9840+ }
9841+
9842
9843 /* Emit the message to the right places */
9844 EmitErrorReport();
9845@@ -417,6 +457,34 @@
9846 if (PG_exception_stack == NULL && whereToSendOutput == DestRemote)
9847 whereToSendOutput = DestNone;
9848
9849+#ifdef USE_REPLICATION
9850+ if (CurrentReplicateServer != NULL)
9851+ {
9852+ if (PGR_Need_Notice == true)
9853+ {
9854+ PGR_Notice_Transaction_Query_Aborted();
9855+ }
9856+ if (PGR_Copy_Data_Need_Replicate)
9857+ {
9858+ PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
9859+ }
9860+ else
9861+ {
9862+ if ((!PGR_Is_Replicated_Query ) &&
9863+ (PGR_Check_Lock.dest != TO_FRONTEND) &&
9864+ (PGR_Reliable_Mode_Wait == true) &&
9865+ (CurrentReplicateServer->response_mode == PGR_RELIABLE_MODE))
9866+ {
9867+ status = PGR_Recv_Trigger(0);
9868+ }
9869+ }
9870+ }
9871+ if (TransactionSock != -1)
9872+ {
9873+ close (TransactionSock);
9874+ TransactionSock = -1;
9875+ }
9876+#endif /* USE_REPLICATION */
9877 /*
9878 * fflush here is just to improve the odds that we get to see the
9879 * error message, in case things are so hosed that proc_exit crashes.
9880@@ -436,6 +504,34 @@
9881
9882 if (elevel >= PANIC)
9883 {
9884+#ifdef USE_REPLICATION
9885+ if (CurrentReplicateServer != NULL)
9886+ {
9887+ if (PGR_Need_Notice == true)
9888+ {
9889+ PGR_Notice_Transaction_Query_Aborted();
9890+ }
9891+ if (PGR_Copy_Data_Need_Replicate)
9892+ {
9893+ PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
9894+ }
9895+ else
9896+ {
9897+ if ((!PGR_Is_Replicated_Query ) &&
9898+ (PGR_Check_Lock.dest != TO_FRONTEND) &&
9899+ (PGR_Reliable_Mode_Wait == true) &&
9900+ (CurrentReplicateServer->response_mode == PGR_RELIABLE_MODE))
9901+ {
9902+ status = PGR_Recv_Trigger(PGR_Replication_Timeout);
9903+ }
9904+ }
9905+ }
9906+ if (TransactionSock != -1)
9907+ {
9908+ close (TransactionSock);
9909+ TransactionSock = -1;
9910+ }
9911+#endif /* USE_REPLICATION */
9912 /*
9913 * Serious crash time. Postmaster will observe SIGABRT process exit
9914 * status and kill the other backends too.
9915diff -aruN postgresql-8.2.4/src/backend/utils/fmgr/fmgr.c pgcluster-1.7.0rc7/src/backend/utils/fmgr/fmgr.c
9916--- postgresql-8.2.4/src/backend/utils/fmgr/fmgr.c 2006-10-04 02:30:01.000000000 +0200
9917+++ pgcluster-1.7.0rc7/src/backend/utils/fmgr/fmgr.c 2007-02-18 22:52:16.000000000 +0100
9918@@ -25,6 +25,9 @@
9919 #include "utils/fmgrtab.h"
9920 #include "utils/lsyscache.h"
9921 #include "utils/syscache.h"
9922+#ifdef USE_REPLICATION
9923+#include "replicate.h"
9924+#endif /* USE_REPLICATION */
9925
9926 /*
9927 * Declaration for old-style function pointer type. This is now used only
9928@@ -218,7 +221,12 @@
9929 ReleaseSysCache(procedureTuple);
9930 return;
9931 }
9932-
9933+#ifdef USE_REPLICATION
9934+ if (PGR_Replicate_Function_Call() != STATUS_OK)
9935+ {
9936+ return;
9937+ }
9938+#endif /* USE_REPLICATION */
9939 switch (procedureStruct->prolang)
9940 {
9941 case INTERNALlanguageId:
9942diff -aruN postgresql-8.2.4/src/backend/utils/mb/mbutils.c pgcluster-1.7.0rc7/src/backend/utils/mb/mbutils.c
9943--- postgresql-8.2.4/src/backend/utils/mb/mbutils.c 2006-10-04 02:30:02.000000000 +0200
9944+++ pgcluster-1.7.0rc7/src/backend/utils/mb/mbutils.c 2007-02-18 22:52:16.000000000 +0100
9945@@ -15,6 +15,9 @@
9946 #include "utils/memutils.h"
9947 #include "utils/syscache.h"
9948
9949+#ifdef USE_REPLICATION
9950+#include "replicate.h"
9951+#endif /* USE_REPLICATION */
9952 /*
9953 * We handle for actual FE and BE encoding setting encoding-identificator
9954 * and encoding-name too. It prevent searching and conversion from encoding
9955@@ -442,6 +445,11 @@
9956 dest_encoding;
9957 FmgrInfo *flinfo;
9958
9959+#ifdef USE_REPLICATION
9960+ if (PGR_Is_Replicated_Query)
9961+ return (char *)src;
9962+#endif /* USE_REPLICATION */
9963+
9964 if (is_client_to_server)
9965 {
9966 src_encoding = ClientEncoding->encoding;
9967diff -aruN postgresql-8.2.4/src/backend/utils/misc/guc.c pgcluster-1.7.0rc7/src/backend/utils/misc/guc.c
9968--- postgresql-8.2.4/src/backend/utils/misc/guc.c 2006-11-29 15:50:07.000000000 +0100
9969+++ pgcluster-1.7.0rc7/src/backend/utils/misc/guc.c 2007-02-18 22:52:16.000000000 +0100
9970@@ -25,6 +25,9 @@
9971 #include <syslog.h>
9972 #endif
9973
9974+#ifdef USE_REPLICATION
9975+#include "replicate.h"
9976+#endif /* USE_REPLICATION */
9977
9978 #include "access/gin.h"
9979 #include "access/twophase.h"
9980@@ -236,6 +239,9 @@
9981 char *role_string;
9982 char *session_authorization_string;
9983
9984+#ifdef USE_REPLICATION
9985+static void ShowReplicationServerConfig(DestReceiver *dest);
9986+#endif /* USE_REPLICATION */
9987
9988 /*
9989 * Displayable names for context types (enum GucContext)
9990@@ -970,6 +976,40 @@
9991 &pg_krb_caseins_users,
9992 false, NULL, NULL
9993 },
9994+#ifdef USE_REPLICATION
9995+ {
9996+ {"pgr_force_loadbalance", PGC_USERSET, CLIENT_CONN_STATEMENT,
9997+ gettext_noop("force loadbalance mode"),
9998+ NULL
9999+ },
10000+ &PGRforceLoadBalance,
10001+ false, NULL, NULL
10002+ },
10003+ {
10004+ {"check_constraint_with_lock", PGC_USERSET, CLIENT_CONN_STATEMENT,
10005+ gettext_noop("check constrain with lock"),
10006+ NULL
10007+ },
10008+ &PGRcheckConstraintWithLock,
10009+ false, NULL, NULL
10010+ },
10011+ {
10012+ {"auto_lock_table", PGC_USERSET, CLIENT_CONN_STATEMENT,
10013+ gettext_noop("auto lock table"),
10014+ NULL
10015+ },
10016+ &PGRautoLockTable,
10017+ true, NULL, NULL
10018+ },
10019+ {
10020+ {"not_replicate_prepared_select", PGC_USERSET, CLIENT_CONN_STATEMENT,
10021+ gettext_noop("not replicate the prepared as select"),
10022+ NULL
10023+ },
10024+ &PGRnotReplicatePreparedSelect,
10025+ false, NULL, NULL
10026+ },
10027+#endif
10028
10029 {
10030 {"escape_string_warning", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS,
10031@@ -4830,6 +4870,10 @@
10032 {
10033 if (pg_strcasecmp(name, "all") == 0)
10034 ShowAllGUCConfig(dest);
10035+#ifdef USE_REPLICATION
10036+ else if (strcasecmp(name, "replication_server") == 0)
10037+ ShowReplicationServerConfig(dest);
10038+#endif
10039 else
10040 ShowGUCConfigOption(name, dest);
10041 }
10042@@ -6512,5 +6556,72 @@
10043 return nbuf;
10044 }
10045
10046+#ifdef USE_REPLICATION
10047+/*
10048+ * SHOW REPLICATION SERVER command
10049+ */
10050+static void
10051+ShowReplicationServerConfig(DestReceiver *dest)
10052+{
10053+ TupOutputState *tstate;
10054+ TupleDesc tupdesc;
10055+ char *values[4];
10056+ char buffer[256];
10057+ ReplicateServerInfo *sp;
10058+
10059+ /* need a tuple descriptor representing two TEXT columns */
10060+ tupdesc = CreateTemplateTupleDesc(4, false);
10061+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "status",
10062+ TEXTOID, -1, 0 );
10063+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "host_name",
10064+ TEXTOID, -1, 0 );
10065+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "port_num",
10066+ TEXTOID, -1, 0 );
10067+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "recovery_port_num",
10068+ TEXTOID, -1, 0 );
10069+
10070+ /* prepare for projection of tuples */
10071+ tstate = begin_tup_output_tupdesc(dest, tupdesc);
10072+
10073+ sp = ReplicateServerData;
10074+ while (sp->useFlag != DATA_END) {
10075+ if (PGR_Check_Replicate_Server_Status(sp) == STATUS_ERROR) {
10076+ PGR_Set_Replication_Server_Status(sp, DATA_ERR);
10077+ }
10078+
10079+ sp++;
10080+ }
10081+
10082+ sp = ReplicateServerData;
10083+ while (sp->useFlag != DATA_END) {
10084+ if (sp->useFlag == DATA_USE) {
10085+ values[0] = "ALIVE";
10086+ } else if (sp->useFlag == DATA_ERR) {
10087+ values[0] = "DEAD";
10088+ } else if (sp->useFlag == DATA_INIT) {
10089+ values[0] = "STANDBY";
10090+ } else {
10091+ values[0] = "UNKNOWN";
10092+ }
10093+
10094+ values[1] = (char *) sp->hostName;
10095+
10096+ snprintf(buffer, sizeof(buffer), "%d", sp->portNumber);
10097+ values[2] = pstrdup(buffer);
10098+
10099+ snprintf(buffer, sizeof(buffer), "%d", sp->recoveryPortNumber);
10100+ values[3] = pstrdup(buffer);
10101+
10102+ do_tup_output(tstate, values);
10103+
10104+ pfree(values[2]);
10105+ pfree(values[3]);
10106+
10107+ sp++;
10108+ }
10109+
10110+ end_tup_output(tstate);
10111+}
10112+#endif /* USE_REPLICATION */
10113
10114 #include "guc-file.c"
10115diff -aruN postgresql-8.2.4/src/backend/utils/misc/postgresql.conf.sample pgcluster-1.7.0rc7/src/backend/utils/misc/postgresql.conf.sample
10116--- postgresql-8.2.4/src/backend/utils/misc/postgresql.conf.sample 2007-01-20 22:42:06.000000000 +0100
10117+++ pgcluster-1.7.0rc7/src/backend/utils/misc/postgresql.conf.sample 2007-02-18 22:52:16.000000000 +0100
10118@@ -469,3 +469,12 @@
10119 #---------------------------------------------------------------------------
10120
10121 #custom_variable_classes = '' # list of custom variable class names
10122+
10123+
10124+#---------------------------------------------------------------------------
10125+# PGCluster
10126+#---------------------------------------------------------------------------
10127+
10128+# auto_lock_table = true
10129+# check_constraint_with_lock = false
10130+# not_replicate_prepared_select = false
10131diff -aruN postgresql-8.2.4/src/bin/initdb/initdb.c pgcluster-1.7.0rc7/src/bin/initdb/initdb.c
10132--- postgresql-8.2.4/src/bin/initdb/initdb.c 2006-10-04 20:58:08.000000000 +0200
10133+++ pgcluster-1.7.0rc7/src/bin/initdb/initdb.c 2007-02-18 22:52:16.000000000 +0100
10134@@ -122,6 +122,11 @@
10135 static int n_buffers = 50;
10136 static int n_fsm_pages = 20000;
10137
10138+#ifdef USE_REPLICATION
10139+static char *cluster_conf_file;
10140+static char *pgreplicate_conf_file;
10141+static char *pglb_conf_file;
10142+#endif /* USE_REPLICATION */
10143 /*
10144 * Warning messages for authentication methods
10145 */
10146@@ -1352,6 +1357,14 @@
10147
10148 free(conflines);
10149
10150+#ifdef USE_REPLICATION
10151+ /* cluster.conf */
10152+ conflines = readfile(cluster_conf_file);
10153+ snprintf(path, sizeof(path), "%s/cluster.conf", pg_data);
10154+ writefile(path, conflines);
10155+ chmod(path, 0600);
10156+ free(conflines);
10157+#endif /* USE_REPLICATION */
10158 check_ok();
10159 }
10160
10161@@ -2712,6 +2725,11 @@
10162 set_input(&info_schema_file, "information_schema.sql");
10163 set_input(&features_file, "sql_features.txt");
10164 set_input(&system_views_file, "system_views.sql");
10165+#ifdef USE_REPLICATION
10166+ set_input(&cluster_conf_file, "cluster.conf.sample");
10167+ set_input(&pgreplicate_conf_file, "pgreplicate.conf.sample");
10168+ set_input(&pglb_conf_file, "pglb.conf.sample");
10169+#endif /* USE_REPLICATION */
10170
10171 set_info_version();
10172
10173@@ -2730,6 +2748,16 @@
10174 desc_file, shdesc_file,
10175 conf_file,
10176 hba_file, ident_file);
10177+#ifdef USE_REPLICATION
10178+ fprintf(stderr,
10179+ "PGCLUSTER_VERSION=%s\n"
10180+ "CLUSTER_CONF_SAMPLE=%s\nPGREPLICATE_CONF_SAMPLE=%s\n"
10181+ "PGLB_CONF_SAMPLE=%s\n",
10182+ PGCLUSTER_VERSION,
10183+ cluster_conf_file,
10184+ pgreplicate_conf_file,
10185+ pglb_conf_file);
10186+#endif /* USE_REPLICATION */
10187 if (show_setting)
10188 exit(0);
10189 }
10190@@ -2744,6 +2772,11 @@
10191 check_input(info_schema_file);
10192 check_input(features_file);
10193 check_input(system_views_file);
10194+#ifdef USE_REPLICATION
10195+ check_input(cluster_conf_file);
10196+ check_input(pgreplicate_conf_file);
10197+ check_input(pglb_conf_file);
10198+#endif /* USE_REPLICATION */
10199
10200 setlocales();
10201
10202diff -aruN postgresql-8.2.4/src/bin/pg_dump/pg_dump.c pgcluster-1.7.0rc7/src/bin/pg_dump/pg_dump.c
10203--- postgresql-8.2.4/src/bin/pg_dump/pg_dump.c 2006-10-10 01:36:59.000000000 +0200
10204+++ pgcluster-1.7.0rc7/src/bin/pg_dump/pg_dump.c 2007-02-18 22:52:16.000000000 +0100
10205@@ -119,6 +119,9 @@
10206 /* flag to turn on/off dollar quoting */
10207 static int disable_dollar_quoting = 0;
10208
10209+#ifdef USE_REPLICATION
10210+ bool nonReplicate=true;
10211+#endif
10212
10213 static void help(const char *progname);
10214 static void expand_schema_name_patterns(SimpleStringList *patterns,
10215@@ -235,6 +238,9 @@
10216 {"column-inserts", no_argument, NULL, 'D'},
10217 {"host", required_argument, NULL, 'h'},
10218 {"ignore-version", no_argument, NULL, 'i'},
10219+#ifdef USE_REPLICATION
10220+ {"non-replicate", no_argument ,NULL, 'r'},
10221+#endif
10222 {"no-reconnect", no_argument, NULL, 'R'},
10223 {"oids", no_argument, NULL, 'o'},
10224 {"no-owner", no_argument, NULL, 'O'},
10225@@ -368,6 +374,11 @@
10226 pgport = optarg;
10227 break;
10228
10229+#ifdef USE_REPLICATION
10230+ case 'r':
10231+ nonReplicate = true;
10232+ break;
10233+#endif
10234 case 'R':
10235 /* no-op, still accepted for backwards compatibility */
10236 break;
10237@@ -553,6 +564,11 @@
10238 /*
10239 * Start serializable transaction to dump consistent data.
10240 */
10241+#ifdef USE_REPLICATION
10242+ if(nonReplicate) {
10243+ do_sql_command(g_conn, "set pgr_force_loadbalance to on");
10244+ }
10245+#endif /* USE_REPLICATION */
10246 do_sql_command(g_conn, "BEGIN");
10247
10248 do_sql_command(g_conn, "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE");
10249@@ -751,6 +767,9 @@
10250 printf(_(" -o, --oids include OIDs in dump\n"));
10251 printf(_(" -O, --no-owner skip restoration of object ownership\n"
10252 " in plain text format\n"));
10253+#ifdef USE_REPLICATION
10254+ printf(_(" -r, --non-replicate No queries replicate. Available only in pgcluster.\n"));
10255+#endif
10256 printf(_(" -s, --schema-only dump only the schema, no data\n"));
10257 printf(_(" -S, --superuser=NAME specify the superuser user name to use in\n"
10258 " plain text format\n"));
10259diff -aruN postgresql-8.2.4/src/bin/pg_dump/pg_dumpall.c pgcluster-1.7.0rc7/src/bin/pg_dump/pg_dumpall.c
10260--- postgresql-8.2.4/src/bin/pg_dump/pg_dumpall.c 2006-11-21 23:19:46.000000000 +0100
10261+++ pgcluster-1.7.0rc7/src/bin/pg_dump/pg_dumpall.c 2007-02-18 22:52:16.000000000 +0100
10262@@ -97,6 +97,9 @@
10263 {"oids", no_argument, NULL, 'o'},
10264 {"no-owner", no_argument, NULL, 'O'},
10265 {"port", required_argument, NULL, 'p'},
10266+#ifdef USE_REPLICATION
10267+ {"non-replicate", no_argument ,NULL, 'r'},
10268+#endif
10269 {"password", no_argument, NULL, 'W'},
10270 {"schema-only", no_argument, NULL, 's'},
10271 {"superuser", required_argument, NULL, 'S'},
10272@@ -161,7 +164,7 @@
10273
10274 pgdumpopts = createPQExpBuffer();
10275
10276- while ((c = getopt_long(argc, argv, "acdDgh:ioOp:sS:U:vWxX:", long_options, &optindex)) != -1)
10277+ while ((c = getopt_long(argc, argv, "acdDgh:ioOp:rsS:U:vWxX:", long_options, &optindex)) != -1)
10278 {
10279 switch (c)
10280 {
10281@@ -215,6 +218,11 @@
10282 #endif
10283 break;
10284
10285+#ifdef USE_REPLICATION
10286+ case 'r':
10287+ appendPQExpBuffer(pgdumpopts, " -r");
10288+ break;
10289+#endif /* USE_REPLICATION */
10290 case 's':
10291 schema_only = true;
10292 appendPQExpBuffer(pgdumpopts, " -s");
10293@@ -397,6 +405,9 @@
10294 printf(_("\nConnection options:\n"));
10295 printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
10296 printf(_(" -p, --port=PORT database server port number\n"));
10297+#ifdef USE_REPLICATION
10298+ printf(_(" -r, --non-replicate No queries replicate. Available only in pgcluster.\n"));
10299+#endif /* USE_REPLICATION */
10300 printf(_(" -U, --username=NAME connect as specified database user\n"));
10301 printf(_(" -W, --password force password prompt (should happen automatically)\n"));
10302
10303diff -aruN postgresql-8.2.4/src/include/commands/prepare.h pgcluster-1.7.0rc7/src/include/commands/prepare.h
10304--- postgresql-8.2.4/src/include/commands/prepare.h 2006-10-04 02:30:08.000000000 +0200
10305+++ pgcluster-1.7.0rc7/src/include/commands/prepare.h 2007-02-18 22:52:16.000000000 +0100
10306@@ -64,4 +64,8 @@
10307 extern bool PreparedStatementReturnsTuples(PreparedStatement *stmt);
10308 extern List *FetchPreparedStatementTargetList(PreparedStatement *stmt);
10309
10310+#ifdef USE_REPLICATION
10311+extern bool PGR_is_select_prepared_statement(PrepareStmt *stmt);
10312+#endif /* USE_REPLICATION */
10313+
10314 #endif /* PREPARE_H */
10315diff -aruN postgresql-8.2.4/src/include/pg_config.h.in pgcluster-1.7.0rc7/src/include/pg_config.h.in
10316--- postgresql-8.2.4/src/include/pg_config.h.in 2006-11-06 04:44:38.000000000 +0100
10317+++ pgcluster-1.7.0rc7/src/include/pg_config.h.in 2007-02-18 22:52:17.000000000 +0100
10318@@ -673,3 +673,7 @@
10319 /* Define to empty if the keyword `volatile' does not work. Warning: valid
10320 code using `volatile' can become incorrect without. Disable with care. */
10321 #undef volatile
10322+
10323+/* PGCluster version */
10324+#undef PGCLUSTER_VERSION
10325+
10326diff -aruN postgresql-8.2.4/src/include/replicate.h pgcluster-1.7.0rc7/src/include/replicate.h
10327--- postgresql-8.2.4/src/include/replicate.h 1970-01-01 01:00:00.000000000 +0100
10328+++ pgcluster-1.7.0rc7/src/include/replicate.h 2007-02-18 22:52:17.000000000 +0100
10329@@ -0,0 +1,223 @@
10330+/*-------------------------------------------------------------------------
10331+ *
10332+ * replicate.h
10333+ * Primary include file for replicate server .c files
10334+ *
10335+ * This should be the first file included by replicate modules.
10336+ *
10337+ *-------------------------------------------------------------------------
10338+ */
10339+#ifndef REPLICATE_H
10340+#define REPLICATE_H
10341+
10342+#ifndef _SYS_TIME_H
10343+#include <sys/time.h>
10344+#endif
10345+#include "tcop/dest.h"
10346+#include "storage/proc.h"
10347+#include "lib/stringinfo.h"
10348+#include "replicate_com.h"
10349+
10350+#define STAND_ALONE_TAG "When_Stand_Alone"
10351+#define NOT_REPLICATE_INFO_TAG "Not_Replicate_Info"
10352+#define DB_NAME_TAG "DB_Name"
10353+#define TABLE_NAME_TAG "Table_Name"
10354+#define RSYNC_PATH_TAG "Rsync_Path"
10355+#define RSYNC_OPTION_TAG "Rsync_Option"
10356+#define RSYNC_COMPRESS_TAG "Rsync_Compress"
10357+#define PG_DUMP_PATH_TAG "Pg_Dump_Path"
10358+
10359+#define CLUSTER_CONF_FILE "cluster.conf"
10360+#define DEFAULT_RSYNC "/usr/bin/rsync"
10361+#define DEFAULT_PG_DUMP "/usr/local/pgsql/bin/pg_dump"
10362+#define NOT_SESSION_AUTHORIZATION (0)
10363+#define SESSION_AUTHORIZATION_BEGIN (1)
10364+#define SESSION_AUTHORIZATION_END (2)
10365+
10366+#define READ_ONLY_IF_STAND_ALONE "read_only"
10367+#define READ_WRITE_IF_STAND_ALONE "read_write"
10368+#define PERMIT_READ_ONLY (1)
10369+#define PERMIT_READ_WRITE (2)
10370+#define STATUS_REPLICATED (3)
10371+#define STATUS_CONTINUE (4)
10372+#define STATUS_CONTINUE_SELECT (5)
10373+#define STATUS_NOT_REPLICATE (6)
10374+#define STATUS_SKIP_QUERY (7)
10375+#define STATUS_RECOVERY (11)
10376+#define STATUS_REPLICATION_ABORT (98)
10377+#define STATUS_DEADLOCK_DETECT (99)
10378+
10379+#define TO_REPLICATION_SERVER (0)
10380+#define TO_FRONTEND (1)
10381+
10382+#define PGR_DEADLOCK_DETECTION_MSG "deadlock detected!"
10383+#define PGR_REPLICATION_ABORT_MSG "replication aborted!"
10384+#define SKIP_QUERY_1 "begin; select getdatabaseencoding(); commit"
10385+#define SKIP_QUERY_2 "BEGIN; SELECT usesuper FROM pg_catalog.pg_user WHERE usename = '%s'; COMMIT"
10386+#define SKIP_QUERY_3 "SET autocommit TO 'on'"
10387+#define SKIP_QUERY_4 "SET search_path = public"
10388+#define SYS_QUERY_1 "set pgr_force_loadbalance to on"
10389+
10390+#define PGR_1ST_RECOVERY (1)
10391+#define PGR_2ND_RECOVERY (2)
10392+#define PGR_COLD_RECOVERY (1)
10393+#define PGR_HOT_RECOVERY (2)
10394+#define PGR_WITHOUT_BACKUP (3)
10395+
10396+#define PGR_MESSAGE_OTHER (0)
10397+#define PGR_MESSAGE_SELECT (1)
10398+#define PGR_MESSAGE_PREPARE (2)
10399+#define PGR_MESSAGE_EXECUTE (3)
10400+#define PGR_MESSAGE_DEALLOCATE (4)
10401+
10402+typedef struct
10403+{
10404+ bool is_stand_alone;
10405+ int permit;
10406+} PGR_Stand_Alone_Type;
10407+
10408+typedef struct
10409+{
10410+ char db_name[DBNAME_MAX_LENGTH];
10411+ char table_name[TABLENAME_MAX_LENGTH];
10412+} PGR_Not_Replicate_Type;
10413+
10414+typedef struct
10415+{
10416+ bool check_lock_conflict;
10417+ bool deadlock;
10418+ int status_lock_conflict;
10419+ int dest;
10420+} PGR_Check_Lock_Type;
10421+
10422+typedef struct
10423+{
10424+ char * query_string;
10425+ int query_len;
10426+ char cmdSts;
10427+ char cmdType;
10428+ char useFlag;
10429+} PGR_Retry_Query_Type;
10430+
10431+
10432+/* replicaition log */
10433+typedef struct {
10434+ uint32_t PGR_Replicate_ID;
10435+ uint32_t PGR_Request_ID;
10436+} PGR_ReplicationLog_Info;
10437+
10438+typedef struct {
10439+ char * password;
10440+ char md5Salt[4];
10441+ char cryptSalt[2];
10442+} PGR_Password_Info;
10443+
10444+extern char * Query_String;
10445+extern int TransactionQuery;
10446+extern int Transaction_Mode;
10447+extern bool PGR_Noticed_Abort;
10448+extern bool Session_Authorization_Mode;
10449+extern bool Create_Temp_Table_Mode;
10450+extern int RecoveryPortNumber;
10451+extern char * RsyncPath;
10452+extern char * RsyncOption;
10453+extern bool RsyncCompress;
10454+extern char * PgDumpPath;
10455+extern int TransactionSock;
10456+extern ReplicateNow * ReplicateCurrentTime;
10457+extern CopyData * PGRCopyData;
10458+extern bool PGR_Copy_Data_Need_Replicate;
10459+extern PGR_Stand_Alone_Type * PGR_Stand_Alone;
10460+extern PGR_Not_Replicate_Type * PGR_Not_Replicate;
10461+extern int PGR_Not_Replicate_Rec_Num;
10462+extern bool autocommit;
10463+extern bool PGR_Is_Replicated_Query;
10464+extern PGR_Check_Lock_Type PGR_Check_Lock;
10465+extern int PGR_Sock_To_Replication_Server;
10466+extern bool PGR_Need_Notice;
10467+extern bool PGR_Lock_Noticed;
10468+extern bool PGR_Recovery_Option;
10469+extern int PGR_recovery_mode;
10470+extern ReplicateServerInfo * CurrentReplicateServer;
10471+extern ReplicateServerInfo * LastReplicateServer;
10472+extern char * PGRSelfHostName;
10473+extern int PGR_Pending_Sem_Num;
10474+extern int PGR_Response_Mode;
10475+extern bool PGR_Reliable_Mode_Wait;
10476+extern PGR_Retry_Query_Type PGR_Retry_Query;
10477+extern bool needToUpdateReplicateIdOnNextQueryIsDone;
10478+extern PGR_ReplicationLog_Info ReplicationLog_Info;
10479+extern bool PGR_Not_Replication_Query;
10480+extern bool PGR_Is_Sync_OID;
10481+extern PGR_Password_Info * PGR_password;
10482+
10483+/* backend/utils/misc/guc.c */
10484+extern bool PGRforceLoadBalance;
10485+extern bool PGRcheckConstraintWithLock;
10486+extern bool PGRautoLockTable;
10487+extern bool PGRnotReplicatePreparedSelect;
10488+
10489+/* in backend/libpq/replicate.c */
10490+extern int PGR_Init_Replicate_Server_Data(void);
10491+extern int PGR_Set_Replicate_Server_Socket(void);
10492+extern int PGR_get_replicate_server_socket ( ReplicateServerInfo * sp , int socket_type );
10493+extern ReplicateServerInfo * PGR_get_replicate_server_info(void);
10494+extern ReplicateServerInfo * PGR_check_replicate_server_info(void);
10495+extern char * PGR_Send_Replicate_Command(char * query_string, int query_len, char cmdSts ,char cmdType);
10496+extern bool PGR_Is_Replicated_Command(char * query);
10497+extern int Xlog_Check_Replicate(int operation);
10498+extern int PGR_Replicate_Function_Call(void);
10499+extern void PGR_delete_shm(void);
10500+extern int PGR_replication(char * query_string, CommandDest dest, Node *parsetree, const char * commandTag);
10501+extern bool PGR_Is_System_Command(char * query);
10502+extern int PGR_Call_System_Command(char * command);
10503+extern int PGR_GetTimeOfDay(struct timeval *tp,struct timezone *tpz);
10504+extern long PGR_Random(void);
10505+extern int PGR_Set_Current_Time(char * sec, char * usec);
10506+extern int PGR_Send_Copy(CopyData * copy, int end);
10507+extern CopyData * PGR_Set_Copy_Data(CopyData * copy, char *str, int len, int end);
10508+extern char * PGR_scan_terminate( char * str);
10509+extern bool PGR_Is_Stand_Alone(void);
10510+extern void PGR_Send_Message_To_Frontend(char * msg);
10511+extern void PGR_Notice_Transaction_Query_Done(void);
10512+extern void PGR_Notice_Transaction_Query_Aborted(void);
10513+extern int PGRsend_system_command(char cmdSts, char cmdType);
10514+extern int PGR_Notice_Conflict(void);
10515+extern int PGR_Recv_Trigger (int user_timeout);
10516+extern void PGR_Set_Replication_Server_Status( ReplicateServerInfo * sp, int status);
10517+extern int PGR_Is_Skip_Replication(char * query);
10518+extern bool PGR_Did_Commit_Transaction(void);
10519+extern int PGR_Set_Transaction_Mode(int mode,const char * commandTag);
10520+extern char * PGR_Remove_Comment(char * str);
10521+extern void PGR_Force_Replicate_Query(void);
10522+extern void PGR_Notice_DeadLock(void);
10523+extern void PGR_Set_Cluster_Status(int status);
10524+extern int PGR_Get_Cluster_Status(void);
10525+extern int PGR_Check_Replicate_Server_Status(ReplicateServerInfo * sp);
10526+extern int PGR_lo_import(char * filename);
10527+extern int PGR_lo_create(int flags);
10528+extern int PGR_lo_open(Oid lobjId,int32 mode);
10529+extern int PGR_lo_close(int32 fd);
10530+extern int PGR_lo_write(int fd, char *buf, int len);
10531+extern int PGR_lo_lseek(int32 fd, int32 offset, int32 whence);
10532+extern int PGR_lo_unlink(Oid lobjId);
10533+extern uint32_t PGRget_replication_id(void);
10534+extern Oid PGRGetNewObjectId(Oid last_id);
10535+extern int PGR_Send_Input_Message(char cmdType,StringInfo input_message);
10536+extern bool PGR_is_select_prepare_query(void);
10537+extern char * PGR_get_md5salt(char * md5Salt, char * string);
10538+extern int PGR_recv_replicate_result(int sock,char * result,int user_timeout);
10539+
10540+/* in backend/libpq/recovery.c */
10541+extern int PGR_Master_Main(void);
10542+extern int PGR_Recovery_Main(int mode);
10543+extern int PGR_recovery_error_send(void);
10544+extern int PGR_recovery_finish_send(void);
10545+extern int PGR_recovery_queue_data_req(void);
10546+
10547+/* in backend/libpq/lifecheck.c */
10548+extern int PGR_Lifecheck_Main(void);
10549+
10550+/* in backend/access/transam/xact.c */
10551+extern void PGR_Reload_Start_Time(void);
10552+#endif /* REPLICATE_H */
10553diff -aruN postgresql-8.2.4/src/include/replicate_com.h pgcluster-1.7.0rc7/src/include/replicate_com.h
10554--- postgresql-8.2.4/src/include/replicate_com.h 1970-01-01 01:00:00.000000000 +0100
10555+++ pgcluster-1.7.0rc7/src/include/replicate_com.h 2007-03-01 16:27:15.000000000 +0100
10556@@ -0,0 +1,432 @@
10557+/*-------------------------------------------------------------------------
10558+ *
10559+ * replicate.h
10560+ * Primary include file for replicate server .c files
10561+ *
10562+ * This should be the first file included by replicate modules.
10563+ *
10564+ *-------------------------------------------------------------------------
10565+ */
10566+#ifndef REPLICATE_COM_H
10567+#define REPLICATE_COM_H 1
10568+
10569+#ifndef _SYS_TYPES_H
10570+#include <sys/types.h>
10571+#endif
10572+#ifndef _INTTYPES_H
10573+#include <inttypes.h>
10574+#endif
10575+#ifndef _NETINET_IN_H
10576+#include <netinet/in.h>
10577+#endif
10578+
10579+#include "c.h"
10580+#include "pg_config.h"
10581+
10582+/* default values */
10583+#define DEFAULT_PGLB_PORT (6001)
10584+#define DEFAULT_PGLB_RECOVERY_PORT (6101)
10585+#define DEFAULT_PGLB_LIFECHECK_PORT (6201)
10586+#define DEFAULT_CLUSTER_PORT (5432)
10587+#define DEFAULT_CLUSTER_RECOVERY_PORT (7101)
10588+#define DEFAULT_CLUSTER_LIFECHECK_PORT (7201)
10589+#define DEFAULT_PGRP_PORT (8001)
10590+#define DEFAULT_PGRP_RECOVERY_PORT (8101)
10591+#define DEFAULT_PGRP_LIFECHECK_PORT (8201)
10592+#define DEFAULT_PGRP_RLOG_PORT (8301)
10593+#define MAX_DB_SERVER (32)
10594+
10595+/**************************
10596+* *
10597+* Packet ID definition *
10598+* *
10599+***************************/
10600+/*=========================
10601+ Replication packet id
10602+===========================*/
10603+#define CMD_SYS_REPLICATE 'R'
10604+/*-------------------------
10605+ Simple Query
10606+--------------------------*/
10607+#define CMD_STS_SET_SESSION_AUTHORIZATION 'S'
10608+#define CMD_STS_TRANSACTION 'T'
10609+#define CMD_STS_TEMP_TABLE 'E'
10610+#define CMD_STS_QUERY 'Q'
10611+#define CMD_STS_OTHER 'O'
10612+
10613+#define CMD_TYPE_VACUUM 'V'
10614+#define CMD_TYPE_ANALYZE 'A'
10615+#define CMD_TYPE_REINDEX 'N'
10616+#define CMD_TYPE_SELECT 'S'
10617+#define CMD_TYPE_EXPLAIN 'X'
10618+#define CMD_TYPE_SET 'T'
10619+#define CMD_TYPE_RESET 't'
10620+#define CMD_TYPE_INSERT 'I'
10621+#define CMD_TYPE_DELETE 'D'
10622+#define CMD_TYPE_EXECUTE 'U'
10623+#define CMD_TYPE_UPDATE 'U'
10624+#define CMD_TYPE_BEGIN 'B'
10625+#define CMD_TYPE_COMMIT 'E'
10626+#define CMD_TYPE_ROLLBACK 'R'
10627+#define CMD_TYPE_CONNECTION_CLOSE 'x'
10628+#define CMD_TYPE_SESSION_AUTHORIZATION_BEGIN 'a'
10629+#define CMD_TYPE_SESSION_AUTHORIZATION_END 'b'
10630+#define CMD_TYPE_SAVEPOINT 's'
10631+#define CMD_TYPE_ROLLBACK_TO_SAVEPOINT 'r'
10632+#define CMD_TYPE_RELEASE_SAVEPOINT 'l'
10633+#define CMD_TYPE_OTHER 'O'
10634+
10635+/*=========================
10636+ System call packet id
10637+===========================*/
10638+#define CMD_SYS_CALL 'S'
10639+#define CMD_SYS_PREREPLICATE 'Z'
10640+
10641+#define CMD_STS_NOTICE 'N'
10642+#define CMD_STS_RESPONSE 'R'
10643+#define CMD_STS_TRANSACTION_ABORT 'A'
10644+#define CMD_STS_QUERY_SUSPEND 'P'
10645+#define CMD_STS_QUERY_DONE 'D'
10646+
10647+#define CMD_TYPE_COMMIT_CONFIRM 'c'
10648+#define CMD_TYPE_QUERY_CONFIRM 'q'
10649+#define CMD_TYPE_DEADLOCK_DETECT 'd'
10650+#define CMD_TYPE_FRONTEND_CLOSED 'x'
10651+
10652+/*----------------------------
10653+ Copy Command
10654+------------------------------*/
10655+#define CMD_STS_COPY 'C'
10656+
10657+#define CMD_TYPE_COPY 'C'
10658+#define CMD_TYPE_COPY_DATA 'd'
10659+#define CMD_TYPE_COPY_DATA_END 'e'
10660+
10661+/*----------------------------
10662+ Large Object
10663+------------------------------*/
10664+#define CMD_STS_LARGE_OBJECT 'L'
10665+
10666+#define CMD_TYPE_LO_IMPORT 'I'
10667+#define CMD_TYPE_LO_CREATE 'C'
10668+#define CMD_TYPE_LO_OPEN 'O'
10669+#define CMD_TYPE_LO_WRITE 'W'
10670+#define CMD_TYPE_LO_LSEEK 'S'
10671+#define CMD_TYPE_LO_CLOSE 'X'
10672+#define CMD_TYPE_LO_UNLINK 'U'
10673+
10674+/*-------------------------
10675+ Prepare/Params Query
10676+--------------------------*/
10677+#define CMD_STS_PREPARE 'P'
10678+
10679+#define CMD_TYPE_P_PARSE 'P'
10680+#define CMD_TYPE_P_BIND 'B'
10681+#define CMD_TYPE_P_EXECUTE 'E'
10682+#define CMD_TYPE_P_FASTPATH 'F'
10683+#define CMD_TYPE_P_CLOSE 'C'
10684+#define CMD_TYPE_P_DESCRIBE 'D'
10685+#define CMD_TYPE_P_FLUSH 'H'
10686+#define CMD_TYPE_P_SYNC 'S'
10687+
10688+/*=========================
10689+ Lifecheck packet id
10690+===========================*/
10691+#define CMD_SYS_LIFECHECK 'W'
10692+#define CMD_STS_LOADBALANCER 'A'
10693+#define CMD_STS_CLUSTER 'B'
10694+#define CMD_STS_REPLICATOR 'C'
10695+
10696+#define PGR_TRANSACTION_SOCKET (0)
10697+#define PGR_QUERY_SOCKET (1)
10698+
10699+#define DATA_FREE (0)
10700+#define DATA_INIT (1)
10701+#define DATA_USE (2)
10702+#define DATA_ERR (90)
10703+#define DATA_END (-1)
10704+#define HOSTNAME_MAX_LENGTH (128)
10705+#define DBNAME_MAX_LENGTH (128)
10706+#define USERNAME_MAX_LENGTH (128)
10707+#define PASSWORD_MAX_LENGTH (128)
10708+#define TABLENAME_MAX_LENGTH (128)
10709+#define PATH_MAX_LENGTH (256)
10710+#define MAX_SERVER_NUM (128)
10711+#define MAX_RETRY_TIMES (3)
10712+#define MAX_SOCKET_QUEUE (100000)
10713+#define TRANSACTION_ERROR_RESULT "TRANSACTION_ERROR"
10714+#define REPLICATE_SERVER_SHM_KEY (1020)
10715+/* target -> replicate */
10716+#define RECOVERY_PREPARE_REQ (1)
10717+/* replicate -> master */
10718+#define RECOVERY_PGDATA_REQ (2)
10719+/* master -> replicate */
10720+#define RECOVERY_PGDATA_ANS (3)
10721+/* replicate -> target */
10722+#define RECOVERY_PREPARE_ANS (4)
10723+/* target -> replicate */
10724+#define RECOVERY_START_REQ (5)
10725+/* replicate -> master */
10726+#define RECOVERY_FSYNC_REQ (6)
10727+/* master -> replicate */
10728+#define RECOVERY_FSYNC_ANS (7)
10729+/* replicate -> target */
10730+#define RECOVERY_START_ANS (8)
10731+/* target -> replicate */
10732+#define RECOVERY_QUEUE_DATA_REQ (9)
10733+/* replicate -> target */
10734+#define RECOVERY_QUEUE_DATA_ANS (10)
10735+/* target -> replicate */
10736+#define RECOVERY_FINISH (11)
10737+
10738+#define RECOVERY_ERROR_OCCUPIED (100)
10739+#define RECOVERY_ERROR_CONNECTION (101)
10740+#define RECOVERY_ERROR_TARGET_ONLY (102)
10741+#define RECOVERY_ERROR_ANS (200)
10742+
10743+/* lifecheck ask from cluster db */
10744+#define LIFECHECK_ASK_FROM_CLUSTER (1)
10745+/* lifecheck response from replication server */
10746+#define LIFECHECK_RES_FROM_REPLICATOR (2)
10747+/* lifecheck ask from replication server */
10748+#define LIFECHECK_ASK_FROM_REPLICATOR (3)
10749+/* lifecheck response from cluster db */
10750+#define LIFECHECK_RES_FROM_CLUSTER (4)
10751+
10752+#define REPLICATION_SERVER_INFO_TAG "Replicate_Server_Info"
10753+#define HOST_NAME_TAG "Host_Name"
10754+#define PORT_TAG "Port"
10755+#define RECOVERY_PORT_TAG "Recovery_Port"
10756+#define LIFECHECK_PORT_TAG "LifeCheck_Port"
10757+#define TIMEOUT_TAG "Replication_Timeout"
10758+#define LIFECHECK_TIMEOUT_TAG "LifeCheck_Timeout"
10759+#define LIFECHECK_INTERVAL_TAG "LifeCheck_Interval"
10760+
10761+#define RECOVERY_INIT (0)
10762+#define RECOVERY_PREPARE_START (1)
10763+#define RECOVERY_START_1 (2)
10764+#define RECOVERY_CLEARED (3)
10765+#define RECOVERY_WAIT_CLEAN (10)
10766+#define RECOVERY_ERROR (99)
10767+
10768+/* response mode */
10769+#define PGR_FAST_MODE (0)
10770+#define PGR_NORMAL_MODE (1)
10771+#define PGR_RELIABLE_MODE (2)
10772+
10773+#define RECOVERY_TIMEOUT (600)
10774+#ifndef COMPLETION_TAG_BUFSIZE
10775+#define COMPLETION_TAG_BUFSIZE (128)
10776+#endif
10777+
10778+/* replicate log type */
10779+#define FROM_R_LOG_TYPE (1)
10780+#define FROM_C_DB_TYPE (2)
10781+#define CONNECTION_SUSPENDED_TYPE (3)
10782+
10783+#define PGR_SYSTEM_COMMAND_FUNC "PGR_SYSTEM_COMMAND_FUNCTION"
10784+#define PGR_STARTUP_REPLICATION_SERVER_FUNC_NO (1)
10785+#define PGR_CHANGE_REPLICATION_SERVER_FUNC_NO (2)
10786+#define PGR_SET_CURRENT_TIME_FUNC_NO (3)
10787+#define PGR_NOTICE_DEADLOCK_DETECTION_FUNC_NO (4)
10788+#define PGR_TRANSACTION_CONFIRM_ANSWER_FUNC_NO (5)
10789+#define PGR_RELIABLE_MODE_DONE_FUNC_NO (6)
10790+#define PGR_NOTICE_ABORT_FUNC_NO (7)
10791+#define PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO (8)
10792+#define PGR_QUERY_CONFIRM_ANSWER_FUNC_NO (9)
10793+#define PGR_GET_OID_FUNC_NO (10)
10794+#define PGR_SET_OID_FUNC_NO (11)
10795+
10796+#define PGR_CMD_ARG_NUM (10)
10797+#define PGR_LOCK_CONFLICT_NOTICE_CMD "PGR_LOCK_CONFLICT_NOTICE_CMD"
10798+#define PGR_DEADLOCK_DETECT_NOTICE_CMD "PGR_DEADLOCK_DETECT_NOTICE_CMD"
10799+#define PGR_QUERY_DONE_NOTICE_CMD "PGR_QUERY_DONE_NOTICE_CMD"
10800+#define PGR_QUERY_ABORTED_NOTICE_CMD "PGR_QUERY_ABORTED_NOTICE_CMD"
10801+#define PGR_RETRY_LOCK_QUERY_CMD "PGR_RETRY_LOCK_QUERY_CMD"
10802+#define PGR_NOT_YET_REPLICATE_NOTICE_CMD "PGR_NOT_YET_REPLICATE_NOTICE_CMD"
10803+#define PGR_ALREADY_REPLICATED_NOTICE_CMD "PGR_ALREADY_REPLICATED_NOTICE_CMD"
10804+#define PGR_NOT_YET_COMMIT (0)
10805+#define PGR_ALREADY_COMMITTED (1)
10806+
10807+#define COPYBUFSIZ (8192)
10808+#define MAX_WORDS (24)
10809+#define MAX_WORD_LETTERS (48)
10810+#define PGR_MESSAGE_BUFSIZE (128)
10811+#define INT_LENGTH (12)
10812+#define PGR_MAX_COUNTER (0x0FFFFFFF)
10813+#define PGR_GET_OVER_FLOW_FILTER (0xF0000000)
10814+#define PGR_GET_DATA_FILTER (0x0FFFFFFF)
10815+#define PGR_SET_OVER_FLOW (0x10000000)
10816+#define PGR_MIN_COUNTER (0x0000000F)
10817+
10818+#define STRCMP(x,y) (strncmp(x,y,strlen(y)))
10819+
10820+/* life check target */
10821+#define SYN_TO_LOAD_BALANCER (0)
10822+#define SYN_TO_CLUSTER_DB (1)
10823+#define SYN_TO_REPLICATION_SERVER (2)
10824+#define LIFE_CHECK_TRY_COUNT (2)
10825+#define LIFE_CHECK_STOP (0)
10826+#define LIFE_CHECK_START (1)
10827+
10828+#ifndef HAVE_UNION_SEMUN
10829+union semun {
10830+ int val;
10831+ struct semid_ds *buf;
10832+ unsigned short int *array;
10833+ struct seminfo *__buf;
10834+};
10835+#endif
10836+
10837+typedef struct ReplicateHeaderType
10838+{
10839+ char cmdSys;
10840+ char cmdSts; /*
10841+ Q:query
10842+ T:transaction
10843+ */
10844+ char cmdType; /*
10845+ S:select
10846+ I:insert
10847+ D:delete
10848+ U:update
10849+ B:begin
10850+ E:commit/rollback/end
10851+ O:others
10852+ */
10853+ char rlog; /*
10854+ -- kind of replication log --
10855+ 1: send from replication log
10856+ 2: send from cluster db (should be retry)
10857+ 3: connection suspended
10858+ */
10859+ uint16_t port;
10860+ uint16_t pid;
10861+ uint32_t query_size;
10862+ char from_host[HOSTNAME_MAX_LENGTH];
10863+ char dbName[DBNAME_MAX_LENGTH];
10864+ char userName[USERNAME_MAX_LENGTH];
10865+ struct timeval tv;
10866+ uint32_t query_id;
10867+ int isAutoCommit; /* 0 if autocommit is off. 1 if autocommit is on */
10868+ uint32_t request_id;
10869+ uint32_t replicate_id;
10870+ char password[PASSWORD_MAX_LENGTH];
10871+ char md5Salt[4];
10872+ char cryptSalt[2];
10873+ char dummySalt[2];
10874+} ReplicateHeader;
10875+
10876+typedef struct RecoveryPacketType
10877+{
10878+ uint16_t packet_no; /*
10879+ 1:start recovery prepare
10880+ 2:ask pgdata
10881+ 3:ans pgdata
10882+ 4:send master info
10883+ 5:start queueing query
10884+ 6:requst fsync
10885+ 7:ready to fsync
10886+ 8:pepared master
10887+ 9:finished rsync
10888+ */
10889+ uint16_t max_connect;
10890+ uint16_t port;
10891+ uint16_t recoveryPort;
10892+ char hostName[HOSTNAME_MAX_LENGTH];
10893+ char pg_data[PATH_MAX_LENGTH];
10894+ char userName[USERNAME_MAX_LENGTH];
10895+} RecoveryPacket;
10896+
10897+typedef struct
10898+{
10899+ char table[128];
10900+ int rec_no;
10901+ char key[128];
10902+ char value[128];
10903+ char * last;
10904+ char * next;
10905+} ConfDataType;
10906+
10907+
10908+typedef struct ReplicateServerInfoType
10909+{
10910+ uint32_t useFlag;
10911+ char hostName[HOSTNAME_MAX_LENGTH];
10912+ uint16_t portNumber;
10913+ uint16_t recoveryPortNumber;
10914+ uint16_t lifecheckPortNumber;
10915+ uint16_t RLogPortNumber;
10916+ uint32_t sock;
10917+ uint32_t rlog_sock;
10918+ uint32_t replicate_id;
10919+ uint16_t response_mode;
10920+ uint16_t retry_count;
10921+} ReplicateServerInfo;
10922+
10923+
10924+typedef struct ReplicateNowType
10925+{
10926+ uint32_t replicate_id;
10927+ int useFlag;
10928+ int use_seed;
10929+ int use_time;
10930+ int offset_sec;
10931+ int offset_usec;
10932+ struct timeval tp;
10933+} ReplicateNow;
10934+
10935+typedef struct CopyDataType
10936+{
10937+ int cnt;
10938+ char copy_data[COPYBUFSIZ];
10939+} CopyData;
10940+
10941+typedef struct ClusterDBInfoType
10942+{
10943+ int status;
10944+} ClusterDBInfo;
10945+
10946+typedef struct
10947+{
10948+ uint32_t arg1;
10949+ uint32_t arg2;
10950+ uint32_t arg3;
10951+ char buf[1];
10952+} LOArgs;
10953+
10954+typedef struct
10955+{
10956+ int length;
10957+ char data[1];
10958+} ArrayData;
10959+
10960+extern ConfDataType * ConfData_Top;
10961+extern ConfDataType * ConfData_End;
10962+extern ReplicateServerInfo * ReplicateServerData;
10963+extern ClusterDBInfo * ClusterDBData;
10964+extern int ReplicateServerShmid;
10965+extern int ClusterDBShmid;
10966+extern bool PGR_Under_Replication_Server;
10967+extern int PGR_Replication_Timeout;
10968+extern int PGR_Lifecheck_Timeout;
10969+extern int PGR_Lifecheck_Interval;
10970+
10971+/* in backend/libpq/replicate_com.c */
10972+extern int PGR_Create_Socket_Connect(int * fdP, char * hostName , unsigned short portNumber);
10973+extern void PGR_Close_Sock(int * sock);
10974+extern int PGR_Create_Socket_Bind(int * fdP, char * hostName , unsigned short portNumber);
10975+extern int PGR_Create_Acception(int fd, int * sockP, char * hostName , unsigned short portNumber);
10976+extern int PGR_Free_Conf_Data(void);
10977+extern int PGR_Get_Conf_Data(char * dir , char * fname);
10978+extern void PGRset_recovery_packet_no(RecoveryPacket * packet, int packet_no);
10979+extern unsigned int PGRget_ip_by_name(char * host);
10980+extern int PGRget_time_value(char *str);
10981+
10982+extern void PGRwrite_log_file(FILE * fp, const char * fmt,...);
10983+extern void show_debug(const char * fmt,...);
10984+extern void show_error(const char * fmt,...);
10985+
10986+
10987+
10988+#endif /* REPLICATE_COM_H */
10989diff -aruN postgresql-8.2.4/src/include/storage/lmgr.h pgcluster-1.7.0rc7/src/include/storage/lmgr.h
10990--- postgresql-8.2.4/src/include/storage/lmgr.h 2006-08-18 18:09:13.000000000 +0200
10991+++ pgcluster-1.7.0rc7/src/include/storage/lmgr.h 2007-02-18 22:52:17.000000000 +0100
10992@@ -15,6 +15,7 @@
10993 #define LMGR_H
10994
10995 #include "storage/lock.h"
10996+#include "storage/bufmgr.h"
10997 #include "utils/rel.h"
10998
10999
11000@@ -69,4 +70,5 @@
11001 /* Knowledge about which locktags describe temp objects */
11002 extern bool LockTagIsTemp(const LOCKTAG *tag);
11003
11004+extern void XactLockTableWaitForCluster(TransactionId xid,Buffer buffer);
11005 #endif /* LMGR_H */
11006diff -aruN postgresql-8.2.4/src/include/storage/proc.h pgcluster-1.7.0rc7/src/include/storage/proc.h
11007--- postgresql-8.2.4/src/include/storage/proc.h 2006-10-04 02:30:10.000000000 +0200
11008+++ pgcluster-1.7.0rc7/src/include/storage/proc.h 2007-02-18 22:52:17.000000000 +0100
11009@@ -97,6 +97,9 @@
11010 SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS];
11011
11012 struct XidCache subxids; /* cache for subtransaction XIDs */
11013+#ifdef USE_REPLICATION
11014+ unsigned int replicationId; /* id for replication. */
11015+#endif
11016 };
11017
11018 /* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
11019diff -aruN postgresql-8.2.4/src/interfaces/libpq/Makefile pgcluster-1.7.0rc7/src/interfaces/libpq/Makefile
11020--- postgresql-8.2.4/src/interfaces/libpq/Makefile 2006-12-28 01:01:12.000000000 +0100
11021+++ pgcluster-1.7.0rc7/src/interfaces/libpq/Makefile 2007-02-18 22:52:17.000000000 +0100
11022@@ -33,7 +33,7 @@
11023
11024 OBJS= fe-auth.o fe-connect.o fe-exec.o fe-misc.o fe-print.o fe-lobj.o \
11025 fe-protocol2.o fe-protocol3.o pqexpbuffer.o pqsignal.o fe-secure.o \
11026- md5.o ip.o wchar.o encnames.o noblock.o pgstrcasecmp.o thread.o \
11027+ dllist.o md5.o ip.o wchar.o encnames.o noblock.o pgstrcasecmp.o thread.o \
11028 $(filter crypt.o getaddrinfo.o inet_aton.o open.o snprintf.o strerror.o strlcpy.o, $(LIBOBJS))
11029
11030 ifeq ($(PORTNAME), cygwin)
11031@@ -89,6 +89,9 @@
11032 encnames.c wchar.c : % : $(backend_src)/utils/mb/%
11033 rm -f $@ && $(LN_S) $< .
11034
11035+dllist.c : % : $(backend_src)/lib/dllist.c
11036+ rm -f $@ && $(LN_S) $< .
11037+
11038
11039 # We need several not-quite-identical variants of .DEF files to build libpq
11040 # DLLs for Windows. These are made from the single source file exports.txt.
11041@@ -169,7 +172,7 @@
11042 rm -f '$(DESTDIR)$(includedir)/libpq-fe.h' '$(DESTDIR)$(includedir_internal)/libpq-int.h' '$(DESTDIR)$(includedir_internal)/pqexpbuffer.h' '$(DESTDIR)$(datadir)/pg_service.conf.sample'
11043
11044 clean distclean: clean-lib
11045- rm -f $(OBJS) pg_config_paths.h crypt.c getaddrinfo.c inet_aton.c noblock.c open.c pgstrcasecmp.c snprintf.c strerror.c strlcpy.c thread.c md5.c ip.c encnames.c wchar.c pthread.h exports.list
11046+ rm -f $(OBJS) pg_config_paths.h crypt.c getaddrinfo.c inet_aton.c noblock.c open.c pgstrcasecmp.c snprintf.c strerror.c strlcpy.c thread.c md5.c ip.c encnames.c wchar.c pthread.h exports.list dllist.c
11047 rm -f pg_config_paths.h # Might be left over from a Win32 client-only build
11048
11049 maintainer-clean: distclean
11050diff -aruN postgresql-8.2.4/src/interfaces/libpq/fe-auth.c pgcluster-1.7.0rc7/src/interfaces/libpq/fe-auth.c
11051--- postgresql-8.2.4/src/interfaces/libpq/fe-auth.c 2006-10-04 02:30:12.000000000 +0200
11052+++ pgcluster-1.7.0rc7/src/interfaces/libpq/fe-auth.c 2007-02-18 22:52:17.000000000 +0100
11053@@ -51,6 +51,10 @@
11054 #include "fe-auth.h"
11055 #include "libpq/md5.h"
11056
11057+#ifdef USE_REPLICATION
11058+#include "replicate_com.h"
11059+bool PGR_Under_Replication_Server = false;
11060+#endif /* USE_REPLICATION */
11061
11062 #ifdef KRB5
11063 /*
11064@@ -412,6 +416,19 @@
11065 free(crypt_pwd);
11066 return STATUS_ERROR;
11067 }
11068+#ifdef USE_REPLICATION
11069+ if (PGR_Under_Replication_Server)
11070+ {
11071+ /*
11072+ * When this module is called from the replication server,
11073+ * there is no need encrypt password.
11074+ * Since the password was already encrypted at the Cluster DB
11075+ */
11076+ int size = 2 * (MD5_PASSWD_LEN + 1);
11077+ memset(crypt_pwd,0, size);
11078+ strncpy(crypt_pwd,password, size);
11079+ }
11080+#endif /* USE_REPLICATION */
11081 break;
11082 }
11083 case AUTH_REQ_CRYPT:
11084diff -aruN postgresql-8.2.4/src/makefiles/Makefile.aix pgcluster-1.7.0rc7/src/makefiles/Makefile.aix
11085--- postgresql-8.2.4/src/makefiles/Makefile.aix 2006-09-19 17:36:08.000000000 +0200
11086+++ pgcluster-1.7.0rc7/src/makefiles/Makefile.aix 2007-02-18 22:52:17.000000000 +0100
11087@@ -44,3 +44,5 @@
11088 $(CC) $(LDFLAGS) $(LDFLAGS_SL) -o $@ $*.o -Wl,-bE:$*$(EXPSUFF) $(SHLIB_LINK)
11089
11090 sqlmansect = 7
11091+CFLAGS += -pthread
11092+LDFLAGS += -L/usr/lib/threads
11093diff -aruN postgresql-8.2.4/src/makefiles/Makefile.freebsd pgcluster-1.7.0rc7/src/makefiles/Makefile.freebsd
11094--- postgresql-8.2.4/src/makefiles/Makefile.freebsd 2006-04-19 18:32:08.000000000 +0200
11095+++ pgcluster-1.7.0rc7/src/makefiles/Makefile.freebsd 2007-02-18 22:52:17.000000000 +0100
11096@@ -28,3 +28,5 @@
11097 endif
11098
11099 sqlmansect = 7
11100+
11101+LIBS += -lc_r
11102diff -aruN postgresql-8.2.4/src/makefiles/Makefile.hpux pgcluster-1.7.0rc7/src/makefiles/Makefile.hpux
11103--- postgresql-8.2.4/src/makefiles/Makefile.hpux 2006-02-07 18:36:13.000000000 +0100
11104+++ pgcluster-1.7.0rc7/src/makefiles/Makefile.hpux 2007-02-18 22:52:17.000000000 +0100
11105@@ -10,6 +10,9 @@
11106 # correctly in the LP64 data model.
11107 LIBS := -lxnet $(LIBS)
11108
11109+# add thread lib for PGCluster
11110+LIBS := -lpthread $(LIBS)
11111+
11112 # Set up rpath so that the executables don't need SHLIB_PATH to be set.
11113 # (Note: --disable-rpath is a really bad idea on this platform...)
11114 ifeq ($(with_gnu_ld), yes)
11115diff -aruN postgresql-8.2.4/src/makefiles/Makefile.linux pgcluster-1.7.0rc7/src/makefiles/Makefile.linux
11116--- postgresql-8.2.4/src/makefiles/Makefile.linux 2005-12-09 22:19:36.000000000 +0100
11117+++ pgcluster-1.7.0rc7/src/makefiles/Makefile.linux 2007-02-18 22:52:17.000000000 +0100
11118@@ -14,3 +14,4 @@
11119 $(CC) -shared -o $@ $<
11120
11121 sqlmansect = 7
11122+LIBS += -lpthread
11123diff -aruN postgresql-8.2.4/src/makefiles/Makefile.netbsd pgcluster-1.7.0rc7/src/makefiles/Makefile.netbsd
11124--- postgresql-8.2.4/src/makefiles/Makefile.netbsd 2006-04-19 18:32:08.000000000 +0200
11125+++ pgcluster-1.7.0rc7/src/makefiles/Makefile.netbsd 2007-02-18 22:52:17.000000000 +0100
11126@@ -30,3 +30,4 @@
11127 endif
11128
11129 sqlmansect = 7
11130+LIBS += -lpthread
11131diff -aruN postgresql-8.2.4/src/makefiles/Makefile.openbsd pgcluster-1.7.0rc7/src/makefiles/Makefile.openbsd
11132--- postgresql-8.2.4/src/makefiles/Makefile.openbsd 2006-04-19 18:32:08.000000000 +0200
11133+++ pgcluster-1.7.0rc7/src/makefiles/Makefile.openbsd 2007-02-18 22:52:17.000000000 +0100
11134@@ -28,3 +28,4 @@
11135 endif
11136
11137 sqlmansect = 7
11138+LIBS += -lc_r
11139diff -aruN postgresql-8.2.4/src/makefiles/Makefile.solaris pgcluster-1.7.0rc7/src/makefiles/Makefile.solaris
11140--- postgresql-8.2.4/src/makefiles/Makefile.solaris 2005-12-09 22:19:36.000000000 +0100
11141+++ pgcluster-1.7.0rc7/src/makefiles/Makefile.solaris 2007-02-18 22:52:17.000000000 +0100
11142@@ -20,3 +20,4 @@
11143 $(LD) -G -Bdynamic -o $@ $<
11144
11145 sqlmansect = 5sql
11146+LIBS += -lpthread
11147diff -aruN postgresql-8.2.4/src/makefiles/Makefile.sunos4 pgcluster-1.7.0rc7/src/makefiles/Makefile.sunos4
11148--- postgresql-8.2.4/src/makefiles/Makefile.sunos4 2002-09-05 00:54:18.000000000 +0200
11149+++ pgcluster-1.7.0rc7/src/makefiles/Makefile.sunos4 2007-02-18 22:52:17.000000000 +0100
11150@@ -11,3 +11,4 @@
11151 $(LD) -assert pure-text -Bdynamic -o $@ $<
11152
11153 sqlmansect = 7
11154+LIBS += -lpthread
11155diff -aruN postgresql-8.2.4/src/pgcluster/Makefile pgcluster-1.7.0rc7/src/pgcluster/Makefile
11156--- postgresql-8.2.4/src/pgcluster/Makefile 1970-01-01 01:00:00.000000000 +0100
11157+++ pgcluster-1.7.0rc7/src/pgcluster/Makefile 2007-02-18 22:52:17.000000000 +0100
11158@@ -0,0 +1,17 @@
11159+#-------------------------------------------------------------------------
11160+#
11161+# Makefile for src/pgcluster (server programs)
11162+#
11163+#-------------------------------------------------------------------------
11164+
11165+subdir = src/pgcluster
11166+top_builddir = ../..
11167+include $(top_builddir)/src/Makefile.global
11168+
11169+DIRS := libpgc pgrp pglb tool
11170+
11171+all install installdirs uninstall depend distprep:
11172+ @for dir in $(DIRS); do $(MAKE) -C $$dir $@ || exit; done
11173+
11174+clean distclean maintainer-clean:
11175+ -@for dir in $(DIRS); do $(MAKE) -C $$dir $@; done
11176diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/Makefile pgcluster-1.7.0rc7/src/pgcluster/libpgc/Makefile
11177--- postgresql-8.2.4/src/pgcluster/libpgc/Makefile 1970-01-01 01:00:00.000000000 +0100
11178+++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/Makefile 2007-02-18 22:52:17.000000000 +0100
11179@@ -0,0 +1,29 @@
11180+#-------------------------------------------------------------------------
11181+#
11182+# Makefile--
11183+# Makefile for libpq subsystem (common library for replication server)
11184+#
11185+#-------------------------------------------------------------------------
11186+
11187+subdir = src/pgcluster/libpgc
11188+top_builddir = ../../..
11189+include $(top_builddir)/src/Makefile.global
11190+
11191+OBJS = sem.o show.o signal.o
11192+
11193+all: SUBSYS.o
11194+
11195+SUBSYS.o: $(OBJS)
11196+ $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS)
11197+
11198+depend dep:
11199+ $(CC) -MM $(CFLAGS) *.c >depend
11200+
11201+distclean: clean
11202+
11203+clean:
11204+ rm -f SUBSYS.o $(OBJS)
11205+
11206+ifeq (depend,$(wildcard depend))
11207+include depend
11208+endif
11209diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/libpgc.h pgcluster-1.7.0rc7/src/pgcluster/libpgc/libpgc.h
11210--- postgresql-8.2.4/src/pgcluster/libpgc/libpgc.h 1970-01-01 01:00:00.000000000 +0100
11211+++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/libpgc.h 2007-02-18 22:52:17.000000000 +0100
11212@@ -0,0 +1,47 @@
11213+/*-------------------------------------------------------------------------
11214+ *
11215+ * lilbpgc.h
11216+ * external definition of the function for pgreplicate and pglb
11217+ *
11218+ * This should be the first file included by replicate modules.
11219+ *
11220+ *-------------------------------------------------------------------------
11221+ */
11222+#ifndef LIBPGC_H
11223+#define LIBPGC_H
11224+
11225+#include <stdio.h>
11226+
11227+/* character length of IP address */
11228+#define ADDRESS_LENGTH (24)
11229+
11230+/* logging file data tag in configuration file */
11231+#define LOG_INFO_TAG "Log_File_Info"
11232+#define FILE_NAME_TAG "File_Name"
11233+#define FILE_SIZE_TAG "File_Size"
11234+#define LOG_ROTATION_TAG "Rotate"
11235+
11236+typedef struct {
11237+ char file_name[256];
11238+ FILE * fp;
11239+ int max_size;
11240+ int rotation;
11241+} LogFileInf;
11242+
11243+extern LogFileInf * LogFileData;
11244+/* external definition of the function in sem.c */
11245+extern void PGRsem_unlock( int semid, short sem_num );
11246+extern void PGRsem_lock( int semid, short sem_num );
11247+
11248+/* external definition of the function in show.c */
11249+extern FILE * PGRopen_log_file(char * fname, int max_size, int rotation);
11250+extern void PGRclose_log_file(FILE * fp);
11251+extern void show_debug(const char * fmt,...);
11252+extern void show_error(const char * fmt,...);
11253+extern void PGRwrite_log_file(FILE * fp, const char * fmt,...);
11254+
11255+/* external definition of the function in signal.c */
11256+typedef void (*PGRsighandler)(int);
11257+extern PGRsighandler PGRsignal(int signo, PGRsighandler sighandler);
11258+
11259+#endif /* LIBPGC_H */
11260diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/sem.c pgcluster-1.7.0rc7/src/pgcluster/libpgc/sem.c
11261--- postgresql-8.2.4/src/pgcluster/libpgc/sem.c 1970-01-01 01:00:00.000000000 +0100
11262+++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/sem.c 2007-02-18 22:52:17.000000000 +0100
11263@@ -0,0 +1,67 @@
11264+/*--------------------------------------------------------------------
11265+ * FILE:
11266+ * sem.c
11267+ *
11268+ * NOTE:
11269+ * This file is composed of the functions to call with the source
11270+ * at pgreplicate for the semapho control.
11271+ *
11272+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
11273+ *--------------------------------------------------------------------
11274+ */
11275+#include <stdio.h>
11276+#include <unistd.h>
11277+#include <sys/types.h>
11278+#include <errno.h>
11279+#include <sys/ipc.h>
11280+#include <sys/sem.h>
11281+#include <signal.h>
11282+
11283+extern void show_debug(const char * fmt,...);
11284+
11285+void PGRsem_unlock( int semid, short sem_num );
11286+void PGRsem_lock( int semid, short sem_num );
11287+
11288+#define PGR_SEM_UNLOCK_WAIT_MSEC (100)
11289+#define PGR_SEM_LOCK_WAIT_MSEC (500)
11290+
11291+void
11292+PGRsem_unlock( int semid, short sem_num )
11293+{
11294+ int status = 0;
11295+ struct sembuf sops;
11296+
11297+ sops.sem_num = sem_num;
11298+ sops.sem_op = 1;
11299+ /*sops.sem_flg = IPC_NOWAIT;*/
11300+ sops.sem_flg = 0;
11301+ do
11302+ {
11303+ status = semop(semid, &sops, 1);
11304+ if ((status == -1) && (errno != EINTR))
11305+ {
11306+ usleep(PGR_SEM_UNLOCK_WAIT_MSEC);
11307+ }
11308+ } while (status == -1);
11309+}
11310+
11311+void
11312+PGRsem_lock( int semid, short sem_num )
11313+{
11314+ int status = 0;
11315+ struct sembuf sops;
11316+
11317+ sops.sem_num = sem_num;
11318+ sops.sem_op = -1;
11319+ /*sops.sem_flg = IPC_NOWAIT;*/
11320+ sops.sem_flg = 0;
11321+ do
11322+ {
11323+ status = semop(semid, &sops, 1);
11324+ if ((status == -1) && (errno != EINTR))
11325+ {
11326+ usleep(PGR_SEM_LOCK_WAIT_MSEC);
11327+ }
11328+ } while (status == -1);
11329+}
11330+
11331diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/show.c pgcluster-1.7.0rc7/src/pgcluster/libpgc/show.c
11332--- postgresql-8.2.4/src/pgcluster/libpgc/show.c 1970-01-01 01:00:00.000000000 +0100
11333+++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/show.c 2007-02-18 22:52:17.000000000 +0100
11334@@ -0,0 +1,226 @@
11335+/*--------------------------------------------------------------------
11336+ * FILE:
11337+ * show.c
11338+ *
11339+ * NOTE:
11340+ * This file is composed of the logging and debug functions
11341+ *
11342+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
11343+ *--------------------------------------------------------------------
11344+ */
11345+#include <sys/time.h>
11346+#include <sys/types.h>
11347+#include <sys/stat.h>
11348+#include <stdio.h>
11349+#include <stdarg.h>
11350+#include <stdlib.h>
11351+#include <errno.h>
11352+#include <string.h>
11353+#include <time.h>
11354+#include <unistd.h>
11355+#include "libpgc.h"
11356+
11357+#define TIMESTAMP_SIZE 19 /* format `YYYY-MM-DD HH:MM:SS' */
11358+
11359+/*--------------------------------------
11360+ * PROTOTYPE DECLARATION
11361+ *--------------------------------------
11362+ */
11363+static char* get_current_timestamp(void);
11364+static int file_rotation(char * fname, int max_rotation);
11365+
11366+FILE * PGRopen_log_file(char * fname, int max_size, int rotation);
11367+void PGRclose_log_file(FILE * fp);
11368+void show_debug(const char * fmt,...);
11369+void show_error(const char * fmt,...);
11370+void PGRwrite_log_file(FILE * fp, const char * fmt,...);
11371+
11372+extern int Debug_Print;
11373+extern int Log_Print;
11374+
11375+LogFileInf * LogFileData = NULL;
11376+
11377+static char*
11378+get_current_timestamp(void)
11379+{
11380+ time_t now;
11381+ static char buf[TIMESTAMP_SIZE + 1];
11382+
11383+ now = time(NULL);
11384+ strftime(buf, sizeof(buf),
11385+ "%Y-%m-%d %H:%M:%S", localtime(&now));
11386+ return buf;
11387+}
11388+
11389+void
11390+show_debug(const char * fmt,...)
11391+{
11392+ va_list ap;
11393+ char *timestamp;
11394+ char buf[256];
11395+
11396+ if (Debug_Print)
11397+ {
11398+ timestamp = get_current_timestamp();
11399+ fprintf(stdout,"%s [%d] DEBUG:",timestamp, getpid());
11400+ va_start(ap,fmt);
11401+ vfprintf(stdout,fmt,ap);
11402+ va_end(ap);
11403+ fprintf(stdout,"\n");
11404+ fflush(stdout);
11405+ if ((Log_Print) && (LogFileData != NULL))
11406+ {
11407+ FILE * fp = NULL;
11408+ fp = PGRopen_log_file(LogFileData->file_name, LogFileData->max_size, LogFileData->rotation);
11409+ va_start(ap,fmt);
11410+ vsnprintf(buf,sizeof(buf),fmt,ap);
11411+ va_end(ap);
11412+ PGRwrite_log_file(fp, buf);
11413+ PGRclose_log_file(fp);
11414+ }
11415+ }
11416+}
11417+
11418+void
11419+show_error(const char * fmt,...)
11420+{
11421+ va_list ap;
11422+ char buf[256], *timestamp;
11423+
11424+ if (Debug_Print)
11425+ {
11426+ timestamp = get_current_timestamp();
11427+ fprintf(stderr,"%s [%d] ERROR:",timestamp, getpid());
11428+ va_start(ap,fmt);
11429+ vfprintf(stderr,fmt,ap);
11430+ va_end(ap);
11431+ fprintf(stderr,"\n");
11432+ fflush(stderr);
11433+ }
11434+ if ((Log_Print) && (LogFileData != NULL))
11435+ {
11436+ FILE * fp = NULL;
11437+ fp = PGRopen_log_file(LogFileData->file_name, LogFileData->max_size, LogFileData->rotation);
11438+ va_start(ap,fmt);
11439+ vsnprintf(buf,sizeof(buf),fmt,ap);
11440+ va_end(ap);
11441+ PGRwrite_log_file(fp, buf);
11442+ PGRclose_log_file(fp);
11443+ }
11444+}
11445+
11446+void
11447+PGRwrite_log_file(FILE * fp, const char * fmt,...)
11448+{
11449+ char buf[256];
11450+ char log[288];
11451+ char * p;
11452+ va_list ap;
11453+ time_t t;
11454+
11455+ if (fp == NULL)
11456+ {
11457+ return;
11458+ }
11459+ if (time(&t) < 0)
11460+ {
11461+ return;
11462+ }
11463+ snprintf(log,sizeof(log),"%s ",ctime(&t));
11464+ p = strchr(log,'\n');
11465+ if (p != NULL)
11466+ {
11467+ *p = ' ';
11468+ }
11469+ va_start(ap,fmt);
11470+ vsnprintf(buf,sizeof(buf),fmt,ap);
11471+ va_end(ap);
11472+ strcat(log,buf);
11473+ strcat(log,"\n");
11474+ if (fputs(log,fp) >= 0)
11475+ {
11476+ fflush(fp);
11477+ }
11478+}
11479+
11480+FILE *
11481+PGRopen_log_file(char * fname, int max_size, int rotation)
11482+{
11483+ int rtn;
11484+ struct stat st;
11485+
11486+ if (fname == NULL)
11487+ {
11488+ return (FILE *)NULL;
11489+ }
11490+
11491+ if (max_size > 0)
11492+ {
11493+ rtn = stat(fname,&st);
11494+ if (rtn == 0)
11495+ {
11496+ if (st.st_size > max_size)
11497+ {
11498+ if (file_rotation(fname, rotation) < 0)
11499+ {
11500+ return (FILE *)NULL;
11501+ }
11502+ }
11503+ }
11504+ }
11505+ return (fopen(fname,"a"));
11506+}
11507+
11508+void
11509+PGRclose_log_file(FILE * fp)
11510+{
11511+ if (fp != NULL)
11512+ {
11513+ fflush(fp);
11514+ fclose(fp);
11515+ }
11516+}
11517+
11518+static int
11519+file_rotation(char * fname, int max_rotation)
11520+{
11521+ char * func = "file_rotation()";
11522+ int i;
11523+ int rtn;
11524+ struct stat st;
11525+ char old_fname[256];
11526+ char new_fname[256];
11527+
11528+ if ((fname == NULL) || (max_rotation < 0))
11529+ {
11530+ return -1;
11531+ }
11532+
11533+ for ( i = max_rotation ; i > 1 ; i -- )
11534+ {
11535+ sprintf(old_fname,"%s.%d",fname,i-1);
11536+ rtn = stat(old_fname,&st);
11537+ if (rtn == 0)
11538+ {
11539+ sprintf(new_fname,"%s.%d",fname,i);
11540+ rtn = rename(old_fname, new_fname);
11541+ if (rtn < 0)
11542+ {
11543+ show_error("%s:rotate failed: (%s)",func,strerror(errno));
11544+ return rtn;
11545+ }
11546+ }
11547+ }
11548+ if (max_rotation > 0)
11549+ {
11550+ sprintf(new_fname,"%s.1",fname);
11551+ rtn = rename(fname, new_fname);
11552+ }
11553+ else
11554+ {
11555+ rtn = unlink(fname);
11556+ }
11557+
11558+ return rtn;
11559+}
11560+
11561diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/signal.c pgcluster-1.7.0rc7/src/pgcluster/libpgc/signal.c
11562--- postgresql-8.2.4/src/pgcluster/libpgc/signal.c 1970-01-01 01:00:00.000000000 +0100
11563+++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/signal.c 2007-02-18 22:52:17.000000000 +0100
11564@@ -0,0 +1,35 @@
11565+/*--------------------------------------------------------------------
11566+ * FILE:
11567+ * replicate.c
11568+ *
11569+ * NOTE:
11570+ * This file is composed of the functions to set signal handler
11571+ *
11572+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
11573+ *--------------------------------------------------------------------
11574+ */
11575+
11576+#include <signal.h>
11577+#include "pg_config.h"
11578+#include "libpgc.h"
11579+
11580+/*
11581+ * Set up a signal handler
11582+ */
11583+PGRsighandler
11584+PGRsignal(int signo, PGRsighandler sighandler)
11585+{
11586+#if !defined(HAVE_POSIX_SIGNALS)
11587+ return signal(signo, func);
11588+#else
11589+ struct sigaction act,
11590+ oact;
11591+
11592+ act.sa_handler = sighandler;
11593+ sigemptyset(&act.sa_mask);
11594+ act.sa_flags = 0;
11595+ if (sigaction(signo, &act, &oact) < 0)
11596+ return SIG_ERR;
11597+ return oact.sa_handler;
11598+#endif /* !HAVE_POSIX_SIGNALS */
11599+}
11600diff -aruN postgresql-8.2.4/src/pgcluster/pglb/AUTHORS pgcluster-1.7.0rc7/src/pgcluster/pglb/AUTHORS
11601--- postgresql-8.2.4/src/pgcluster/pglb/AUTHORS 1970-01-01 01:00:00.000000000 +0100
11602+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/AUTHORS 2007-02-18 22:52:17.000000000 +0100
11603@@ -0,0 +1,4 @@
11604+Authors of pglb
11605+
11606+pglb was written by Atsushi Mitani.
11607+pglb is based on pg_pool which is written by Tatsuo Ishii.
11608diff -aruN postgresql-8.2.4/src/pgcluster/pglb/COPYING pgcluster-1.7.0rc7/src/pgcluster/pglb/COPYING
11609--- postgresql-8.2.4/src/pgcluster/pglb/COPYING 1970-01-01 01:00:00.000000000 +0100
11610+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/COPYING 2007-02-18 22:52:17.000000000 +0100
11611@@ -0,0 +1,14 @@
11612+Copyright (c) 2003-2006 Atsushi Mitani
11613+
11614+Permission to use, copy, modify, and distribute this software and
11615+its documentation for any purpose and without fee is hereby
11616+granted, provided that the above copyright notice appear in all
11617+copies and that both that copyright notice and this permission
11618+notice appear in supporting documentation, and that the name of the
11619+author not be used in advertising or publicity pertaining to
11620+distribution of the software without specific, written prior
11621+permission. The author makes no representations about the
11622+suitability of this software for any purpose. It is provided "as
11623+is" without express or implied warranty.
11624+
11625+Portions copyright (c) 2003-2006, Tatsuo Ishii
11626diff -aruN postgresql-8.2.4/src/pgcluster/pglb/Makefile pgcluster-1.7.0rc7/src/pgcluster/pglb/Makefile
11627--- postgresql-8.2.4/src/pgcluster/pglb/Makefile 1970-01-01 01:00:00.000000000 +0100
11628+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/Makefile 2007-02-18 22:52:17.000000000 +0100
11629@@ -0,0 +1,38 @@
11630+#-------------------------------------------------------------------------
11631+#
11632+# Makefile for src/pgcluster/pgrp
11633+#
11634+#-------------------------------------------------------------------------
11635+
11636+subdir = src/pgcluster/pglb
11637+top_builddir = ../../..
11638+include $(top_builddir)/src/Makefile.global
11639+
11640+OBJS= child.o cluster_table.o load_balance.o main.o pool_auth.o \
11641+ pool_connection_pool.o pool_process_query.o pool_stream.o \
11642+ pool_params.o recovery.o socket.o lifecheck.o
11643+
11644+EXTRA_OBJS = $(top_builddir)/src/backend/libpq/replicate_com.o ../libpgc/SUBSYS.o
11645+
11646+CFLAGS += -DPRINT_DEBUG
11647+override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -DBINDIR=\"$(bindir)\"
11648+
11649+all: pglb
11650+
11651+pglb: $(OBJS) $(libpq_builddir)/libpq.a
11652+ $(CC) $(CFLAGS) $(OBJS) $(EXTRA_OBJS) $(libpq) $(LDFLAGS) $(LIBS) -o $@
11653+
11654+install: all installdirs
11655+ $(INSTALL_PROGRAM) pglb$(X) $(DESTDIR)$(bindir)/pglb$(X)
11656+ $(INSTALL_DATA) pglb.conf.sample $(DESTDIR)$(datadir)/pglb.conf.sample
11657+
11658+installdirs:
11659+ $(mkinstalldirs) $(DESTDIR)$(bindir)
11660+ $(mkinstalldirs) $(DESTDIR)$(datadir)
11661+
11662+uninstall:
11663+ rm -f $(addprefix $(DESTDIR)$(bindir)/, pglb$(X))
11664+ rm -f $(DESTDIR)$(datadir)/pglb.conf.sample
11665+
11666+clean distclean maintainer-clean:
11667+ rm -f pglb$(X) $(OBJS)
11668diff -aruN postgresql-8.2.4/src/pgcluster/pglb/child.c pgcluster-1.7.0rc7/src/pgcluster/pglb/child.c
11669--- postgresql-8.2.4/src/pgcluster/pglb/child.c 1970-01-01 01:00:00.000000000 +0100
11670+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/child.c 2007-02-18 22:52:17.000000000 +0100
11671@@ -0,0 +1,1194 @@
11672+/*--------------------------------------------------------------------
11673+ * FILE:
11674+ * child.c
11675+ *
11676+ * NOTE:
11677+ * This file is composed of the functions to call with the source
11678+ * at child process of pglb.
11679+ *
11680+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
11681+ * Portions Copyright (c) 2003-2006, Tatsuo Ishii
11682+ *--------------------------------------------------------------------
11683+ */
11684+/*
11685+ * Permission to use, copy, modify, and distribute this software and
11686+ * its documentation for any purpose and without fee is hereby
11687+ * granted, provided that the above copyright notice appear in all
11688+ * copies and that both that copyright notice and this permission
11689+ * notice appear in supporting documentation, and that the name of the
11690+ * author not be used in advertising or publicity pertaining to
11691+ * distribution of the software without specific, written prior
11692+ * permission. The author makes no representations about the
11693+ * suitability of this software for any purpose. It is provided "as
11694+ * is" without express or implied warranty.
11695+ *
11696+*/
11697+#include "postgres.h"
11698+
11699+#include <stdio.h>
11700+#include <string.h>
11701+#include <unistd.h>
11702+#include <signal.h>
11703+#include <sys/wait.h>
11704+#include <sys/time.h>
11705+#include <ctype.h>
11706+#include <sys/types.h>
11707+#include <sys/stat.h>
11708+#include <sys/socket.h>
11709+#include <sys/ipc.h>
11710+#include <netdb.h>
11711+#include <netinet/in.h>
11712+#include <errno.h>
11713+#include <fcntl.h>
11714+#include <time.h>
11715+#include <sys/param.h>
11716+#include <arpa/inet.h>
11717+#include <sys/file.h>
11718+
11719+#ifdef HAVE_NETINET_TCP_H
11720+#include <netinet/tcp.h>
11721+#endif
11722+
11723+#ifdef HAVE_CRYPT_H
11724+#include <crypt.h>
11725+#endif
11726+
11727+#include "postgres_fe.h"
11728+#include "libpq/pqcomm.h"
11729+
11730+#include "replicate_com.h"
11731+#include "pglb.h"
11732+
11733+/*--------------------------------------
11734+ * GLOBAL VARIABLE DECLARATION
11735+ *--------------------------------------
11736+ */
11737+POOL_CONNECTION * Frontend = NULL;
11738+
11739+/*--------------------------------------
11740+ * PROTOTYPE DECLARATION
11741+ *--------------------------------------
11742+ */
11743+int PGRpre_fork_children(ClusterTbl * ptr);
11744+int PGRpre_fork_child(ClusterTbl * ptr);
11745+int PGRdo_child( int use_pool);
11746+int PGRcreate_child(ClusterTbl * cluster_p);
11747+pid_t PGRscan_child_tbl(ClusterTbl * cluster_p);
11748+void notice_backend_error(void);
11749+void do_pooling_child(int sig);
11750+int PGRset_status_to_child_tbl(pid_t pid, int status);
11751+int PGRadd_child_tbl(ClusterTbl * cluster_p, pid_t pid, int status);
11752+int PGRget_child_status(pid_t pid);
11753+void PGRreturn_connection_full_error(void);
11754+void PGRreturn_no_connection_error(void);
11755+void PGRquit_children_on_cluster(int rec_no);
11756+
11757+#ifdef NONE_BLOCK
11758+static void set_nonblock(int fd);
11759+#endif
11760+static void unset_nonblock(int fd);
11761+static POOL_CONNECTION *do_accept(int unix_fd, int inet_fd);
11762+static PGR_StartupPacket *read_startup_packet(POOL_CONNECTION *cp);
11763+static int send_startup_packet(POOL_CONNECTION_POOL_SLOT *cp);
11764+static void cancel_request(CancelPacket *sp, int secondary_backend);
11765+static POOL_CONNECTION_POOL *connect_backend(PGR_StartupPacket *sp, POOL_CONNECTION *frontend);
11766+static int send_params(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
11767+static void child_end(int sig);
11768+static void PGRreturn_with_error(char *msg);
11769+
11770+
11771+/*--------------------------------------------------------------------
11772+ * SYMBOL
11773+ * PGRpre_fork_children()
11774+ * NOTES
11775+ * pre forked child precesses
11776+ * ARGS
11777+ * ClusterTbl * ptr: pointer of cluster server table (I)
11778+ * RETURN
11779+ * OK: STATUS_OK
11780+ * NG: STATUS_ERROR
11781+ *--------------------------------------------------------------------
11782+ */
11783+int
11784+PGRpre_fork_children(ClusterTbl * ptr)
11785+{
11786+ int cnt;
11787+
11788+ if (ptr == NULL)
11789+ {
11790+ return STATUS_ERROR;
11791+ }
11792+ cnt = 0 ;
11793+ while ((ptr->useFlag != TBL_END) && (cnt < ClusterNum))
11794+ {
11795+ PGRpre_fork_child(ptr);
11796+ cnt ++;
11797+ ptr ++;
11798+ }
11799+ return STATUS_OK;
11800+}
11801+
11802+/*--------------------------------------------------------------------
11803+ * SYMBOL
11804+ * PGRpre_fork_child()
11805+ * NOTES
11806+ * pre forked child precess
11807+ * ARGS
11808+ * ClusterTbl * ptr: pointer of cluster server table (I)
11809+ * RETURN
11810+ * OK: STATUS_OK
11811+ * NG: STATUS_ERROR
11812+ *--------------------------------------------------------------------
11813+ */
11814+int
11815+PGRpre_fork_child(ClusterTbl * ptr)
11816+{
11817+ pid_t pid = 0;
11818+ int i;
11819+
11820+ if (ptr == NULL)
11821+ {
11822+ return STATUS_ERROR;
11823+ }
11824+ if (ptr->useFlag == TBL_END)
11825+ {
11826+ return STATUS_ERROR;
11827+ }
11828+ for ( i = 0 ; i < ptr->max_connect * Max_Pool ; i ++)
11829+ {
11830+ pid = PGRcreate_child(ptr);
11831+ }
11832+ return STATUS_OK;
11833+}
11834+/*--------------------------------------------------------------------
11835+ * SYMBOL
11836+ * PGRdo_child()
11837+ * NOTES
11838+ * execute child process
11839+ * ARGS
11840+ * int use_pool: usage flag of connection pooling (I)
11841+ * RETURN
11842+ * OK: STATUS_OK
11843+ * NG: STATUS_ERROR
11844+ *--------------------------------------------------------------------
11845+ */
11846+int
11847+PGRdo_child( int use_pool)
11848+{
11849+ char * func = "PGRdo_child()";
11850+ pid_t pid = 0;
11851+ PGR_StartupPacket *sp = NULL;
11852+ POOL_CONNECTION *frontend = NULL;
11853+ POOL_CONNECTION_POOL *backend = NULL;
11854+ int status = 0;
11855+ int connection_reuse = 1;
11856+ int ssl_request = 0;
11857+ int count = 0;
11858+
11859+ pid = getpid();
11860+#ifdef PRINT_DEBUG
11861+ show_debug("%s:I am %d",func, pid);
11862+#endif
11863+
11864+ /* set up signal handlers */
11865+ PGRsignal(SIGALRM, SIG_DFL);
11866+ PGRsignal(SIGTERM, child_end);
11867+ PGRsignal(SIGHUP, child_end);
11868+ PGRsignal(SIGINT, child_end);
11869+ PGRsignal(SIGUSR1, SIG_IGN);
11870+ PGRsignal(SIGUSR2, SIG_IGN);
11871+
11872+#ifdef NONE_BLOCK
11873+ /* set listen fds to none block */
11874+ set_nonblock(Frontend_FD.unix_fd);
11875+ set_nonblock(Frontend_FD.inet_fd);
11876+#endif
11877+
11878+retry_accept:
11879+ /* perform accept() */
11880+ frontend = do_accept(Frontend_FD.unix_fd,Frontend_FD.inet_fd);
11881+ if (frontend == NULL)
11882+ {
11883+ /* accept() failed. return to the accept() loop */
11884+ PGRset_status_to_child_tbl(pid,TBL_FREE);
11885+ return STATUS_ERROR;
11886+ }
11887+
11888+ /* unset frontend fd tp none block */
11889+ unset_nonblock(frontend->fd);
11890+
11891+ /* read the startup packet */
11892+ sp = 0;
11893+retry_startup:
11894+ if (sp)
11895+ {
11896+ free(sp->startup_packet);
11897+ free(sp->database);
11898+ free(sp->user);
11899+ free(sp);
11900+ }
11901+
11902+ sp = read_startup_packet(frontend);
11903+ if (sp == NULL)
11904+ {
11905+ /* failed to read the startup packet. return to the
11906+ accept() loop */
11907+ pool_close(frontend);
11908+ PGRset_status_to_child_tbl(pid,TBL_FREE);
11909+ return STATUS_ERROR;
11910+ }
11911+ PGRset_status_to_child_tbl(pid,TBL_ACCEPT);
11912+
11913+ /* cancel request? */
11914+ if (sp->major == 1234 && sp->minor == 5678)
11915+ {
11916+ cancel_request((CancelPacket *)sp->startup_packet, 0);
11917+ pool_close(frontend);
11918+ return STATUS_ERROR;
11919+ }
11920+
11921+ /* SSL? */
11922+ if (sp->major == 1234 && sp->minor == 5679)
11923+ {
11924+ /* SSL not supported */
11925+#ifdef PRINT_DEBUG
11926+ show_debug("%s:SSLRequest: sent N; retry startup",func);
11927+#endif
11928+ if (ssl_request && use_pool)
11929+ {
11930+ pool_close(frontend);
11931+ return STATUS_ERROR;
11932+ }
11933+
11934+ /*
11935+ * say to the frontend "we do not suppport SSL"
11936+ * note that this is not a NOTICE response despite it's an 'N'!
11937+ */
11938+ pool_write_and_flush(frontend, "N", 1);
11939+ ssl_request = 1;
11940+ goto retry_startup;
11941+ }
11942+
11943+ /*
11944+ * Ok, negotiaton with frontend has been done. Let's go to the next step.
11945+ */
11946+ /*
11947+ * if there's no connection associated with user and database,
11948+ * we need to connect to the backend and send the startup packet.
11949+ */
11950+ count = 0;
11951+ if ((backend = pool_get_cp(sp->user, sp->database, sp->major)) == NULL)
11952+ {
11953+ connection_reuse = 0;
11954+
11955+ if ((backend = connect_backend(sp, frontend)) == NULL)
11956+ {
11957+ /*
11958+ PGRset_status_on_cluster_tbl(TBL_ERROR,CurrentCluster);
11959+ return STATUS_ERROR;
11960+ */
11961+ goto retry_accept;
11962+ }
11963+ }
11964+ else
11965+ {
11966+ /* reuse existing connection to backend */
11967+
11968+ if (pool_do_reauth(frontend, backend))
11969+ {
11970+ pool_close(frontend);
11971+ return STATUS_ERROR;
11972+ }
11973+
11974+ if (MAJOR(backend) == 3)
11975+ {
11976+ if (send_params(frontend, backend))
11977+ {
11978+ pool_close(frontend);
11979+ return STATUS_ERROR;
11980+ }
11981+ }
11982+
11983+ /* send ReadyForQuery to frontend */
11984+ pool_write(frontend, "Z", 1);
11985+
11986+ if (MAJOR(backend) == 3)
11987+ {
11988+ int len;
11989+ char tstate;
11990+
11991+ len = htonl(5);
11992+ pool_write(frontend, &len, sizeof(len));
11993+ tstate = TSTATE(backend);
11994+ pool_write(frontend, &tstate, 1);
11995+ }
11996+
11997+ if (pool_flush(frontend) < 0)
11998+ {
11999+ pool_close(frontend);
12000+ return STATUS_ERROR;
12001+ }
12002+
12003+ }
12004+
12005+ /* query process loop */
12006+ for (;;)
12007+ {
12008+ POOL_STATUS status;
12009+
12010+ status = pool_process_query(frontend, backend, 0);
12011+
12012+ switch (status)
12013+ {
12014+ /* client exits */
12015+ case POOL_END:
12016+ /* do not cache connection to template0, template1, regression */
12017+ if (!strcmp(sp->database, "template0") || !strcmp(sp->database, "template1") ||
12018+ !strcmp(sp->database, "regression") || use_pool == NOT_USE_CONNECTION_POOL)
12019+ {
12020+ pool_close(frontend);
12021+ pool_send_frontend_exits(backend);
12022+ pool_discard_cp(sp->user, sp->database, sp->major);
12023+ }
12024+ else
12025+ {
12026+ POOL_STATUS status1;
12027+
12028+ /* send reset request to backend */
12029+ status1 = pool_process_query(frontend, backend, 1);
12030+ pool_close(frontend);
12031+
12032+ /* if we detect errors on resetting connection, we need to discard
12033+ * this connection since it might be in unknown status
12034+ */
12035+ if (status1 != POOL_CONTINUE)
12036+ pool_discard_cp(sp->user, sp->database, sp->major);
12037+ else
12038+ pool_connection_pool_timer(backend);
12039+ }
12040+ break;
12041+
12042+ /* error occured. discard backend connection pool
12043+ and disconnect connection to the frontend */
12044+ case POOL_ERROR:
12045+ show_error("%s:do_child: exits with status 1 due to error",func);
12046+ break;
12047+
12048+ /* fatal error occured. just exit myself... */
12049+ case POOL_FATAL:
12050+ show_error("%s:do_child: fatal error occured",func);
12051+ notice_backend_error();
12052+ break;
12053+
12054+ /* not implemented yet */
12055+ case POOL_IDLE:
12056+ do_accept(Frontend_FD.unix_fd,Frontend_FD.inet_fd);
12057+#ifdef PRINT_DEBUG
12058+ show_debug("%s:accept while idle",func);
12059+#endif
12060+ break;
12061+
12062+ default:
12063+ break;
12064+ }
12065+
12066+ if (status != POOL_CONTINUE)
12067+ break;
12068+ }
12069+ if ((status == POOL_ERROR) ||
12070+ (status == POOL_FATAL))
12071+ {
12072+ PGRset_status_to_child_tbl(pid,TBL_FREE);
12073+ return STATUS_ERROR;
12074+ }
12075+ PGRset_status_to_child_tbl(pid,TBL_INIT);
12076+ return STATUS_OK;
12077+}
12078+
12079+/*--------------------------------------------------------------------
12080+ * SYMBOL
12081+ * PGRcreate_child()
12082+ * NOTES
12083+ * create child process
12084+ * ARGS
12085+ * ClusterTbl * ptr: pointer of cluster server table (I)
12086+ * RETURN
12087+ * OK: STATUS_OK
12088+ * NG: STATUS_ERROR
12089+ *--------------------------------------------------------------------
12090+ */
12091+int
12092+PGRcreate_child(ClusterTbl * cluster_p)
12093+{
12094+ char * func = "PGRcreate_child()";
12095+ pid_t pid,pgid;
12096+
12097+ if (cluster_p == NULL)
12098+ return STATUS_ERROR;
12099+
12100+#ifdef PRINT_DEBUG
12101+ show_debug("%s:create child [%d@%s]",func,cluster_p->port,cluster_p->hostName);
12102+#endif
12103+ PGRsignal(SIGCHLD,PGRrecreate_child);
12104+ pgid = getpgid((pid_t)0);
12105+ pid = fork();
12106+ if (pid < 0)
12107+ {
12108+ show_error("%s:fork() failed. (%s)",func,strerror(errno));
12109+ return STATUS_ERROR;
12110+ }
12111+ if (pid == 0)
12112+ {
12113+ CurrentCluster = cluster_p;
12114+ if (pool_init_cp())
12115+ {
12116+ show_error("%s:pool_init_cp failed",func);
12117+ exit(1);
12118+ }
12119+ PGRsignal(SIGCHLD,PGRchild_wait);
12120+ PGRsignal(SIGTERM, child_end);
12121+ PGRsignal(SIGHUP, child_end);
12122+ PGRsignal(SIGINT, child_end);
12123+ PGRsignal(SIGUSR1,do_pooling_child);
12124+ setpgid((pid_t)0,pgid);
12125+ for (;;)
12126+ {
12127+ pause();
12128+ PGRsignal(SIGUSR1,do_pooling_child);
12129+ }
12130+#ifdef PRINT_DEBUG
12131+ show_debug("%s:create child end [%d@%s]",func,cluster_p->port,cluster_p->hostName);
12132+#endif
12133+ child_end(SIGTERM);
12134+ }
12135+ else
12136+ {
12137+ PGRadd_child_tbl(cluster_p,pid,TBL_INIT);
12138+ }
12139+ return pid;
12140+}
12141+
12142+/*--------------------------------------------------------------------
12143+ * SYMBOL
12144+ * PGRscan_child_tbl()
12145+ * NOTES
12146+ * get a child process id that is waiting for connection
12147+ * with the cluster server
12148+ * ARGS
12149+ * ClusterTbl * ptr: pointer of cluster server table (I)
12150+ * RETURN
12151+ * OK: child process id
12152+ * NG: 0
12153+ *--------------------------------------------------------------------
12154+ */
12155+pid_t
12156+PGRscan_child_tbl(ClusterTbl * cluster_p)
12157+{
12158+ char * func = "PGRscan_child_tbl()";
12159+ ChildTbl * p;
12160+
12161+ if ( cluster_p == NULL)
12162+ {
12163+ show_error("%s:Cluster_Tbl is not initialize",func);
12164+ return STATUS_ERROR;
12165+ }
12166+ p = Child_Tbl;
12167+ if ( p == NULL)
12168+ {
12169+ show_error("%s:Child_Tbl is not initialize",func);
12170+ return STATUS_ERROR;
12171+ }
12172+ while(p->useFlag != TBL_END)
12173+ {
12174+ if (p->pid <= 0)
12175+ {
12176+ p++;
12177+ continue;
12178+ }
12179+ if ((p->useFlag == TBL_INIT) &&
12180+ (p->rec_no == cluster_p->rec_no))
12181+ {
12182+ p->useFlag = TBL_USE;
12183+ return (p->pid);
12184+ }
12185+ p++;
12186+ }
12187+ return 0;
12188+}
12189+
12190+/* notice backend connection error using SIGUSR2 */
12191+void
12192+notice_backend_error(void)
12193+{
12194+ pid_t pid = getpid();
12195+
12196+ PGRset_status_to_child_tbl(pid,TBL_ERROR);
12197+ PGRset_status_on_cluster_tbl(TBL_ERROR_NOTICE,CurrentCluster);
12198+
12199+ /*
12200+ kill(parent, SIGUSR2);
12201+ sleep(1);
12202+ */
12203+}
12204+
12205+
12206+/*
12207+ * start up pooling child process
12208+ */
12209+void
12210+do_pooling_child(int sig)
12211+{
12212+ char * func = "do_pooling_child()";
12213+ int rtn;
12214+ pid_t pid;
12215+
12216+ pid = getpid();
12217+ rtn = PGRdo_child(USE_CONNECTION_POOL);
12218+ PGRrelease_connection(CurrentCluster);
12219+ if (rtn != STATUS_OK)
12220+ {
12221+ show_error("%s:PGRdo_child failed",func);
12222+ child_end(SIGTERM);
12223+ }
12224+ return ;
12225+}
12226+
12227+/*
12228+ * set status in child process table
12229+ */
12230+int
12231+PGRset_status_to_child_tbl(pid_t pid, int status)
12232+{
12233+ char * func = "PGRset_status_to_child_tbl()";
12234+ ChildTbl * p;
12235+
12236+ p = Child_Tbl;
12237+ if ( p == NULL)
12238+ {
12239+ show_error("%s:Child_Tbl is not initialize",func);
12240+ return STATUS_ERROR;
12241+ }
12242+ while(p->useFlag != TBL_END)
12243+ {
12244+ if (p->pid == pid)
12245+ {
12246+ p->useFlag = status;
12247+ return STATUS_OK;
12248+ }
12249+ p++;
12250+ }
12251+ return STATUS_ERROR;
12252+}
12253+
12254+/*
12255+ * add child process data in child process table
12256+ */
12257+int
12258+PGRadd_child_tbl(ClusterTbl * cluster_p, pid_t pid, int status)
12259+{
12260+ char * func = "PGRadd_child_tbl()";
12261+ ChildTbl * p;
12262+
12263+ p = Child_Tbl;
12264+ if ( cluster_p == NULL)
12265+ {
12266+ show_error("%s:Cluster_Tbl is not initialize",func);
12267+ return STATUS_ERROR;
12268+ }
12269+ if ( p == NULL)
12270+ {
12271+ show_error("%s:Child_Tbl is not initialize",func);
12272+ return STATUS_ERROR;
12273+ }
12274+ while(p->useFlag != TBL_END)
12275+ {
12276+ if ((p->useFlag == TBL_FREE) ||
12277+ (p->useFlag == TBL_ERROR))
12278+ {
12279+ p->useFlag = status;
12280+ p->rec_no = cluster_p->rec_no;
12281+ p->pid = pid;
12282+ return STATUS_OK;
12283+ }
12284+ p++;
12285+ }
12286+ return STATUS_ERROR;
12287+}
12288+
12289+int
12290+PGRget_child_status(pid_t pid)
12291+{
12292+ char * func = "PGRget_child_status()";
12293+ ChildTbl * p;
12294+
12295+ p = Child_Tbl;
12296+ if ( p == NULL)
12297+ {
12298+ show_error("%s:Child_Tbl is not initialize",func);
12299+ return STATUS_ERROR;
12300+ }
12301+
12302+ while (p->useFlag != TBL_END)
12303+ {
12304+ if (p->pid == pid)
12305+ {
12306+ return p->useFlag;
12307+ }
12308+ p++;
12309+ }
12310+ return STATUS_ERROR;
12311+}
12312+
12313+void
12314+PGRreturn_connection_full_error(void)
12315+{
12316+ PGRreturn_with_error( "Sorry, backend connection is full\n");
12317+}
12318+
12319+void
12320+PGRreturn_no_connection_error(void) {
12321+ PGRreturn_with_error("pglb could not connect to server: no cluster available.\n");
12322+}
12323+
12324+static void
12325+PGRreturn_with_error (char *msg)
12326+{
12327+ PGR_StartupPacket *sp = NULL;
12328+ POOL_CONNECTION *frontend = NULL;
12329+
12330+
12331+ /* perform accept() */
12332+ frontend = do_accept(Frontend_FD.unix_fd,Frontend_FD.inet_fd);
12333+ if (frontend == NULL)
12334+ {
12335+ /* accept() failed. return to the accept() loop */
12336+ return ;
12337+ }
12338+ sp = read_startup_packet(frontend);
12339+ if (sp == NULL)
12340+ {
12341+ /* failed to read the startup packet. return to the
12342+ accept() loop */
12343+ pool_close(frontend);
12344+ return ;
12345+ }
12346+ pool_write_and_flush(frontend, "E", 1);
12347+ pool_write_and_flush(frontend, msg, strlen(msg)+1);
12348+ pool_close(frontend);
12349+ return ;
12350+}
12351+
12352+void
12353+PGRquit_children_on_cluster(int rec_no)
12354+{
12355+ char * func = "PGRquit_children_on_cluster()";
12356+ ChildTbl * p;
12357+
12358+ if (Child_Tbl == NULL)
12359+ {
12360+ return;
12361+ }
12362+ PGRsignal(SIGCHLD,SIG_IGN);
12363+ p = Child_Tbl;
12364+ while(p->useFlag != TBL_END)
12365+ {
12366+ if (p->rec_no == rec_no)
12367+ {
12368+ if (kill (p->pid,SIGTERM) == -1)
12369+ {
12370+ show_error("%s:could not stop pid: %d (%s)",func,p->pid,strerror(errno));
12371+ return;
12372+ }
12373+ PGRchild_wait(SIGTERM);
12374+ p->useFlag = DATA_FREE;
12375+ }
12376+ p++;
12377+ }
12378+ if (Use_Connection_Pool)
12379+ {
12380+ PGRsignal(SIGCHLD,PGRrecreate_child);
12381+ }
12382+ else
12383+ {
12384+ PGRsignal(SIGCHLD,PGRchild_wait);
12385+ }
12386+}
12387+
12388+/* -------------------------------------------------------------------
12389+ * private functions
12390+ * -------------------------------------------------------------------
12391+ */
12392+
12393+#ifdef NONE_BLOCK
12394+/*
12395+ * set non-block flag
12396+ */
12397+static void set_nonblock(int fd)
12398+{
12399+ char* func = "set_nonblock()";
12400+ int var;
12401+
12402+ /* set fd to none blocking */
12403+ var = fcntl(fd, F_GETFL, 0);
12404+ if (var == -1)
12405+ {
12406+ show_error("%s:fcntl failed. %s", func,strerror(errno));
12407+ child_end(SIGTERM);
12408+ }
12409+ if (fcntl(fd, F_SETFL, var | O_NONBLOCK) == -1)
12410+ {
12411+ show_error("%s:fcntl failed. %s", func,strerror(errno));
12412+ child_end(SIGTERM);
12413+ }
12414+}
12415+#endif
12416+
12417+/*
12418+ * unset non-block flag
12419+ */
12420+static void unset_nonblock(int fd)
12421+{
12422+ char * func = "unset_nonblock()";
12423+ int var;
12424+
12425+ /* set fd to none blocking */
12426+ var = fcntl(fd, F_GETFL, 0);
12427+ if (var == -1)
12428+ {
12429+ show_error("%s,fcntl failed. %s", func,strerror(errno));
12430+ child_end(SIGTERM);
12431+ }
12432+ if (fcntl(fd, F_SETFL, var & ~O_NONBLOCK) == -1)
12433+ {
12434+ show_error("%s,fcntl failed. %s", func,strerror(errno));
12435+ child_end(SIGTERM);
12436+ }
12437+}
12438+
12439+
12440+/*
12441+* perform accept() and returns new fd
12442+*/
12443+static POOL_CONNECTION *do_accept(int unix_fd, int inet_fd)
12444+{
12445+ char * func = "do_accept()";
12446+ fd_set readmask;
12447+ int fds;
12448+ struct sockaddr addr;
12449+ socklen_t addrlen;
12450+ int fd = 0;
12451+ int afd;
12452+ int inet = 0;
12453+ POOL_CONNECTION *cp;
12454+#ifdef ACCEPT_PERFORMANCE
12455+ struct timeval now1, now2;
12456+ static long atime;
12457+ static int cnt;
12458+#endif
12459+
12460+ FD_ZERO(&readmask);
12461+ FD_SET(unix_fd, &readmask);
12462+ if (inet_fd)
12463+ FD_SET(inet_fd, &readmask);
12464+
12465+ fds = select(Max(unix_fd, inet_fd)+1, &readmask, NULL, NULL, NULL);
12466+ if (fds == -1)
12467+ {
12468+ if (errno == EAGAIN || errno == EINTR)
12469+ return NULL;
12470+
12471+ show_error("%s:select() failed. reason %s",func, strerror(errno));
12472+ return NULL;
12473+ }
12474+
12475+ if (fds == 0)
12476+ return NULL;
12477+
12478+ if (FD_ISSET(unix_fd, &readmask))
12479+ {
12480+ fd = unix_fd;
12481+ }
12482+
12483+ if (FD_ISSET(inet_fd, &readmask))
12484+ {
12485+ fd = inet_fd;
12486+ inet++;
12487+ }
12488+
12489+ /*
12490+ * Note that some SysV systems do not work here. For those
12491+ * systems, we need some locking mechanism for the fd.
12492+ */
12493+ addrlen = sizeof(addr);
12494+
12495+#ifdef ACCEPT_PERFORMANCE
12496+ gettimeofday(&now1,0);
12497+#endif
12498+ afd = accept(fd, &addr, &addrlen);
12499+ if (afd < 0)
12500+ {
12501+ /*
12502+ * "Resource temporarily unavailable" (EAGAIN or EWOULDBLOCK)
12503+ * can be silently ignored.
12504+ */
12505+ if (errno != EAGAIN && errno != EWOULDBLOCK)
12506+ show_error("%s:accept() failed. reason: %s",func, strerror(errno));
12507+ return NULL;
12508+ }
12509+#ifdef ACCEPT_PERFORMANCE
12510+ gettimeofday(&now2,0);
12511+ atime += (now2.tv_sec - now1.tv_sec)*1000000 + (now2.tv_usec - now1.tv_usec);
12512+ cnt++;
12513+ if (cnt % 100 == 0)
12514+ {
12515+ show_error("%s:cnt: %d atime: %ld",func, cnt, atime);
12516+ }
12517+#endif
12518+#ifdef PRINT_DEBUG
12519+ show_debug("%s:I am %d accept fd %d",func, getpid(), afd);
12520+#endif
12521+
12522+ /* set NODELAY and KEEPALIVE options if INET connection */
12523+ if (inet)
12524+ {
12525+ int on = 1;
12526+
12527+ if (setsockopt(afd, IPPROTO_TCP, TCP_NODELAY,
12528+ (char *) &on,
12529+ sizeof(on)) < 0)
12530+ {
12531+ show_error("%s:do_accept: setsockopt() failed: %s",func, strerror(errno));
12532+ close(afd);
12533+ return NULL;
12534+ }
12535+ if (setsockopt(afd, SOL_SOCKET, SO_KEEPALIVE,
12536+ (char *) &on,
12537+ sizeof(on)) < 0)
12538+ {
12539+ show_error("%s:do_accept: setsockopt() failed: %s", func,strerror(errno));
12540+ close(afd);
12541+ return NULL;
12542+ }
12543+ }
12544+
12545+ if ((cp = pool_open(afd)) == NULL)
12546+ {
12547+ close(afd);
12548+ return NULL;
12549+ }
12550+ return cp;
12551+}
12552+
12553+/*
12554+* read startup packet
12555+*/
12556+static PGR_StartupPacket *read_startup_packet(POOL_CONNECTION *cp)
12557+{
12558+ char * func = "read_startup_packet()";
12559+ PGR_StartupPacket *sp;
12560+ PGR_StartupPacket_v2 *sp2;
12561+ int protov;
12562+ int len;
12563+ char *p;
12564+
12565+ sp = (PGR_StartupPacket *)malloc(sizeof(PGR_StartupPacket));
12566+ if (!sp)
12567+ {
12568+ show_error("%s:read_startup_packet: out of memory",func);
12569+ return NULL;
12570+ }
12571+
12572+ /* read startup packet length */
12573+ if (pool_read(cp, &len, sizeof(len)))
12574+ {
12575+ free(sp);
12576+ return NULL;
12577+ }
12578+ len = ntohl(len);
12579+ len -= sizeof(len);
12580+
12581+ if (len <= 0)
12582+ {
12583+ show_error("%s:read_startup_packet: incorrect packet length (%d)", func,len);
12584+ free(sp);
12585+ return NULL;
12586+ }
12587+
12588+ sp->startup_packet = calloc(len, 1);
12589+ if (!sp->startup_packet)
12590+ {
12591+ show_error("%s:read_startup_packet: out of memory",func);
12592+ free(sp);
12593+ return NULL;
12594+ }
12595+
12596+ /* read startup packet */
12597+ if (pool_read(cp, sp->startup_packet, len))
12598+ {
12599+ free(sp);
12600+ return NULL;
12601+ }
12602+
12603+ sp->len = len;
12604+ memcpy(&protov, sp->startup_packet, sizeof(protov));
12605+ sp->major = ntohl(protov)>>16;
12606+ sp->minor = ntohl(protov) & 0x0000ffff;
12607+ p = sp->startup_packet;
12608+
12609+ switch(sp->major)
12610+ {
12611+ case PROTO_MAJOR_V2: /* V2 */
12612+ sp2 = (PGR_StartupPacket_v2 *)(sp->startup_packet);
12613+
12614+ sp->database = calloc(SM_DATABASE+1, 1);
12615+ if (!sp->database)
12616+ {
12617+ show_error("%s:read_startup_packet: out of memory",func);
12618+ free(sp);
12619+ return NULL;
12620+ }
12621+ strncpy(sp->database, sp2->database, SM_DATABASE);
12622+
12623+ sp->user = calloc(SM_USER+1, 1);
12624+ if (!sp->user)
12625+ {
12626+ show_error("%s:read_startup_packet: out of memory",func);
12627+ free(sp);
12628+ return NULL;
12629+ }
12630+ strncpy(sp->user, sp2->user, SM_USER);
12631+
12632+ break;
12633+
12634+ case PROTO_MAJOR_V3: /* V3 */
12635+ p += sizeof(int); /* skip protocol version info */
12636+
12637+ while(*p)
12638+ {
12639+ if (!strcmp("user", p))
12640+ {
12641+ p += (strlen(p) + 1);
12642+ sp->user = strdup(p);
12643+ if (!sp->user)
12644+ {
12645+ show_error("%s:read_startup_packet: out of memory",func);
12646+ free(sp);
12647+ return NULL;
12648+ }
12649+ }
12650+ else if (!strcmp("database", p))
12651+ {
12652+ p += (strlen(p) + 1);
12653+ sp->database = strdup(p);
12654+ if (!sp->database)
12655+ {
12656+ show_error("%s:read_startup_packet: out of memory",func);
12657+ free(sp);
12658+ return NULL;
12659+ }
12660+ }
12661+ p += (strlen(p) + 1);
12662+ }
12663+ break;
12664+
12665+ case 1234: /* cancel or SSL request */
12666+ /* set dummy database, user info */
12667+ sp->database = calloc(1, 1);
12668+ if (!sp->database)
12669+ {
12670+ show_error("%s:read_startup_packet: out of memory",func);
12671+ free(sp);
12672+ return NULL;
12673+ }
12674+ sp->user = calloc(1, 1);
12675+ if (!sp->user)
12676+ {
12677+ show_error("%s:read_startup_packet: out of memory",func);
12678+ free(sp);
12679+ return NULL;
12680+ }
12681+ break;
12682+
12683+ default:
12684+ show_error("%s:read_startup_packet: invalid major no: %d",func, sp->major);
12685+ free(sp);
12686+ return NULL;
12687+ }
12688+
12689+#ifdef PRINT_DEBUG
12690+ show_debug("%s:Protocol Major: %d Minor: %d database: %s user: %s",
12691+ func,sp->major, sp->minor, sp->database, sp->user);
12692+#endif
12693+
12694+ return sp;
12695+}
12696+
12697+/*
12698+* send startup packet
12699+*/
12700+static int send_startup_packet(POOL_CONNECTION_POOL_SLOT *cp)
12701+{
12702+ int len;
12703+
12704+ len = htonl(cp->sp->len + sizeof(len));
12705+ pool_write(cp->con, &len, sizeof(len));
12706+ return pool_write_and_flush(cp->con, cp->sp->startup_packet, cp->sp->len);
12707+}
12708+
12709+/*
12710+ * process cancel request
12711+ */
12712+static void cancel_request(CancelPacket *sp, int secondary_backend)
12713+{
12714+ char * func = "cancel_request()";
12715+ int len;
12716+ int fd;
12717+ POOL_CONNECTION *con;
12718+ char hostName[128];
12719+
12720+#ifdef PRINT_DEBUG
12721+ show_debug("%s:Cancel request received",func);
12722+#endif
12723+
12724+ if (CurrentCluster == NULL)
12725+ {
12726+ return;
12727+ }
12728+ if (gethostname(hostName,sizeof(hostName)) < 0)
12729+ {
12730+ show_error("%s:gethostname() failed. (%s)",func,strerror(errno));
12731+ return ;
12732+ }
12733+ if (secondary_backend)
12734+ {
12735+ if (PGRis_same_host(hostName,CurrentCluster->hostName))
12736+ fd = connect_unix_domain_socket(1);
12737+ else
12738+ fd = connect_inet_domain_socket(1);
12739+ }
12740+ else
12741+ {
12742+ if (PGRis_same_host(hostName,CurrentCluster->hostName))
12743+ fd = connect_unix_domain_socket(0);
12744+ else
12745+ fd = connect_inet_domain_socket(0);
12746+ }
12747+
12748+ if (fd < 0)
12749+ {
12750+ show_error("%s:Could not create socket for sending cancel request",func);
12751+ return;
12752+ }
12753+
12754+ con = pool_open(fd);
12755+ if (con == NULL)
12756+ return;
12757+
12758+ len = htonl(sizeof(len) + sizeof(CancelPacket));
12759+ pool_write(con, &len, sizeof(len));
12760+
12761+ if (pool_write_and_flush(con, sp, sizeof(CancelPacket)) < 0)
12762+ show_error("%s:Could not send cancel request packet",func);
12763+ pool_close(con);
12764+}
12765+
12766+static POOL_CONNECTION_POOL *connect_backend(PGR_StartupPacket *sp, POOL_CONNECTION *frontend)
12767+{
12768+ char * func ="connect_backend()";
12769+ POOL_CONNECTION_POOL *backend;
12770+
12771+ /* connect to the backend */
12772+ backend = pool_create_cp();
12773+ if (backend == NULL)
12774+ {
12775+ pool_send_error_message(frontend, sp->major, "XX000", "connection cache is full", "",
12776+ "increace max_pool", __FILE__, __LINE__);
12777+ pool_close(frontend);
12778+ return NULL;
12779+ }
12780+
12781+ /* mark this is a backend connection */
12782+ backend->slots[0]->con->isbackend = 1;
12783+ /*
12784+ * save startup packet info
12785+ */
12786+ backend->slots[0]->sp = sp;
12787+
12788+ if (pool_config_replication_enabled)
12789+ {
12790+ backend->slots[1]->con->isbackend = 1;
12791+ backend->slots[1]->con->issecondary_backend = 1;
12792+ /*
12793+ * save startup packet info
12794+ */
12795+ backend->slots[1]->sp = sp;
12796+ }
12797+
12798+ /* send startup packet */
12799+ if (send_startup_packet(backend->slots[0]) < 0)
12800+ {
12801+ show_error("%s:do_child: fails to send startup packet to the backend",func);
12802+ pool_close(frontend);
12803+ return NULL;
12804+ }
12805+
12806+ /* send startup packet */
12807+ if (pool_config_replication_enabled)
12808+ {
12809+ if (send_startup_packet(backend->slots[1]) < 0)
12810+ {
12811+ show_error("%s:do_child: fails to send startup packet to the secondary backend",func);
12812+ pool_close(frontend);
12813+ return NULL;
12814+ }
12815+ }
12816+
12817+ /*
12818+ * do authentication stuff
12819+ */
12820+ if (pool_do_auth(frontend, backend))
12821+ {
12822+ pool_close(frontend);
12823+ pool_discard_cp(sp->user, sp->database, sp->major);
12824+ return NULL;
12825+ }
12826+ return backend;
12827+}
12828+
12829+static int send_params(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend)
12830+{
12831+ char * func = "send_params()";
12832+ int index;
12833+ char *name, *value;
12834+ int len, sendlen;
12835+
12836+ index = 0;
12837+ while (pool_get_param(&MASTER(backend)->params, index++, &name, &value) == 0)
12838+ {
12839+ pool_write(frontend, "S", 1);
12840+ len = sizeof(sendlen) + strlen(name) + 1 + strlen(value) + 1;
12841+ sendlen = htonl(len);
12842+ pool_write(frontend, &sendlen, sizeof(sendlen));
12843+ pool_write(frontend, name, strlen(name) + 1);
12844+ pool_write(frontend, value, strlen(value) + 1);
12845+ }
12846+
12847+ if (pool_flush(frontend))
12848+ {
12849+ show_error("%s:pool_send_params: pool_flush() failed",func);
12850+ return -1;
12851+ }
12852+ return 0;
12853+}
12854+
12855+/*
12856+ * ending function of child process
12857+ */
12858+static void
12859+child_end(int sig)
12860+{
12861+ PGRsignal(sig,SIG_IGN);
12862+
12863+ pool_finish();
12864+ exit(0);
12865+}
12866diff -aruN postgresql-8.2.4/src/pgcluster/pglb/cluster_table.c pgcluster-1.7.0rc7/src/pgcluster/pglb/cluster_table.c
12867--- postgresql-8.2.4/src/pgcluster/pglb/cluster_table.c 1970-01-01 01:00:00.000000000 +0100
12868+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/cluster_table.c 2007-02-18 22:52:17.000000000 +0100
12869@@ -0,0 +1,343 @@
12870+/*--------------------------------------------------------------------
12871+ * FILE:
12872+ * cluster_tbl.c
12873+ *
12874+ * NOTE:
12875+ * This file is composed of the functions to use a cluster table.
12876+ *
12877+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
12878+ *--------------------------------------------------------------------
12879+ */
12880+/*
12881+ * Permission to use, copy, modify, and distribute this software and
12882+ * its documentation for any purpose and without fee is hereby
12883+ * granted, provided that the above copyright notice appear in all
12884+ * copies and that both that copyright notice and this permission
12885+ * notice appear in supporting documentation, and that the name of the
12886+ * author not be used in advertising or publicity pertaining to
12887+ * distribution of the software without specific, written prior
12888+ * permission. The author makes no representations about the
12889+ * suitability of this software for any purpose. It is provided "as
12890+ * is" without express or implied warranty.
12891+ *
12892+*/
12893+#include <stdio.h>
12894+#include <stdarg.h>
12895+#include <string.h>
12896+#include <stdlib.h>
12897+#include <unistd.h>
12898+#include <signal.h>
12899+#include <sys/wait.h>
12900+#include <sys/time.h>
12901+#include <ctype.h>
12902+#include <sys/types.h>
12903+#include <sys/stat.h>
12904+#include <sys/socket.h>
12905+#include <sys/ipc.h>
12906+#include <sys/shm.h>
12907+#include <sys/sem.h>
12908+#include <sys/msg.h>
12909+#include <netdb.h>
12910+#include <netinet/in.h>
12911+#include <errno.h>
12912+#include <fcntl.h>
12913+#include <time.h>
12914+#include <sys/param.h>
12915+#include <sys/select.h>
12916+#include <arpa/inet.h>
12917+#include <sys/file.h>
12918+
12919+#ifdef HAVE_NETINET_TCP_H
12920+#include <netinet/tcp.h>
12921+#endif
12922+
12923+#include "replicate_com.h"
12924+#include "pglb.h"
12925+
12926+
12927+/*--------------------------------------
12928+ * PROTOTYPE DECLARATION
12929+ *--------------------------------------
12930+ */
12931+int PGRis_cluster_alive(void) ;
12932+ClusterTbl * PGRscan_cluster(void);
12933+void PGRset_key_of_cluster(ClusterTbl * ptr, RecoveryPacket * packet);
12934+ClusterTbl * PGRadd_cluster_tbl (ClusterTbl * conf_data);
12935+ClusterTbl * PGRset_status_on_cluster_tbl (int status, ClusterTbl * ptr);
12936+ClusterTbl * PGRsearch_cluster_tbl(ClusterTbl * conf_data);
12937+
12938+static int set_cluster_tbl(ClusterTbl * ptr , ClusterTbl * conf_data);
12939+static ClusterTbl * search_free_cluster_tbl(void );
12940+static void write_cluster_status_file(ClusterTbl * ptr);
12941+
12942+int PGRis_cluster_alive(void)
12943+{
12944+ ClusterTbl * ptr = NULL;
12945+ int use=0;
12946+ ptr = Cluster_Tbl;
12947+
12948+ PGRsem_lock(ClusterSemid,MAX_DB_SERVER);
12949+ while (ptr->useFlag != TBL_END)
12950+ {
12951+ if ((ptr->useFlag == TBL_USE) || (ptr->useFlag == TBL_INIT))
12952+ {
12953+ use++;
12954+ }
12955+ ptr++;
12956+ }
12957+ PGRsem_unlock(ClusterSemid,MAX_DB_SERVER);
12958+ return use==0 ? STATUS_ERROR : STATUS_OK;
12959+}
12960+
12961+ClusterTbl *
12962+PGRscan_cluster(void)
12963+{
12964+ char * func = "PGRscan_cluster";
12965+ ClusterTbl * ptr = NULL;
12966+ ClusterTbl * rtn = NULL;
12967+ int min_use_rate = 100;
12968+ int use_rate = 0;
12969+ int cnt = 0;
12970+
12971+
12972+ ptr = Cluster_Tbl;
12973+ if (ptr == NULL)
12974+ {
12975+ show_error("%s:Cluster Table is not initialize",func);
12976+ return (ClusterTbl *)NULL;
12977+ }
12978+#ifdef PRINT_DEBUG
12979+ show_debug("%s:%d ClusterDB can be used",func,ClusterNum);
12980+#endif
12981+ PGRsem_lock(ClusterSemid,MAX_DB_SERVER);
12982+ while ((cnt <= ClusterNum) && (ptr->useFlag != TBL_END))
12983+ {
12984+#ifdef PRINT_DEBUG
12985+ show_debug("%s:%s [%d],useFlag->%d max->%d use_num->%d\n",
12986+ func, ptr->hostName,ptr->port,ptr->useFlag,ptr->max_connect,ptr->use_num);
12987+#endif
12988+ cnt ++;
12989+ if ((ptr->useFlag != TBL_USE) && (ptr->useFlag != TBL_INIT))
12990+ {
12991+ ptr ++;
12992+ continue;
12993+ }
12994+ if (ptr->max_connect <= ptr->use_num)
12995+ {
12996+ ptr ++;
12997+ continue;
12998+ }
12999+ if (ptr->use_num > 0)
13000+ {
13001+ use_rate = ptr->use_num * 100 / ptr->max_connect ;
13002+ }
13003+ else
13004+ {
13005+ use_rate = 0;
13006+ rtn = ptr;
13007+ break;
13008+ }
13009+ if (min_use_rate > use_rate)
13010+ {
13011+ min_use_rate = use_rate;
13012+ rtn = ptr;
13013+ }
13014+ ptr ++;
13015+ }
13016+ if (rtn != NULL)
13017+ {
13018+ rtn->use_num ++;
13019+ if (rtn->useFlag == TBL_INIT)
13020+ {
13021+ PGRset_status_on_cluster_tbl (TBL_USE,rtn);
13022+ }
13023+ }
13024+ PGRsem_unlock(ClusterSemid,MAX_DB_SERVER);
13025+ return rtn;
13026+}
13027+
13028+void
13029+PGRset_key_of_cluster(ClusterTbl * ptr, RecoveryPacket * packet)
13030+{
13031+ int max_connect = 0;
13032+ int port = 0;
13033+
13034+ memset(ptr,0,sizeof(ClusterTbl));
13035+ memcpy(ptr->hostName,packet->hostName,sizeof(ptr->hostName));
13036+ max_connect = ntohs(packet->max_connect);
13037+ if (max_connect >= 0)
13038+ {
13039+ ptr->max_connect = max_connect;
13040+ }
13041+ else
13042+ {
13043+ ptr->max_connect = DEFAULT_CONNECT_NUM;
13044+ }
13045+ port = ntohs(packet->port);
13046+ if ( port >= 0)
13047+ {
13048+ ptr->port = port;
13049+ }
13050+ else
13051+ {
13052+ ptr->port = DEFAULT_PORT;
13053+ }
13054+}
13055+
13056+ClusterTbl *
13057+PGRadd_cluster_tbl (ClusterTbl * conf_data)
13058+{
13059+ char * func = "PGRadd_cluster_tbl()";
13060+ ClusterTbl * ptr;
13061+
13062+ ptr = PGRsearch_cluster_tbl(conf_data);
13063+ if ((ptr != NULL) &&
13064+ ((ptr->useFlag == TBL_USE ) || ((ptr->useFlag == TBL_INIT))))
13065+ {
13066+ ptr->max_connect = conf_data->max_connect;
13067+ ptr->use_num = 0;
13068+ ptr->rate = 0;
13069+ return ptr;
13070+ }
13071+ ptr = search_free_cluster_tbl();
13072+ if (ptr == (ClusterTbl *) NULL)
13073+ {
13074+ show_error("%s:no more free space in cluster table",func);
13075+ return (ClusterTbl *)NULL;
13076+ }
13077+ if (ClusterNum < Max_DB_Server)
13078+ {
13079+ set_cluster_tbl( ptr, conf_data);
13080+ return ptr;
13081+ }
13082+ return (ClusterTbl *)NULL;
13083+}
13084+
13085+ClusterTbl *
13086+PGRset_status_on_cluster_tbl (int status, ClusterTbl * ptr)
13087+{
13088+#ifdef PRINT_DEBUG
13089+ char * func = "PGRset_status_on_cluster_tbl()";
13090+#endif
13091+
13092+ if (ptr != (ClusterTbl*)NULL)
13093+ {
13094+ if (ptr->useFlag != status)
13095+ {
13096+#ifdef PRINT_DEBUG
13097+ show_debug("%s:host:%s port:%d max:%d use:%d status%d",
13098+ func, ptr->hostName,ptr->port,ptr->max_connect,ptr->useFlag,status);
13099+#endif
13100+ ptr->useFlag = status;
13101+ write_cluster_status_file(ptr);
13102+ if (status == TBL_INIT)
13103+ {
13104+ if (ClusterNum < Max_DB_Server)
13105+ ClusterNum ++ ;
13106+ }
13107+ else if (status != TBL_STOP)
13108+ {
13109+ if (ClusterNum > 0)
13110+ ClusterNum -- ;
13111+ }
13112+ }
13113+ }
13114+ return ptr;
13115+}
13116+
13117+static void
13118+write_cluster_status_file(ClusterTbl * ptr)
13119+{
13120+ switch( ptr->useFlag)
13121+ {
13122+ case TBL_FREE:
13123+ PGRwrite_log_file(StatusFp,"port(%d) host:%s free",
13124+ ptr->port,
13125+ ptr->hostName);
13126+ break;
13127+ case TBL_INIT:
13128+ PGRwrite_log_file(StatusFp,"port(%d) host:%s initialize",
13129+ ptr->port,
13130+ ptr->hostName);
13131+ break;
13132+ case TBL_USE:
13133+ PGRwrite_log_file(StatusFp,"port(%d) host:%s start use",
13134+ ptr->port,
13135+ ptr->hostName);
13136+ break;
13137+ case TBL_ERROR:
13138+ PGRwrite_log_file(StatusFp,"port(%d) host:%s error",
13139+ ptr->port,
13140+ ptr->hostName);
13141+ break;
13142+ case TBL_END:
13143+ PGRwrite_log_file(StatusFp,"port(%d) host:%s end",
13144+ ptr->port,
13145+ ptr->hostName);
13146+ break;
13147+ }
13148+}
13149+
13150+ClusterTbl *
13151+PGRsearch_cluster_tbl(ClusterTbl * conf_data)
13152+{
13153+ ClusterTbl *ptr;
13154+ int cnt = 0;
13155+ int rec_num = 0;
13156+
13157+ ptr = Cluster_Tbl;
13158+ while ((cnt <= ClusterNum) && (rec_num < Max_DB_Server))
13159+ {
13160+ if (ptr->port > 0)
13161+ {
13162+ if ((!strcmp(ptr->hostName,conf_data->hostName)) &&
13163+ (ptr->port == conf_data->port))
13164+ {
13165+ return ptr;
13166+ }
13167+ if ((ptr->useFlag == TBL_USE) || (ptr->useFlag == TBL_INIT))
13168+ {
13169+ cnt ++;
13170+ }
13171+ }
13172+ ptr ++;
13173+ rec_num ++;
13174+ }
13175+ return (ClusterTbl *)NULL;
13176+}
13177+
13178+static int
13179+set_cluster_tbl(ClusterTbl * ptr , ClusterTbl * conf_data)
13180+{
13181+ int rec_no;
13182+
13183+ rec_no = ptr->rec_no;
13184+ memcpy(ptr->hostName,conf_data->hostName,sizeof(ptr->hostName));
13185+ ptr->max_connect = conf_data->max_connect;
13186+ ptr->port = conf_data->port;
13187+ ptr->use_num = conf_data->use_num;
13188+ ptr->rate = conf_data->rate;
13189+ PGRset_status_on_cluster_tbl (TBL_INIT, ptr);
13190+
13191+ return STATUS_OK;
13192+}
13193+
13194+static ClusterTbl *
13195+search_free_cluster_tbl(void )
13196+{
13197+ ClusterTbl *ptr;
13198+ int cnt = 0;
13199+
13200+ ptr = Cluster_Tbl;
13201+ while ((cnt <= ClusterNum ) && (cnt < Max_DB_Server))
13202+ {
13203+ if ((ptr->useFlag == TBL_FREE) || (ptr->useFlag == TBL_ERROR))
13204+ {
13205+ return ptr;
13206+ }
13207+ cnt ++;
13208+ ptr ++;
13209+ }
13210+ return (ClusterTbl *)NULL;
13211+}
13212+
13213diff -aruN postgresql-8.2.4/src/pgcluster/pglb/lifecheck.c pgcluster-1.7.0rc7/src/pgcluster/pglb/lifecheck.c
13214--- postgresql-8.2.4/src/pgcluster/pglb/lifecheck.c 1970-01-01 01:00:00.000000000 +0100
13215+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/lifecheck.c 2007-03-01 16:27:15.000000000 +0100
13216@@ -0,0 +1,329 @@
13217+/*--------------------------------------------------------------------
13218+ * FILE:
13219+ * lifecheck.c
13220+ *
13221+ * NOTE:
13222+ * This file is composed of the functions to call with the source
13223+ * at pgreplicate for the lifecheck.
13224+ *
13225+ * Portions Copyright (c) 2003-2007, Atsushi Mitani
13226+ *--------------------------------------------------------------------
13227+ */
13228+#include "postgres.h"
13229+#include "postgres_fe.h"
13230+
13231+#include <pthread.h>
13232+#include <stdio.h>
13233+#include <stdarg.h>
13234+#include <sys/types.h>
13235+#include <fcntl.h>
13236+#include <errno.h>
13237+#include <ctype.h>
13238+#include <time.h>
13239+#include <sys/ipc.h>
13240+#include <sys/shm.h>
13241+#include <sys/sem.h>
13242+#include <sys/msg.h>
13243+#include <signal.h>
13244+
13245+#include "libpq-fe.h"
13246+#include "libpq-int.h"
13247+#include "fe-auth.h"
13248+
13249+#include <sys/socket.h>
13250+#include <unistd.h>
13251+#include <netdb.h>
13252+#include <arpa/inet.h>
13253+
13254+#ifdef HAVE_NETINET_TCP_H
13255+#include <netinet/tcp.h>
13256+#endif
13257+
13258+#ifdef HAVE_SYS_SELECT_H
13259+#include <sys/select.h>
13260+#endif
13261+
13262+
13263+#ifdef HAVE_CRYPT_H
13264+#include <crypt.h>
13265+#endif
13266+
13267+
13268+#ifdef MULTIBYTE
13269+#include "mb/pg_wchar.h"
13270+#endif
13271+
13272+#include "access/xact.h"
13273+#include "lib/dllist.h"
13274+#include "libpq/pqformat.h"
13275+#include "replicate_com.h"
13276+#include "pglb.h"
13277+
13278+#define PING_DB "template1"
13279+#define PING_QUERY "SELECT 1"
13280+
13281+static ClusterTbl * PGR_Cluster_DB_4_Lifecheck = (ClusterTbl*)NULL;
13282+
13283+/*--------------------------------------
13284+ * PROTOTYPE DECLARATION
13285+ *--------------------------------------
13286+ */
13287+int PGRlifecheck_main(int fork_wait_time);
13288+PGconn * PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt );
13289+
13290+static bool is_started_loadbalance(void);
13291+static void set_timeout(SIGNAL_ARGS);
13292+static int lifecheck_loop(void);
13293+static int ping_cluster(PGconn * conn);
13294+static void set_cluster_status(ClusterTbl * host_ptr, int status);
13295+
13296+int
13297+PGRlifecheck_main(int fork_wait_time)
13298+{
13299+ bool started = false;
13300+ pid_t pgid = 0;
13301+ pid_t pid = 0;
13302+
13303+ pgid = getpgid(0);
13304+ pid = fork();
13305+ if (pid != 0)
13306+ {
13307+ return STATUS_OK;
13308+ }
13309+
13310+ /*
13311+ * in child process,
13312+ * call recovery module
13313+ */
13314+ setpgid(0,pgid);
13315+
13316+ PGRsignal(SIGHUP, PGRexit_subprocess);
13317+ PGRsignal(SIGTERM, PGRexit_subprocess);
13318+ PGRsignal(SIGINT, PGRexit_subprocess);
13319+ PGRsignal(SIGQUIT, PGRexit_subprocess);
13320+ PGRsignal(SIGALRM, set_timeout);
13321+
13322+ if (fork_wait_time > 0) {
13323+ sleep(fork_wait_time);
13324+ }
13325+
13326+ if (PGRuserName == NULL)
13327+ {
13328+ PGRuserName = getenv("LOGNAME");
13329+ if (PGRuserName == NULL)
13330+ {
13331+ PGRuserName = getenv("USER");
13332+ if (PGRuserName == NULL)
13333+ PGRuserName = "postgres";
13334+ }
13335+ }
13336+
13337+ for (;;)
13338+ {
13339+ started = is_started_loadbalance();
13340+ if (!started)
13341+ {
13342+ /* wait next lifecheck as interval */
13343+ sleep(PGR_Lifecheck_Interval);
13344+ continue;
13345+ }
13346+
13347+ /* life check to all cluster dbs */
13348+ lifecheck_loop();
13349+
13350+ /* wait next lifecheck as interval */
13351+ sleep(PGR_Lifecheck_Interval);
13352+ }
13353+ return STATUS_OK;
13354+}
13355+
13356+static bool
13357+is_started_loadbalance(void)
13358+{
13359+ ClusterTbl * host_ptr = (ClusterTbl*)NULL;
13360+
13361+ host_ptr = Cluster_Tbl;
13362+ if (host_ptr == NULL)
13363+ {
13364+ return false;
13365+ }
13366+ while(host_ptr->useFlag != TBL_END)
13367+ {
13368+ if (host_ptr->useFlag == TBL_USE)
13369+ {
13370+ return true;
13371+ }
13372+ host_ptr ++;
13373+ }
13374+ return false;
13375+}
13376+
13377+static void
13378+set_timeout(SIGNAL_ARGS)
13379+{
13380+ if (PGR_Cluster_DB_4_Lifecheck != NULL)
13381+ {
13382+ set_cluster_status( PGR_Cluster_DB_4_Lifecheck, TBL_ERROR);
13383+ }
13384+ PGRsignal(SIGALRM, set_timeout);
13385+}
13386+
13387+static int
13388+lifecheck_loop(void)
13389+{
13390+ ClusterTbl * host_ptr = (ClusterTbl*)NULL;
13391+ char port[8];
13392+ char * host = NULL;
13393+ PGconn * conn = NULL;
13394+
13395+ host_ptr = Cluster_Tbl;
13396+ if (host_ptr == NULL)
13397+ {
13398+ return STATUS_ERROR;
13399+ }
13400+ alarm(0);
13401+ while(host_ptr->useFlag != TBL_END)
13402+ {
13403+ /*
13404+ * check the status of the cluster DB
13405+ */
13406+ if ((host_ptr->useFlag != TBL_USE) || (host_ptr->useFlag != TBL_INIT))
13407+ {
13408+ host_ptr ++;
13409+ continue;
13410+ }
13411+ snprintf(port,sizeof(port),"%d", host_ptr->port);
13412+ host = (char *)(host_ptr->hostName);
13413+ /* set host data */
13414+ PGR_Cluster_DB_4_Lifecheck = host_ptr;
13415+
13416+ /* set alarm as lifecheck timeout */
13417+ alarm(PGR_Lifecheck_Timeout);
13418+
13419+ /* connect DB */
13420+ conn = PGRcreateConn(host,port, PING_DB ,PGRuserName,"","","");
13421+ if ((conn != NULL) &&
13422+ (ping_cluster(conn) == STATUS_OK))
13423+ {
13424+ set_cluster_status(host_ptr,TBL_USE);
13425+ }
13426+ else
13427+ {
13428+ set_cluster_status(host_ptr,TBL_ERROR);
13429+ }
13430+ /* reset alarm */
13431+ alarm(0);
13432+
13433+ PQfinish(conn);
13434+ conn = NULL;
13435+ host_ptr ++;
13436+ }
13437+
13438+ return STATUS_OK;
13439+}
13440+
13441+static int
13442+ping_cluster(PGconn * conn)
13443+{
13444+ int status = 0;
13445+ PGresult * res = (PGresult *)NULL;
13446+
13447+ res = PQexec(conn, PING_QUERY );
13448+
13449+ status = PQresultStatus(res);
13450+ if (res != NULL)
13451+ {
13452+ PQclear(res);
13453+ }
13454+ if ((status == PGRES_NONFATAL_ERROR ) ||
13455+ (status == PGRES_FATAL_ERROR ))
13456+ {
13457+ return STATUS_ERROR;
13458+ }
13459+ return STATUS_OK;
13460+}
13461+
13462+PGconn *
13463+PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt )
13464+{
13465+ int cnt = 0;
13466+ PGconn * conn = NULL;
13467+ char pwd[256];
13468+
13469+ memset(pwd,0,sizeof(pwd));
13470+ if (*password != '\0')
13471+ {
13472+ if ((strncmp(password,"md5",3) == 0) && (md5Salt != NULL))
13473+ {
13474+ sprintf(pwd,"%s(%d)(%d)(%d)(%d)",password,
13475+ *md5Salt,*(md5Salt+1),*(md5Salt+2),*(md5Salt+3));
13476+ }
13477+ else
13478+ {
13479+ strncpy(pwd,password,sizeof(pwd));
13480+ }
13481+ }
13482+ conn = PQsetdbLogin(host, port, NULL, NULL, database, userName, pwd);
13483+ /* check to see that the backend Connection was successfully made */
13484+ cnt = 0;
13485+ while (PQstatus(conn) == CONNECTION_BAD)
13486+ {
13487+ if (conn != NULL)
13488+ {
13489+ PQfinish(conn);
13490+ conn = NULL;
13491+ }
13492+ conn = PQsetdbLogin(host, port, NULL, NULL, database, userName, pwd);
13493+ if (cnt > PGLB_CONNECT_RETRY_TIME )
13494+ {
13495+ if (conn != NULL)
13496+ {
13497+ PQfinish(conn);
13498+ conn = NULL;
13499+ }
13500+ return (PGconn *)NULL;
13501+ }
13502+
13503+ if(PQstatus(conn) == CONNECTION_BAD && h_errno==2)
13504+ {
13505+ usleep(PGR_SEND_WAIT_MSEC);
13506+ cnt ++;
13507+ }
13508+ else if(!strncasecmp(PQerrorMessage(conn),"FATAL: Sorry, too many clients already",30) ||
13509+ !strncasecmp(PQerrorMessage(conn),"FATAL: Non-superuser connection limit",30) )
13510+ {
13511+ usleep(PGR_SEND_WAIT_MSEC);
13512+ cnt ++;
13513+ }
13514+ else if(!strncasecmp(PQerrorMessage(conn),"FATAL: The database system is starting up",40) )
13515+ {
13516+ usleep(PGR_SEND_WAIT_MSEC);
13517+ }
13518+ else
13519+ {
13520+ usleep(PGR_SEND_WAIT_MSEC);
13521+ cnt ++;
13522+ }
13523+ }
13524+ return conn;
13525+}
13526+
13527+static void
13528+set_cluster_status(ClusterTbl * host_ptr, int status)
13529+{
13530+ if (host_ptr == NULL)
13531+ return;
13532+ if (status == TBL_ERROR)
13533+ {
13534+ host_ptr->retry_count ++;
13535+ if (host_ptr->retry_count > PGLB_CONNECT_RETRY_TIME )
13536+ {
13537+ PGRset_status_on_cluster_tbl(status, host_ptr);
13538+ }
13539+ }
13540+ else
13541+ {
13542+ host_ptr->retry_count = 0;
13543+ PGRset_status_on_cluster_tbl(status, host_ptr);
13544+ }
13545+}
13546diff -aruN postgresql-8.2.4/src/pgcluster/pglb/load_balance.c pgcluster-1.7.0rc7/src/pgcluster/pglb/load_balance.c
13547--- postgresql-8.2.4/src/pgcluster/pglb/load_balance.c 1970-01-01 01:00:00.000000000 +0100
13548+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/load_balance.c 2007-02-18 22:52:17.000000000 +0100
13549@@ -0,0 +1,252 @@
13550+/*--------------------------------------------------------------------
13551+ * FILE:
13552+ * load_balance.c
13553+ *
13554+ * NOTE:
13555+ * This file is composed of the functions of load balance modules
13556+ * with connection pooling or not
13557+ *
13558+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
13559+ *--------------------------------------------------------------------
13560+ */
13561+/*
13562+ * Permission to use, copy, modify, and distribute this software and
13563+ * its documentation for any purpose and without fee is hereby
13564+ * granted, provided that the above copyright notice appear in all
13565+ * copies and that both that copyright notice and this permission
13566+ * notice appear in supporting documentation, and that the name of the
13567+ * author not be used in advertising or publicity pertaining to
13568+ * distribution of the software without specific, written prior
13569+ * permission. The author makes no representations about the
13570+ * suitability of this software for any purpose. It is provided "as
13571+ * is" without express or implied warranty.
13572+ *
13573+*/
13574+#include "postgres.h"
13575+#include <stdio.h>
13576+#include <stdlib.h>
13577+#include <string.h>
13578+#include <unistd.h>
13579+#include <signal.h>
13580+#include <sys/wait.h>
13581+#include <sys/time.h>
13582+#include <ctype.h>
13583+#include <sys/types.h>
13584+#include <sys/stat.h>
13585+#include <sys/socket.h>
13586+#include <sys/ipc.h>
13587+#include <sys/sem.h>
13588+#include <netdb.h>
13589+#include <netinet/in.h>
13590+#include <errno.h>
13591+#include <fcntl.h>
13592+#include <time.h>
13593+#include <sys/param.h>
13594+#include <sys/select.h>
13595+#include <arpa/inet.h>
13596+#include <sys/file.h>
13597+
13598+#ifdef HAVE_NETINET_TCP_H
13599+#include <netinet/tcp.h>
13600+#endif
13601+
13602+#include "replicate_com.h"
13603+#include "pglb.h"
13604+
13605+/*--------------------------------------
13606+ * PROTOTYPE DECLARATION
13607+ *--------------------------------------
13608+ */
13609+int PGRload_balance(void);
13610+int PGRload_balance_with_pool(void);
13611+char PGRis_connection_full(ClusterTbl * ptr);
13612+void PGRrelease_connection(ClusterTbl * ptr);
13613+void PGRchild_wait(int sig);
13614+
13615+/*--------------------------------------------------------------------
13616+ * SYMBOL
13617+ * PGRload_balance()
13618+ * NOTES
13619+ * load balance module that normal connection is used
13620+ * ARGS
13621+ * void
13622+ * RETURN
13623+ * OK: STATUS_OK
13624+ * NG: STATUS_ERROR
13625+ *--------------------------------------------------------------------
13626+ */
13627+int
13628+PGRload_balance(void)
13629+{
13630+ char * func = "PGRload_balance()";
13631+ pid_t pid,pgid;
13632+ int count;
13633+ int status;
13634+ ClusterTbl * cluster_p = NULL;
13635+
13636+ PGRsignal(SIGCHLD, PGRchild_wait);
13637+ /* get the least locaded cluster server info */
13638+ cluster_p = PGRscan_cluster();
13639+ count = 0;
13640+ while (cluster_p == NULL )
13641+ {
13642+ if ( count > PGLB_CONNECT_RETRY_TIME)
13643+ {
13644+ show_error("%s:no cluster available",func);
13645+ return STATUS_ERROR;
13646+ }
13647+ cluster_p = PGRscan_cluster();
13648+ count ++;
13649+ }
13650+
13651+ pgid = getpgid((pid_t)0);
13652+ pid = fork();
13653+ if (pid < 0)
13654+ {
13655+ show_error("%s:fork() failed. (%s)",func,strerror(errno));
13656+ exit(1);
13657+ }
13658+ if (pid == 0)
13659+ {
13660+ setpgid((pid_t)0,pgid);
13661+ CurrentCluster = cluster_p;
13662+
13663+ if (pool_init_cp())
13664+ {
13665+ show_error("%s:pool_init_cp failed",func);
13666+ exit(1);
13667+ }
13668+ PGRsem_lock(ClusterSemid,cluster_p->rec_no);
13669+ if (PGRget_child_status(getpid()) == STATUS_ERROR)
13670+ {
13671+ PGRadd_child_tbl(cluster_p, getpid(), TBL_USE);
13672+ }
13673+ PGRsem_unlock(ClusterSemid,cluster_p->rec_no);
13674+ PGRdo_child(NOT_USE_CONNECTION_POOL );
13675+ PGRrelease_connection(cluster_p);
13676+ PGRset_status_to_child_tbl(getpid(), TBL_FREE);
13677+ exit(0);
13678+ }
13679+ else if (pid > 0)
13680+ {
13681+ PGRsem_lock(ClusterSemid,cluster_p->rec_no);
13682+ if (PGRget_child_status(pid) == STATUS_ERROR)
13683+ {
13684+ PGRadd_child_tbl(cluster_p, pid, TBL_USE);
13685+ }
13686+ PGRsem_unlock(ClusterSemid,cluster_p->rec_no);
13687+ status = PGRget_child_status(pid);
13688+ while (status == TBL_USE)
13689+ {
13690+ status = PGRget_child_status(pid);
13691+ usleep(20);
13692+ }
13693+ return STATUS_OK;
13694+ }
13695+ else
13696+ {
13697+ return STATUS_ERROR;
13698+ }
13699+}
13700+
13701+/*--------------------------------------------------------------------
13702+ * SYMBOL
13703+ * PGRload_balance_with_pool()
13704+ * NOTES
13705+ * load balance module that connection pooling system is used
13706+ * ARGS
13707+ * void
13708+ * RETURN
13709+ * OK: STATUS_OK
13710+ * NG: STATUS_ERROR
13711+ *--------------------------------------------------------------------
13712+ */
13713+int
13714+PGRload_balance_with_pool(void)
13715+{
13716+ char * func = "PGRload_balance_with_pool()";
13717+ int count;
13718+ pid_t pid;
13719+ ClusterTbl * cluster_p = NULL;
13720+ int status = TBL_USE;
13721+
13722+ /* get the least locaded cluster server info */
13723+ cluster_p = PGRscan_cluster();
13724+ count = 0;
13725+ while (cluster_p == NULL )
13726+ {
13727+ if ( count > PGLB_CONNECT_RETRY_TIME)
13728+ {
13729+ show_error("%s:no cluster available",func);
13730+ PGRreturn_no_connection_error();
13731+ return STATUS_ERROR;
13732+ }
13733+ cluster_p = PGRscan_cluster();
13734+ count ++;
13735+ }
13736+ pid = PGRscan_child_tbl(cluster_p);
13737+ if ((pid == 0) || (pid == STATUS_ERROR))
13738+ {
13739+ show_error("%s:no child process available",func);
13740+ return STATUS_ERROR;
13741+ }
13742+ kill(pid,SIGUSR1);
13743+
13744+ status = PGRget_child_status(pid);
13745+ while (status == TBL_USE)
13746+ {
13747+ status = PGRget_child_status(pid);
13748+ usleep(20);
13749+ }
13750+
13751+ return STATUS_OK;
13752+
13753+}
13754+
13755+char
13756+PGRis_connection_full(ClusterTbl * ptr)
13757+{
13758+ char rtn = 1;
13759+
13760+ if (ptr == NULL)
13761+ {
13762+ return rtn;
13763+ }
13764+ PGRsem_lock(ClusterSemid,ptr->rec_no);
13765+ if (ptr->max_connect > ptr->use_num)
13766+ {
13767+ rtn = 0;
13768+ }
13769+ PGRsem_unlock(ClusterSemid,ptr->rec_no);
13770+ return rtn;
13771+}
13772+
13773+void
13774+PGRrelease_connection(ClusterTbl * ptr)
13775+{
13776+ if (ptr == NULL)
13777+ {
13778+ return;
13779+ }
13780+ PGRsem_lock(ClusterSemid,MAX_DB_SERVER);
13781+ if (ptr->use_num > 0)
13782+ {
13783+ ptr->use_num --;
13784+ }
13785+ PGRsem_unlock(ClusterSemid,MAX_DB_SERVER);
13786+}
13787+
13788+void
13789+PGRchild_wait(int sig)
13790+{
13791+ pid_t pid = 0;
13792+ int ret = 0;
13793+
13794+ do {
13795+ pid = waitpid(-1,&ret,WNOHANG);
13796+ if ((pid <= 0) && (WTERMSIG(ret) > 0))
13797+ {
13798+ pid = 1;
13799+ }
13800+ } while(pid > 0);
13801+}
13802diff -aruN postgresql-8.2.4/src/pgcluster/pglb/main.c pgcluster-1.7.0rc7/src/pgcluster/pglb/main.c
13803--- postgresql-8.2.4/src/pgcluster/pglb/main.c 1970-01-01 01:00:00.000000000 +0100
13804+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/main.c 2007-03-01 16:27:15.000000000 +0100
13805@@ -0,0 +1,1137 @@
13806+/*--------------------------------------------------------------------
13807+ * FILE:
13808+ * main.c
13809+ *
13810+ * NOTE:
13811+ * This file is composed of the main function of pglb.
13812+ *
13813+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
13814+ *--------------------------------------------------------------------
13815+ */
13816+/*
13817+ * Permission to use, copy, modify, and distribute this software and
13818+ * its documentation for any purpose and without fee is hereby
13819+ * granted, provided that the above copyright notice appear in all
13820+ * copies and that both that copyright notice and this permission
13821+ * notice appear in supporting documentation, and that the name of the
13822+ * author not be used in advertising or publicity pertaining to
13823+ * distribution of the software without specific, written prior
13824+ * permission. The author makes no representations about the
13825+ * suitability of this software for any purpose. It is provided "as
13826+ * is" without express or implied warranty.
13827+ *
13828+*/
13829+#include "postgres.h"
13830+#include <stdio.h>
13831+#include <stdarg.h>
13832+#include <string.h>
13833+#include <stdlib.h>
13834+#include <unistd.h>
13835+#include <signal.h>
13836+#include <sys/wait.h>
13837+#include <sys/time.h>
13838+#include <sys/types.h>
13839+#include <sys/stat.h>
13840+#include <sys/socket.h>
13841+#include <sys/ipc.h>
13842+#include <sys/shm.h>
13843+#include <sys/sem.h>
13844+#include <sys/msg.h>
13845+#include <netdb.h>
13846+#include <errno.h>
13847+#include <fcntl.h>
13848+#include <time.h>
13849+#include <sys/param.h>
13850+#include <sys/select.h>
13851+#include <netinet/in.h>
13852+#include <arpa/inet.h>
13853+#include <sys/file.h>
13854+#include <arpa/inet.h>
13855+
13856+#ifdef HAVE_NETINET_TCP_H
13857+#include <netinet/tcp.h>
13858+#endif
13859+
13860+#ifdef HAVE_CRYPT_H
13861+#include <crypt.h>
13862+#endif
13863+
13864+#ifdef HAVE_GETOPT_H
13865+#include <getopt.h>
13866+#endif
13867+
13868+#include "replicate_com.h"
13869+#include "pglb.h"
13870+
13871+
13872+
13873+#define IPC_NMAXSEM (32)
13874+/*--------------------------------------
13875+ * GLOBAL VARIABLE DECLARATION
13876+ *--------------------------------------
13877+ */
13878+/* for replicate_com.h */
13879+ConfDataType * ConfData_Top = (ConfDataType *)NULL;
13880+ConfDataType * ConfData_End = (ConfDataType *)NULL;
13881+int MapTableShmid = -1;
13882+int LifeCheckStartShmid = -1;
13883+char * LifeCheckStartFlag = NULL;
13884+int LifeCheckTimeOut = 10;
13885+FILE * StatusFp = (FILE *)NULL;
13886+char * PGRStatusFileName = NULL;
13887+char * PGRLogFileName = NULL;
13888+char * PGRuserName = NULL;
13889+int Log_Print = 0;
13890+int Debug_Print = 0;
13891+
13892+char * ResolvedName = NULL;
13893+int Recv_Port_Number = 0;
13894+int Recovery_Port_Number = 0;
13895+uint16_t LifeCheck_Port_Number = 0;
13896+int Use_Connection_Pool = 0;
13897+int Max_Pool = 1;
13898+int Connection_Life_Time = 0;
13899+int Max_DB_Server = 0;
13900+int MaxBackends = 0;
13901+ClusterTbl * Cluster_Tbl = (ClusterTbl *)NULL;
13902+int ClusterNum = 0;
13903+int ClusterShmid = 0;
13904+int ClusterSemid = 0;
13905+ChildTbl * Child_Tbl = (ChildTbl *)NULL;
13906+int ChildShmid = 0;
13907+char * PGR_Data_Path = NULL;
13908+char * PGR_Write_Path = NULL;
13909+char * Backend_Socket_Dir = NULL;
13910+FrontSocket Frontend_FD;
13911+ClusterTbl * CurrentCluster = NULL;
13912+int PGR_Lifecheck_Timeout = 3;
13913+int PGR_Lifecheck_Interval = 15;
13914+
13915+int fork_wait_time = 0;
13916+
13917+extern char *optarg;
13918+
13919+/*--------------------------------------
13920+ * PROTOTYPE DECLARATION
13921+ *--------------------------------------
13922+ */
13923+static int init_pglb(char * path);
13924+static void pglb_exit(int signal_args);
13925+static void load_balance_main(void);
13926+static void daemonize(void);
13927+static void write_pid_file(void);
13928+static void stop_pglb(void);
13929+static int is_exist_pid_file(void);
13930+static ClusterTbl * scan_cluster_by_pid(pid_t pid);
13931+static void usage(void);
13932+static void close_child(int signal_args);
13933+
13934+void PGRrecreate_child(int signal_args);
13935+void PGRexit_subprocess(int sig);
13936+
13937+/*--------------------------------------------------------------------
13938+ * SYMBOL
13939+ * init_pglb()
13940+ * NOTES
13941+ * Reading of the setup file
13942+ * and the initialization of the memory area.
13943+ * ARGS
13944+ * char * path: path of the setup file (I)
13945+ * RETURN
13946+ * OK: STATUS_OK
13947+ * NG: STATUS_ERROR
13948+ *--------------------------------------------------------------------
13949+ */
13950+static int
13951+init_pglb(char * path)
13952+{
13953+ char * func = "init_pglb()";
13954+
13955+ ConfDataType * conf;
13956+ ClusterTbl cluster_tbl[MAX_DB_SERVER];
13957+ int size = 0;
13958+ int rec_no = 0;
13959+ int i;
13960+ int max_connect = 0;
13961+ union semun sem_arg;
13962+ char fname[256];
13963+
13964+ /*
13965+ * read configuration file
13966+ */
13967+ if (path == NULL)
13968+ {
13969+ path = ".";
13970+ }
13971+ if (PGR_Get_Conf_Data(path,PGLB_CONF_FILE) != STATUS_OK)
13972+ {
13973+ show_error("%s:PGR_Get_Conf_Data failed",func);
13974+ return STATUS_ERROR;
13975+ }
13976+
13977+ size = sizeof(LogFileInf);
13978+ LogFileData = (LogFileInf *) malloc(size);
13979+ if (LogFileData == NULL)
13980+ {
13981+ show_error("%s:malloc() failed. reason: %s", func,strerror(errno));
13982+ return STATUS_ERROR;
13983+ }
13984+ memset(LogFileData,0,size);
13985+
13986+ /* cluster db status file open */
13987+ if (PGRStatusFileName == NULL)
13988+ {
13989+ snprintf(fname,sizeof(fname),"%s/%s",PGR_Write_Path,PGLB_STATUS_FILE);
13990+ }
13991+ else
13992+ {
13993+ memcpy(fname,PGRStatusFileName,sizeof(fname));
13994+ }
13995+ StatusFp = fopen(fname, "a");
13996+ if (StatusFp == NULL)
13997+ {
13998+ show_error("%s:open() %s file failed. (%s)",
13999+ func,fname, strerror(errno));
14000+ exit(1);
14001+ }
14002+
14003+ Backend_Socket_Dir = malloc(128);
14004+ if (Backend_Socket_Dir == NULL)
14005+ {
14006+ show_error("%s:malloc() failed. (%s)",func,strerror(errno));
14007+ return STATUS_ERROR;
14008+ }
14009+ memset(Backend_Socket_Dir,0,128);
14010+ /* set initiarize data */
14011+ strcpy(Backend_Socket_Dir,"/tmp");
14012+ Max_Pool = 1;
14013+ Connection_Life_Time = 0;
14014+ Use_Connection_Pool = 0;
14015+
14016+ conf = ConfData_Top;
14017+ while (conf != (ConfDataType *)NULL)
14018+ {
14019+ /* get cluster db servers name */
14020+ if (!strcmp(conf->table,CLUSTER_SERVER_TAG))
14021+ {
14022+ rec_no = conf->rec_no;
14023+ if (!strcmp(conf->key,HOST_NAME_TAG))
14024+ {
14025+ memcpy(cluster_tbl[rec_no].hostName,conf->value,sizeof(cluster_tbl[rec_no].hostName));
14026+ conf = (ConfDataType*)conf->next;
14027+ continue;
14028+ }
14029+ if (!strcmp(conf->key,PORT_TAG))
14030+ {
14031+ cluster_tbl[rec_no].port = atoi(conf->value);
14032+ conf = (ConfDataType*)conf->next;
14033+ continue;
14034+ }
14035+ if (!strcmp(conf->key,MAX_CONNECT_TAG))
14036+ {
14037+ cluster_tbl[rec_no].max_connect = atoi(conf->value);
14038+ conf = (ConfDataType*)conf->next;
14039+ continue;
14040+ }
14041+ }
14042+ /* get logging file data */
14043+ else if (!strcmp(conf->table, LOG_INFO_TAG))
14044+ {
14045+ if (!strcmp(conf->key, FILE_NAME_TAG))
14046+ {
14047+ strncpy(LogFileData->file_name, conf->value ,sizeof(LogFileData->file_name));
14048+ LogFileData->fp = NULL;
14049+ conf = (ConfDataType*)conf->next;
14050+ continue;
14051+ }
14052+ if (!strcmp(conf->key, FILE_SIZE_TAG))
14053+ {
14054+ int i,len;
14055+ char * ptr;
14056+ int unit = 1;
14057+ len = strlen(conf->value);
14058+ ptr = conf->value;
14059+ for (i = 0; i < len ; i ++,ptr++)
14060+ {
14061+ if ((! isdigit(*ptr)) && (! isspace(*ptr)))
14062+ {
14063+ switch (*ptr)
14064+ {
14065+ case 'K':
14066+ case 'k':
14067+ unit = 1024;
14068+ break;
14069+ case 'M':
14070+ case 'm':
14071+ unit = 1024*1024;
14072+ break;
14073+ case 'G':
14074+ case 'g':
14075+ unit = 1024*1024*1024;
14076+ break;
14077+ }
14078+ *ptr = '\0';
14079+ break;
14080+ }
14081+ }
14082+ LogFileData->max_size = atoi(conf->value) * unit;
14083+ conf = (ConfDataType*)conf->next;
14084+ continue;
14085+ }
14086+ if (!strcmp(conf->key, LOG_ROTATION_TAG))
14087+ {
14088+ LogFileData->rotation = atoi(conf->value);
14089+ conf = (ConfDataType*)conf->next;
14090+ continue;
14091+ }
14092+ }
14093+ else
14094+ {
14095+ if (!strcmp(conf->key,HOST_NAME_TAG))
14096+ {
14097+ int ip;
14098+ ip=PGRget_ip_by_name(conf->value);
14099+ if (ResolvedName == NULL)
14100+ {
14101+ ResolvedName = malloc(ADDRESS_LENGTH);
14102+ }
14103+ if (ResolvedName == NULL)
14104+ {
14105+ continue;
14106+ }
14107+ else
14108+ {
14109+ memset(ResolvedName,0,ADDRESS_LENGTH);
14110+ }
14111+
14112+ sprintf(ResolvedName,
14113+ "%d.%d.%d.%d",
14114+ (ip ) & 0xff ,
14115+ (ip >> 8) & 0xff ,
14116+ (ip >> 16) & 0xff ,
14117+ (ip >> 24) & 0xff );
14118+ conf = (ConfDataType*)conf->next;
14119+ continue;
14120+ }
14121+ /* get port number for receive querys */
14122+ else if (!strcmp(conf->key,RECV_PORT_TAG))
14123+ {
14124+ Recv_Port_Number = atoi(conf->value);
14125+ conf = (ConfDataType*)conf->next;
14126+ continue;
14127+ }
14128+ /* get port number for recovery session */
14129+ else if (!strcmp(conf->key,RECOVERY_PORT_TAG))
14130+ {
14131+ Recovery_Port_Number = atoi(conf->value);
14132+ conf = (ConfDataType*)conf->next;
14133+ continue;
14134+ }
14135+ else if (!strcmp(conf->key,MAX_CLUSTER_TAG))
14136+ {
14137+ Max_DB_Server = atoi(conf->value);
14138+ conf = (ConfDataType*)conf->next;
14139+ continue;
14140+ }
14141+ else if (!strcmp(conf->key,USE_CONNECTION_POOL_TAG))
14142+ {
14143+ if (!strcmp(conf->value,"yes"))
14144+ {
14145+ Use_Connection_Pool = 1;
14146+ }
14147+ conf = (ConfDataType*)conf->next;
14148+ continue;
14149+ }
14150+ else if (!strcmp(conf->key,MAX_POOL_TAG))
14151+ {
14152+ Max_Pool = atoi(conf->value);
14153+ if (Max_Pool < 0)
14154+ Max_Pool = 1;
14155+ conf = (ConfDataType*)conf->next;
14156+ continue;
14157+ }
14158+ else if (!strcmp(conf->key,CONNECTION_LIFE_TIME))
14159+ {
14160+ Connection_Life_Time = atoi(conf->value);
14161+ if (Connection_Life_Time < 0)
14162+ Connection_Life_Time = 0;
14163+ conf = (ConfDataType*)conf->next;
14164+ continue;
14165+ }
14166+ else if (!strcmp(conf->key,BACKEND_SOCKET_DIR_TAG))
14167+ {
14168+ strncpy(Backend_Socket_Dir,conf->value,128);
14169+ conf = (ConfDataType*)conf->next;
14170+ continue;
14171+ }
14172+ else if (!STRCMP(conf->key,LIFECHECK_TIMEOUT_TAG))
14173+ {
14174+ /* get lifecheck timeout */
14175+ PGR_Lifecheck_Timeout = PGRget_time_value(conf->value);
14176+ if ((PGR_Lifecheck_Timeout < 1) || (PGR_Lifecheck_Timeout > 3600))
14177+ {
14178+ show_error("%s is out of range. It should be between 1sec-1hr.\n",LIFECHECK_TIMEOUT_TAG);
14179+ return STATUS_ERROR;
14180+ }
14181+ conf = (ConfDataType*)conf->next;
14182+ continue;
14183+ }
14184+ else if (!STRCMP(conf->key,LIFECHECK_INTERVAL_TAG))
14185+ {
14186+ /* get lifecheck interval */
14187+ PGR_Lifecheck_Interval = PGRget_time_value(conf->value);
14188+ if ((PGR_Lifecheck_Interval < 1) || (PGR_Lifecheck_Interval > 3600))
14189+ {
14190+ show_error("%s is out of range. It should between 1sec-1hr.\n",LIFECHECK_INTERVAL_TAG);
14191+ return STATUS_ERROR;
14192+ }
14193+ conf = (ConfDataType*)conf->next;
14194+ continue;
14195+ }
14196+ }
14197+ conf = (ConfDataType*)conf->next;
14198+ }
14199+ if (Max_DB_Server <= 0)
14200+ {
14201+ show_error("%s:Max_DB_Server is wrong value. %s/%s file should be broken",func, path, PGLB_CONF_FILE);
14202+ exit(1);
14203+ }
14204+ /* shared memory allocation for cluster table */
14205+ size = sizeof(ClusterTbl) * Max_DB_Server;
14206+
14207+ ClusterShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
14208+ if (ClusterShmid < 0)
14209+ {
14210+ show_error("%s:ClusterShm shmget() failed. (%s)", func,strerror(errno));
14211+ return STATUS_ERROR;
14212+ }
14213+ Cluster_Tbl = (ClusterTbl *)shmat(ClusterShmid,0,0);
14214+ if (Cluster_Tbl == (ClusterTbl *)-1)
14215+ {
14216+ show_error("%s:shmat() failed. (%s)", func,strerror(errno));
14217+ return STATUS_ERROR;
14218+ }
14219+ memset(Cluster_Tbl,0,size);
14220+
14221+ if ((ClusterSemid = semget(IPC_PRIVATE,MAX_DB_SERVER+1,IPC_CREAT | IPC_EXCL | 0600)) < 0)
14222+ {
14223+ show_error("%s:semget() failed. (%s)",func,strerror(errno));
14224+ return STATUS_ERROR;
14225+ }
14226+ for ( i = 0 ; i <= MAX_DB_SERVER ; i ++)
14227+ {
14228+ semctl(ClusterSemid, i, GETVAL, sem_arg);
14229+ sem_arg.val = 1;
14230+ semctl(ClusterSemid, i, SETVAL, sem_arg);
14231+ }
14232+ ClusterNum = 0;
14233+ /* set cluster db server name into cluster db server table */
14234+ for ( i = 0 ; i < Max_DB_Server ; i ++)
14235+ {
14236+ (Cluster_Tbl + i)->rec_no = i;
14237+ }
14238+ (Cluster_Tbl + i)->useFlag = TBL_END;
14239+ max_connect = 0;
14240+ for ( i = 0 ; i <= rec_no ; i ++)
14241+ {
14242+ cluster_tbl[i].use_num = 0;
14243+ cluster_tbl[i].rate = 0;
14244+ if (cluster_tbl[i].max_connect < 0)
14245+ {
14246+ cluster_tbl[i].max_connect = 0;
14247+ }
14248+ if (max_connect < cluster_tbl[i].max_connect)
14249+ {
14250+ max_connect = cluster_tbl[i].max_connect;
14251+ }
14252+ PGRadd_cluster_tbl(&cluster_tbl[i]);
14253+ }
14254+
14255+ /* shared memory allocation for children table */
14256+ size = sizeof(ChildTbl) * (Max_DB_Server + 1) * max_connect * Max_Pool;
14257+#ifdef PRINT_DEBUG
14258+ show_debug("%s:Child_Tbl size is[%d]",func,size);
14259+#endif
14260+
14261+ ChildShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
14262+ if (ChildShmid < 0)
14263+ {
14264+ show_error("%s:ChildShm shmget() failed. (%s)",func, strerror(errno));
14265+ return STATUS_ERROR;
14266+ }
14267+ Child_Tbl = (ChildTbl *)shmat(ChildShmid,0,0);
14268+ if (Child_Tbl == (ChildTbl *)-1)
14269+ {
14270+ show_error("%s:shmat() failed. (%s)", func,strerror(errno));
14271+ return STATUS_ERROR;
14272+ }
14273+ memset(Child_Tbl, 0, size);
14274+ (Child_Tbl + ( Max_DB_Server * max_connect * Max_Pool) -1)->useFlag = TBL_END;
14275+
14276+ PGR_Free_Conf_Data();
14277+
14278+ return STATUS_OK;
14279+}
14280+
14281+/*--------------------------------------------------------------------
14282+ * SYMBOL
14283+ * pglb_exit()
14284+ * NOTES
14285+ * Closing of pglb process
14286+ * ARGS
14287+ * int signal_args: signal number (I)
14288+ * RETURN
14289+ * none
14290+ *--------------------------------------------------------------------
14291+ */
14292+static void
14293+pglb_exit(int signal_args)
14294+{
14295+ char fname[256];
14296+ int rtn;
14297+
14298+ Child_Tbl->useFlag = TBL_END;
14299+ PGRsignal(SIGCHLD,SIG_IGN);
14300+ PGRsignal(signal_args,SIG_IGN);
14301+ kill (0,signal_args);
14302+ while (wait(NULL) > 0 )
14303+ ;
14304+
14305+ if (ClusterShmid > 0)
14306+ {
14307+ rtn = shmdt((char *)Cluster_Tbl);
14308+ shmctl(ClusterShmid,IPC_RMID,(struct shmid_ds *)NULL);
14309+ ClusterShmid = 0;
14310+ Cluster_Tbl = NULL;
14311+ }
14312+ if (ChildShmid > 0)
14313+ {
14314+ rtn = shmdt((char *)Child_Tbl);
14315+ shmctl(ChildShmid,IPC_RMID,(struct shmid_ds *)NULL);
14316+ ChildShmid = 0;
14317+ Child_Tbl = NULL;
14318+ }
14319+ if (ClusterSemid > 0)
14320+ {
14321+ semctl(ClusterSemid, 0, IPC_RMID);
14322+ ClusterSemid = 0;
14323+ }
14324+
14325+ if (StatusFp != NULL)
14326+ {
14327+ fflush(StatusFp);
14328+ fclose(StatusFp);
14329+ }
14330+ if (Frontend_FD.unix_fd != 0)
14331+ {
14332+ close(Frontend_FD.unix_fd);
14333+ Frontend_FD.unix_fd = 0;
14334+ snprintf(fname, sizeof(fname), "%s/.s.PGSQL.%d", Backend_Socket_Dir,Recv_Port_Number);
14335+ unlink(fname);
14336+ }
14337+ if (Frontend_FD.inet_fd != 0)
14338+ {
14339+ close(Frontend_FD.inet_fd);
14340+ Frontend_FD.inet_fd = 0;
14341+ }
14342+ /*
14343+ PGRsyn_quit();
14344+ */
14345+ snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGLB_PID_FILE);
14346+ unlink(fname);
14347+
14348+ if (ResolvedName != NULL)
14349+ {
14350+ free(ResolvedName);
14351+ ResolvedName = NULL;
14352+ }
14353+ exit(0);
14354+}
14355+
14356+/*--------------------------------------------------------------------
14357+ * SYMBOL
14358+ * load_balance_main()
14359+ * NOTES
14360+ * This is a main module of load balance function
14361+ * ARGS
14362+ * void
14363+ * RETURN
14364+ * none
14365+ *--------------------------------------------------------------------
14366+ */
14367+static void
14368+load_balance_main(void)
14369+{
14370+ char * func = "load_balance_main()";
14371+ int status;
14372+ int rtn;
14373+ int count = 0;
14374+
14375+ Frontend_FD.unix_fd = PGRcreate_unix_domain_socket(Backend_Socket_Dir, Recv_Port_Number);
14376+ if (Frontend_FD.unix_fd < 0)
14377+ {
14378+ show_error("%s:PGRcreate_unix_domain_socket failed",func);
14379+ pglb_exit(SIGTERM);
14380+ }
14381+ Frontend_FD.inet_fd = PGRcreate_recv_socket(ResolvedName, Recv_Port_Number);
14382+ if (Frontend_FD.inet_fd < 0)
14383+ {
14384+ show_error("%s:PGRcreate_recv_socket failed",func);
14385+ pglb_exit(SIGTERM);
14386+ }
14387+ if (Use_Connection_Pool)
14388+ {
14389+ PGRsignal(SIGCHLD,PGRrecreate_child);
14390+ rtn = PGRpre_fork_children(Cluster_Tbl);
14391+ if (rtn != STATUS_OK)
14392+ {
14393+ show_error("%s:PGRpre_fork_children failed",func);
14394+ pglb_exit(SIGTERM);
14395+ }
14396+ }
14397+
14398+ for (;;)
14399+ {
14400+ fd_set rmask;
14401+ struct timeval timeout;
14402+
14403+ timeout.tv_sec = 60;
14404+ timeout.tv_usec = 0;
14405+
14406+ /*
14407+ * Wait for something to happen.
14408+ */
14409+ FD_ZERO(&rmask);
14410+ FD_SET(Frontend_FD.unix_fd,&rmask);
14411+ if(Frontend_FD.inet_fd)
14412+ FD_SET(Frontend_FD.inet_fd,&rmask);
14413+ rtn = select(Max(Frontend_FD.unix_fd, Frontend_FD.inet_fd) + 1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
14414+ if( rtn > 0)
14415+ {
14416+ if(PGRis_cluster_alive() == STATUS_ERROR) {
14417+ show_error("%s:all clusters were dead.",func);
14418+ PGRreturn_no_connection_error();
14419+ count=0;
14420+ }
14421+ else
14422+ {
14423+ if (Use_Connection_Pool)
14424+ {
14425+ status = PGRload_balance_with_pool();
14426+ }
14427+ else
14428+ {
14429+ status = PGRload_balance();
14430+ }
14431+ if (status != STATUS_OK)
14432+ {
14433+ show_error("%s:load balance process failed",func);
14434+ if ( count > PGLB_CONNECT_RETRY_TIME)
14435+ {
14436+ show_error("%s:no cluster available",func);
14437+ PGRreturn_connection_full_error();
14438+ count = 0;
14439+ }
14440+ count ++;
14441+ }
14442+ else
14443+ {
14444+ count = 0;
14445+ }
14446+ }
14447+ }
14448+ }
14449+}
14450+
14451+/*--------------------------------------------------------------------
14452+ * SYMBOL
14453+ * daemonize()
14454+ * NOTES
14455+ * Daemonize this process
14456+ * ARGS
14457+ * void
14458+ * RETURN
14459+ * none
14460+ *--------------------------------------------------------------------
14461+ */
14462+static void
14463+daemonize(void)
14464+{
14465+ char * func = "daemonize()";
14466+ int i;
14467+ pid_t pid;
14468+
14469+ pid = fork();
14470+ if (pid == (pid_t) -1)
14471+ {
14472+ show_error("%s:fork() failed. (%s)",func, strerror(errno));
14473+ exit(1);
14474+ return; /* not reached */
14475+ }
14476+ else if (pid > 0)
14477+ { /* parent */
14478+ exit(0);
14479+ }
14480+
14481+#ifdef HAVE_SETSID
14482+ if (setsid() < 0)
14483+ {
14484+ show_error("%s:setsid() failed. (%s)", func,strerror(errno));
14485+ exit(1);
14486+ }
14487+#endif
14488+
14489+ i = open("/dev/null", O_RDWR);
14490+ dup2(i, 0);
14491+ dup2(i, 1);
14492+ dup2(i, 2);
14493+ close(i);
14494+}
14495+
14496+
14497+/*--------------------------------------------------------------------
14498+ * SYMBOL
14499+ * write_pid_file()
14500+ * NOTES
14501+ * The process ID is written in the file.
14502+ * This process ID is used when finish pglb.
14503+ * ARGS
14504+ * void
14505+ * RETURN
14506+ * none
14507+ *--------------------------------------------------------------------
14508+ */
14509+static void
14510+write_pid_file(void)
14511+{
14512+ char * func = "write_pid_file()";
14513+ FILE *fd;
14514+ char fname[256];
14515+ char pidbuf[128];
14516+
14517+ snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGLB_PID_FILE);
14518+ fd = fopen(fname, "w");
14519+ if (!fd)
14520+ {
14521+ show_error("%s:open() %s file failed. (%s)",
14522+ func,fname, strerror(errno));
14523+ exit(1);
14524+ }
14525+ snprintf(pidbuf, sizeof(pidbuf), "%d", getpid());
14526+ fwrite(pidbuf, strlen(pidbuf), 1, fd);
14527+ if (fclose(fd))
14528+ {
14529+ show_error("%s:fwrite() %s file failed. (%s)",
14530+ func,fname, strerror(errno));
14531+ exit(1);
14532+ }
14533+}
14534+
14535+
14536+/*--------------------------------------------------------------------
14537+ * SYMBOL
14538+ * stop_pglb()
14539+ * NOTES
14540+ * Stop the pglb process
14541+ * ARGS
14542+ * void
14543+ * RETURN
14544+ * none
14545+ *--------------------------------------------------------------------
14546+ */
14547+static void
14548+stop_pglb(void)
14549+{
14550+ char * func = "stop_pglb()";
14551+ FILE *fd;
14552+ char fname[256];
14553+ char pidbuf[128];
14554+ pid_t pid;
14555+
14556+ if (PGR_Write_Path == NULL)
14557+ {
14558+ PGR_Write_Path = ".";
14559+ }
14560+ snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGLB_PID_FILE);
14561+ fd = fopen(fname, "r");
14562+ if (!fd)
14563+ {
14564+ show_error("%s:open() %s file failed. (%s)",
14565+ func,fname, strerror(errno));
14566+ exit(1);
14567+ }
14568+ memset(pidbuf,0,sizeof(pidbuf));
14569+ fread(pidbuf, sizeof(pidbuf), 1, fd);
14570+ fclose(fd);
14571+ pid = atoi(pidbuf);
14572+ if (kill (pid,SIGTERM) == -1)
14573+ {
14574+ show_error("%s:could not stop pid: %d (%s)",func,pid,strerror(errno));
14575+ exit(1);
14576+ }
14577+}
14578+
14579+
14580+/*--------------------------------------------------------------------
14581+ * SYMBOL
14582+ * is_exist_pid_file()
14583+ * NOTES
14584+ * Check existence of pid file.
14585+ * ARGS
14586+ * void
14587+ * RETURN
14588+ * 1: the pid file is exist
14589+ * 0: the pid file is not exist
14590+ *--------------------------------------------------------------------
14591+ */
14592+static int
14593+is_exist_pid_file(void)
14594+{
14595+ char fname[256];
14596+ struct stat buf;
14597+
14598+ snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGLB_PID_FILE);
14599+ if (stat(fname,&buf) == 0)
14600+ {
14601+ /* pid file is exist */
14602+ return 1;
14603+ }
14604+ else
14605+ {
14606+ /* pid file is not exist */
14607+ return 0;
14608+ }
14609+}
14610+
14611+
14612+/*--------------------------------------------------------------------
14613+ * SYMBOL
14614+ * PGRrecreate_child()
14615+ * NOTES
14616+ * create the child process again which it hunged up
14617+ * ARGS
14618+ * int signal_args: signal number (expecting the SIGCHLD)
14619+ * RETURN
14620+ * none
14621+ *--------------------------------------------------------------------
14622+ */
14623+void
14624+PGRrecreate_child(int signal_args)
14625+{
14626+ pid_t pid = 0;
14627+ int status;
14628+ ClusterTbl * cluster_p;
14629+
14630+ReWait:
14631+
14632+ errno = 0;
14633+#ifdef HAVE_WAITPID
14634+ while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
14635+ {
14636+#else
14637+ while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
14638+ {
14639+#endif
14640+ cluster_p = scan_cluster_by_pid(pid);
14641+ pid = PGRcreate_child(cluster_p);
14642+ }
14643+ if ((pid < 0) && (errno == EINTR))
14644+ goto ReWait;
14645+}
14646+
14647+/*--------------------------------------------------------------------
14648+ * SYMBOL
14649+ * close_child()
14650+ * NOTES
14651+ * Hung up child process
14652+ * ARGS
14653+ * int signal_args: signal number (expecting the SIGUSR2)
14654+ * RETURN
14655+ * none
14656+ *--------------------------------------------------------------------
14657+ */
14658+static void
14659+close_child(int signal_args)
14660+{
14661+ char * func = "close_child()";
14662+ ChildTbl * child;
14663+ ClusterTbl * cluster;
14664+ int rec_no = -1;
14665+
14666+ if (( Cluster_Tbl == NULL) || (Child_Tbl == NULL))
14667+ {
14668+ show_error("%s:Cluster_Tbl or Child_Tbl is not initialize",func);
14669+ return ;
14670+ }
14671+ cluster = Cluster_Tbl;
14672+ while(cluster->useFlag != TBL_END)
14673+ {
14674+ if (cluster->useFlag == TBL_ERROR_NOTICE)
14675+ {
14676+ rec_no = cluster->rec_no;
14677+ PGRset_status_on_cluster_tbl(TBL_ERROR,cluster);
14678+ break;
14679+ }
14680+ cluster++;
14681+ }
14682+ if (rec_no < 0)
14683+ {
14684+ return;
14685+ }
14686+ child = Child_Tbl;
14687+ while(child->useFlag != TBL_END)
14688+ {
14689+ if (child->rec_no == rec_no)
14690+ {
14691+ if (kill (child->pid,SIGTERM) == -1)
14692+ {
14693+ show_error("%s:could not stop pid: %d (%s)",func,child->pid,strerror(errno));
14694+ return;
14695+ }
14696+ PGRchild_wait(signal_args);
14697+ child->useFlag = DATA_FREE;
14698+ }
14699+ child++;
14700+ }
14701+ PGRsignal(SIGUSR2, close_child);
14702+}
14703+
14704+/*--------------------------------------------------------------------
14705+ * SYMBOL
14706+ * scan_cluster_by_pid()
14707+ * NOTES
14708+ * get cluster server record from child process id
14709+ * ARGS
14710+ * pid_t pid: child process id (I)
14711+ * RETURN
14712+ * OK: pointer of cluster table
14713+ * NG: NULL
14714+ *--------------------------------------------------------------------
14715+ */
14716+static ClusterTbl *
14717+scan_cluster_by_pid(pid_t pid)
14718+{
14719+ char * func = "scan_cluster_by_pid()";
14720+ ChildTbl * child_p;
14721+ ClusterTbl * cluster_p;
14722+ int cnt;
14723+
14724+ child_p = Child_Tbl;
14725+ if (child_p == NULL)
14726+ {
14727+ show_error("%s:Child Table is not initialize",func);
14728+ return NULL;
14729+ }
14730+ cluster_p = Cluster_Tbl;
14731+ if (cluster_p == NULL)
14732+ {
14733+ show_error("%s:Cluster Table is not initialize",func);
14734+ return NULL;
14735+ }
14736+
14737+ while (child_p->useFlag != TBL_END)
14738+ {
14739+ if (child_p->pid == pid)
14740+ {
14741+ break;
14742+ }
14743+ child_p++;
14744+ }
14745+ if (child_p->useFlag == TBL_END)
14746+ {
14747+ show_error("%s:pid:%d not found in child table",func,pid);
14748+ return NULL;
14749+ }
14750+
14751+ cnt = 0;
14752+ while ((cluster_p->useFlag != TBL_END) && (cnt < ClusterNum))
14753+ {
14754+ if (cluster_p->rec_no == child_p->rec_no)
14755+ {
14756+ return cluster_p;
14757+ }
14758+ cluster_p++;
14759+ cnt ++;
14760+ }
14761+ return NULL;
14762+}
14763+
14764+/*--------------------------------------------------------------------
14765+ * SYMBOL
14766+ * usage()
14767+ * NOTES
14768+ * show usage of pglb
14769+ * ARGS
14770+ * void
14771+ * RETURN
14772+ * none
14773+ *--------------------------------------------------------------------
14774+ */
14775+static void
14776+usage(void)
14777+{
14778+ char * path;
14779+
14780+ path = getenv("PGDATA");
14781+ if (path == NULL)
14782+ path = ".";
14783+ fprintf(stderr,"pglb version [%s]\n",PGLB_VERSION);
14784+ fprintf(stderr,"A load balancer for PostgreSQL\n\n");
14785+ fprintf(stderr,"usage: pglb [-D path_of_config_file] [-W path_of_work_files] [-n][-v][-h][stop | restart]\n");
14786+ fprintf(stderr," config file default path: %s/%s\n",path, PGLB_CONF_FILE);
14787+ fprintf(stderr," -l: print error logs in the log file.\n");
14788+ fprintf(stderr," -n: don't run in daemon mode.\n");
14789+ fprintf(stderr," -v: debug mode. need '-n' flag\n");
14790+ fprintf(stderr," -h: print this help\n");
14791+ fprintf(stderr," stop: stop pglb\n");
14792+ fprintf(stderr," restart: restart pglb\n");
14793+}
14794+
14795+/*--------------------------------------------------------------------
14796+ * SYMBOL
14797+ * main()
14798+ * NOTES
14799+ * main module of pglb
14800+ * ARGS
14801+ * int argc: number of parameter
14802+ * char ** argv: value of parameter
14803+ * RETURN
14804+ * none
14805+ *--------------------------------------------------------------------
14806+ */
14807+int
14808+main(int argc, char ** argv)
14809+{
14810+ int opt = 0;
14811+ char * r_path = NULL;
14812+ char * w_path = NULL;
14813+ int detach = 1;
14814+
14815+ PGRsignal(SIGHUP, pglb_exit);
14816+ PGRsignal(SIGINT, pglb_exit);
14817+ PGRsignal(SIGQUIT, pglb_exit);
14818+ PGRsignal(SIGTERM, pglb_exit);
14819+ PGRsignal(SIGALRM, SIG_IGN); /* ignored */
14820+ PGRsignal(SIGPIPE, SIG_IGN); /* ignored */
14821+ PGRsignal(SIGTTIN, SIG_IGN); /* ignored */
14822+ PGRsignal(SIGTTOU, SIG_IGN); /* ignored */
14823+ PGRsignal(SIGCHLD,PGRchild_wait);
14824+ PGRsignal(SIGUSR1, SIG_IGN); /* ignored */
14825+ PGRsignal(SIGUSR2, close_child); /* close child process */
14826+ r_path = getenv("PGDATA");
14827+ if (r_path == NULL)
14828+ r_path = ".";
14829+
14830+ while ((opt = getopt(argc, argv, "U:D:W:w:lvnh")) != -1)
14831+ {
14832+ switch (opt)
14833+ {
14834+ case 'U':
14835+ if (!optarg)
14836+ {
14837+ usage();
14838+ exit(1);
14839+ }
14840+ PGRuserName = strdup(optarg);
14841+ break;
14842+ case 'D':
14843+ if (!optarg)
14844+ {
14845+ usage();
14846+ exit(1);
14847+ }
14848+ r_path = optarg;
14849+ break;
14850+ case 'W':
14851+ if (!optarg)
14852+ {
14853+ usage();
14854+ exit(1);
14855+ }
14856+ w_path = optarg;
14857+ break;
14858+ case 'w':
14859+ fork_wait_time = atoi(optarg);
14860+ if (fork_wait_time < 0)
14861+ fork_wait_time = 0;
14862+ break;
14863+ case 'l':
14864+ Log_Print = 1;
14865+ break;
14866+ case 'v':
14867+ Debug_Print = 1;
14868+ break;
14869+ case 'n':
14870+ detach = 0;
14871+ break;
14872+ case 'h':
14873+ usage();
14874+ exit(0);
14875+ break;
14876+ default:
14877+ usage();
14878+ exit(1);
14879+ }
14880+ }
14881+ PGR_Data_Path = r_path;
14882+ if (w_path == NULL)
14883+ {
14884+ PGR_Write_Path = PGR_Data_Path;
14885+ }
14886+ else
14887+ {
14888+ PGR_Write_Path = w_path;
14889+ }
14890+
14891+ if (optind == (argc-1) &&
14892+ ((!strcmp(argv[optind],"stop")) ||
14893+ (!strcmp(argv[optind],"restart"))))
14894+ {
14895+ stop_pglb();
14896+ if (!strcmp(argv[optind],"stop"))
14897+ {
14898+ exit(0);
14899+ }
14900+ }
14901+ else if (optind == argc)
14902+ {
14903+ if (is_exist_pid_file())
14904+ {
14905+ fprintf(stderr,"pid file %s/%s found. is another pglb running?", PGR_Write_Path, PGLB_PID_FILE);
14906+ exit(1);
14907+ }
14908+ }
14909+ else if (optind < argc)
14910+ {
14911+ usage();
14912+ exit(1);
14913+ }
14914+
14915+ if (detach)
14916+ {
14917+ daemonize();
14918+ }
14919+ write_pid_file();
14920+
14921+ if (init_pglb(PGR_Data_Path) != STATUS_OK)
14922+ {
14923+ exit(0);
14924+ }
14925+
14926+ /* call recovery process */
14927+ PGRrecovery_main(fork_wait_time);
14928+
14929+ /* call lifecheck process */
14930+ PGRlifecheck_main(fork_wait_time);
14931+
14932+ /* start loadbalance module */
14933+ load_balance_main();
14934+ pglb_exit(0);
14935+ return STATUS_OK;
14936+}
14937+
14938+void
14939+PGRexit_subprocess(int sig)
14940+{
14941+ pglb_exit(sig);
14942+}
14943diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pglb.conf.sample pgcluster-1.7.0rc7/src/pgcluster/pglb/pglb.conf.sample
14944--- postgresql-8.2.4/src/pgcluster/pglb/pglb.conf.sample 1970-01-01 01:00:00.000000000 +0100
14945+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pglb.conf.sample 2007-02-18 22:52:17.000000000 +0100
14946@@ -0,0 +1,73 @@
14947+#============================================================
14948+# Load Balance Server configuration file
14949+#-------------------------------------------------------------
14950+# file: pglb.conf
14951+#-------------------------------------------------------------
14952+# This file controls:
14953+# o which hosts are db cluster server
14954+# o which port use connect to db cluster server
14955+# o how many connections are allowed on each DB server
14956+#============================================================
14957+#-------------------------------------------------------------
14958+# set cluster DB server information
14959+# o Host_Name : Hostname of Cluster
14960+# Please write a host name by FQDN or IP address.
14961+# o Port : Connection port for postmaster
14962+# o Max_Connection : Maximum number of connections to postmaster
14963+#-------------------------------------------------------------
14964+#<Cluster_Server_Info>
14965+# <Host_Name> master.pgcluster.org </Host_Name>
14966+# <Port> 5432 </Port>
14967+# <Max_Connect> 32 </Max_Connect>
14968+#</Cluster_Server_Info>
14969+#<Cluster_Server_Info>
14970+# <Host_Name> post2.pgcluster.org </Host_Name>
14971+# <Port> 5432 </Port>
14972+# <Max_Connect> 32 </Max_Connect>
14973+#</Cluster_Server_Info>
14974+#<Cluster_Server_Info>
14975+# <Host_Name> post3.pgcluster.org </Host_Name>
14976+# <Port> 5432 </Port>
14977+# <Max_Connect> 32 </Max_Connect>
14978+#</Cluster_Server_Info>
14979+#-------------------------------------------------------------
14980+# set Load Balance server information
14981+# o Host_Name : The host name of this load balance server
14982+# Please write a host name by FQDN or IP address.
14983+# o Backend_Socket_Dir : Unix domain socket path for the backend
14984+# o Receive_Port Connection port from client
14985+# o Recovery_Port : Connection port for recovery process
14986+# o Max_Cluster_Num : Maximum number of cluster DB servers
14987+# o Use_Connection_Pooling : Use connection pool [yes/no]
14988+# o Lifecheck_Timeout : Timeout of the lifecheck response
14989+# o Lifecheck_Interval : Interval time of the lifecheck
14990+# (range 1s - 1h)
14991+# 10s -- 10 seconds
14992+# 10min -- 10 minutes
14993+# 1h -- 1 hours
14994+#-------------------------------------------------------------
14995+<Host_Name> loadbalancer.pgcluster.org </Host_Name>
14996+<Backend_Socket_Dir> /tmp </Backend_Socket_Dir>
14997+<Receive_Port> 5432 </Receive_Port>
14998+<Recovery_Port> 6001 </Recovery_Port>
14999+<Max_Cluster_Num> 128 </Max_Cluster_Num>
15000+<Use_Connection_Pooling> no </Use_Connection_Pooling>
15001+<LifeCheck_Timeout> 3s </LifeCheck_Timeout>
15002+<LifeCheck_Interval> 15s </LifeCheck_Interval>
15003+#-------------------------------------------------------------
15004+# A setup of a log files
15005+#
15006+# o File_Name : Log file name with full path
15007+# o File_Size : Maximum size of each log files
15008+# Please specify in a number and unit(K or M)
15009+# 10 -- 10 Byte
15010+# 10K -- 10 KByte
15011+# 10M -- 10 MByte
15012+# o Rotate : Rotation times
15013+# If specified 0, old versions are removed.
15014+#-------------------------------------------------------------
15015+<Log_File_Info>
15016+ <File_Name> /tmp/pglb.log </File_Name>
15017+ <File_Size> 1M </File_Size>
15018+ <Rotate> 3 </Rotate>
15019+</Log_File_Info>
15020diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pglb.h pgcluster-1.7.0rc7/src/pgcluster/pglb/pglb.h
15021--- postgresql-8.2.4/src/pgcluster/pglb/pglb.h 1970-01-01 01:00:00.000000000 +0100
15022+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pglb.h 2007-03-01 16:27:49.000000000 +0100
15023@@ -0,0 +1,472 @@
15024+/*--------------------------------------------------------------------
15025+ * FILE:
15026+ * pglb.h
15027+ *
15028+ * Portions Copyright (c) 2003-2006 Atsushi Mitani
15029+ *--------------------------------------------------------------------
15030+ */
15031+#ifndef PGLB_H
15032+#define PGLB_H
15033+
15034+#define PGLB_VERSION "1.7.0rc7"
15035+
15036+#include "../libpgc/libpgc.h"
15037+
15038+/*
15039+ * from pool.h
15040+ */
15041+
15042+/*
15043+ * define this if you do not want to issue RESET ALL at each new
15044+ * connection. Also you need to define this for 7.1 or prior
15045+ * PostgreSQL since they do not support RESET ALL
15046+ */
15047+#undef NO_RESET_ALL
15048+
15049+/* undef this if you have problems with non blocking accept() */
15050+#define NONE_BLOCK
15051+
15052+#define POOLMAXPATHLEN 8192
15053+
15054+/* configuration file name */
15055+#define POOL_CONF_FILE_NAME "pgpool.conf"
15056+
15057+/* pid file directory */
15058+#define DEFAULT_LOGDIR "/tmp"
15059+
15060+/* Unix domain socket directory */
15061+#define DEFAULT_SOCKET_DIR "/tmp"
15062+
15063+/* pid file name */
15064+#define PID_FILE_NAME "pgpool.pid"
15065+
15066+/* strict mode comment in SQL */
15067+#define STRICT_MODE_STR "/*STRICT*/"
15068+#define STRICT_MODE(s) (strncasecmp((s), STRICT_MODE_STR, strlen(STRICT_MODE_STR)) == 0)
15069+
15070+typedef enum {
15071+ POOL_CONTINUE = 0,
15072+ POOL_IDLE,
15073+ POOL_END,
15074+ POOL_ERROR,
15075+ POOL_FATAL
15076+} POOL_STATUS;
15077+
15078+/* protocol major version numbers */
15079+#define PROTO_MAJOR_V2 2
15080+#define PROTO_MAJOR_V3 3
15081+
15082+/*
15083+ * startup packet definitions (v2) stolen from PostgreSQL
15084+ */
15085+#define SM_DATABASE 64
15086+#define SM_USER 32
15087+#define SM_OPTIONS 64
15088+#define SM_UNUSED 64
15089+#define SM_TTY 64
15090+
15091+typedef struct PGR_StartupPacket_v2
15092+{
15093+ int protoVersion; /* Protocol version */
15094+ char database[SM_DATABASE]; /* Database name */
15095+ char user[SM_USER]; /* User name */
15096+ char options[SM_OPTIONS]; /* Optional additional args */
15097+ char unused[SM_UNUSED]; /* Unused */
15098+ char tty[SM_TTY]; /* Tty for debug output */
15099+} PGR_StartupPacket_v2;
15100+
15101+/* startup packet info */
15102+typedef struct
15103+{
15104+ char *startup_packet; /* raw startup packet without packet length (malloced area) */
15105+ int len; /* raw startup packet length */
15106+ int major; /* protocol major version */
15107+ int minor; /* protocol minor version */
15108+ char *database; /* database name in startup_packet (malloced area) */
15109+ char *user; /* user name in startup_packet (malloced area) */
15110+} PGR_StartupPacket;
15111+
15112+typedef struct CancelPacket
15113+{
15114+ int protoVersion; /* Protocol version */
15115+ int pid; /* bcckend process id */
15116+ int key; /* cancel key */
15117+} CancelPacket;
15118+
15119+/*
15120+ * configuration paramters
15121+ */
15122+typedef struct {
15123+ int inetdomain; /* should we make an INET domain socket too? */
15124+ int port; /* port # to bind */
15125+ char *socket_dir; /* pgpool socket directory */
15126+ char *backend_host_name; /* backend host name */
15127+ int backend_port; /* backend port # */
15128+ char *secondary_backend_host_name; /* secondary backend host name */
15129+ int secondary_backend_port; /* secondary backend port # */
15130+ int num_init_children; /* # of children initially pre-forked */
15131+ int child_life_time; /* if idle for this seconds, child exits */
15132+ int connection_life_time; /* if idle for this seconds, connection closes */
15133+ int max_pool; /* max # of connection pool per child */
15134+ char *logdir; /* logging directory */
15135+ char *backend_socket_dir; /* Unix domain socket directory for the PostgreSQL server */
15136+ int replication_mode; /* replication mode */
15137+ int replication_strict; /* if non 0, wait for completion of the
15138+ query sent to master to avoid deadlock */
15139+ /*
15140+ * if secondary does not respond in this milli seconds, abort this session.
15141+ * this is not compatible with replication_strict = 1. 0 means no timeout.
15142+ */
15143+ int replication_timeout;
15144+
15145+ int load_balance_mode; /* load balance mode */
15146+
15147+ /* followings do not exist in the configuration file */
15148+ char *current_backend_host_name; /* current backend host name */
15149+ int current_backend_port; /* current backend port # */
15150+ int replication_enabled; /* replication mode enabled */
15151+
15152+ int replication_stop_on_mismatch; /* if there's a data mismatch between master and secondary
15153+ * start degenration to stop replication mode
15154+ */
15155+} POOL_CONFIG;
15156+
15157+#define MAX_PASSWORD_SIZE (1024)
15158+
15159+typedef struct {
15160+ int num; /* number of entries */
15161+ char **names; /* parameter names */
15162+ char **values; /* values */
15163+} ParamStatus;
15164+
15165+/*
15166+ * stream connection structure
15167+ */
15168+typedef struct {
15169+ int fd; /* fd for connection */
15170+ FILE *write_fd; /* stream write connection */
15171+
15172+ char *hp; /* pending data buffer head address */
15173+ int po; /* pending data offset */
15174+ int bufsz; /* pending data buffer size */
15175+ int len; /* pending data length */
15176+
15177+ char *sbuf; /* buffer for pool_read_string */
15178+ int sbufsz; /* its size in bytes */
15179+
15180+ char *buf2; /* buffer for pool_read2 */
15181+ int bufsz2; /* its size in bytes */
15182+
15183+ int isbackend; /* this connection is for backend if non 0 */
15184+ int issecondary_backend; /* this connection is for secondary backend if non 0 */
15185+
15186+ char tstate; /* transaction state (V3 only) */
15187+
15188+ /*
15189+ * following are used to remember when re-use the authenticated connection
15190+ */
15191+ int auth_kind; /* 3: clear text password, 4: crypt password, 5: md5 password */
15192+ int pwd_size; /* password (sent back from frontend) size in host order */
15193+ char password[MAX_PASSWORD_SIZE]; /* password (sent back from frontend) */
15194+ char salt[4]; /* password salt */
15195+
15196+ /*
15197+ * following are used to remember current session paramter status.
15198+ * re-used connection will need them (V3 only)
15199+ */
15200+ ParamStatus params;
15201+
15202+ int no_forward; /* if non 0, do not write to frontend */
15203+
15204+} POOL_CONNECTION;
15205+
15206+/*
15207+ * connection pool structure
15208+ */
15209+typedef struct {
15210+ PGR_StartupPacket *sp; /* startup packet info */
15211+ int pid; /* backend pid */
15212+ int key; /* cancel key */
15213+ POOL_CONNECTION *con;
15214+ time_t closetime; /* absolute time in second when the connection closed
15215+ * if 0, that means the connection is under use.
15216+ */
15217+} POOL_CONNECTION_POOL_SLOT;
15218+
15219+#define MAX_CONNECTION_SLOTS 2
15220+
15221+typedef struct {
15222+ int num; /* number of slots */
15223+ POOL_CONNECTION_POOL_SLOT *slots[MAX_CONNECTION_SLOTS];
15224+} POOL_CONNECTION_POOL;
15225+
15226+#define MASTER_CONNECTION(p) ((p)->slots[0])
15227+#define SECONDARY_CONNECTION(p) ((p)->slots[1])
15228+#define MASTER(p) MASTER_CONNECTION(p)->con
15229+#define SECONDARY(p) SECONDARY_CONNECTION(p)->con
15230+#define MAJOR(p) MASTER_CONNECTION(p)->sp->major
15231+#define TSTATE(p) MASTER(p)->tstate
15232+
15233+#define Max(x, y) ((x) > (y) ? (x) : (y))
15234+#define Min(x, y) ((x) < (y) ? (x) : (y))
15235+
15236+/*
15237+ * pglb
15238+ */
15239+
15240+typedef struct {
15241+ int useFlag;
15242+ int sock;
15243+}SocketTbl;
15244+
15245+typedef struct {
15246+ int useFlag;
15247+ char hostName[HOSTNAME_MAX_LENGTH];
15248+ unsigned short port;
15249+ short max_connect;
15250+ int use_num;
15251+ int rate;
15252+ int rec_no;
15253+ int retry_count;
15254+}ClusterTbl;
15255+
15256+typedef struct {
15257+ long mtype;
15258+ char mdata[1];
15259+}MsgData;
15260+
15261+typedef struct {
15262+ int useFlag;
15263+ int rec_no;
15264+ pid_t pid;
15265+}ChildTbl;
15266+
15267+#define UNIX_DOMAIN_FD (0)
15268+#define INET_DOMAIN_FD (1)
15269+typedef struct {
15270+ int unix_fd;
15271+ int inet_fd;
15272+}FrontSocket;
15273+
15274+#define pool_config_inetdomain (0)
15275+#define pool_config_replication_mode (0)
15276+#define pool_config_replication_strict (0)
15277+#define pool_config_replication_timeout (0)
15278+#define pool_config_replication_enabled (0)
15279+#define pool_config_load_balance_mode (0)
15280+#define pool_config_replication_stop_on_mismatch (0)
15281+#define pool_config_port (Recv_Port_Number)
15282+#define pool_config_socket_dir (Backend_Socket_Dir)
15283+#define pool_config_backend_host_name (CurrentCluster->hostName)
15284+#define pool_config_backend_port (CurrentCluster->port)
15285+#define pool_config_secondary_backend_host_name (CurrentCluster->hostName)
15286+#define pool_config_secondary_backend_port (CurrentCluster->port)
15287+#define pool_config_num_init_children (CurrentCluster->max_connect)
15288+#define pool_config_child_life_time (Connection_Life_Time)
15289+#define pool_config_connection_life_time (Connection_Life_Time)
15290+#define pool_config_max_pool (Max_Pool)
15291+#define pool_config_logdir "./"
15292+#define pool_config_backend_socket_dir (Backend_Socket_Dir)
15293+#define pool_config_current_backend_host_name (CurrentCluster->hostName)
15294+#define pool_config_current_backend_port (CurrentCluster->port)
15295+#define REPLICATION (0)
15296+#define IN_LOAD_BALANCE (0)
15297+
15298+/*
15299+ * for pglb
15300+ */
15301+#define MAX_DB_SERVER (32)
15302+#define PGLB_MAX_SOCKET_QUEUE (10000)
15303+#define CLUSTER_TBL_SHM_KEY (1010)
15304+#define PGLB_CONNECT_RETRY_TIME (3)
15305+#define DEFAULT_CONNECT_NUM (32)
15306+#define DEFAULT_PORT (5432)
15307+#define BUF_SIZE (16384)
15308+#define TBL_FREE (0)
15309+#define TBL_INIT (1)
15310+#define TBL_USE (2)
15311+#define TBL_STOP (3)
15312+#define TBL_ACCEPT (10)
15313+#define TBL_ERROR_NOTICE (98)
15314+#define TBL_ERROR (99)
15315+#define TBL_END (-1)
15316+#define STATUS_OK (0)
15317+#define STATUS_ERROR (-1)
15318+#ifdef RECOVERY_PREPARE_REQ
15319+#define ADD_DB RECOVERY_PREPARE_REQ
15320+#else
15321+#define ADD_DB (1)
15322+#endif
15323+#ifdef RECOVERY_PGDATA_ANS
15324+#define STOP_DB RECOVERY_PGDATA_ANS
15325+#else
15326+#define STOP_DB (3)
15327+#endif
15328+#ifdef RECOVERY_FINISH
15329+#define START_DB RECOVERY_FINISH
15330+#else
15331+#define START_DB (9)
15332+#endif
15333+#define DELETE_DB (99)
15334+#define QUERY_TERMINATE (0x00)
15335+#define RESPONSE_TERMINATE (0x5a)
15336+#define PGLB_CONF_FILE "pglb.conf"
15337+#define PGLB_PID_FILE "pglb.pid"
15338+#define PGLB_STATUS_FILE "pglb.sts"
15339+#define PGLB_LOG_FILE "pglb.log"
15340+#define CLUSTER_SERVER_TAG "Cluster_Server_Info"
15341+#define MAX_CONNECT_TAG "Max_Connect"
15342+#define RECOVERY_PORT_TAG "Recovery_Port"
15343+#define RECV_PORT_TAG "Receive_Port"
15344+#define MAX_CLUSTER_TAG "Max_Cluster_Num"
15345+#define USE_CONNECTION_POOL_TAG "Use_Connection_Pooling"
15346+#define MAX_POOL_TAG "Max_Pool_Each_Server"
15347+#define BACKEND_SOCKET_DIR_TAG "Backend_Socket_Dir"
15348+#define CONNECTION_LIFE_TIME "Connection_Life_Time"
15349+#define NOT_USE_CONNECTION_POOL (0)
15350+#define USE_CONNECTION_POOL (1)
15351+
15352+#define PGR_SEND_RETRY_CNT (100)
15353+#define PGR_SEND_WAIT_MSEC (500)
15354+#define PGR_RECV_RETRY_CNT (100)
15355+#define PGR_RECV_WAIT_MSEC (500)
15356+
15357+extern int Recv_Port_Number;
15358+extern int Recovery_Port_Number;
15359+extern uint16_t LifeCheck_Port_Number;
15360+extern int Use_Connection_Pool;
15361+extern int Max_Pool;
15362+extern int Connection_Life_Time;
15363+extern int Msg_Id;
15364+extern ClusterTbl * Cluster_Tbl;
15365+extern int Max_DB_Server;
15366+extern int MaxBackends;
15367+extern char * Backend_Socket_Dir;
15368+extern int ClusterShmid;
15369+extern int ClusterSemid;
15370+extern int ChildShmid;
15371+extern int ClusterNum;
15372+extern ChildTbl * Child_Tbl;
15373+extern char * PGR_Data_Path;
15374+extern char * PGR_Write_Path;
15375+extern char * Backend_Socket_Dir;
15376+extern FrontSocket Frontend_FD;
15377+extern FILE * StatusFp;
15378+extern char * ResolvedName;
15379+extern char * PGRuserName;
15380+
15381+/* for child.c */
15382+extern POOL_CONNECTION * Frontend;
15383+extern ClusterTbl * CurrentCluster;
15384+
15385+extern char * Function;
15386+
15387+extern POOL_CONNECTION_POOL *pool_connection_pool; /* connection pool */
15388+
15389+/* extern of main.c */
15390+extern void PGRrecreate_child(int signal_args);
15391+extern void PGRexit_subprocess(int sig);
15392+
15393+/* extern of child.c */
15394+extern int PGRpre_fork_children(ClusterTbl * ptr);
15395+extern int PGRpre_fork_child(ClusterTbl * ptr);
15396+extern int PGRdo_child( int use_pool);
15397+extern int PGRcreate_child(ClusterTbl * cluster_p);
15398+extern pid_t PGRscan_child_tbl(ClusterTbl * cluster_p);
15399+extern void notice_backend_error(void);
15400+extern void do_pooling_child(int sig);
15401+extern int PGRset_status_to_child_tbl(pid_t pid, int status);
15402+extern int PGRadd_child_tbl(ClusterTbl * cluster_p, pid_t pid, int status);
15403+extern int PGRget_child_status(pid_t pid);
15404+extern void PGRreturn_connection_full_error(void);
15405+extern void PGRreturn_no_connection_error(void);
15406+extern void PGRquit_children_on_cluster(int rec_no);
15407+
15408+/* extern of cluster_table.c */
15409+extern int PGRis_cluster_alive(void) ;
15410+extern ClusterTbl * PGRscan_cluster(void);
15411+extern void PGRset_key_of_cluster(ClusterTbl * ptr, RecoveryPacket * packet);
15412+extern ClusterTbl * PGRadd_cluster_tbl (ClusterTbl * conf_data);
15413+extern ClusterTbl * PGRset_status_on_cluster_tbl (int status, ClusterTbl * ptr);
15414+extern ClusterTbl * PGRsearch_cluster_tbl(ClusterTbl * conf_data);
15415+
15416+/* extern of load_balance.c */
15417+extern int PGRload_balance(void);
15418+extern int PGRload_balance_with_pool(void);
15419+extern char PGRis_connection_full(ClusterTbl * ptr);
15420+extern void PGRuse_connection(ClusterTbl * ptr);
15421+extern void PGRrelease_connection(ClusterTbl * ptr);
15422+extern void PGRchild_wait(int sig);
15423+
15424+/* extern of recovery.c */
15425+extern void PGRrecovery_main(int fork_wait_fime);
15426+
15427+/* extern of socket.c */
15428+extern int PGRcreate_unix_domain_socket(char * sock_dir, unsigned short port);
15429+extern int PGRcreate_recv_socket(char * hostName , unsigned short portNumber);
15430+extern int PGRcreate_acception(int fd, char * hostName , unsigned short portNumber);
15431+extern void PGRclose_sock(int * sock);
15432+extern int PGRread_byte(int sock,char * buf,int len, int flag);
15433+extern int PGRcreate_cluster_socket( int * sock, ClusterTbl * ptr );
15434+
15435+/* extern of pool_auth.c */
15436+extern int pool_do_auth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp);
15437+extern int pool_do_reauth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp);
15438+extern int pool_read_message_length(POOL_CONNECTION_POOL *cp);
15439+extern signed char pool_read_kind(POOL_CONNECTION_POOL *cp);
15440+
15441+/* extern of pool_connection_pool.c */
15442+extern int pool_init_cp(void);
15443+extern POOL_CONNECTION_POOL *pool_get_cp(char *user, char *database, int protoMajor);
15444+extern void pool_discard_cp(char *user, char *database, int protoMajor);
15445+extern POOL_CONNECTION_POOL *pool_create_cp(void);
15446+extern void pool_connection_pool_timer(POOL_CONNECTION_POOL *backend);
15447+extern void pool_backend_timer_handler(int sig);
15448+extern int connect_inet_domain_socket(int secondary_backend);
15449+extern int connect_unix_domain_socket(int secondary_backend);
15450+extern char PGRis_same_host(char * host1, char * host2);
15451+extern void pool_finish(void);
15452+
15453+/* extern of pool_process_query.c */
15454+extern POOL_STATUS pool_process_query(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, int connection_reuse);
15455+extern POOL_STATUS ErrorResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
15456+extern void pool_enable_timeout();
15457+extern void pool_disable_timeout();
15458+extern int pool_check_fd(POOL_CONNECTION *cp, int notimeout);
15459+extern void pool_send_frontend_exits(POOL_CONNECTION_POOL *backend);
15460+extern POOL_STATUS SimpleForwardToFrontend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
15461+extern POOL_STATUS SimpleForwardToBackend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
15462+extern POOL_STATUS ParameterStatus(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
15463+extern void pool_send_error_message(POOL_CONNECTION *frontend, int protoMajor, char *code, char *message, char *detail, char *hint, char *file, int line);
15464+
15465+/* extern of pool_params.c */
15466+extern int pool_init_params(ParamStatus *params);
15467+extern void pool_discard_params(ParamStatus *params);
15468+extern char *pool_find_name(ParamStatus *params, char *name, int *pos);
15469+extern int pool_get_param(ParamStatus *params, int index, char **name, char **value);
15470+extern int pool_add_param(ParamStatus *params, char *name, char *value);
15471+extern void pool_param_debug_print(ParamStatus *params);
15472+
15473+/* extern of pool_stream.c */
15474+extern POOL_CONNECTION *pool_open(int fd);
15475+extern void pool_close(POOL_CONNECTION *cp);
15476+extern int pool_read(POOL_CONNECTION *cp, void *buf, int len);
15477+extern char *pool_read2(POOL_CONNECTION *cp, int len);
15478+extern int pool_write(POOL_CONNECTION *cp, void *buf, int len);
15479+extern int pool_flush(POOL_CONNECTION *cp);
15480+extern int pool_write_and_flush(POOL_CONNECTION *cp, void *buf, int len);
15481+extern char *pool_read_string(POOL_CONNECTION *cp, int *len, int line);
15482+
15483+/*
15484+ * external prototype in show.c
15485+ */
15486+extern void show_error(const char * fmt,...);
15487+extern void show_debug(const char * fmt,...);
15488+extern void PGRwrite_log_file(FILE * fp, const char * fmt,...);
15489+
15490+/*
15491+ * external prototype in lifecheck.c
15492+ */
15493+extern int PGRlifecheck_main(int fork_wait_time);
15494+
15495+#endif /* PGLB_H */
15496diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_auth.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_auth.c
15497--- postgresql-8.2.4/src/pgcluster/pglb/pool_auth.c 1970-01-01 01:00:00.000000000 +0100
15498+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_auth.c 2007-02-18 22:52:17.000000000 +0100
15499@@ -0,0 +1,959 @@
15500+/*--------------------------------------------------------------------
15501+ * FILE:
15502+ * pool_auth.c
15503+ *
15504+ * NOTE:
15505+ * authenticaton stuff
15506+ *
15507+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
15508+ * Portions Copyright (c) 2003-2006, Tatsuo Ishii
15509+ *--------------------------------------------------------------------
15510+ */
15511+/*
15512+ * Permission to use, copy, modify, and distribute this software and
15513+ * its documentation for any purpose and without fee is hereby
15514+ * granted, provided that the above copyright notice appear in all
15515+ * copies and that both that copyright notice and this permission
15516+ * notice appear in supporting documentation, and that the name of the
15517+ * author not be used in advertising or publicity pertaining to
15518+ * distribution of the software without specific, written prior
15519+ * permission. The author makes no representations about the
15520+ * suitability of this software for any purpose. It is provided "as
15521+ * is" without express or implied warranty.
15522+ *
15523+*/
15524+#include <sys/types.h>
15525+#include <netinet/in.h>
15526+#include <sys/param.h>
15527+#include <arpa/inet.h>
15528+#include <errno.h>
15529+#include <string.h>
15530+#include <sys/time.h>
15531+#include <stdio.h>
15532+#include "replicate_com.h"
15533+#include "pglb.h"
15534+
15535+int pool_do_auth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp);
15536+int pool_do_reauth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp);
15537+int pool_read_message_length(POOL_CONNECTION_POOL *cp);
15538+signed char pool_read_kind(POOL_CONNECTION_POOL *cp);
15539+
15540+static POOL_STATUS pool_send_auth_ok(POOL_CONNECTION *frontend, int pid, int key, int protoMajor);
15541+static int do_clear_text_password(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor);
15542+static int do_crypt(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor);
15543+static int do_md5(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor);
15544+
15545+/*
15546+* do authentication against backend. if success return 0 otherwise non 0.
15547+*/
15548+int pool_do_auth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp)
15549+{
15550+ char * func = "pool_do_auth()";
15551+ int status;
15552+ signed char kind;
15553+ int pid, pid1;
15554+ int key, key1;
15555+ int protoMajor;
15556+ int length;
15557+
15558+ protoMajor = MAJOR(cp);
15559+
15560+ kind = pool_read_kind(cp);
15561+ if (kind < 0)
15562+ {
15563+ return -1;
15564+ }
15565+
15566+ /* error response? */
15567+ if (kind == 'E')
15568+ {
15569+ /* we assume error response at this stage is likely version
15570+ * protocol mismatch (v3 frontend vs. v2 backend). So we throw
15571+ * a V2 protocol error response in the hope that v3 frontend
15572+ * will negotiate again using v2 protocol.
15573+ */
15574+ show_error("%s:pool_do_auth: maybe protocol version mismatch (current version %d)",func, protoMajor);
15575+ ErrorResponse(frontend, cp);
15576+ return -1;
15577+ }
15578+ else if (kind != 'R')
15579+ {
15580+ show_error("%s:pool_do_auth: expect \"R\" got %c",func, kind);
15581+ return -1;
15582+ }
15583+
15584+ /*
15585+ * message length (v3 only) */
15586+ if (protoMajor == PROTO_MAJOR_V3 && pool_read_message_length(cp) < 0)
15587+ {
15588+ return -1;
15589+ }
15590+
15591+ /*
15592+ * read authentication request kind.
15593+ *
15594+ * 0: authentication ok
15595+ * 1: kerberos v4
15596+ * 2: kerberos v5
15597+ * 3: clear text password
15598+ * 4: crypt password
15599+ * 5: md5 password
15600+ * 6: scm credential
15601+ *
15602+ * in replication mode, we only supports kind = 0, 3. this is because to "salt"
15603+ * cannot be replicated among master and secondary.
15604+ * in non replication mode, we supports kind = 0, 3, 4, 5
15605+ */
15606+
15607+ status = pool_read(MASTER(cp), &pid, sizeof(pid));
15608+ if (status < 0)
15609+ {
15610+ show_error("%s:pool_do_auth: read authentication kind failed",func);
15611+ return -1;
15612+ }
15613+
15614+ if (REPLICATION)
15615+ {
15616+ status = pool_read(SECONDARY(cp), &pid1, sizeof(pid1));
15617+
15618+ if (status < 0)
15619+ {
15620+ show_error("%s:pool_do_auth: read authentication kind from secondary failed",func);
15621+ return -1;
15622+ }
15623+ }
15624+
15625+ pid = ntohl(pid);
15626+
15627+ /* trust? */
15628+ if (pid == 0)
15629+ {
15630+ if (protoMajor == PROTO_MAJOR_V3)
15631+ {
15632+ int msglen;
15633+
15634+ pool_write(frontend, "R", 1);
15635+ msglen = htonl(8);
15636+ pool_write(frontend, &msglen, sizeof(msglen));
15637+ msglen = htonl(0);
15638+ if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
15639+ {
15640+ return -1;
15641+ }
15642+ }
15643+ MASTER(cp)->auth_kind = 0;
15644+ }
15645+
15646+ /* clear text password authentication? */
15647+ else if (pid == 3)
15648+ {
15649+#ifdef PRINT_DEBUG
15650+ show_debug("%s:trying clear text password authentication",func);
15651+#endif
15652+
15653+ pid = do_clear_text_password(MASTER(cp), frontend, 0, protoMajor);
15654+
15655+ if (pid >= 0 && REPLICATION)
15656+ {
15657+ pid = do_clear_text_password(SECONDARY(cp), frontend, 0, protoMajor);
15658+ }
15659+ }
15660+
15661+ /* crypt authentication? */
15662+ else if (pid == 4)
15663+ {
15664+#ifdef PRINT_DEBUG
15665+ show_debug("%s:trying crypt authentication",func);
15666+#endif
15667+
15668+ pid = do_crypt(MASTER(cp), frontend, 0, protoMajor);
15669+
15670+ if (pid >= 0 && REPLICATION)
15671+ {
15672+ pid = do_crypt(SECONDARY(cp), frontend, 0, protoMajor);
15673+ }
15674+ }
15675+
15676+ /* md5 authentication? */
15677+ else if (pid == 5)
15678+ {
15679+#ifdef PRINT_DEBUG
15680+ show_debug("%s:trying md5 authentication",func);
15681+#endif
15682+
15683+ pid = do_md5(MASTER(cp), frontend, 0, protoMajor);
15684+
15685+ if (pid >= 0 && REPLICATION)
15686+ {
15687+ pid = do_md5(SECONDARY(cp), frontend, 0, protoMajor);
15688+ }
15689+ }
15690+
15691+ if (pid != 0)
15692+ {
15693+ show_error("%s:pool_do_auth: backend does not return authenticaton ok",func);
15694+ return -1;
15695+ }
15696+
15697+ /*
15698+ * authentication ok. now read pid and secret key from the
15699+ * backend
15700+ */
15701+ kind = pool_read_kind(cp);
15702+ if (kind < 0)
15703+ {
15704+ return -1;
15705+ }
15706+
15707+ /* error response? */
15708+ if (kind == 'E')
15709+ {
15710+ if (protoMajor == PROTO_MAJOR_V2)
15711+ ErrorResponse(frontend, cp);
15712+ else
15713+ SimpleForwardToFrontend(kind, frontend, cp);
15714+ return -1;
15715+ }
15716+ else if (kind != 'K')
15717+ {
15718+ if (protoMajor == PROTO_MAJOR_V3)
15719+ {
15720+ /* process parameter status */
15721+ while (kind == 'S')
15722+ {
15723+ if (ParameterStatus(frontend, cp) != POOL_CONTINUE)
15724+ return -1;
15725+
15726+ pool_flush(frontend);
15727+
15728+ kind = pool_read_kind(cp);
15729+ if (kind < 0)
15730+ {
15731+ show_error("%s:pool_do_auth: failed to read kind while processing ParamterStatus",func);
15732+ return -1;
15733+ }
15734+ }
15735+ }
15736+ else
15737+ {
15738+ show_error("%s:pool_do_auth: expect \"K\" got %c",func, kind);
15739+ return -1;
15740+ }
15741+ }
15742+
15743+ /*
15744+ * message length (V3 only)
15745+ */
15746+ if (protoMajor == PROTO_MAJOR_V3 && (length = pool_read_message_length(cp)) != 12)
15747+ {
15748+ show_error("%s:pool_do_auth: invalid messages length(%d) for BackendKeyData",func, length);
15749+ return -1;
15750+ }
15751+
15752+ /*
15753+ * OK, read pid and secret key
15754+ */
15755+
15756+ /* pid */
15757+ pool_read(MASTER(cp), &pid, sizeof(pid));
15758+ MASTER_CONNECTION(cp)->pid = pid;
15759+
15760+ /* key */
15761+ pool_read(MASTER(cp), &key, sizeof(key));
15762+ MASTER_CONNECTION(cp)->key = key;
15763+
15764+ if (REPLICATION)
15765+ {
15766+ pool_read(SECONDARY(cp), &pid1, sizeof(pid1));
15767+ SECONDARY_CONNECTION(cp)->pid = pid;
15768+
15769+ /* key */
15770+ pool_read(SECONDARY(cp), &key1, sizeof(key1));
15771+ SECONDARY_CONNECTION(cp)->key = key;
15772+ }
15773+
15774+ return (pool_send_auth_ok(frontend, pid, key, protoMajor));
15775+}
15776+
15777+/*
15778+* do re-authentication for reused connection. if success return 0 otherwise non 0.
15779+*/
15780+int pool_do_reauth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp)
15781+{
15782+ char * func = "pool_do_reauth()";
15783+ int status;
15784+ int protoMajor;
15785+
15786+ protoMajor = MAJOR(cp);
15787+
15788+ switch(MASTER(cp)->auth_kind)
15789+ {
15790+ case 0:
15791+ /* trust */
15792+ status = 0;
15793+ break;
15794+
15795+ case 3:
15796+ /* clear text password */
15797+ status = do_clear_text_password(MASTER(cp), frontend, 1, protoMajor);
15798+ break;
15799+
15800+ case 4:
15801+ /* crypt password */
15802+ status = do_crypt(MASTER(cp), frontend, 1, protoMajor);
15803+ break;
15804+
15805+ case 5:
15806+ /* md5 password */
15807+ status = do_md5(MASTER(cp), frontend, 1, protoMajor);
15808+ break;
15809+
15810+ default:
15811+ show_error("%s: unknown authentication request code %d",
15812+ func,MASTER(cp)->auth_kind);
15813+ return -1;
15814+ }
15815+
15816+ if (status == 0)
15817+ {
15818+ if (protoMajor == PROTO_MAJOR_V3)
15819+ {
15820+ int msglen;
15821+
15822+ pool_write(frontend, "R", 1);
15823+ msglen = htonl(8);
15824+ pool_write(frontend, &msglen, sizeof(msglen));
15825+ msglen = htonl(0);
15826+ if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
15827+ {
15828+ return -1;
15829+ }
15830+ }
15831+ }
15832+ else
15833+ {
15834+#ifdef PRINT_DEBUG
15835+ show_debug("%s: authentication failed",func);
15836+#endif
15837+ return -1;
15838+ }
15839+
15840+ return (pool_send_auth_ok(frontend, MASTER_CONNECTION(cp)->pid, MASTER_CONNECTION(cp)->key, protoMajor) != POOL_CONTINUE);
15841+}
15842+
15843+/*
15844+* send authentication ok to frontend. if success return 0 otherwise non 0.
15845+*/
15846+static POOL_STATUS pool_send_auth_ok(POOL_CONNECTION *frontend, int pid, int key, int protoMajor)
15847+{
15848+ char kind;
15849+ int len;
15850+
15851+ if (protoMajor == PROTO_MAJOR_V2)
15852+ {
15853+ /* return "Authentication OK" to the frontend */
15854+ kind = 'R';
15855+ pool_write(frontend, &kind, 1);
15856+ len = htonl(0);
15857+ if (pool_write_and_flush(frontend, &len, sizeof(len)) < 0)
15858+ {
15859+ return -1;
15860+ }
15861+ }
15862+
15863+ /* send backend key data */
15864+ kind = 'K';
15865+ pool_write(frontend, &kind, 1);
15866+ if (protoMajor == PROTO_MAJOR_V3)
15867+ {
15868+ len = htonl(12);
15869+ pool_write(frontend, &len, sizeof(len));
15870+ }
15871+ pool_write(frontend, &pid, sizeof(pid));
15872+ if (pool_write_and_flush(frontend, &key, sizeof(key)) < 0)
15873+ {
15874+ return -1;
15875+ }
15876+
15877+ return 0;
15878+}
15879+
15880+/*
15881+ * perform clear text password authetication
15882+ */
15883+static int do_clear_text_password(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor)
15884+{
15885+ char * func = "do_clear_text_password()";
15886+ static int size;
15887+ static char password[MAX_PASSWORD_SIZE];
15888+ char response;
15889+ int kind;
15890+ int len;
15891+
15892+ /* master? */
15893+ if (!backend->issecondary_backend)
15894+ {
15895+ pool_write(frontend, "R", 1); /* authenticaton */
15896+ if (protoMajor == PROTO_MAJOR_V3)
15897+ {
15898+ len = htonl(8);
15899+ pool_write(frontend, &len, sizeof(len));
15900+ }
15901+ kind = htonl(3); /* clear text password authentication */
15902+ pool_write_and_flush(frontend, &kind, sizeof(kind)); /* indicating clear text password authentication */
15903+
15904+ /* read password packet */
15905+ if (protoMajor == PROTO_MAJOR_V2)
15906+ {
15907+ if (pool_read(frontend, &size, sizeof(size)))
15908+ {
15909+ show_error("%s: failed to read password packet size",func);
15910+ return -1;
15911+ }
15912+ }
15913+ else
15914+ {
15915+ char k;
15916+
15917+ if (pool_read(frontend, &k, sizeof(k)))
15918+ {
15919+ show_error("%s: failed to read password packet \"p\"",func);
15920+ return -1;
15921+ }
15922+ if (k != 'p')
15923+ {
15924+ show_error("%s:packet does not start with \"p\"",func);
15925+ return -1;
15926+ }
15927+ if (pool_read(frontend, &size, sizeof(size)))
15928+ {
15929+ show_error("%s: failed to read password packet size",func);
15930+ return -1;
15931+ }
15932+ }
15933+
15934+ if ((ntohl(size) - 4) > sizeof(password))
15935+ {
15936+ show_error("%s: password is too long (size: %d)",func, ntohl(size) - 4);
15937+ return -1;
15938+ }
15939+
15940+ if (pool_read(frontend, password, ntohl(size) - 4))
15941+ {
15942+ show_error("%s: failed to read password (size: %d)",func, ntohl(size) - 4);
15943+ return -1;
15944+ }
15945+ }
15946+
15947+ /* connection reusing? */
15948+ if (reauth)
15949+ {
15950+ if ((ntohl(size) - 4) != backend->pwd_size)
15951+ {
15952+#ifdef PRINT_DEBUG
15953+ show_debug("%s; password size does not match in re-authetication",func);
15954+#endif
15955+ return -1;
15956+ }
15957+
15958+ if (memcmp(password, backend->password, backend->pwd_size) != 0)
15959+ {
15960+#ifdef PRINT_DEBUG
15961+ show_debug("%s; password does not match in re-authetication",func);
15962+#endif
15963+ return -1;
15964+ }
15965+
15966+ return 0;
15967+ }
15968+
15969+ /* send password packet to backend */
15970+ if (protoMajor == PROTO_MAJOR_V3)
15971+ pool_write(backend, "p", 1);
15972+ pool_write(backend, &size, sizeof(size));
15973+ pool_write_and_flush(backend, password, ntohl(size) -4);
15974+ if (pool_read(backend, &response, sizeof(response)))
15975+ {
15976+ show_error("%s: failed to read authentication response",func);
15977+ return -1;
15978+ }
15979+
15980+ if (response != 'R')
15981+ {
15982+#ifdef PRINT_DEBUG
15983+ show_debug("%s: backend does not return R while processing clear text password authentication",func);
15984+#endif
15985+ return -1;
15986+ }
15987+
15988+ if (protoMajor == PROTO_MAJOR_V3)
15989+ {
15990+ if (pool_read(backend, &len, sizeof(len)))
15991+ {
15992+ show_error("%s: failed to read authentication packet size",func);
15993+ return -1;
15994+ }
15995+
15996+ if (ntohl(len) != 8)
15997+ {
15998+ show_error("%s: incorrect authentication packet size (%d)",func, ntohl(len));
15999+ return -1;
16000+ }
16001+ }
16002+
16003+ /* expect to read "Authentication OK" response. kind should be 0... */
16004+ if (pool_read(backend, &kind, sizeof(kind)))
16005+ {
16006+#ifdef PRINT_DEBUG
16007+ show_debug("%s: failed to read Authentication OK response",func);
16008+#endif
16009+ return -1;
16010+ }
16011+
16012+ /* if authenticated, save info */
16013+ if (!reauth && kind == 0)
16014+ {
16015+ if (!backend->issecondary_backend && protoMajor == PROTO_MAJOR_V3)
16016+ {
16017+ int msglen;
16018+
16019+ pool_write(frontend, "R", 1);
16020+ msglen = htonl(8);
16021+ pool_write(frontend, &msglen, sizeof(msglen));
16022+ msglen = htonl(0);
16023+ if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
16024+ {
16025+ return -1;
16026+ }
16027+ }
16028+
16029+ backend->auth_kind = 3;
16030+ backend->pwd_size = ntohl(size) - 4;
16031+ memcpy(backend->password, password, backend->pwd_size);
16032+ }
16033+ return kind;
16034+}
16035+
16036+/*
16037+ * perform crypt authetication
16038+ */
16039+static int do_crypt(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor)
16040+{
16041+ char * func = "do_crypt()";
16042+ char salt[2];
16043+ static int size;
16044+ static char password[MAX_PASSWORD_SIZE];
16045+ char response;
16046+ int kind;
16047+ int len;
16048+
16049+ if (!reauth)
16050+ {
16051+ /* read salt */
16052+ if (pool_read(backend, salt, sizeof(salt)))
16053+ {
16054+ show_error("%s: failed to read salt",func);
16055+ return -1;
16056+ }
16057+ }
16058+ else
16059+ {
16060+ memcpy(salt, backend->salt, sizeof(salt));
16061+ }
16062+
16063+ /* master? */
16064+ if (!backend->issecondary_backend)
16065+ {
16066+ pool_write(frontend, "R", 1); /* authenticaton */
16067+ if (protoMajor == PROTO_MAJOR_V3)
16068+ {
16069+ len = htonl(10);
16070+ pool_write(frontend, &len, sizeof(len));
16071+ }
16072+ kind = htonl(4); /* crypt authentication */
16073+ pool_write(frontend, &kind, sizeof(kind)); /* indicating crypt authentication */
16074+ pool_write_and_flush(frontend, salt, sizeof(salt)); /* salt */
16075+
16076+ /* read password packet */
16077+ if (protoMajor == PROTO_MAJOR_V2)
16078+ {
16079+ if (pool_read(frontend, &size, sizeof(size)))
16080+ {
16081+ show_error("%s: failed to read password packet size",func);
16082+ return -1;
16083+ }
16084+ }
16085+ else
16086+ {
16087+ char k;
16088+
16089+ if (pool_read(frontend, &k, sizeof(k)))
16090+ {
16091+ show_error("%s: failed to read password packet",func);
16092+ return -1;
16093+ }
16094+ if (k != 'p')
16095+ {
16096+ show_error("%s: password packet does not start with \"p\"",func);
16097+ return -1;
16098+ }
16099+ if (pool_read(frontend, &size, sizeof(size)))
16100+ {
16101+ show_error("%s: failed to read password packet size",func);
16102+ return -1;
16103+ }
16104+ }
16105+
16106+ if ((ntohl(size) - 4) > sizeof(password))
16107+ {
16108+ show_error("%s: password is too long(size: %d)", func,ntohl(size) - 4);
16109+ return -1;
16110+ }
16111+
16112+ if (pool_read(frontend, password, ntohl(size) - 4))
16113+ {
16114+ show_error("%s: failed to read password (size: %d)", func,ntohl(size) - 4);
16115+ return -1;
16116+ }
16117+ }
16118+
16119+ /* connection reusing? */
16120+ if (reauth)
16121+ {
16122+#ifdef PRINT_DEBUG
16123+ show_debug("%s:size: %d saved_size: %d",func, (ntohl(size) - 4), backend->pwd_size);
16124+#endif
16125+ if ((ntohl(size) - 4) != backend->pwd_size)
16126+ {
16127+#ifdef PRINT_DEBUG
16128+ show_debug("%s: password size does not match in re-authetication",func);
16129+#endif
16130+ return -1;
16131+ }
16132+
16133+ if (memcmp(password, backend->password, backend->pwd_size) != 0)
16134+ {
16135+#ifdef PRINT_DEBUG
16136+ show_debug("%s: password does not match in re-authetication",func);
16137+#endif
16138+ return -1;
16139+ }
16140+
16141+ return 0;
16142+ }
16143+
16144+ /* send password packet to backend */
16145+ if (protoMajor == PROTO_MAJOR_V3)
16146+ pool_write(backend, "p", 1);
16147+ pool_write(backend, &size, sizeof(size));
16148+ pool_write_and_flush(backend, password, ntohl(size) -4);
16149+ if (pool_read(backend, &response, sizeof(response)))
16150+ {
16151+ show_error("%s: failed to read authentication response",func);
16152+ return -1;
16153+ }
16154+
16155+ if (response != 'R')
16156+ {
16157+#ifdef PRINT_DEBUG
16158+ show_debug("%s: backend does not return R while processing crypt authentication(%02x) secondary: %d",func, response, backend->issecondary_backend);
16159+#endif
16160+ return -1;
16161+ }
16162+
16163+ if (protoMajor == PROTO_MAJOR_V3)
16164+ {
16165+ if (pool_read(backend, &len, sizeof(len)))
16166+ {
16167+ show_error("%s: failed to read authentication packet size",func);
16168+ return -1;
16169+ }
16170+
16171+ if (ntohl(len) != 8)
16172+ {
16173+ show_error("%s: incorrect authentication packet size (%d)",func, ntohl(len));
16174+ return -1;
16175+ }
16176+ }
16177+
16178+ /* expect to read "Authentication OK" response. kind should be 0... */
16179+ if (pool_read(backend, &kind, sizeof(kind)))
16180+ {
16181+#ifdef PRINT_DEBUG
16182+ show_debug("%s: failed to read Authentication OK response",func);
16183+#endif
16184+ return -1;
16185+ }
16186+
16187+ /* if authenticated, save info */
16188+ if (!reauth && kind == 0)
16189+ {
16190+ if (protoMajor == PROTO_MAJOR_V3)
16191+ {
16192+ int msglen;
16193+
16194+ pool_write(frontend, "R", 1);
16195+ msglen = htonl(8);
16196+ pool_write(frontend, &msglen, sizeof(msglen));
16197+ msglen = htonl(0);
16198+ if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
16199+ {
16200+ return -1;
16201+ }
16202+ }
16203+ backend->auth_kind = 4;
16204+ backend->pwd_size = ntohl(size) - 4;
16205+ memcpy(backend->password, password, backend->pwd_size);
16206+ memcpy(backend->salt, salt, sizeof(salt));
16207+ }
16208+ return kind;
16209+}
16210+
16211+/*
16212+ * perform MD5 authetication
16213+ */
16214+static int do_md5(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor)
16215+{
16216+ char * func = "do_md5()";
16217+ char salt[4];
16218+ static int size;
16219+ static char password[MAX_PASSWORD_SIZE];
16220+ char response;
16221+ int kind;
16222+ int len;
16223+
16224+ if (!reauth)
16225+ {
16226+ /* read salt */
16227+ if (pool_read(backend, salt, sizeof(salt)))
16228+ {
16229+ show_error("%s: failed to read salt",func);
16230+ return -1;
16231+ }
16232+ }
16233+ else
16234+ {
16235+ memcpy(salt, backend->salt, sizeof(salt));
16236+ }
16237+
16238+ /* master? */
16239+ if (!backend->issecondary_backend)
16240+ {
16241+ pool_write(frontend, "R", 1); /* authenticaton */
16242+ if (protoMajor == PROTO_MAJOR_V3)
16243+ {
16244+ len = htonl(12);
16245+ pool_write(frontend, &len, sizeof(len));
16246+ }
16247+ kind = htonl(5);
16248+ pool_write(frontend, &kind, sizeof(kind)); /* indicating MD5 */
16249+ pool_write_and_flush(frontend, salt, sizeof(salt)); /* salt */
16250+
16251+ /* read password packet */
16252+ if (protoMajor == PROTO_MAJOR_V2)
16253+ {
16254+ if (pool_read(frontend, &size, sizeof(size)))
16255+ {
16256+ show_error("%s: failed to read password packet size",func);
16257+ return -1;
16258+ }
16259+ }
16260+ else
16261+ {
16262+ char k;
16263+
16264+ if (pool_read(frontend, &k, sizeof(k)))
16265+ {
16266+ show_error("%s: failed to read password packet \"p\"",func);
16267+ return -1;
16268+ }
16269+ if (k != 'p')
16270+ {
16271+ show_error("%s: password packet does not start with \"p\"",func);
16272+ return -1;
16273+ }
16274+ if (pool_read(frontend, &size, sizeof(size)))
16275+ {
16276+ show_error("%s: failed to read password packet size",func);
16277+ return -1;
16278+ }
16279+ }
16280+
16281+ if ((ntohl(size) - 4) > sizeof(password))
16282+ {
16283+ show_error("%s: password is too long(size: %d)",func, ntohl(size) - 4);
16284+ return -1;
16285+ }
16286+
16287+ if (pool_read(frontend, password, ntohl(size) - 4))
16288+ {
16289+ show_error("%s: failed to read password (size: %d)",func, ntohl(size) - 4);
16290+ return -1;
16291+ }
16292+ }
16293+
16294+ /* connection reusing? */
16295+ if (reauth)
16296+ {
16297+ if ((ntohl(size) - 4) != backend->pwd_size)
16298+ {
16299+#ifdef PRINT_DEBUG
16300+ show_debug("%s; password size does not match in re-authetication",func);
16301+#endif
16302+ return -1;
16303+ }
16304+
16305+ if (memcmp(password, backend->password, backend->pwd_size) != 0)
16306+ {
16307+#ifdef PRINT_DEBUG
16308+ show_debug("%s; password does not match in re-authetication",func);
16309+#endif
16310+ return -1;
16311+ }
16312+
16313+ return 0;
16314+ }
16315+
16316+ /* send password packet to backend */
16317+ if (protoMajor == PROTO_MAJOR_V3)
16318+ pool_write(backend, "p", 1);
16319+ pool_write(backend, &size, sizeof(size));
16320+ pool_write_and_flush(backend, password, ntohl(size) -4);
16321+ if (pool_read(backend, &response, sizeof(response)))
16322+ {
16323+ show_error("%s: failed to read authentication response",func);
16324+ return -1;
16325+ }
16326+
16327+ if (response != 'R')
16328+ {
16329+#ifdef PRINT_DEBUG
16330+ show_debug("%s: backend does not return R while processing MD5 authentication %c", func,response);
16331+#endif
16332+ return -1;
16333+ }
16334+
16335+ if (protoMajor == PROTO_MAJOR_V3)
16336+ {
16337+ if (pool_read(backend, &len, sizeof(len)))
16338+ {
16339+ show_error("%s: failed to read authentication packet size",func);
16340+ return -1;
16341+ }
16342+
16343+ if (ntohl(len) != 8)
16344+ {
16345+ show_error("%s: incorrect authentication packet size (%d)",func, ntohl(len));
16346+ return -1;
16347+ }
16348+ }
16349+
16350+ /* expect to read "Authentication OK" response. kind should be 0... */
16351+ if (pool_read(backend, &kind, sizeof(kind)))
16352+ {
16353+#ifdef PRINT_DEBUG
16354+ show_debug("%s: failed to read Authentication OK response",func);
16355+#endif
16356+ return -1;
16357+ }
16358+
16359+ /* if authenticated, save info */
16360+ if (!reauth && kind == 0)
16361+ {
16362+ if (protoMajor == PROTO_MAJOR_V3)
16363+ {
16364+ int msglen;
16365+
16366+ pool_write(frontend, "R", 1);
16367+ msglen = htonl(8);
16368+ pool_write(frontend, &msglen, sizeof(msglen));
16369+ msglen = htonl(0);
16370+ if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
16371+ {
16372+ return -1;
16373+ }
16374+ }
16375+ backend->auth_kind = 5;
16376+ backend->pwd_size = ntohl(size) - 4;
16377+ memcpy(backend->password, password, backend->pwd_size);
16378+ memcpy(backend->salt, salt, sizeof(salt));
16379+ }
16380+ return kind;
16381+}
16382+
16383+/*
16384+ * read message length (V3 only)
16385+ */
16386+int pool_read_message_length(POOL_CONNECTION_POOL *cp)
16387+{
16388+ char * func = "pool_read_message_length()";
16389+ int status;
16390+ int length, length1;
16391+
16392+ status = pool_read(MASTER(cp), &length, sizeof(length));
16393+ if (status < 0)
16394+ {
16395+ show_error("%s: error while reading message length",func);
16396+ return -1;
16397+ }
16398+ length = ntohl(length);
16399+
16400+ if (REPLICATION)
16401+ {
16402+ status = pool_read(SECONDARY(cp), &length1, sizeof(length1));
16403+ if (status < 0)
16404+ {
16405+ show_error("%s: error while reading message length from secondary backend",func);
16406+ return -1;
16407+ }
16408+ length1 = ntohl(length1);
16409+
16410+ if (length != length1)
16411+ {
16412+ show_error("%s: length does not match between backends master(%d) secondary(%d)",
16413+ func,length, length1);
16414+ return -1;
16415+ }
16416+ }
16417+
16418+ if (length < 0)
16419+ {
16420+ show_error("%s:read_message_length: invalid message length (%d)", func, length);
16421+ return -1;
16422+ }
16423+
16424+ return length;
16425+}
16426+
16427+signed char pool_read_kind(POOL_CONNECTION_POOL *cp)
16428+{
16429+ char * func = "pool_read_kind()";
16430+ int status;
16431+ char kind, kind1;
16432+
16433+ status = pool_read(MASTER(cp), &kind, sizeof(kind));
16434+ if (status < 0)
16435+ {
16436+ show_error("%s:read_message_kind: error while reading message kind",func);
16437+ return -1;
16438+ }
16439+
16440+ if (REPLICATION)
16441+ {
16442+ status = pool_read(SECONDARY(cp), &kind1, sizeof(kind1));
16443+ if (status < 0)
16444+ {
16445+ show_error("%s: error while reading message kind from secondary backend",func);
16446+ return -1;
16447+ }
16448+
16449+ if (kind != kind1)
16450+ {
16451+ show_error("%s: kind does not match between backends master(%d) secondary(%d)",
16452+ func, kind, kind1);
16453+ return -1;
16454+ }
16455+ }
16456+
16457+ return kind;
16458+}
16459diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_connection_pool.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_connection_pool.c
16460--- postgresql-8.2.4/src/pgcluster/pglb/pool_connection_pool.c 1970-01-01 01:00:00.000000000 +0100
16461+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_connection_pool.c 2007-02-18 22:52:17.000000000 +0100
16462@@ -0,0 +1,535 @@
16463+/*--------------------------------------------------------------------
16464+ * FILE:
16465+ * pool_connection_pool.c
16466+ *
16467+ * NOTE:
16468+ * connection pool stuff
16469+ *
16470+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
16471+ * Portions Copyright (c) 2003-2006, Tatsuo Ishii
16472+ *--------------------------------------------------------------------
16473+ */
16474+/*
16475+ * Permission to use, copy, modify, and distribute this software and
16476+ * its documentation for any purpose and without fee is hereby
16477+ * granted, provided that the above copyright notice appear in all
16478+ * copies and that both that copyright notice and this permission
16479+ * notice appear in supporting documentation, and that the name of the
16480+ * author not be used in advertising or publicity pertaining to
16481+ * distribution of the software without specific, written prior
16482+ * permission. The author makes no representations about the
16483+ * suitability of this software for any purpose. It is provided "as
16484+ * is" without express or implied warranty.
16485+ *
16486+ */
16487+#include "postgres.h"
16488+#include <sys/types.h>
16489+#include <sys/socket.h>
16490+#include <sys/time.h>
16491+#include <sys/un.h>
16492+#include <arpa/inet.h>
16493+#include <netdb.h>
16494+#include <stdio.h>
16495+#include <stdlib.h>
16496+#include <errno.h>
16497+#include <signal.h>
16498+#include <string.h>
16499+#include <unistd.h>
16500+#include <time.h>
16501+
16502+#ifdef HAVE_NETINET_TCP_H
16503+#include <netinet/tcp.h>
16504+#endif
16505+
16506+#include "replicate_com.h"
16507+#include "pglb.h"
16508+
16509+POOL_CONNECTION_POOL *pool_connection_pool; /* connection pool */
16510+
16511+int pool_init_cp(void);
16512+POOL_CONNECTION_POOL *pool_get_cp(char *user, char *database, int protoMajor);
16513+void pool_discard_cp(char *user, char *database, int protoMajor);
16514+POOL_CONNECTION_POOL *pool_create_cp(void);
16515+void pool_connection_pool_timer(POOL_CONNECTION_POOL *backend);
16516+void pool_backend_timer_handler(int sig);
16517+int connect_inet_domain_socket(int secondary_backend);
16518+int connect_unix_domain_socket(int secondary_backend);
16519+char PGRis_same_host(char * host1, char * host2);
16520+void pool_finish(void);
16521+
16522+
16523+static POOL_CONNECTION_POOL_SLOT *create_cp(POOL_CONNECTION_POOL_SLOT *cp, int secondary_backend);
16524+static POOL_CONNECTION_POOL *new_connection(POOL_CONNECTION_POOL *p);
16525+
16526+
16527+
16528+/*
16529+* initialize connection pools. this should be called once at the startup.
16530+*/
16531+int pool_init_cp(void)
16532+{
16533+ char * func = "pool_init_cp()";
16534+ pool_connection_pool = (POOL_CONNECTION_POOL *)malloc(sizeof(POOL_CONNECTION_POOL)*Max_Pool);
16535+ if (pool_connection_pool == NULL)
16536+ {
16537+ show_error("%s: malloc() failed[%s]",func,strerror(errno));
16538+ return -1;
16539+ }
16540+ memset(pool_connection_pool, 0, sizeof(POOL_CONNECTION_POOL)*Max_Pool);
16541+
16542+ return 0;
16543+}
16544+
16545+/*
16546+* find connection by user and database
16547+*/
16548+POOL_CONNECTION_POOL *pool_get_cp(char *user, char *database, int protoMajor)
16549+{
16550+ char * func = "pool_get_cp()";
16551+ int i;
16552+
16553+ POOL_CONNECTION_POOL *p = pool_connection_pool;
16554+
16555+ if (p == NULL)
16556+ {
16557+ show_error("%s: pool_connection_pool is not initialized",func);
16558+ return NULL;
16559+ }
16560+
16561+ for (i=0;i<Max_Pool;i++)
16562+ {
16563+ if (MASTER_CONNECTION(p) &&
16564+ MASTER_CONNECTION(p)->sp->major == protoMajor &&
16565+ MASTER_CONNECTION(p)->sp->user != NULL &&
16566+ strcmp(MASTER_CONNECTION(p)->sp->user, user) == 0 &&
16567+ strcmp(MASTER_CONNECTION(p)->sp->database, database) == 0)
16568+ {
16569+ /* mark this connection is under use */
16570+ MASTER_CONNECTION(p)->closetime = 0;
16571+ return p;
16572+ }
16573+ p++;
16574+ }
16575+ return NULL;
16576+}
16577+
16578+/*
16579+ * disconnect and release a connection to the database
16580+ */
16581+void pool_discard_cp(char *user, char *database, int protoMajor)
16582+{
16583+ char * func = "pool_discard_cp()";
16584+ POOL_CONNECTION_POOL *p = pool_get_cp(user, database, protoMajor);
16585+
16586+ if (p == NULL)
16587+ {
16588+ show_error("%s: cannot get connection pool for user %s datbase %s", func,user, database);
16589+ return;
16590+ }
16591+
16592+ free(MASTER_CONNECTION(p)->sp->user);
16593+ free(MASTER_CONNECTION(p)->sp->database);
16594+ free(MASTER_CONNECTION(p)->sp->startup_packet);
16595+ pool_close(MASTER_CONNECTION(p)->con);
16596+
16597+ memset(p, 0, sizeof(POOL_CONNECTION_POOL));
16598+}
16599+
16600+
16601+/*
16602+* create a connection pool by user and database
16603+*/
16604+POOL_CONNECTION_POOL *pool_create_cp(void)
16605+{
16606+ char * func = "pool_create_cp()";
16607+ int i;
16608+ time_t closetime;
16609+ POOL_CONNECTION_POOL *oldestp;
16610+
16611+ POOL_CONNECTION_POOL *p = pool_connection_pool;
16612+
16613+ if (p == NULL)
16614+ {
16615+ show_error("%s: pool_connection_pool is not initialized",func);
16616+ return NULL;
16617+ }
16618+
16619+ for (i=0; i<Max_Pool; i++)
16620+ {
16621+ if (MASTER_CONNECTION(p) == NULL)
16622+ return new_connection(p);
16623+ p++;
16624+ }
16625+
16626+#ifdef PRINT_DEBUG
16627+ show_debug("%s:no empty connection slot was found",func);
16628+#endif
16629+
16630+ /*
16631+ * no empty connection slot was found. look for the oldest connection and discard it.
16632+ */
16633+ oldestp = p = pool_connection_pool;
16634+ closetime = MASTER_CONNECTION(p)->closetime;
16635+ for (i=0; i<Max_Pool; i++)
16636+ {
16637+#ifdef PRINT_DEBUG
16638+ show_debug("%s:user: %s database: %s closetime: %d",
16639+ func,
16640+ MASTER_CONNECTION(p)->sp->user,
16641+ MASTER_CONNECTION(p)->sp->database,
16642+ MASTER_CONNECTION(p)->closetime);
16643+#endif
16644+ if (MASTER_CONNECTION(p)->closetime < closetime)
16645+ {
16646+ closetime = MASTER_CONNECTION(p)->closetime;
16647+ oldestp = p;
16648+ }
16649+ p++;
16650+ }
16651+
16652+ p = oldestp;
16653+ pool_send_frontend_exits(p);
16654+
16655+#ifdef PRINT_DEBUG
16656+ show_debug("%s:discarding old %d th connection. user: %s database: %s",
16657+ func,
16658+ oldestp - pool_connection_pool,
16659+ MASTER_CONNECTION(p)->sp->user,
16660+ MASTER_CONNECTION(p)->sp->database);
16661+#endif
16662+
16663+ free(MASTER_CONNECTION(p)->sp->user);
16664+ free(MASTER_CONNECTION(p)->sp->database);
16665+ free(MASTER_CONNECTION(p)->sp->startup_packet);
16666+ pool_close(MASTER_CONNECTION(p)->con);
16667+
16668+ memset(p, 0, sizeof(POOL_CONNECTION_POOL));
16669+
16670+ return new_connection(p);
16671+}
16672+
16673+/*
16674+ * set backend connection close timer
16675+ */
16676+void pool_connection_pool_timer(POOL_CONNECTION_POOL *backend)
16677+{
16678+#ifdef PRINT_DEBUG
16679+ char * func = "pool_connection_pool_timer()";
16680+#endif
16681+ POOL_CONNECTION_POOL *p = pool_connection_pool;
16682+ int i;
16683+
16684+#ifdef PRINT_DEBUG
16685+ show_debug("%s:pool_connection_pool_timer: called",func);
16686+#endif
16687+
16688+ MASTER_CONNECTION(backend)->closetime = time(NULL); /* set connection close time */
16689+
16690+ if (Connection_Life_Time == 0)
16691+ return;
16692+
16693+ /* look for any other timeout */
16694+ for (i=0;i<Max_Pool;i++, p++)
16695+ {
16696+ if (!MASTER_CONNECTION(p))
16697+ continue;
16698+ if (MASTER_CONNECTION(p)->sp->user == NULL)
16699+ continue;
16700+
16701+ if (p != backend && MASTER_CONNECTION(p)->closetime)
16702+ return;
16703+ }
16704+
16705+ /* no other timer found. set my timer */
16706+#ifdef PRINT_DEBUG
16707+ show_debug("%s: set alarm after %d seconds",func, Connection_Life_Time);
16708+#endif
16709+ signal(SIGALRM, pool_backend_timer_handler);
16710+ alarm(Connection_Life_Time);
16711+}
16712+
16713+/*
16714+ * backend connection close timer handler
16715+ */
16716+void pool_backend_timer_handler(int sig)
16717+{
16718+#define TMINTMAX 0x7fffffff
16719+
16720+#ifdef PRINT_DEBUG
16721+ char * func = "pool_backend_timer_handler()";
16722+#endif
16723+ POOL_CONNECTION_POOL *p = pool_connection_pool;
16724+ int i;
16725+ time_t now;
16726+ time_t nearest = TMINTMAX;
16727+
16728+ now = time(NULL);
16729+
16730+#ifdef PRINT_DEBUG
16731+ show_debug("%s:called at %d", func,now);
16732+#endif
16733+
16734+ for (i=0;i<Max_Pool;i++, p++)
16735+ {
16736+ if (!MASTER_CONNECTION(p))
16737+ continue;
16738+ if (MASTER_CONNECTION(p)->sp->user == NULL)
16739+ continue;
16740+
16741+ /* timer expire? */
16742+ if (MASTER_CONNECTION(p)->closetime)
16743+ {
16744+#ifdef PRINT_DEBUG
16745+ show_debug("%s: expire time: %d",
16746+ func,
16747+ MASTER_CONNECTION(p)->closetime+Connection_Life_Time);
16748+#endif
16749+
16750+ if (now >= (MASTER_CONNECTION(p)->closetime+Connection_Life_Time))
16751+ {
16752+ /* discard expired connection */
16753+#ifdef PRINT_DEBUG
16754+ show_debug("%s: expires user %s database %s", func, MASTER_CONNECTION(p)->sp->user, MASTER_CONNECTION(p)->sp->database);
16755+#endif
16756+
16757+ pool_send_frontend_exits(p);
16758+
16759+ free(MASTER_CONNECTION(p)->sp->user);
16760+ free(MASTER_CONNECTION(p)->sp->database);
16761+ free(MASTER_CONNECTION(p)->sp->startup_packet);
16762+ pool_close(MASTER_CONNECTION(p)->con);
16763+
16764+ memset(p, 0, sizeof(POOL_CONNECTION_POOL));
16765+ }
16766+ else
16767+ {
16768+ /* look for nearest timer */
16769+ if (MASTER_CONNECTION(p)->closetime < nearest)
16770+ nearest = MASTER_CONNECTION(p)->closetime;
16771+ }
16772+ }
16773+ }
16774+
16775+ /* any remaining timer */
16776+ if (nearest != TMINTMAX)
16777+ {
16778+ nearest = Connection_Life_Time - (now - nearest);
16779+ if (nearest <= 0)
16780+ nearest = 1;
16781+ signal(SIGALRM, pool_backend_timer_handler);
16782+ alarm(nearest);
16783+ }
16784+}
16785+
16786+int connect_inet_domain_socket(int secondary_backend)
16787+{
16788+ char * func = "connect_inet_domain_socket()";
16789+ int fd;
16790+ int len;
16791+ int on = 1;
16792+ struct sockaddr_in addr;
16793+ struct hostent *hp;
16794+
16795+ fd = socket(AF_INET, SOCK_STREAM, 0);
16796+ if (fd < 0)
16797+ {
16798+ show_error("%s: socket() failed: %s",func, strerror(errno));
16799+ return -1;
16800+ }
16801+
16802+ /* set nodelay */
16803+ if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
16804+ (char *) &on,
16805+ sizeof(on)) < 0)
16806+ {
16807+ show_error("%s: setsockopt() failed: %s", func, strerror(errno));
16808+ close(fd);
16809+ return -1;
16810+ }
16811+
16812+ memset((char *) &addr, 0, sizeof(addr));
16813+ ((struct sockaddr *)&addr)->sa_family = AF_INET;
16814+
16815+ addr.sin_port = htons(CurrentCluster->port);
16816+ len = sizeof(struct sockaddr_in);
16817+
16818+ hp = gethostbyname(CurrentCluster->hostName);
16819+
16820+ if ((hp == NULL) || (hp->h_addrtype != AF_INET))
16821+ {
16822+ show_error("%s: gethostbyname() failed: %s host: %s",func, strerror(errno), CurrentCluster->hostName);
16823+ close(fd);
16824+ return -1;
16825+ }
16826+ memmove((char *) &(addr.sin_addr),
16827+ (char *) hp->h_addr,
16828+ hp->h_length);
16829+
16830+ if (connect(fd, (struct sockaddr *)&addr, len) < 0)
16831+ {
16832+ show_error("%s: connect() failed: %s",func,strerror(errno));
16833+ close(fd);
16834+ return -1;
16835+ }
16836+ return fd;
16837+}
16838+
16839+int connect_unix_domain_socket(int secondary_backend)
16840+{
16841+ char * func = "connect_unix_domain_socket()";
16842+ struct sockaddr_un addr;
16843+ int fd;
16844+ int len;
16845+ int port;
16846+
16847+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
16848+ if (fd == -1)
16849+ {
16850+ show_error("%s: setsockopt() failed: %s", func,strerror(errno));
16851+ return -1;
16852+ }
16853+
16854+ port = CurrentCluster->port;
16855+ memset((char *) &addr, 0, sizeof(addr));
16856+ ((struct sockaddr *)&addr)->sa_family = AF_UNIX;
16857+ snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/.s.PGSQL.%d",
16858+ Backend_Socket_Dir,
16859+ CurrentCluster->port);
16860+#ifdef PRINT_DEBUG
16861+ show_debug("%s:postmaster Unix domain socket: %s",func, addr.sun_path);
16862+#endif
16863+
16864+ len = sizeof(struct sockaddr_un);
16865+
16866+ if (connect(fd, (struct sockaddr *)&addr, len) < 0)
16867+ {
16868+ show_error("%s: connect() failed: %s",func, strerror(errno));
16869+ close(fd);
16870+ return -1;
16871+ }
16872+#ifdef PRINT_DEBUG
16873+ show_debug("%s:connected to postmaster Unix domain socket: %s fd: %d", func,addr.sun_path, fd);
16874+#endif
16875+ return fd;
16876+}
16877+
16878+static POOL_CONNECTION_POOL_SLOT *create_cp(POOL_CONNECTION_POOL_SLOT *cp, int secondary_backend)
16879+{
16880+ char * func = "create_cp()";
16881+ int fd;
16882+ char hostName[HOSTNAME_MAX_LENGTH];
16883+
16884+ if (gethostname(hostName,sizeof(hostName)) < 0)
16885+ {
16886+ show_error("%s:gethostname() failed. (%s)",func,strerror(errno));
16887+ return NULL;
16888+ }
16889+ if (PGRis_same_host(hostName,CurrentCluster->hostName) == 1)
16890+ {
16891+#ifdef PRINT_DEBUG
16892+ show_debug("%s:[%s] [%s] is same",func,hostName,CurrentCluster->hostName);
16893+#endif
16894+ fd = connect_unix_domain_socket(secondary_backend);
16895+ }
16896+ else
16897+ {
16898+ fd = connect_inet_domain_socket(secondary_backend);
16899+ }
16900+
16901+ if (fd < 0)
16902+ {
16903+ /* fatal error, notice to parent and exit */
16904+ notice_backend_error();
16905+ exit(1);
16906+ }
16907+
16908+ cp->con = pool_open(fd);
16909+ cp->closetime = 0;
16910+ return cp;
16911+}
16912+
16913+static POOL_CONNECTION_POOL *new_connection(POOL_CONNECTION_POOL *p)
16914+{
16915+ char * func = "new_connection()";
16916+ /* create master connection */
16917+ MASTER_CONNECTION(p) = malloc(sizeof(POOL_CONNECTION_POOL_SLOT));
16918+ if (MASTER_CONNECTION(p) == NULL)
16919+ {
16920+ show_error("%s: malloc() failed [%s]",func,strerror(errno));
16921+ return NULL;
16922+ }
16923+ create_cp(MASTER_CONNECTION(p), 0);
16924+
16925+ /* initialize Paramter Status save structure */
16926+ if (pool_init_params(&MASTER(p)->params))
16927+ {
16928+ return NULL;
16929+ }
16930+ p->num = 1; /* number of slots */
16931+
16932+ return p;
16933+}
16934+
16935+char PGRis_same_host(char * host1, char * host2)
16936+{
16937+ unsigned int ip1, ip2;
16938+
16939+ if ((host1 == NULL) || (host2 == NULL))
16940+ {
16941+ return 0;
16942+ }
16943+ ip1 = PGRget_ip_by_name( host1);
16944+ ip2 = PGRget_ip_by_name( host2);
16945+ if (ip1 == ip2)
16946+ {
16947+ return 1;
16948+ }
16949+ return 0;
16950+}
16951+
16952+void pool_finish(void)
16953+{
16954+ char * func = "pool_finish()";
16955+ int i;
16956+
16957+ POOL_CONNECTION_POOL *p = pool_connection_pool;
16958+return;
16959+ if (p == NULL)
16960+ {
16961+ show_error("%s:pool_connection_pool is not initialized",func);
16962+ return ;
16963+ }
16964+
16965+ for (i=0 ; i<Max_Pool ; i++)
16966+ {
16967+ if (p == NULL)
16968+ break;
16969+ /*
16970+ if (MASTER_CONNECTION(p)->sp->user != NULL)
16971+ {
16972+ free(MASTER_CONNECTION(p)->sp->user);
16973+ MASTER_CONNECTION(p)->sp->user = NULL;
16974+ }
16975+ if (MASTER_CONNECTION(p)->sp->database != NULL)
16976+ {
16977+ free(MASTER_CONNECTION(p)->sp->database);
16978+ MASTER_CONNECTION(p)->sp->database = NULL;
16979+ }
16980+ if (MASTER_CONNECTION(p)->sp->startup_packet != NULL)
16981+ {
16982+ free(MASTER_CONNECTION(p)->sp->startup_packet);
16983+ MASTER_CONNECTION(p)->sp->startup_packet = NULL;
16984+ }
16985+ */
16986+ if (MASTER_CONNECTION(p)->con != NULL)
16987+ {
16988+ pool_close(MASTER_CONNECTION(p)->con);
16989+ MASTER_CONNECTION(p)->con = NULL;
16990+ }
16991+ memset(p, 0, sizeof(POOL_CONNECTION_POOL));
16992+ p++;
16993+ }
16994+ free((char *)pool_connection_pool);
16995+ pool_connection_pool = NULL;
16996+}
16997+
16998diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_params.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_params.c
16999--- postgresql-8.2.4/src/pgcluster/pglb/pool_params.c 1970-01-01 01:00:00.000000000 +0100
17000+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_params.c 2007-02-18 22:52:17.000000000 +0100
17001@@ -0,0 +1,184 @@
17002+/*--------------------------------------------------------------------
17003+ * FILE:
17004+ * pool_params.c
17005+ *
17006+ * NOTE:
17007+ * connection pool stuff
17008+ *
17009+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
17010+ * Portions Copyright (c) 2003-2006, Tatsuo Ishii
17011+ *--------------------------------------------------------------------
17012+ */
17013+/*
17014+ * Permission to use, copy, modify, and distribute this software and
17015+ * its documentation for any purpose and without fee is hereby
17016+ * granted, provided that the above copyright notice appear in all
17017+ * copies and that both that copyright notice and this permission
17018+ * notice appear in supporting documentation, and that the name of the
17019+ * author not be used in advertising or publicity pertaining to
17020+ * distribution of the software without specific, written prior
17021+ * permission. The author makes no representations about the
17022+ * suitability of this software for any purpose. It is provided "as
17023+ * is" without express or implied warranty.
17024+ *
17025+ */
17026+
17027+#include <stdio.h>
17028+#include <sys/time.h>
17029+#include <time.h>
17030+#include <stdlib.h>
17031+#include <string.h>
17032+
17033+#ifdef HAVE_NETINET_TCP_H
17034+#include <netinet/tcp.h>
17035+#endif
17036+
17037+#include "replicate_com.h"
17038+#include "pglb.h"
17039+
17040+#define MAX_PARAM_ITEMS 128
17041+
17042+int pool_init_params(ParamStatus *params);
17043+void pool_discard_params(ParamStatus *params);
17044+char *pool_find_name(ParamStatus *params, char *name, int *pos);
17045+int pool_get_param(ParamStatus *params, int index, char **name, char **value);
17046+int pool_add_param(ParamStatus *params, char *name, char *value);
17047+void pool_param_debug_print(ParamStatus *params);
17048+
17049+/*
17050+ * initialize parameter structure
17051+ */
17052+int pool_init_params(ParamStatus *params)
17053+{
17054+ char * func = "pool_init_params()";
17055+
17056+ params->num = 0;
17057+ params->names = malloc(MAX_PARAM_ITEMS*sizeof(char *));
17058+ if (params->names == NULL)
17059+ {
17060+ show_error("%s: cannot allocate memory",func);
17061+ return -1;
17062+ }
17063+ params->values = malloc(MAX_PARAM_ITEMS*sizeof(char *));
17064+ if (params->values == NULL)
17065+ {
17066+ show_error("%s: cannot allocate memory",func);
17067+ return -1;
17068+ }
17069+ return 0;
17070+}
17071+
17072+/*
17073+ * discard parameter structure
17074+ */
17075+void pool_discard_params(ParamStatus *params)
17076+{
17077+ int i;
17078+
17079+ for (i=0;i<params->num;i++)
17080+ {
17081+ free(params->names[i]);
17082+ free(params->values[i]);
17083+ }
17084+ free(params->names);
17085+ free(params->values);
17086+}
17087+
17088+/*
17089+ * find param value by name. if found, its value is returned
17090+ * also, pos is set
17091+ * if not found, NULL is returned
17092+ */
17093+char *pool_find_name(ParamStatus *params, char *name, int *pos)
17094+{
17095+ int i;
17096+
17097+ for (i=0;i<params->num;i++)
17098+ {
17099+ if (!strcmp(name, params->names[i]))
17100+ {
17101+ *pos = i;
17102+ return params->values[i];
17103+ }
17104+ }
17105+ return NULL;
17106+}
17107+
17108+/*
17109+ * return name and value by index.
17110+ */
17111+int pool_get_param(ParamStatus *params, int index, char **name, char **value)
17112+{
17113+ if (index < 0 || index >= params->num)
17114+ return -1;
17115+
17116+ *name = params->names[index];
17117+ *value = params->values[index];
17118+
17119+ return 0;
17120+}
17121+
17122+/*
17123+ * add or replace name/value pair
17124+ */
17125+int pool_add_param(ParamStatus *params, char *name, char *value)
17126+{
17127+ char * func = "pool_add_param()";
17128+ int pos;
17129+
17130+ if (pool_find_name(params, name, &pos))
17131+ {
17132+ /* name already exists */
17133+ if (strlen(params->values[pos]) < strlen(value))
17134+ {
17135+ params->values[pos] = realloc(params->values[pos], strlen(value) + 1);
17136+ if (params->values[pos] == NULL)
17137+ {
17138+ show_error("%s: cannot allocate memory",func);
17139+ return -1;
17140+ }
17141+ }
17142+ strcpy(params->values[pos], value);
17143+ }
17144+ else
17145+ {
17146+ int num;
17147+
17148+ /* add name/value pair */
17149+ if (params->num >= MAX_PARAM_ITEMS)
17150+ {
17151+ show_error("%s: no more room for num",func);
17152+ return -1;
17153+ }
17154+ num = params->num;
17155+ params->names[num] = strdup(name);
17156+ if (params->names[num] == NULL)
17157+ {
17158+ show_error("%s: cannot allocate memory",func);
17159+ return -1;
17160+ }
17161+ params->values[num] = strdup(value);
17162+ if (params->values[num] == NULL)
17163+ {
17164+ show_error("%s: cannot allocate memory",func);
17165+ return -1;
17166+ }
17167+ params->num++;
17168+ }
17169+ return 0;
17170+}
17171+
17172+void pool_param_debug_print(ParamStatus *params)
17173+{
17174+#ifdef PRINT_DEBUG
17175+ char * func = "pool_param_debug_print()";
17176+#endif
17177+ int i;
17178+
17179+ for (i=0;i<params->num;i++)
17180+ {
17181+#ifdef PRINT_DEBUG
17182+ show_debug("%s: No.%d: name: %s value: %s",func, i, params->names[i], params->values[i]);
17183+#endif
17184+ }
17185+}
17186diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_process_query.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_process_query.c
17187--- postgresql-8.2.4/src/pgcluster/pglb/pool_process_query.c 1970-01-01 01:00:00.000000000 +0100
17188+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_process_query.c 2007-02-18 22:52:17.000000000 +0100
17189@@ -0,0 +1,2100 @@
17190+/*--------------------------------------------------------------------
17191+ * FILE:
17192+ * pool_process_query.c
17193+ *
17194+ * NOTE:
17195+ * query processing stuff
17196+ *
17197+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
17198+ * Portions Copyright (c) 2003-2006, Tatsuo Ishii
17199+ *--------------------------------------------------------------------
17200+ */
17201+/*
17202+ * Permission to use, copy, modify, and distribute this software and
17203+ * its documentation for any purpose and without fee is hereby
17204+ * granted, provided that the above copyright notice appear in all
17205+ * copies and that both that copyright notice and this permission
17206+ * notice appear in supporting documentation, and that the name of the
17207+ * author not be used in advertising or publicity pertaining to
17208+ * distribution of the software without specific, written prior
17209+ * permission. The author makes no representations about the
17210+ * suitability of this software for any purpose. It is provided "as
17211+ * is" without express or implied warranty.
17212+ *
17213+*/
17214+#include <errno.h>
17215+#include <sys/types.h>
17216+#include <sys/time.h>
17217+#include <arpa/inet.h>
17218+#include <stdlib.h>
17219+#include <unistd.h>
17220+#include <string.h>
17221+#include <netinet/in.h>
17222+
17223+#include "postgres_fe.h"
17224+#include "libpq/pqcomm.h"
17225+
17226+#include "replicate_com.h"
17227+#include "pglb.h"
17228+
17229+POOL_STATUS pool_process_query(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, int connection_reuse);
17230+POOL_STATUS ErrorResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17231+void pool_enable_timeout(void);
17232+void pool_disable_timeout(void);
17233+int pool_check_fd(POOL_CONNECTION *cp, int notimeout);
17234+void pool_send_frontend_exits(POOL_CONNECTION_POOL *backend);
17235+POOL_STATUS SimpleForwardToFrontend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17236+POOL_STATUS SimpleForwardToBackend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17237+POOL_STATUS ParameterStatus(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17238+void pool_send_error_message(POOL_CONNECTION *frontend, int protoMajor, char *code, char *message, char *detail, char *hint, char *file, int line);
17239+
17240+
17241+static POOL_STATUS Query(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, char *query);
17242+static POOL_STATUS ReadyForQuery(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, int send_ready);
17243+static POOL_STATUS CompleteCommandResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17244+static int RowDescription(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17245+static POOL_STATUS AsciiRow(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, short num_fields);
17246+static POOL_STATUS BinaryRow(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, short num_fields);
17247+static POOL_STATUS CursorResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17248+static POOL_STATUS NoticeResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17249+static POOL_STATUS CopyInResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17250+static POOL_STATUS CopyOutResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17251+static POOL_STATUS CopyDataRows(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, int copyin);
17252+static POOL_STATUS EmptyQueryResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17253+static POOL_STATUS NotificationResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17254+static POOL_STATUS FunctionCall(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17255+static POOL_STATUS FunctionResultResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17256+static POOL_STATUS ProcessFrontendResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17257+static int synchronize(POOL_CONNECTION *cp);
17258+static void process_reporting(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
17259+static int reset_backend(POOL_CONNECTION_POOL *backend, int qcnt);
17260+static int load_balance_enabled(POOL_CONNECTION_POOL *backend, char *sql);
17261+static void start_load_balance(POOL_CONNECTION_POOL *backend);
17262+static void end_load_balance(POOL_CONNECTION_POOL *backend);
17263+
17264+static POOL_CONNECTION_POOL_SLOT *slots[MAX_CONNECTION_SLOTS];
17265+
17266+POOL_STATUS pool_process_query(POOL_CONNECTION *frontend,
17267+ POOL_CONNECTION_POOL *backend,
17268+ int connection_reuse)
17269+{
17270+ char * func = "pool_process_query()";
17271+ char kind, kind1; /* packet kind (backend) */
17272+ char fkind; /* packet kind (frontend) */
17273+ short num_fields = 0;
17274+ fd_set readmask;
17275+ fd_set writemask;
17276+ fd_set exceptmask;
17277+ int fds;
17278+ POOL_STATUS status;
17279+ int state; /* 0: ok to issue commands 1: waiting for "ready for query" response */
17280+ int qcnt;
17281+
17282+ frontend->no_forward = connection_reuse;
17283+ qcnt = 0;
17284+ state = 0;
17285+
17286+ for (;;)
17287+ {
17288+ kind = kind1 = 0;
17289+ fkind = 0;
17290+
17291+ if (state == 0 && connection_reuse)
17292+ {
17293+ int st = 0;
17294+
17295+ /* send query for resetting connection such as "ROLLBACK" "RESET ALL"... */
17296+ st = reset_backend(backend, qcnt);
17297+
17298+ if (st < 0) /* error? */
17299+ return POOL_END;
17300+
17301+ else if (st == 0) /* no query issued? */
17302+ {
17303+ qcnt++;
17304+ continue;
17305+ }
17306+
17307+ else if (st == 1) /* more query remains */
17308+ {
17309+ state = 1;
17310+ qcnt++;
17311+ continue;
17312+ }
17313+
17314+ else if (st == 2) /* no more qury */
17315+ {
17316+ frontend->no_forward = 0;
17317+ return POOL_CONTINUE;
17318+ }
17319+
17320+ }
17321+
17322+ if ((!REPLICATION && MASTER(backend)->len == 0 && frontend->len == 0) ||
17323+ (REPLICATION && MASTER(backend)->len == 0 &&
17324+ SECONDARY(backend)->len == 0
17325+ && frontend->len == 0))
17326+ {
17327+
17328+ struct timeval timeout;
17329+
17330+ timeout.tv_sec = 1;
17331+ timeout.tv_usec = 0;
17332+
17333+ FD_ZERO(&readmask);
17334+ FD_ZERO(&writemask);
17335+ FD_ZERO(&exceptmask);
17336+ if (!connection_reuse)
17337+ FD_SET(frontend->fd, &readmask);
17338+ FD_SET(MASTER(backend)->fd, &readmask);
17339+ if (REPLICATION)
17340+ FD_SET(SECONDARY(backend)->fd, &readmask);
17341+ if (!connection_reuse)
17342+ FD_SET(frontend->fd, &exceptmask);
17343+ FD_SET(MASTER(backend)->fd, &exceptmask);
17344+
17345+ if (connection_reuse)
17346+ {
17347+ if (REPLICATION)
17348+ fds = select(Max(SECONDARY(backend)->fd, MASTER(backend)->fd) + 1,
17349+ &readmask, &writemask, &exceptmask, NULL);
17350+ else
17351+ fds = select(MASTER(backend)->fd+1, &readmask, &writemask, &exceptmask, NULL);
17352+ }
17353+ else
17354+ {
17355+ if (REPLICATION)
17356+ fds = select(Max(SECONDARY(backend)->fd,
17357+ Max(frontend->fd, MASTER(backend)->fd)+1),
17358+ &readmask, &writemask, &exceptmask, NULL);
17359+ else
17360+ fds = select(Max(frontend->fd, MASTER(backend)->fd)+1,
17361+ &readmask, &writemask, &exceptmask, NULL);
17362+ }
17363+
17364+ if (fds == -1)
17365+ {
17366+ if (errno == EINTR)
17367+ continue;
17368+
17369+ show_error("%s:select() failed. reason: %s",func, strerror(errno));
17370+ return POOL_ERROR;
17371+ }
17372+
17373+ if (fds == 0)
17374+ {
17375+ return POOL_CONTINUE;
17376+ }
17377+
17378+ if (FD_ISSET(MASTER(backend)->fd, &readmask))
17379+ {
17380+ pool_read(MASTER(backend), &kind, 1);
17381+#ifdef PRINT_DEBUG
17382+ show_debug("%s:read kind from backend %c", func,kind);
17383+#endif
17384+ }
17385+
17386+ if (REPLICATION && FD_ISSET(SECONDARY(backend)->fd, &readmask))
17387+ {
17388+ pool_read(SECONDARY(backend), &kind1, 1);
17389+#ifdef PRINT_DEBUG
17390+ show_debug("%s:read kind from secondary backend %c", func,kind1);
17391+#endif
17392+ }
17393+
17394+ if (!connection_reuse && FD_ISSET(frontend->fd, &exceptmask))
17395+ {
17396+ return POOL_END;
17397+ }
17398+ if (FD_ISSET(MASTER(backend)->fd, &exceptmask))
17399+ {
17400+ return POOL_ERROR;
17401+ }
17402+
17403+ if (!connection_reuse && FD_ISSET(frontend->fd, &readmask))
17404+ {
17405+ status = ProcessFrontendResponse(frontend, backend);
17406+ if (status != POOL_CONTINUE)
17407+ return status;
17408+
17409+ continue;
17410+ }
17411+ }
17412+ else
17413+ {
17414+ if (MASTER(backend)->len > 0)
17415+ {
17416+ pool_read(MASTER(backend), &kind, 1);
17417+ if (REPLICATION)
17418+ {
17419+ pool_read(SECONDARY(backend), &kind1, 1);
17420+ if (kind == '\0' || kind != kind1)
17421+ {
17422+ show_error("%s: kind does not match between backends master(%c) secondary(%c)",
17423+ func, kind, kind1);
17424+ pool_send_error_message(frontend, MAJOR(backend), "XX000",
17425+ "kind mismatch between backends", "",
17426+ "check data consistency between master and secondary", __FILE__, __LINE__);
17427+
17428+ if (pool_config_replication_stop_on_mismatch)
17429+ return POOL_FATAL;
17430+ else
17431+ return POOL_ERROR;
17432+ }
17433+ }
17434+#ifdef PRINT_DEBUG
17435+ show_debug("%s:read kind from backend pending data %c len: %d po: %d", func, kind, MASTER(backend)->len, MASTER(backend)->po);
17436+#endif
17437+ }
17438+ if (frontend->len > 0)
17439+ {
17440+ status = ProcessFrontendResponse(frontend, backend);
17441+ if (status != POOL_CONTINUE)
17442+ return status;
17443+
17444+ continue;
17445+ }
17446+ }
17447+
17448+ /* this is the synchronous point */
17449+ if (REPLICATION)
17450+ {
17451+ if (kind == 0)
17452+ {
17453+ pool_read(MASTER(backend), &kind, 1);
17454+ }
17455+ if (kind1 == 0)
17456+ {
17457+ pool_read(SECONDARY(backend), &kind1, 1);
17458+ }
17459+ if (kind == '\0' || kind != kind1)
17460+ {
17461+ show_error("%s: kind does not match between backends master(%c) secondary(%c)",
17462+ func, kind, kind1);
17463+ pool_send_error_message(frontend, MAJOR(backend), "XX000",
17464+ "kind mismatch between backends", "",
17465+ "check data consistency between master and secondary", __FILE__, __LINE__);
17466+
17467+ if (pool_config_replication_stop_on_mismatch)
17468+ return POOL_FATAL;
17469+ else
17470+ return POOL_ERROR;
17471+ }
17472+ }
17473+
17474+ /*
17475+ * Prrocess backend Response
17476+ */
17477+
17478+ if (MAJOR(backend) == PROTO_MAJOR_V3)
17479+ {
17480+ switch (kind)
17481+ {
17482+ case 'G':
17483+ /* CopyIn response */
17484+ status = CopyInResponse(frontend, backend);
17485+ break;
17486+ case 'S':
17487+ /* Paramter Status */
17488+ status = ParameterStatus(frontend, backend);
17489+ break;
17490+ case 'Z':
17491+ /* Ready for query */
17492+ status = ReadyForQuery(frontend, backend, 1);
17493+ break;
17494+ default:
17495+ status = SimpleForwardToFrontend(kind, frontend, backend);
17496+ break;
17497+ }
17498+ }
17499+ else
17500+ {
17501+ switch (kind)
17502+ {
17503+ case 'A':
17504+ /* Notification response */
17505+ status = NotificationResponse(frontend, backend);
17506+ break;
17507+
17508+ case 'B':
17509+ /* BinaryRow */
17510+ status = BinaryRow(frontend, backend, num_fields);
17511+ break;
17512+
17513+ case 'C':
17514+ /* Complete command response */
17515+ status = CompleteCommandResponse(frontend, backend);
17516+ break;
17517+
17518+ case 'D':
17519+ /* AsciiRow */
17520+ status = AsciiRow(frontend, backend, num_fields);
17521+ break;
17522+
17523+ case 'E':
17524+ /* Error Response */
17525+ status = ErrorResponse(frontend, backend);
17526+ break;
17527+
17528+ case 'G':
17529+ /* CopyIn Response */
17530+ status = CopyInResponse(frontend, backend);
17531+ break;
17532+
17533+ case 'H':
17534+ /* CopyOut Response */
17535+ status = CopyOutResponse(frontend, backend);
17536+ break;
17537+
17538+ case 'I':
17539+ /* Empty Query Response */
17540+ status = EmptyQueryResponse(frontend, backend);
17541+ break;
17542+
17543+ case 'N':
17544+ /* Notice Response */
17545+ status = NoticeResponse(frontend, backend);
17546+ break;
17547+
17548+ case 'P':
17549+ /* CursorResponse */
17550+ status = CursorResponse(frontend, backend);
17551+ break;
17552+
17553+ case 'T':
17554+ /* RowDescription */
17555+ status = RowDescription(frontend, backend);
17556+ if (status < 0)
17557+ return POOL_ERROR;
17558+
17559+ num_fields = status;
17560+ status = POOL_CONTINUE;
17561+ break;
17562+
17563+ case 'V':
17564+ /* FunctionResultResponse and FunctionVoidResponse */
17565+ status = FunctionResultResponse(frontend, backend);
17566+ break;
17567+
17568+ case 'Z':
17569+ /* Ready for query */
17570+ status = ReadyForQuery(frontend, backend, 1);
17571+ break;
17572+
17573+ default:
17574+ show_error("%s:Unknown message type %c(%02x)",func, kind, kind);
17575+ exit(1);
17576+ }
17577+ }
17578+
17579+ if (status != POOL_CONTINUE)
17580+ return status;
17581+
17582+ if (kind == 'Z' && frontend->no_forward && state == 1)
17583+ {
17584+ state = 0;
17585+ }
17586+
17587+ }
17588+ return POOL_CONTINUE;
17589+}
17590+
17591+static POOL_STATUS Query(POOL_CONNECTION *frontend,
17592+ POOL_CONNECTION_POOL *backend, char *query)
17593+{
17594+#ifdef PRINT_DEBUG
17595+ char * func = "Query()";
17596+#endif
17597+ char *string;
17598+ int len;
17599+ static char *sq = "show pool_status";
17600+
17601+ if (query == NULL)
17602+ {
17603+ /* read actual query */
17604+ if (MAJOR(backend) == PROTO_MAJOR_V3)
17605+ {
17606+ if (pool_read(frontend, &len, sizeof(len)) < 0)
17607+ return POOL_END;
17608+ len = ntohl(len) - 4;
17609+ string = pool_read2(frontend, len);
17610+ }
17611+ else
17612+ string = pool_read_string(frontend, &len, 0);
17613+
17614+ if (string == NULL)
17615+ return POOL_END;
17616+ }
17617+ else
17618+ {
17619+ len = strlen(query)+1;
17620+ string = query;
17621+ }
17622+
17623+#ifdef PRINT_DEBUG
17624+ show_debug("%s: %s", func,string);
17625+#endif
17626+
17627+ /* process status reporting? */
17628+ if (strncasecmp(sq, string, strlen(sq)) == 0)
17629+ {
17630+#ifdef PRINT_DEBUG
17631+ show_debug("%s:process reporting",func);
17632+#endif
17633+ process_reporting(frontend, backend);
17634+ return POOL_CONTINUE;
17635+ }
17636+
17637+ /* load balance trick */
17638+ if (load_balance_enabled(backend, string))
17639+ start_load_balance(backend);
17640+
17641+ /* forward the query to the backend */
17642+ pool_write(MASTER(backend), "Q", 1);
17643+
17644+ if (MAJOR(backend) == PROTO_MAJOR_V3)
17645+ {
17646+ int sendlen = htonl(len + 4);
17647+ pool_write(MASTER(backend), &sendlen, sizeof(sendlen));
17648+ }
17649+
17650+ if (pool_write_and_flush(MASTER(backend), string, len) < 0)
17651+ {
17652+ return POOL_END;
17653+ }
17654+
17655+ if (REPLICATION)
17656+ {
17657+ /* in "strict mode" we need to wait for master completing the query */
17658+ if (pool_config_replication_strict || STRICT_MODE(string))
17659+ if (synchronize(MASTER(backend)))
17660+ return POOL_END;
17661+
17662+ pool_write(SECONDARY(backend), "Q", 1);
17663+ if (MAJOR(backend) == PROTO_MAJOR_V3)
17664+ {
17665+ int sendlen = htonl(len + 4);
17666+ pool_write(SECONDARY(backend), &sendlen, sizeof(sendlen));
17667+ }
17668+
17669+ if (pool_write_and_flush(SECONDARY(backend), string, len) < 0)
17670+ {
17671+ return POOL_END;
17672+ }
17673+ }
17674+ return POOL_CONTINUE;
17675+}
17676+
17677+static POOL_STATUS ReadyForQuery(POOL_CONNECTION *frontend,
17678+ POOL_CONNECTION_POOL *backend, int send_ready)
17679+{
17680+#ifdef PRINT_DEBUG
17681+ char * func = "ReadyForQuery()";
17682+#endif
17683+
17684+ pool_flush(frontend);
17685+
17686+ if (send_ready)
17687+ {
17688+ pool_write(frontend, "Z", 1);
17689+
17690+ if (MAJOR(backend) == PROTO_MAJOR_V3)
17691+ {
17692+ int len;
17693+ signed char state;
17694+
17695+ if ((len = pool_read_message_length(backend)) < 0)
17696+ return POOL_END;
17697+
17698+#ifdef PRINT_DEBUG
17699+ show_debug("%s: message length: %d", func, len);
17700+#endif
17701+
17702+ len = htonl(len);
17703+ pool_write(frontend, &len, sizeof(len));
17704+
17705+ state = pool_read_kind(backend);
17706+ if (state < 0)
17707+ return POOL_END;
17708+
17709+ /* set transaction state */
17710+#ifdef PRINT_DEBUG
17711+ show_debug("%s: transaction state: %c", func, state);
17712+#endif
17713+ MASTER(backend)->tstate = state;
17714+ if (REPLICATION)
17715+ SECONDARY(backend)->tstate = state;
17716+
17717+ pool_write(frontend, &state, 1);
17718+ }
17719+
17720+ if (pool_flush(frontend))
17721+ return POOL_END;
17722+ }
17723+
17724+ /* end load balance mode */
17725+ if (IN_LOAD_BALANCE)
17726+ end_load_balance(backend);
17727+
17728+ return ProcessFrontendResponse(frontend, backend);
17729+}
17730+
17731+static POOL_STATUS CompleteCommandResponse(POOL_CONNECTION *frontend,
17732+ POOL_CONNECTION_POOL *backend)
17733+{
17734+ char * func = "CompleteCommandResponse()";
17735+ char *string, *string1;
17736+ int len, len1;
17737+
17738+ /* read command tag */
17739+ string = pool_read_string(MASTER(backend), &len, 0);
17740+ if (string == NULL)
17741+ return POOL_END;
17742+
17743+ if (REPLICATION)
17744+ {
17745+ string1 = pool_read_string(SECONDARY(backend), &len1, 0);
17746+ if (string1 == NULL)
17747+ return POOL_END;
17748+
17749+ if (len != len1)
17750+ {
17751+ show_error("%s: message length does not match between master(%d \"%s\",) and secondary(%d \"%s\",)",
17752+ func, len, string, len1, string1);
17753+ }
17754+ }
17755+
17756+ /* forward to the frontend */
17757+ pool_write(frontend, "C", 1);
17758+#ifdef PRINT_DEBUG
17759+ show_debug("%s: string: \"%s\"",func, string);
17760+#endif
17761+ if (pool_write(frontend, string, len) < 0)
17762+ {
17763+ return POOL_END;
17764+ }
17765+ return POOL_CONTINUE;
17766+}
17767+
17768+static int RowDescription(POOL_CONNECTION *frontend,
17769+ POOL_CONNECTION_POOL *backend)
17770+{
17771+ char * func = "RowDescription()";
17772+ short num_fields, num_fields1;
17773+ int oid, mod;
17774+ int oid1, mod1;
17775+ short size, size1;
17776+ char *string, *string1;
17777+ int len, len1;
17778+ int i;
17779+
17780+ /* # of fields (could be 0) */
17781+ pool_read(MASTER(backend), &num_fields, sizeof(short));
17782+ if (REPLICATION)
17783+ {
17784+ pool_read(SECONDARY(backend), &num_fields1, sizeof(short));
17785+ if (num_fields != num_fields1)
17786+ {
17787+ show_error("%s: num_fields deos not match between backends master(%d) and secondary(%d)",
17788+ func, num_fields, num_fields1);
17789+ return POOL_FATAL;
17790+ }
17791+ }
17792+
17793+ /* forward it to the frontend */
17794+ pool_write(frontend, "T", 1);
17795+ pool_write(frontend, &num_fields, sizeof(short));
17796+
17797+ num_fields = ntohs(num_fields);
17798+ for (i = 0;i<num_fields;i++)
17799+ {
17800+ /* field name */
17801+ string = pool_read_string(MASTER(backend), &len, 0);
17802+ if (string == NULL)
17803+ return POOL_END;
17804+
17805+ if (REPLICATION)
17806+ {
17807+ string1 = pool_read_string(SECONDARY(backend), &len1, 0);
17808+ if (string == NULL)
17809+ return POOL_END;
17810+ if (len != len1)
17811+ {
17812+ show_error("%s: field length deos not match between backends master(%d) and secondary(%d)",
17813+ func, ntohl(len), ntohl(len1));
17814+ return POOL_FATAL;
17815+ }
17816+ }
17817+
17818+ pool_write(frontend, string, len);
17819+
17820+ /* oid */
17821+ pool_read(MASTER(backend), &oid, sizeof(int));
17822+ if (REPLICATION)
17823+ {
17824+ pool_read(SECONDARY(backend), &oid1, sizeof(int));
17825+
17826+ /* we do not regard oid mismatch as fatal */
17827+ if (oid != oid1)
17828+ {
17829+ show_error("%s: field oid deos not match between backends master(%d) and secondary(%d)",
17830+ func, ntohl(oid), ntohl(oid1));
17831+ }
17832+ }
17833+ pool_write(frontend, &oid, sizeof(int));
17834+
17835+ /* size */
17836+ pool_read(MASTER(backend), &size, sizeof(short));
17837+ if (REPLICATION)
17838+ {
17839+ pool_read(SECONDARY(backend), &size1, sizeof(short));
17840+ if (size1 != size1)
17841+ {
17842+ show_error("%s: field size deos not match between backends master(%d) and secondary(%d)",
17843+ func, ntohs(size), ntohs(size1));
17844+ return POOL_FATAL;
17845+ }
17846+ }
17847+#ifdef PRINT_DEBUG
17848+ show_debug("%s: field size:%d", func, ntohs(size));
17849+#endif
17850+ pool_write(frontend, &size, sizeof(short));
17851+
17852+ /* modifier */
17853+ pool_read(MASTER(backend), &mod, sizeof(int));
17854+ if (REPLICATION)
17855+ {
17856+ pool_read(SECONDARY(backend), &mod1, sizeof(int));
17857+ if (mod != mod1)
17858+ {
17859+ show_error("%s: modifier deos not match between backends master(%d) and secondary(%d)",
17860+ func, ntohl(mod), ntohl(mod1));
17861+ }
17862+ }
17863+ pool_write(frontend, &mod, sizeof(int));
17864+ }
17865+
17866+ return num_fields;
17867+}
17868+
17869+static POOL_STATUS AsciiRow(POOL_CONNECTION *frontend,
17870+ POOL_CONNECTION_POOL *backend,
17871+ short num_fields)
17872+{
17873+ char * func = "AsciiRow()";
17874+ static char nullmap[8192], nullmap1[8192];
17875+ int nbytes;
17876+ int i;
17877+ unsigned char mask;
17878+ int size, size1;
17879+ char *buf;
17880+ char msgbuf[1024];
17881+
17882+ pool_write(frontend, "D", 1);
17883+
17884+ nbytes = (num_fields + 7)/8;
17885+
17886+ if (nbytes <= 0)
17887+ return POOL_CONTINUE;
17888+
17889+ /* NULL map */
17890+ pool_read(MASTER(backend), nullmap, nbytes);
17891+ if (pool_write(frontend, nullmap, nbytes) < 0)
17892+ return POOL_END;
17893+
17894+ if (REPLICATION)
17895+ {
17896+ if (pool_read(SECONDARY(backend), nullmap1, nbytes) < 0)
17897+ return POOL_END;
17898+
17899+ if (memcmp(nullmap, nullmap1, nbytes))
17900+ {
17901+ /* XXX: NULLMAP maybe different among
17902+ backends. If we were a paranoid, we have to treat
17903+ this as a fatal error. However in the real world
17904+ we'd better to adapt this situation. Just throw a
17905+ log... */
17906+ show_error("%s: NULLMAP differ between master and secondary",func);
17907+ }
17908+ }
17909+
17910+ mask = 0;
17911+
17912+ for (i = 0;i<num_fields;i++)
17913+ {
17914+ if (mask == 0)
17915+ mask = 0x80;
17916+
17917+ /* NOT NULL? */
17918+ if (mask & nullmap[i/8])
17919+ {
17920+ /* field size */
17921+ if (pool_read(MASTER(backend), &size, sizeof(int)) < 0)
17922+ return POOL_END;
17923+ }
17924+
17925+ if (REPLICATION && (mask & nullmap1[i/8]))
17926+ {
17927+ /* XXX: field size maybe different among
17928+ backends. If we were a paranoid, we have to treat
17929+ this as a fatal error. However in the real world
17930+ we'd better to adapt this situation. Just throw a
17931+ log... */
17932+
17933+ if (pool_read(SECONDARY(backend), &size1, sizeof(int)) < 0)
17934+ return POOL_END;
17935+
17936+ if (size != size1)
17937+ show_error("%s: %d th field size does not match between master(%d) and secondary(%d)",
17938+ func, i, ntohl(size), ntohl(size1));
17939+ size1 = ntohl(size1) - 4;
17940+ }
17941+
17942+ buf = NULL;
17943+
17944+ if (mask & nullmap[i/8])
17945+ {
17946+ /* forward to frontend */
17947+ pool_write(frontend, &size, sizeof(int));
17948+ size = ntohl(size) - 4;
17949+
17950+ /* read and send actual data only when size > 0 */
17951+ if (size > 0)
17952+ {
17953+ buf = pool_read2(MASTER(backend), size);
17954+ if (buf == NULL)
17955+ return POOL_END;
17956+ }
17957+ }
17958+
17959+ if (REPLICATION && size1 > 0 && (mask & nullmap1[i/8]))
17960+ {
17961+ /* read and discard secondary data */
17962+ if (pool_read2(SECONDARY(backend), size1) == NULL)
17963+ return POOL_END;
17964+ }
17965+
17966+ if (buf)
17967+ {
17968+ pool_write(frontend, buf, size);
17969+ snprintf(msgbuf, Min(sizeof(msgbuf), size+1), "%s", buf);
17970+#ifdef PRINT_DEBUG
17971+ show_debug("%s: len: %d data: %s", func, size, msgbuf);
17972+#endif
17973+ }
17974+
17975+ mask >>= 1;
17976+ }
17977+
17978+ return POOL_CONTINUE;
17979+}
17980+
17981+static POOL_STATUS BinaryRow(POOL_CONNECTION *frontend,
17982+ POOL_CONNECTION_POOL *backend,
17983+ short num_fields)
17984+{
17985+ char * func = "BinaryRow()";
17986+ static char nullmap[8192], nullmap1[8192];
17987+ int nbytes;
17988+ int i;
17989+ unsigned char mask;
17990+ int size, size1;
17991+ char *buf;
17992+
17993+ pool_write(frontend, "B", 1);
17994+
17995+ nbytes = (num_fields + 7)/8;
17996+
17997+ if (nbytes <= 0)
17998+ return POOL_CONTINUE;
17999+
18000+ /* NULL map */
18001+ pool_read(MASTER(backend), nullmap, nbytes);
18002+ if (pool_write(frontend, nullmap, nbytes) < 0)
18003+ return POOL_END;
18004+
18005+ if (REPLICATION)
18006+ {
18007+ if (pool_read(SECONDARY(backend), nullmap1, nbytes) < 0)
18008+ return POOL_END;
18009+
18010+ if (memcmp(nullmap, nullmap1, nbytes))
18011+ {
18012+ /* XXX: NULLMAP maybe different among
18013+ backends. If we were a paranoid, we have to treat
18014+ this as a fatal error. However in the real world
18015+ we'd better to adapt this situation. Just throw a
18016+ log... */
18017+ show_error("%s: NULLMAP differ between master and secondary",func);
18018+ }
18019+ }
18020+
18021+ mask = 0;
18022+
18023+ for (i = 0;i<num_fields;i++)
18024+ {
18025+ if (mask == 0)
18026+ mask = 0x80;
18027+
18028+ /* NOT NULL? */
18029+ if (mask & nullmap[i/8])
18030+ {
18031+ /* field size */
18032+ if (pool_read(MASTER(backend), &size, sizeof(int)) < 0)
18033+ return POOL_END;
18034+ }
18035+
18036+ if (REPLICATION && (mask & nullmap1[i/8]))
18037+ {
18038+ /* XXX: field size maybe different among
18039+ backends. If we were a paranoid, we have to treat
18040+ this as a fatal error. However in the real world
18041+ we'd better to adapt this situation. Just throw a
18042+ log... */
18043+
18044+ if (pool_read(SECONDARY(backend), &size1, sizeof(int)) < 0)
18045+ return POOL_END;
18046+
18047+ if (size != size1)
18048+ show_error("%s: %d th field size does not match between master(%d) and secondary(%d)",
18049+ func, i, ntohl(size), ntohl(size1));
18050+ size1 = ntohl(size1) - 4;
18051+ }
18052+
18053+ buf = NULL;
18054+
18055+ if (mask & nullmap[i/8])
18056+ {
18057+ /* forward to frontend */
18058+ pool_write(frontend, &size, sizeof(int));
18059+ size = ntohl(size) - 4;
18060+
18061+ /* read and send actual data only when size > 0 */
18062+ if (size > 0)
18063+ {
18064+ buf = pool_read2(MASTER(backend), size);
18065+ if (buf == NULL)
18066+ return POOL_END;
18067+ }
18068+ }
18069+
18070+ if (REPLICATION && size1 > 0 && (mask & nullmap1[i/8]))
18071+ {
18072+ /* read and discard secondary data */
18073+ if (pool_read2(SECONDARY(backend), size1) == NULL)
18074+ return POOL_END;
18075+ }
18076+
18077+ if (buf)
18078+ pool_write(frontend, buf, size);
18079+
18080+ mask >>= 1;
18081+ }
18082+ return POOL_CONTINUE;
18083+}
18084+
18085+static POOL_STATUS CursorResponse(POOL_CONNECTION *frontend,
18086+ POOL_CONNECTION_POOL *backend)
18087+{
18088+ char * func = "CursorResponse()";
18089+ char *string, *string1;
18090+ int len, len1;
18091+
18092+ /* read cursor name */
18093+ string = pool_read_string(MASTER(backend), &len, 0);
18094+ if (string == NULL)
18095+ return POOL_END;
18096+ if (REPLICATION)
18097+ {
18098+ string1 = pool_read_string(SECONDARY(backend), &len1, 0);
18099+ if (string1 == NULL)
18100+ return POOL_END;
18101+ if (len != len1)
18102+ {
18103+ show_error("%s: length does not match between master(%d) and secondary(%d)",
18104+ func, len, len1);
18105+ show_error("%s: master(%s) secondary(%s)", func, string, string1);
18106+ return POOL_END;
18107+ }
18108+ }
18109+
18110+ /* forward to the frontend */
18111+ pool_write(frontend, "P", 1);
18112+ if (pool_write(frontend, string, len) < 0)
18113+ {
18114+ return POOL_END;
18115+ }
18116+ return POOL_CONTINUE;
18117+}
18118+
18119+POOL_STATUS ErrorResponse(POOL_CONNECTION *frontend,
18120+ POOL_CONNECTION_POOL *backend)
18121+{
18122+ char *string;
18123+ int len;
18124+
18125+ /* read error message */
18126+ string = pool_read_string(MASTER(backend), &len, 0);
18127+ if (string == NULL)
18128+ return POOL_END;
18129+ if (REPLICATION)
18130+ {
18131+ string = pool_read_string(SECONDARY(backend), &len, 0);
18132+ if (string == NULL)
18133+ return POOL_END;
18134+ }
18135+
18136+ /* forward to the frontend */
18137+ pool_write(frontend, "E", 1);
18138+ if (pool_write_and_flush(frontend, string, len) < 0)
18139+ return POOL_END;
18140+
18141+ return POOL_CONTINUE;
18142+}
18143+
18144+static POOL_STATUS NoticeResponse(POOL_CONNECTION *frontend,
18145+ POOL_CONNECTION_POOL *backend)
18146+{
18147+ char *string, *string1;
18148+ int len, len1;
18149+
18150+ /* read notice message */
18151+ string = pool_read_string(MASTER(backend), &len, 0);
18152+ if (string == NULL)
18153+ return POOL_END;
18154+ if (REPLICATION)
18155+ {
18156+ string1 = pool_read_string(SECONDARY(backend), &len1, 0);
18157+ if (string1 == NULL)
18158+ return POOL_END;
18159+ }
18160+
18161+ /* forward to the frontend */
18162+ pool_write(frontend, "N", 1);
18163+ if (pool_write_and_flush(frontend, string, len) < 0)
18164+ {
18165+ return POOL_END;
18166+ }
18167+ return POOL_CONTINUE;
18168+}
18169+
18170+static POOL_STATUS CopyInResponse(POOL_CONNECTION *frontend,
18171+ POOL_CONNECTION_POOL *backend)
18172+{
18173+ POOL_STATUS status;
18174+
18175+ /* forward to the frontend */
18176+ if (MAJOR(backend) == PROTO_MAJOR_V3)
18177+ {
18178+ if (SimpleForwardToFrontend('G', frontend, backend) != POOL_CONTINUE)
18179+ return POOL_END;
18180+ if (pool_flush(frontend) != POOL_CONTINUE)
18181+ return POOL_END;
18182+ }
18183+ else
18184+ if (pool_write_and_flush(frontend, "G", 1) < 0)
18185+ return POOL_END;
18186+
18187+ status = CopyDataRows(frontend, backend, 1);
18188+ return status;
18189+}
18190+
18191+static POOL_STATUS CopyOutResponse(POOL_CONNECTION *frontend,
18192+ POOL_CONNECTION_POOL *backend)
18193+{
18194+ POOL_STATUS status;
18195+
18196+ /* forward to the frontend */
18197+ if (MAJOR(backend) == PROTO_MAJOR_V3)
18198+ {
18199+ if (SimpleForwardToFrontend('H', frontend, backend) != POOL_CONTINUE)
18200+ return POOL_END;
18201+ if (pool_flush(frontend) != POOL_CONTINUE)
18202+ return POOL_END;
18203+ }
18204+ else
18205+ if (pool_write_and_flush(frontend, "H", 1) < 0)
18206+ return POOL_END;
18207+
18208+ status = CopyDataRows(frontend, backend, 0);
18209+ return status;
18210+}
18211+
18212+static POOL_STATUS CopyDataRows(POOL_CONNECTION *frontend,
18213+ POOL_CONNECTION_POOL *backend, int copyin)
18214+{
18215+#ifdef PRINT_DEBUG
18216+ char * func = "CopyDataRows()";
18217+#endif
18218+ char *string;
18219+ int len;
18220+
18221+#ifdef PRINT_DEBUG
18222+ int i = 0;
18223+ char *buf;
18224+#endif
18225+
18226+ for (;;)
18227+ {
18228+ if (copyin)
18229+ {
18230+ if (MAJOR(backend) == PROTO_MAJOR_V3)
18231+ {
18232+ char kind;
18233+ POOL_STATUS status;
18234+
18235+ if (pool_read(frontend, &kind, 1) < 0)
18236+ return POOL_END;
18237+
18238+ status = SimpleForwardToBackend(kind, frontend, backend);
18239+ if (status == POOL_END)
18240+ return status;
18241+
18242+ /* CopyData? */
18243+ if (kind == 'd')
18244+ continue;
18245+ else
18246+ break;
18247+ }
18248+ else
18249+ {
18250+ string = pool_read_string(frontend, &len, 1);
18251+ if (string == NULL)
18252+ return POOL_END;
18253+ }
18254+ }
18255+ else
18256+ {
18257+ /* CopyOut */
18258+ if (MAJOR(backend) == PROTO_MAJOR_V3)
18259+ {
18260+ signed char kind;
18261+ POOL_STATUS status;
18262+
18263+ if ((kind = pool_read_kind(backend)) < 0)
18264+ return POOL_END;
18265+
18266+ status = SimpleForwardToFrontend(kind, frontend, backend);
18267+ if (status == POOL_END)
18268+ return status;
18269+
18270+ /* CopyData? */
18271+ if (kind == 'd')
18272+ continue;
18273+ else
18274+ break;
18275+ }
18276+ else
18277+ {
18278+ string = pool_read_string(MASTER(backend), &len, 1);
18279+ if (REPLICATION)
18280+ string = pool_read_string(SECONDARY(backend), &len, 1);
18281+ }
18282+ }
18283+
18284+ if (string == NULL)
18285+ return POOL_END;
18286+
18287+#ifdef PRINT_DEBUG
18288+ buf = malloc(len + 1);
18289+ if (buf == NULL)
18290+ {
18291+ show_error("CopyDataRows: malloc failed: %s", strerror(errno));
18292+ return POOL_END;
18293+ }
18294+ strncpy(buf, string, len);
18295+ buf[len] = '\0';
18296+ show_debug("%s: copy line %d %d bytes :%s:",func, i++, len, buf);
18297+ free(buf);
18298+#endif
18299+
18300+ if (copyin)
18301+ {
18302+ pool_write(MASTER(backend), string, len);
18303+ if (REPLICATION)
18304+ pool_write(SECONDARY(backend), string, len);
18305+ }
18306+ else
18307+ pool_write(frontend, string, len);
18308+
18309+ if (len == PROTO_MAJOR_V3)
18310+ {
18311+ /* end of copy? */
18312+ if (string[0] == '\\' &&
18313+ string[1] == '.' &&
18314+ string[2] == '\n')
18315+ {
18316+ break;
18317+ }
18318+ }
18319+ }
18320+
18321+ if (copyin)
18322+ {
18323+ if (pool_flush(MASTER(backend)) <0)
18324+ return POOL_END;
18325+ if (REPLICATION)
18326+ {
18327+ if (pool_flush(SECONDARY(backend)) <0)
18328+ return POOL_END;
18329+ }
18330+ }
18331+ else
18332+ if (pool_flush(frontend) <0)
18333+ return POOL_END;
18334+
18335+ return POOL_CONTINUE;
18336+}
18337+
18338+static POOL_STATUS EmptyQueryResponse(POOL_CONNECTION *frontend,
18339+ POOL_CONNECTION_POOL *backend)
18340+{
18341+ char c;
18342+
18343+ if (pool_read(MASTER(backend), &c, sizeof(c)) < 0)
18344+ return POOL_END;
18345+
18346+ if (REPLICATION)
18347+ {
18348+ if (pool_read(SECONDARY(backend), &c, sizeof(c)) < 0)
18349+ return POOL_END;
18350+ }
18351+
18352+ pool_write(frontend, "I", 1);
18353+ return pool_write_and_flush(frontend, "", 1);
18354+}
18355+
18356+static POOL_STATUS NotificationResponse(POOL_CONNECTION *frontend,
18357+ POOL_CONNECTION_POOL *backend)
18358+{
18359+ int pid, pid1;
18360+ char *condition, *condition1;
18361+ int len, len1;
18362+
18363+ pool_write(frontend, "A", 1);
18364+
18365+ if (pool_read(MASTER(backend), &pid, sizeof(pid)) < 0)
18366+ return POOL_ERROR;
18367+
18368+ if (REPLICATION)
18369+ {
18370+ if (pool_read(SECONDARY(backend), &pid1, sizeof(pid1)) < 0)
18371+ return POOL_ERROR;
18372+ }
18373+
18374+ condition = pool_read_string(MASTER(backend), &len, 0);
18375+ if (condition == NULL)
18376+ return POOL_END;
18377+ if (REPLICATION)
18378+ {
18379+ condition1 = pool_read_string(SECONDARY(backend), &len1, 0);
18380+ if (condition1 == NULL)
18381+ return POOL_END;
18382+ }
18383+
18384+ pool_write(frontend, &pid, sizeof(pid));
18385+
18386+ return pool_write_and_flush(frontend, condition, len);
18387+}
18388+
18389+static POOL_STATUS FunctionCall(POOL_CONNECTION *frontend,
18390+ POOL_CONNECTION_POOL *backend)
18391+{
18392+ char dummy[2];
18393+ int oid;
18394+ int argn;
18395+ int i;
18396+
18397+ pool_write(MASTER(backend), "F", 1);
18398+ if (REPLICATION)
18399+ pool_write(SECONDARY(backend), "F", 1);
18400+
18401+ /* dummy */
18402+ if (pool_read(frontend, dummy, sizeof(dummy)) < 0)
18403+ return POOL_ERROR;
18404+ pool_write(MASTER(backend), dummy, sizeof(dummy));
18405+ if (REPLICATION)
18406+ pool_write(SECONDARY(backend), dummy, sizeof(dummy));
18407+
18408+ /* function object id */
18409+ if (pool_read(frontend, &oid, sizeof(oid)) < 0)
18410+ return POOL_ERROR;
18411+
18412+ pool_write(MASTER(backend), &oid, sizeof(oid));
18413+ if (REPLICATION)
18414+ pool_write(SECONDARY(backend), &oid, sizeof(oid));
18415+
18416+ /* number of arguments */
18417+ if (pool_read(frontend, &argn, sizeof(argn)) < 0)
18418+ return POOL_ERROR;
18419+ pool_write(MASTER(backend), &argn, sizeof(argn));
18420+ if (REPLICATION)
18421+ pool_write(SECONDARY(backend), &argn, sizeof(argn));
18422+
18423+ argn = ntohl(argn);
18424+
18425+ for (i=0;i<argn;i++)
18426+ {
18427+ int len;
18428+ char *arg;
18429+
18430+ /* length of each argument in bytes */
18431+ if (pool_read(frontend, &len, sizeof(len)) < 0)
18432+ return POOL_ERROR;
18433+
18434+ pool_write(MASTER(backend), &len, sizeof(len));
18435+ if (REPLICATION)
18436+ pool_write(SECONDARY(backend), &len, sizeof(len));
18437+
18438+ len = ntohl(len);
18439+
18440+ /* argument value itself */
18441+ if ((arg = pool_read2(frontend, len)) == NULL)
18442+ return POOL_ERROR;
18443+ pool_write(MASTER(backend), arg, len);
18444+ if (REPLICATION)
18445+ pool_write(SECONDARY(backend), arg, len);
18446+ }
18447+
18448+ if (pool_flush(MASTER(backend)))
18449+ return POOL_ERROR;
18450+ if (REPLICATION)
18451+ if (pool_flush(SECONDARY(backend)))
18452+ return POOL_ERROR;
18453+ return POOL_CONTINUE;
18454+}
18455+
18456+static POOL_STATUS FunctionResultResponse(POOL_CONNECTION *frontend,
18457+ POOL_CONNECTION_POOL *backend)
18458+{
18459+ char dummy;
18460+ int len;
18461+ char *result;
18462+
18463+ pool_write(frontend, "V", 1);
18464+
18465+ if (pool_read(MASTER(backend), &dummy, 1) < 0)
18466+ return POOL_ERROR;
18467+ if (REPLICATION)
18468+ if (pool_read(SECONDARY(backend), &dummy, 1) < 0)
18469+ return POOL_ERROR;
18470+
18471+ pool_write(frontend, &dummy, 1);
18472+
18473+ /* non empty result? */
18474+ if (dummy == 'G')
18475+ {
18476+ /* length of result in bytes */
18477+ if (pool_read(MASTER(backend), &len, sizeof(len)) < 0)
18478+ return POOL_ERROR;
18479+ if (REPLICATION)
18480+ if (pool_read(SECONDARY(backend), &len, sizeof(len)) < 0)
18481+ return POOL_ERROR;
18482+
18483+ pool_write(frontend, &len, sizeof(len));
18484+
18485+ len = ntohl(len);
18486+
18487+ /* result value itself */
18488+ if ((result = pool_read2(MASTER(backend), len)) == NULL)
18489+ return POOL_ERROR;
18490+ if (REPLICATION)
18491+ if (pool_read(SECONDARY(backend), result, len) < 0)
18492+ return POOL_ERROR;
18493+
18494+ pool_write(frontend, result, len);
18495+ }
18496+
18497+ /* unused ('0') */
18498+ if (pool_read(MASTER(backend), &dummy, 1) < 0)
18499+ return POOL_ERROR;
18500+ if (REPLICATION)
18501+ if (pool_read(SECONDARY(backend), &dummy, 1) < 0)
18502+ return POOL_ERROR;
18503+
18504+ pool_write(frontend, "0", 1);
18505+
18506+ return pool_flush(frontend);
18507+}
18508+
18509+static POOL_STATUS ProcessFrontendResponse(POOL_CONNECTION *frontend,
18510+ POOL_CONNECTION_POOL *backend)
18511+{
18512+ char * func = "ProcessFrontendResponse()";
18513+ char fkind;
18514+ POOL_STATUS status;
18515+
18516+ if (frontend->len <= 0 && frontend->no_forward != 0)
18517+ return POOL_CONTINUE;
18518+
18519+ if (pool_read(frontend, &fkind, 1) < 0)
18520+ {
18521+ show_error("%s: failed to read kind",func);
18522+ return POOL_END;
18523+ }
18524+
18525+#ifdef PRINT_DEBUG
18526+ show_debug("%s:read kind from frontend %c(%02x)", func, fkind, fkind);
18527+#endif
18528+
18529+ switch (fkind)
18530+ {
18531+ case 'X':
18532+ if (MAJOR(backend) == PROTO_MAJOR_V3)
18533+ {
18534+ int len;
18535+ pool_read(frontend, &len, sizeof(len));
18536+ }
18537+ status = POOL_END;
18538+ break;
18539+
18540+ case 'Q':
18541+ status = Query(frontend, backend, NULL);
18542+ break;
18543+
18544+ default:
18545+ if (MAJOR(backend) == PROTO_MAJOR_V3)
18546+ {
18547+ status = SimpleForwardToBackend(fkind, frontend, backend);
18548+ if (pool_flush(MASTER(backend)))
18549+ status = POOL_ERROR;
18550+ if (REPLICATION)
18551+ if (pool_flush(SECONDARY(backend)))
18552+ status = POOL_ERROR;
18553+ }
18554+ else if (MAJOR(backend) == PROTO_MAJOR_V2 && fkind == 'F')
18555+ status = FunctionCall(frontend, backend);
18556+ else
18557+ {
18558+ show_error("%s: unknown message type %c(%02x)", func, fkind, fkind);
18559+ status = POOL_ERROR;
18560+ }
18561+ break;
18562+ }
18563+
18564+ return status;
18565+}
18566+
18567+static int timeoutmsec;
18568+/*
18569+ * enable read timeout
18570+ */
18571+void pool_enable_timeout(void)
18572+{
18573+ timeoutmsec = pool_config_replication_timeout;
18574+}
18575+
18576+/*
18577+ * disable read timeout
18578+ */
18579+void pool_disable_timeout(void)
18580+{
18581+ timeoutmsec = 0;
18582+}
18583+
18584+/*
18585+ * wait until read data is ready
18586+ */
18587+static int synchronize(POOL_CONNECTION *cp)
18588+{
18589+ return pool_check_fd(cp, 1);
18590+}
18591+
18592+/*
18593+ * wait until read data is ready
18594+ * if notimeout is non 0, wait forever.
18595+ */
18596+int pool_check_fd(POOL_CONNECTION *cp, int notimeout)
18597+{
18598+ char * func = "pool_check_fd()";
18599+ fd_set readmask;
18600+ fd_set exceptmask;
18601+ int fd;
18602+ int fds;
18603+ struct timeval timeout;
18604+ struct timeval *tp;
18605+
18606+ fd = cp->fd;
18607+
18608+ for (;;)
18609+ {
18610+ FD_ZERO(&readmask);
18611+ FD_ZERO(&exceptmask);
18612+ FD_SET(fd, &readmask);
18613+ FD_SET(fd, &exceptmask);
18614+
18615+ if (notimeout || timeoutmsec == 0)
18616+ tp = NULL;
18617+ else
18618+ {
18619+ timeout.tv_sec = 0;
18620+ timeout.tv_usec = pool_config_replication_timeout*1000;
18621+ tp = &timeout;
18622+ }
18623+
18624+ fds = select(fd+1, &readmask, NULL, &exceptmask, tp);
18625+
18626+ if (fds == -1)
18627+ {
18628+ if (errno == EAGAIN || errno == EINTR)
18629+ continue;
18630+
18631+ show_error("%s: select() failed. reason %s",func, strerror(errno));
18632+ break;
18633+ }
18634+
18635+ if (FD_ISSET(fd, &exceptmask))
18636+ {
18637+ show_error("%s: exception occurred",func);
18638+ break;
18639+ }
18640+
18641+ if (fds == 0)
18642+ {
18643+ show_error("%s: data is not ready tp->tv_sec %d tp->tp_usec %d", func, tp->tv_sec, tp->tv_usec);
18644+ break;
18645+ }
18646+ return 0;
18647+ }
18648+ return -1;
18649+}
18650+
18651+static void process_reporting(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend)
18652+{
18653+ static char *cursorname = "blank";
18654+ static short num_fields = 3;
18655+ static char *field_names[] = {"item", "value", "description"};
18656+ static int oid = 0;
18657+ static short fsize = -1;
18658+ static int mod = 0;
18659+ short n;
18660+ int i;
18661+ short s;
18662+ int len;
18663+ short colnum;
18664+
18665+ static char nullmap[2] = {0xff, 0xff};
18666+ int nbytes = (num_fields + 7)/8;
18667+
18668+#define MAXVALLEN 512
18669+
18670+ typedef struct {
18671+ char *name;
18672+ char value[MAXVALLEN+1];
18673+ char *desc;
18674+ } POOL_REPORT_STATUS;
18675+
18676+#define MAXITEMS 128
18677+
18678+ POOL_REPORT_STATUS status[MAXITEMS];
18679+
18680+ short nrows;
18681+ int size;
18682+ int hsize;
18683+
18684+ i = 0;
18685+
18686+ status[i].name = "inetdomain";
18687+ snprintf(status[i].value, MAXVALLEN, "%d", pool_config_inetdomain);
18688+ status[i].desc = "1 if accepting TCP/IP connection";
18689+ i++;
18690+
18691+ status[i].name = "port";
18692+ snprintf(status[i].value, MAXVALLEN, "%d", pool_config_port);
18693+ status[i].desc = "pgpool accepting port number";
18694+ i++;
18695+
18696+ status[i].name = "socket_dir";
18697+ snprintf(status[i].value, MAXVALLEN, "%s", pool_config_socket_dir);
18698+ status[i].desc = "pgpool socket directory";
18699+ i++;
18700+
18701+ status[i].name = "backend_host_name";
18702+ snprintf(status[i].value, MAXVALLEN, "%s", pool_config_backend_host_name);
18703+ status[i].desc = "master backend host name";
18704+ i++;
18705+
18706+ status[i].name = "backend_port";
18707+ snprintf(status[i].value, MAXVALLEN, "%d", pool_config_backend_port);
18708+ status[i].desc = "master backend port number";
18709+ i++;
18710+
18711+ status[i].name = "secondary_backend_host_name";
18712+ snprintf(status[i].value, MAXVALLEN, "%s", pool_config_secondary_backend_host_name);
18713+ status[i].desc = "secondary backend host name";
18714+ i++;
18715+
18716+ status[i].name = "secondary_backend_port";
18717+ snprintf(status[i].value, MAXVALLEN, "%d", pool_config_secondary_backend_port);
18718+ status[i].desc = "secondary backend port number";
18719+ i++;
18720+
18721+ status[i].name = "num_init_children";
18722+ snprintf(status[i].value, MAXVALLEN, "%d", pool_config_num_init_children);
18723+ status[i].desc = "# of children initially pre-forked";
18724+ i++;
18725+
18726+ status[i].name = "child_life_time";
18727+ snprintf(status[i].value, MAXVALLEN, "%d", pool_config_child_life_time);
18728+ status[i].desc = "if idle for this seconds, child exits (not implemented yet)";
18729+ i++;
18730+
18731+ status[i].name = "connection_life_time";
18732+ snprintf(status[i].value, MAXVALLEN, "%d", pool_config_connection_life_time);
18733+ status[i].desc = "if idle for this seconds, connection closes";
18734+ i++;
18735+
18736+ status[i].name = "max_pool";
18737+ snprintf(status[i].value, MAXVALLEN, "%d", pool_config_max_pool);
18738+ status[i].desc = "max # of connection pool per child";
18739+ i++;
18740+
18741+ status[i].name = "logdir";
18742+ snprintf(status[i].value, MAXVALLEN, "%s", pool_config_logdir);
18743+ status[i].desc = "logging directory";
18744+ i++;
18745+
18746+ status[i].name = "backend_socket_dir";
18747+ snprintf(status[i].value, MAXVALLEN, "%s", pool_config_backend_socket_dir);
18748+ status[i].desc = "Unix domain socket directory for the PostgreSQL server";
18749+ i++;
18750+
18751+ status[i].name = "replication_mode";
18752+ snprintf(status[i].value, MAXVALLEN, "%d", pool_config_replication_mode);
18753+ status[i].desc = "non 0 if operating in replication mode";
18754+ i++;
18755+
18756+ status[i].name = "replication_strict";
18757+ snprintf(status[i].value, MAXVALLEN, "%d", pool_config_replication_strict);
18758+ status[i].desc = "non 0 if operating in strict mode";
18759+ i++;
18760+
18761+ status[i].name = "replication_timeout";
18762+ snprintf(status[i].value, MAXVALLEN, "%d", pool_config_replication_timeout);
18763+ status[i].desc = "if secondary does not respond in this milli seconds, abort the session";
18764+ i++;
18765+
18766+ status[i].name = "current_backend_host_name";
18767+ snprintf(status[i].value, MAXVALLEN, "%s", pool_config_current_backend_host_name);
18768+ status[i].desc = "current master host name";
18769+ i++;
18770+
18771+ status[i].name = "current_backend_port";
18772+ snprintf(status[i].value, MAXVALLEN, "%d", pool_config_current_backend_port);
18773+ status[i].desc = "current master port #";
18774+ i++;
18775+
18776+ status[i].name = "replication_enabled";
18777+ snprintf(status[i].value, MAXVALLEN, "%d", pool_config_replication_enabled);
18778+ status[i].desc = "non 0 if actually operating in replication mode";
18779+ i++;
18780+
18781+ status[i].name = "load_balance_mode";
18782+ snprintf(status[i].value, MAXVALLEN, "%d", pool_config_load_balance_mode);
18783+ status[i].desc = "non 0 if operating in load balancing mode";
18784+ i++;
18785+
18786+ status[i].name = "replication_stop_on_mismatch";
18787+ snprintf(status[i].value, MAXVALLEN, "%d", pool_config_replication_stop_on_mismatch);
18788+ status[i].desc = "stop replication mode on fatal error";
18789+ i++;
18790+
18791+ nrows = i;
18792+
18793+ if (MAJOR(backend) == PROTO_MAJOR_V2)
18794+ {
18795+ /* cursor response */
18796+ pool_write(frontend, "P", 1);
18797+ pool_write(frontend, cursorname, strlen(cursorname)+1);
18798+ }
18799+
18800+ /* row description */
18801+ pool_write(frontend, "T", 1);
18802+
18803+ if (MAJOR(backend) == PROTO_MAJOR_V3)
18804+ {
18805+ len = sizeof(num_fields) + sizeof(len);
18806+
18807+ for (i=0;i<num_fields;i++)
18808+ {
18809+ char *f = field_names[i];
18810+ len += strlen(f)+1;
18811+ len += sizeof(oid);
18812+ len += sizeof(colnum);
18813+ len += sizeof(oid);
18814+ len += sizeof(s);
18815+ len += sizeof(mod);
18816+ len += sizeof(s);
18817+ }
18818+
18819+ len = htonl(len);
18820+ pool_write(frontend, &len, sizeof(len));
18821+ }
18822+
18823+ n = htons(num_fields);
18824+ pool_write(frontend, &n, sizeof(short));
18825+
18826+ for (i=0;i<num_fields;i++)
18827+ {
18828+ char *f = field_names[i];
18829+
18830+ pool_write(frontend, f, strlen(f)+1); /* field name */
18831+
18832+ if (MAJOR(backend) == PROTO_MAJOR_V3)
18833+ {
18834+ pool_write(frontend, &oid, sizeof(oid)); /* table oid */
18835+ colnum = htons(i);
18836+ pool_write(frontend, &colnum, sizeof(colnum)); /* column number */
18837+ }
18838+
18839+ pool_write(frontend, &oid, sizeof(oid)); /* data type oid */
18840+ s = htons(fsize);
18841+ pool_write(frontend, &s, sizeof(fsize)); /* field size */
18842+ pool_write(frontend, &mod, sizeof(mod)); /* modifier */
18843+
18844+ if (MAJOR(backend) == PROTO_MAJOR_V3)
18845+ {
18846+ s = htons(0);
18847+ pool_write(frontend, &s, sizeof(fsize)); /* field format (text) */
18848+ }
18849+ }
18850+ pool_flush(frontend);
18851+
18852+ if (MAJOR(backend) == PROTO_MAJOR_V2)
18853+ {
18854+ /* ascii row */
18855+ for (i=0;i<nrows;i++)
18856+ {
18857+ pool_write(frontend, "D", 1);
18858+ pool_write_and_flush(frontend, nullmap, nbytes);
18859+
18860+ size = strlen(status[i].name);
18861+ hsize = htonl(size+4);
18862+ pool_write(frontend, &hsize, sizeof(hsize));
18863+ pool_write(frontend, status[i].name, size);
18864+
18865+ size = strlen(status[i].value);
18866+ hsize = htonl(size+4);
18867+ pool_write(frontend, &hsize, sizeof(hsize));
18868+ pool_write(frontend, status[i].value, size);
18869+
18870+ size = strlen(status[i].desc);
18871+ hsize = htonl(size+4);
18872+ pool_write(frontend, &hsize, sizeof(hsize));
18873+ pool_write(frontend, status[i].desc, size);
18874+ }
18875+ }
18876+ else
18877+ {
18878+ /* data row */
18879+ for (i=0;i<nrows;i++)
18880+ {
18881+ pool_write(frontend, "D", 1);
18882+ len = sizeof(len) + sizeof(nrows);
18883+ len += sizeof(int) + strlen(status[i].name);
18884+ len += sizeof(int) + strlen(status[i].value);
18885+ len += sizeof(int) + strlen(status[i].desc);
18886+ len = htonl(len);
18887+ pool_write(frontend, &len, sizeof(len));
18888+ s = htons(3);
18889+ pool_write(frontend, &s, sizeof(s));
18890+
18891+ len = htonl(strlen(status[i].name));
18892+ pool_write(frontend, &len, sizeof(len));
18893+ pool_write(frontend, status[i].name, strlen(status[i].name));
18894+
18895+ len = htonl(strlen(status[i].value));
18896+ pool_write(frontend, &len, sizeof(len));
18897+ pool_write(frontend, status[i].value, strlen(status[i].value));
18898+
18899+ len = htonl(strlen(status[i].desc));
18900+ pool_write(frontend, &len, sizeof(len));
18901+ pool_write(frontend, status[i].desc, strlen(status[i].desc));
18902+ }
18903+ }
18904+
18905+ /* complete command response */
18906+ pool_write(frontend, "C", 1);
18907+ if (MAJOR(backend) == PROTO_MAJOR_V3)
18908+ {
18909+ len = htonl(sizeof(len) + strlen("SELECT")+1);
18910+ pool_write(frontend, &len, sizeof(len));
18911+ }
18912+ pool_write(frontend, "SELECT", strlen("SELECT")+1);
18913+
18914+ /* ready for query */
18915+ pool_write(frontend, "Z", 1);
18916+ if (MAJOR(backend) == PROTO_MAJOR_V3)
18917+ {
18918+ len = htonl(sizeof(len) + 1);
18919+ pool_write(frontend, &len, sizeof(len));
18920+ pool_write(frontend, "I", 1);
18921+ }
18922+
18923+ pool_flush(frontend);
18924+}
18925+
18926+void pool_send_frontend_exits(POOL_CONNECTION_POOL *backend)
18927+{
18928+ int len;
18929+
18930+ pool_write(MASTER(backend), "X", 1);
18931+
18932+ if (MAJOR(backend) == PROTO_MAJOR_V3)
18933+ {
18934+ len = htonl(4);
18935+ pool_write(MASTER(backend), &len, sizeof(len));
18936+ }
18937+
18938+ /*
18939+ * XXX we cannot call pool_flush() here since backend may already
18940+ * close the socket and pool_flush() automatically invokes fail
18941+ * over handler. This could happen in copy command (remember the
18942+ * famouse "lostsynchronization with server, resettin g
18943+ * connection" message)
18944+ */
18945+ fflush(MASTER(backend)->write_fd);
18946+
18947+ if (REPLICATION)
18948+ {
18949+ pool_write(SECONDARY(backend), "X", 1);
18950+ if (MAJOR(backend) == PROTO_MAJOR_V3)
18951+ {
18952+ len = htonl(4);
18953+ pool_write(MASTER(backend), &len, sizeof(len));
18954+ }
18955+ fflush(SECONDARY(backend)->write_fd);
18956+ }
18957+}
18958+
18959+/*
18960+ * -------------------------------------------------------
18961+ * V3 functions
18962+ * -------------------------------------------------------
18963+ */
18964+POOL_STATUS SimpleForwardToFrontend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend)
18965+{
18966+ char * func = "SimpleForwardToFrontend()";
18967+ int len, len1;
18968+ char *p;
18969+ int status;
18970+
18971+ pool_write(frontend, &kind, 1);
18972+
18973+ status = pool_read(MASTER(backend), &len, sizeof(len));
18974+ if (status < 0)
18975+ {
18976+ show_error("%s: error while reading message length",func);
18977+ return POOL_END;
18978+ }
18979+
18980+ if (REPLICATION)
18981+ {
18982+ status = pool_read(SECONDARY(backend), &len1, sizeof(len1));
18983+ if (status < 0)
18984+ {
18985+ show_error("%s: error while reading message length from secondary backend",func);
18986+ return POOL_END;
18987+ }
18988+
18989+ if (len != len1)
18990+ {
18991+ show_error("%s: length does not match between backends master(%d) secondary(%d) kind:(%c)",
18992+ func, ntohl(len), ntohl(len1), kind);
18993+ }
18994+ }
18995+
18996+ pool_write(frontend, &len, sizeof(len));
18997+
18998+ len = ntohl(len);
18999+ len -= 4;
19000+
19001+ p = pool_read2(MASTER(backend), len);
19002+ if (p == NULL)
19003+ return POOL_END;
19004+
19005+ if (REPLICATION)
19006+ {
19007+ len1 = ntohl(len1);
19008+ len1 -= 4;
19009+ if (pool_read2(SECONDARY(backend), len1) == NULL)
19010+ return POOL_END;
19011+ }
19012+
19013+ return pool_write(frontend, p, len);
19014+}
19015+
19016+POOL_STATUS SimpleForwardToBackend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend)
19017+{
19018+ int len;
19019+ int sendlen;
19020+ char *p;
19021+
19022+ if (pool_write(MASTER(backend), &kind, 1))
19023+ return POOL_END;
19024+ if (REPLICATION)
19025+ if (pool_write(SECONDARY(backend), &kind, 1))
19026+ return POOL_END;
19027+
19028+ if (pool_read(frontend, &sendlen, sizeof(sendlen)))
19029+ {
19030+ return POOL_END;
19031+ }
19032+
19033+ len = ntohl(sendlen) - 4;
19034+
19035+ p = pool_read2(frontend, len);
19036+ if (p == NULL)
19037+ return POOL_END;
19038+
19039+ if (pool_write(MASTER(backend), &sendlen, sizeof(sendlen)))
19040+ return POOL_END;
19041+ if (pool_write(MASTER(backend), p, len))
19042+ return POOL_END;
19043+
19044+ if (REPLICATION)
19045+ {
19046+ if (pool_write(SECONDARY(backend), &sendlen, sizeof(sendlen)))
19047+ return POOL_END;
19048+ if (pool_write(SECONDARY(backend), p, len))
19049+ return POOL_END;
19050+ }
19051+
19052+ return POOL_CONTINUE;
19053+}
19054+
19055+POOL_STATUS ParameterStatus(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend)
19056+{
19057+#ifdef PRINT_DEBUG
19058+ char * func = "ParameterStatus()";
19059+#endif
19060+ int len;
19061+ int sendlen;
19062+ char *p;
19063+ char *name;
19064+ char *value;
19065+
19066+ pool_write(frontend, "S", 1);
19067+
19068+ len = pool_read_message_length(backend);
19069+ if (len < 0)
19070+ {
19071+ return POOL_END;
19072+ }
19073+
19074+ sendlen = htonl(len);
19075+ pool_write(frontend, &sendlen, sizeof(sendlen));
19076+
19077+ len -= 4;
19078+
19079+ p = pool_read2(MASTER(backend), len);
19080+ if (p == NULL)
19081+ return POOL_END;
19082+
19083+ name = p;
19084+ value = p + strlen(name) + 1;
19085+
19086+#ifdef PRINT_DEBUG
19087+ show_debug("%s:name: %s value: %s",func, name, value);
19088+#endif
19089+
19090+ pool_add_param(&MASTER(backend)->params, name, value);
19091+
19092+#ifdef PRINT_DEBUG
19093+ pool_param_debug_print(&MASTER(backend)->params);
19094+#endif
19095+
19096+ if (REPLICATION)
19097+ if (pool_read2(SECONDARY(backend), len) == NULL)
19098+ return POOL_END;
19099+
19100+ return pool_write(frontend, p, len);
19101+
19102+}
19103+
19104+/*
19105+ * reset backend status. return values are:
19106+ * 0: no query was issued 1: a query was issued 2: no more queries remain -1: error
19107+ */
19108+static int reset_backend(POOL_CONNECTION_POOL *backend, int qcnt)
19109+{
19110+#ifdef NO_RESET_ALL
19111+ static char *queries[] = {"ABORT"};
19112+#else
19113+ static char *queries[] = {"ABORT", "RESET ALL"};
19114+#endif
19115+
19116+ char *query;
19117+ int qn = sizeof(queries)/sizeof(char *);
19118+
19119+ /* for PGCluster */
19120+ if (!Use_Connection_Pool)
19121+ return 2;
19122+
19123+ if (qcnt >= qn)
19124+ return 2;
19125+
19126+ query = queries[qcnt];
19127+
19128+ /* if transaction state is idle, we don't need to issue ABORT */
19129+ if (TSTATE(backend) == 'I' && !strcmp("ABORT", query))
19130+ return 0;
19131+
19132+ if (Query(NULL, backend, query) != POOL_CONTINUE)
19133+ return -1;
19134+
19135+ return 1;
19136+}
19137+
19138+/*
19139+ * return non 0 if load balance is possible
19140+ */
19141+static int load_balance_enabled(POOL_CONNECTION_POOL *backend, char *sql)
19142+{
19143+ if (pool_config_load_balance_mode &&
19144+ REPLICATION &&
19145+ MAJOR(backend) == PROTO_MAJOR_V3 &&
19146+ TSTATE(backend) == 'I' &&
19147+ !strncasecmp(sql, "SELECT", 6))
19148+ return 1;
19149+ return 0;
19150+}
19151+
19152+/*
19153+ * start load balance mode
19154+ */
19155+static void start_load_balance(POOL_CONNECTION_POOL *backend)
19156+{
19157+#ifdef PRINT_DEBUG
19158+ char * func = "start_load_balance()";
19159+#endif
19160+ int i;
19161+ int master;
19162+
19163+ /* save backend connection slots */
19164+ for (i=0;i<backend->num;i++)
19165+ {
19166+ slots[i] = backend->slots[i];
19167+ }
19168+
19169+ /* temporary turn off replication mode */
19170+ /*REPLICATION = 0; */
19171+
19172+ /* choose a master in random manner */
19173+ master = random() % backend->num;
19174+ backend->slots[0] = slots[master];
19175+#ifdef PRINT_DEBUG
19176+ show_debug("%s: selected master is %d", func,master);
19177+#endif
19178+
19179+ /* start load balancing */
19180+ /*in_load_balance = 1;*/
19181+}
19182+
19183+/*
19184+ * finish load balance mode
19185+ */
19186+static void end_load_balance(POOL_CONNECTION_POOL *backend)
19187+{
19188+ int i;
19189+
19190+ /* restore backend connection slots */
19191+ for (i=0;i<backend->num;i++)
19192+ {
19193+ backend->slots[i] = slots[i];
19194+ }
19195+
19196+ /* turn on replication mode */
19197+ /* REPLICATION = 1; */
19198+
19199+ /*in_load_balance = 0;*/
19200+#ifdef PRINT_DEBUG
19201+ show_debug("end_load_balance: end load balance mode");
19202+#endif
19203+}
19204+
19205+/*
19206+ * send error message to frontend
19207+ */
19208+void pool_send_error_message(POOL_CONNECTION *frontend, int protoMajor,
19209+ char *code,
19210+ char *message,
19211+ char *detail,
19212+ char *hint,
19213+ char *file,
19214+ int line)
19215+{
19216+#define MAXDATA 1024
19217+#define MAXMSGBUF 128
19218+ char * func = "pool_send_error_message()";
19219+
19220+ if (protoMajor == PROTO_MAJOR_V2)
19221+ {
19222+ pool_write(frontend, "E", 1);
19223+ pool_write_and_flush(frontend, message, strlen(message)+1);
19224+ }
19225+ else if (protoMajor == PROTO_MAJOR_V3)
19226+ {
19227+ char data[MAXDATA];
19228+ char msgbuf[MAXMSGBUF];
19229+ int len;
19230+ int thislen;
19231+ int sendlen;
19232+
19233+ len = 0;
19234+
19235+ pool_write(frontend, "E", 1);
19236+
19237+ /* error level */
19238+ thislen = snprintf(msgbuf, MAXMSGBUF, "SERROR");
19239+ memcpy(data +len, msgbuf, thislen+1);
19240+ len += thislen + 1;
19241+
19242+ /* code */
19243+ thislen = snprintf(msgbuf, MAXMSGBUF, "C%s", code);
19244+ memcpy(data +len, msgbuf, thislen+1);
19245+ len += thislen + 1;
19246+
19247+ /* message */
19248+ thislen = snprintf(msgbuf, MAXMSGBUF, "M%s", message);
19249+ memcpy(data +len, msgbuf, thislen+1);
19250+ len += thislen + 1;
19251+
19252+ /* detail */
19253+ if (*detail != '\0')
19254+ {
19255+ thislen = snprintf(msgbuf, MAXMSGBUF, "D%s", detail);
19256+ memcpy(data +len, msgbuf, thislen+1);
19257+ len += thislen + 1;
19258+ }
19259+
19260+ /* hint */
19261+ if (*hint != '\0')
19262+ {
19263+ thislen = snprintf(msgbuf, MAXMSGBUF, "H%s", hint);
19264+ memcpy(data +len, msgbuf, thislen+1);
19265+ len += thislen + 1;
19266+ }
19267+
19268+ /* file */
19269+ thislen = snprintf(msgbuf, MAXMSGBUF, "F%s", file);
19270+ memcpy(data +len, msgbuf, thislen+1);
19271+ len += thislen + 1;
19272+
19273+ /* line */
19274+ thislen = snprintf(msgbuf, MAXMSGBUF, "L%d", line);
19275+ memcpy(data +len, msgbuf, thislen+1);
19276+ len += thislen + 1;
19277+
19278+ /* stop null */
19279+ len++;
19280+ *(data + len) = '\0';
19281+
19282+ sendlen = len;
19283+ len = htonl(len + 4);
19284+ pool_write(frontend, &len, sizeof(len));
19285+ pool_write_and_flush(frontend, data, sendlen);
19286+ }
19287+ else
19288+ show_error("%s: unknown protocol major %d",func, protoMajor);
19289+}
19290diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_stream.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_stream.c
19291--- postgresql-8.2.4/src/pgcluster/pglb/pool_stream.c 1970-01-01 01:00:00.000000000 +0100
19292+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_stream.c 2007-02-18 22:52:17.000000000 +0100
19293@@ -0,0 +1,584 @@
19294+/*--------------------------------------------------------------------
19295+ * FILE:
19296+ * pool_stream.c
19297+ *
19298+ * NOTE:
19299+ * stream I/O modules
19300+ *
19301+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
19302+ * Portions Copyright (c) 2003-2006, Tatsuo Ishii
19303+ *--------------------------------------------------------------------
19304+ */
19305+/*
19306+* Permission to use, copy, modify, and distribute this software and
19307+* its documentation for any purpose and without fee is hereby
19308+* granted, provided that the above copyright notice appear in all
19309+* copies and that both that copyright notice and this permission
19310+* notice appear in supporting documentation, and that the name of the
19311+* author not be used in advertising or publicity pertaining to
19312+* distribution of the software without specific, written prior
19313+* permission. The author makes no representations about the
19314+* suitability of this software for any purpose. It is provided "as
19315+* is" without express or implied warranty.
19316+*/
19317+
19318+#include <stdio.h>
19319+#include <stdlib.h>
19320+#include <string.h>
19321+#include <errno.h>
19322+#include <sys/types.h>
19323+#include <unistd.h>
19324+#include <sys/time.h>
19325+
19326+#include "postgres_fe.h"
19327+#include "libpq/pqcomm.h"
19328+#include "replicate_com.h"
19329+#include "pglb.h"
19330+
19331+#define READBUFSZ 1024
19332+
19333+POOL_CONNECTION *pool_open(int fd);
19334+void pool_close(POOL_CONNECTION *cp);
19335+int pool_read(POOL_CONNECTION *cp, void *buf, int len);
19336+char *pool_read2(POOL_CONNECTION *cp, int len);
19337+int pool_write(POOL_CONNECTION *cp, void *buf, int len);
19338+int pool_flush(POOL_CONNECTION *cp);
19339+int pool_write_and_flush(POOL_CONNECTION *cp, void *buf, int len);
19340+char *pool_read_string(POOL_CONNECTION *cp, int *len, int line);
19341+
19342+static int mystrlen(char *str, int upper, int *flag);
19343+static int mystrlinelen(char *str, int upper, int *flag);
19344+static int save_pending_data(POOL_CONNECTION *cp, void *data, int len);
19345+static int consume_pending_data(POOL_CONNECTION *cp, void *data, int len);
19346+
19347+
19348+/*
19349+* open read/write file descriptors.
19350+* returns POOL_CONNECTION on success otherwise NULL.
19351+*/
19352+POOL_CONNECTION *pool_open(int fd)
19353+{
19354+ POOL_CONNECTION *cp;
19355+
19356+ cp = (POOL_CONNECTION *)malloc(sizeof(POOL_CONNECTION));
19357+ if (cp == NULL)
19358+ {
19359+ show_error("pool_open: malloc failed: %s", strerror(errno));
19360+ return NULL;
19361+ }
19362+
19363+ memset(cp, 0, sizeof(*cp));
19364+
19365+ cp->write_fd = fdopen(fd, "w");
19366+ if (cp->write_fd == NULL)
19367+ {
19368+ show_error("pool_open: fdopen failed: %s",strerror(errno));
19369+ free(cp);
19370+ return NULL;
19371+ }
19372+
19373+ /* initialize pending data buffer */
19374+ cp->hp = malloc(READBUFSZ);
19375+ if (cp->hp == NULL)
19376+ {
19377+ show_error("pool_open: malloc failed");
19378+ return NULL;
19379+ }
19380+ cp->bufsz = READBUFSZ;
19381+ cp->po = 0;
19382+ cp->len = 0;
19383+ cp->sbuf = NULL;
19384+ cp->sbufsz = 0;
19385+ cp->buf2 = NULL;
19386+ cp->sbufsz = 0;
19387+
19388+ cp->fd = fd;
19389+ return cp;
19390+}
19391+
19392+/*
19393+* close read/write file descriptors.
19394+*/
19395+void pool_close(POOL_CONNECTION *cp)
19396+{
19397+ close(cp->fd);
19398+ fclose(cp->write_fd);
19399+ free(cp->hp);
19400+ if (cp->sbuf)
19401+ free(cp->sbuf);
19402+ if (cp->buf2)
19403+ free(cp->buf2);
19404+ pool_discard_params(&cp->params);
19405+ free(cp);
19406+}
19407+
19408+/*
19409+* read len bytes from cp
19410+* returns 0 on success otherwise -1.
19411+*/
19412+int pool_read(POOL_CONNECTION *cp, void *buf, int len)
19413+{
19414+ static char readbuf[READBUFSZ];
19415+
19416+ int consume_size;
19417+ int readlen;
19418+
19419+ consume_size = consume_pending_data(cp, buf, len);
19420+ len -= consume_size;
19421+ buf += consume_size;
19422+
19423+ while (len > 0)
19424+ {
19425+ if (cp->issecondary_backend)
19426+ {
19427+ if (pool_check_fd(cp, 0))
19428+ {
19429+ show_error("pool_read: secondary data is not ready. abort this session");
19430+ exit(1);
19431+ }
19432+ }
19433+
19434+ readlen = read(cp->fd, readbuf, READBUFSZ);
19435+ if (readlen == -1)
19436+ {
19437+ show_error("pool_read: read failed (%s)", strerror(errno));
19438+
19439+ if (cp->isbackend)
19440+ {
19441+ /* fatal error, notice to parent and exit */
19442+ notice_backend_error();
19443+ exit(1);
19444+ }
19445+ else
19446+ {
19447+ return -1;
19448+ }
19449+ }
19450+ else if (readlen == 0)
19451+ {
19452+ show_error("pool_read: EOF encountered");
19453+
19454+ if (cp->isbackend)
19455+ {
19456+ /* fatal error, notice to parent and exit */
19457+ notice_backend_error();
19458+ exit(1);
19459+ }
19460+ else
19461+ {
19462+ /*
19463+ * if backend offers authentication method, frontend could close connection
19464+ */
19465+ return -1;
19466+ }
19467+ }
19468+
19469+ if (len < readlen)
19470+ {
19471+ /* overrun. we need to save remaining data to pending buffer */
19472+ if (save_pending_data(cp, readbuf+len, readlen-len))
19473+ return -1;
19474+ memmove(buf, readbuf, len);
19475+ break;
19476+ }
19477+
19478+ memmove(buf, readbuf, readlen);
19479+ buf += readlen;
19480+ len -= readlen;
19481+ }
19482+
19483+ return 0;
19484+}
19485+
19486+/*
19487+* read exactly len bytes from cp
19488+* returns buffer address on success otherwise NULL.
19489+*/
19490+char *pool_read2(POOL_CONNECTION *cp, int len)
19491+{
19492+ char *buf;
19493+ int req_size;
19494+ int alloc_size;
19495+ int consume_size;
19496+ int readlen;
19497+
19498+ req_size = cp->len + len;
19499+
19500+ if (req_size > cp->bufsz2)
19501+ {
19502+ alloc_size = ((req_size+1)/READBUFSZ+1)*READBUFSZ;
19503+ cp->buf2 = realloc(cp->buf2, alloc_size);
19504+ if (cp->buf2 == NULL)
19505+ {
19506+ show_error("pool_read2: failed to realloc");
19507+ exit(1);
19508+ }
19509+ cp->bufsz2 = alloc_size;
19510+ }
19511+
19512+ buf = cp->buf2;
19513+
19514+ consume_size = consume_pending_data(cp, buf, len);
19515+ len -= consume_size;
19516+ buf += consume_size;
19517+
19518+ while (len > 0)
19519+ {
19520+ if (cp->issecondary_backend)
19521+ {
19522+ if (pool_check_fd(cp, 0))
19523+ {
19524+ show_error("pool_read2: secondary data is not ready. abort this session");
19525+ exit(1);
19526+ }
19527+ }
19528+
19529+ readlen = read(cp->fd, buf, len);
19530+ if (readlen == -1)
19531+ {
19532+ show_error("pool_read2: read failed (%s)", strerror(errno));
19533+
19534+ if (cp->isbackend)
19535+ {
19536+ /* fatal error, notice to parent and exit */
19537+ notice_backend_error();
19538+ exit(1);
19539+ }
19540+ else
19541+ {
19542+ return NULL;
19543+ }
19544+ }
19545+ else if (readlen == 0)
19546+ {
19547+ show_error("pool_read2: EOF encountered");
19548+
19549+ if (cp->isbackend)
19550+ {
19551+ /* fatal error, notice to parent and exit */
19552+ notice_backend_error();
19553+ exit(1);
19554+ }
19555+ else
19556+ {
19557+ /*
19558+ * if backend offers authentication method, frontend could close connection
19559+ */
19560+ return NULL;
19561+ }
19562+ }
19563+
19564+ buf += readlen;
19565+ len -= readlen;
19566+ }
19567+
19568+ return cp->buf2;
19569+}
19570+
19571+/*
19572+* write len bytes from cp
19573+* returns 0 on success otherwise -1.
19574+*/
19575+int pool_write(POOL_CONNECTION *cp, void *buf, int len)
19576+{
19577+ if (!cp->no_forward)
19578+ fwrite(buf, len, 1, cp->write_fd);
19579+
19580+ return 0;
19581+}
19582+
19583+/*
19584+* flush write buffer
19585+*/
19586+int pool_flush(POOL_CONNECTION *cp)
19587+{
19588+ if (fflush(cp->write_fd) != 0)
19589+ {
19590+ show_error("pool_flush: fflush failed (%s)", strerror(errno));
19591+
19592+ if (cp->isbackend)
19593+ {
19594+ notice_backend_error();
19595+ exit(1);
19596+ }
19597+ else
19598+ {
19599+ return -1;
19600+ }
19601+ }
19602+ return 0;
19603+}
19604+
19605+/*
19606+* combo of pool_write and pool_flush
19607+*/
19608+int pool_write_and_flush(POOL_CONNECTION *cp, void *buf, int len)
19609+{
19610+ if (pool_write(cp, buf, len))
19611+ return -1;
19612+ return pool_flush(cp);
19613+}
19614+
19615+/*
19616+ * read a string until EOF or NULL is encountered.
19617+ * if line is not 0, read until new line is encountered.
19618+*/
19619+char *pool_read_string(POOL_CONNECTION *cp, int *len, int line)
19620+{
19621+ int readp;
19622+ int readsize;
19623+ int readlen;
19624+ int strlength;
19625+ int flag;
19626+ int consume_size;
19627+
19628+#ifdef DEBUG
19629+ static char pbuf[READBUFSZ];
19630+#endif
19631+
19632+ *len = 0;
19633+ readp = 0;
19634+
19635+ /* initialize read buffer */
19636+ if (cp->sbufsz == 0)
19637+ {
19638+ cp->sbuf = malloc(READBUFSZ);
19639+ if (cp->sbuf == NULL)
19640+ {
19641+ show_error("pool_read_string: malloc failed");
19642+ return NULL;
19643+ }
19644+ cp->sbufsz = READBUFSZ;
19645+ *cp->sbuf = '\0';
19646+ }
19647+
19648+ /* any pending data? */
19649+ if (cp->len)
19650+ {
19651+ if (line)
19652+ strlength = mystrlinelen(cp->hp+cp->po, cp->len, &flag);
19653+ else
19654+ strlength = mystrlen(cp->hp+cp->po, cp->len, &flag);
19655+
19656+ /* buffer is too small? */
19657+ if ((strlength + 1) > cp->sbufsz)
19658+ {
19659+ cp->sbufsz = ((strlength+1)/READBUFSZ+1)*READBUFSZ;
19660+ cp->sbuf = realloc(cp->sbuf, cp->sbufsz);
19661+ if (cp->sbuf == NULL)
19662+ {
19663+ show_error("pool_read_string: realloc failed");
19664+ return NULL;
19665+ }
19666+ }
19667+
19668+ /* consume pending and save to read string buffer */
19669+ consume_size = consume_pending_data(cp, cp->sbuf, strlength);
19670+
19671+ *len = strlength;
19672+
19673+ /* is the string null terminated? */
19674+ if (consume_size == strlength && !flag)
19675+ {
19676+ /* not null or line terminated.
19677+ * we need to read more since we have not encountered NULL or new line yet
19678+ */
19679+ readsize = cp->sbufsz - strlength;
19680+ readp = strlength;
19681+ }
19682+ else
19683+ {
19684+#ifdef PRINT_DEBUG
19685+ show_debug("pool_read_string: read all from pending data. po:%d len:%d",
19686+ cp->po, cp->len);
19687+#endif
19688+ return cp->sbuf;
19689+ }
19690+ } else
19691+ {
19692+ readsize = cp->sbufsz;
19693+ }
19694+
19695+
19696+ for (;;)
19697+ {
19698+ readlen = read(cp->fd, cp->sbuf+readp, readsize);
19699+ if (readlen == -1)
19700+ {
19701+ show_error("pool_read_string: read() failed. reason:%s", strerror(errno));
19702+
19703+ if (cp->isbackend)
19704+ {
19705+ notice_backend_error();
19706+ exit(1);
19707+ }
19708+ else
19709+ {
19710+ return NULL;
19711+ }
19712+ }
19713+
19714+ if (readlen == 0)
19715+ return NULL;
19716+
19717+ /* check overrun */
19718+ if (line)
19719+ strlength = mystrlinelen(cp->sbuf+readp, readlen, &flag);
19720+ else
19721+ strlength = mystrlen(cp->sbuf+readp, readlen, &flag);
19722+
19723+ if (strlength < readlen)
19724+ {
19725+ save_pending_data(cp, cp->sbuf+readp+strlength, readlen-strlength);
19726+ *len += strlength;
19727+#ifdef PRINT_DEBUG
19728+ show_debug("pool_read_string: total result %d with pending data po:%d len:%d", *len, cp->po, cp->len);
19729+#endif
19730+ return cp->sbuf;
19731+ }
19732+
19733+ *len += readlen;
19734+
19735+ /* encountered null or newline? */
19736+ if (flag)
19737+ {
19738+ /* ok we have read all data */
19739+#ifdef PRINT_DEBUG
19740+ show_debug("pool_read_string: total result %d ", *len);
19741+#endif
19742+ break;
19743+ }
19744+
19745+ readp += readlen;
19746+ readsize = READBUFSZ;
19747+
19748+ if ((*len+readsize) > cp->sbufsz)
19749+ {
19750+ cp->sbufsz += READBUFSZ;
19751+
19752+ cp->sbuf = realloc(cp->sbuf, cp->sbufsz);
19753+ if (cp->sbuf == NULL)
19754+ {
19755+ show_error("pool_read_string: realloc failed");
19756+ return NULL;
19757+ }
19758+ }
19759+ }
19760+ return cp->sbuf;
19761+}
19762+
19763+/*
19764+ * returns the byte length of str, including \0, no more than upper.
19765+ * if encountered \0, flag is set to non 0.
19766+ * example:
19767+ * mystrlen("abc", 2) returns 2
19768+ * mystrlen("abc", 3) returns 3
19769+ * mystrlen("abc", 4) returns 4
19770+ * mystrlen("abc", 5) returns 4
19771+ */
19772+static int mystrlen(char *str, int upper, int *flag)
19773+{
19774+ int len;
19775+
19776+ *flag = 0;
19777+
19778+ for (len = 0;len < upper; len++, str++)
19779+ {
19780+ if (!*str)
19781+ {
19782+ len++;
19783+ *flag = 1;
19784+ break;
19785+ }
19786+ }
19787+ return len;
19788+}
19789+
19790+/*
19791+ * returns the byte length of str terminated by \n or \0 (including \n or \0), no more than upper.
19792+ * if encountered \0 or \n, flag is set to non 0.
19793+ * example:
19794+ * mystrlinelen("abc", 2) returns 2
19795+ * mystrlinelen("abc", 3) returns 3
19796+ * mystrlinelen("abc", 4) returns 4
19797+ * mystrlinelen("abc", 5) returns 4
19798+ * mystrlinelen("abcd\nefg", 4) returns 4
19799+ * mystrlinelen("abcd\nefg", 5) returns 5
19800+ * mystrlinelen("abcd\nefg", 6) returns 5
19801+ */
19802+static int mystrlinelen(char *str, int upper, int *flag)
19803+{
19804+ int len;
19805+
19806+ *flag = 0;
19807+
19808+ for (len = 0;len < upper; len++, str++)
19809+ {
19810+ if (!*str || *str == '\n')
19811+ {
19812+ len++;
19813+ *flag = 1;
19814+ break;
19815+ }
19816+ }
19817+ return len;
19818+}
19819+
19820+/*
19821+ * save pending data
19822+ */
19823+static int save_pending_data(POOL_CONNECTION *cp, void *data, int len)
19824+{
19825+ int reqlen;
19826+ size_t realloc_size;
19827+ char *p;
19828+
19829+ /* to be safe */
19830+ if (cp->len == 0)
19831+ cp->po = 0;
19832+
19833+ reqlen = cp->po + cp->len + len;
19834+
19835+ /* pending buffer is enough? */
19836+ if (reqlen > cp->bufsz)
19837+ {
19838+ /* too small, enlarge it */
19839+ realloc_size = (reqlen/READBUFSZ+1)*READBUFSZ;
19840+ p = realloc(cp->hp, realloc_size);
19841+ if (p == NULL)
19842+ {
19843+ show_error("save_pending_data: realloc failed");
19844+ return -1;
19845+ }
19846+
19847+ cp->bufsz = realloc_size;
19848+ cp->hp = p;
19849+ }
19850+
19851+ memmove(cp->hp + cp->po + cp->len, data, len);
19852+ cp->len += len;
19853+
19854+ return 0;
19855+}
19856+
19857+/*
19858+ * consume pending data. returns actually consumed data length.
19859+ */
19860+static int consume_pending_data(POOL_CONNECTION *cp, void *data, int len)
19861+{
19862+ int consume_size;
19863+
19864+ if (cp->len <= 0)
19865+ return 0;
19866+
19867+ consume_size = Min(len, cp->len);
19868+ memmove(data, cp->hp + cp->po, consume_size);
19869+ cp->len -= consume_size;
19870+
19871+ if (cp->len <= 0)
19872+ cp->po = 0;
19873+ else
19874+ cp->po += consume_size;
19875+
19876+ return consume_size;
19877+}
19878diff -aruN postgresql-8.2.4/src/pgcluster/pglb/recovery.c pgcluster-1.7.0rc7/src/pgcluster/pglb/recovery.c
19879--- postgresql-8.2.4/src/pgcluster/pglb/recovery.c 1970-01-01 01:00:00.000000000 +0100
19880+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/recovery.c 2007-02-18 22:52:17.000000000 +0100
19881@@ -0,0 +1,262 @@
19882+/*--------------------------------------------------------------------
19883+ * FILE:
19884+ * recovery.c
19885+ *
19886+ * NOTE:
19887+ * This file is composed of the functions to call with the source
19888+ * at pglb for the recovery.
19889+ *
19890+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
19891+ *--------------------------------------------------------------------
19892+ */
19893+/*
19894+ * Permission to use, copy, modify, and distribute this software and
19895+ * its documentation for any purpose and without fee is hereby
19896+ * granted, provided that the above copyright notice appear in all
19897+ * copies and that both that copyright notice and this permission
19898+ * notice appear in supporting documentation, and that the name of the
19899+ * author not be used in advertising or publicity pertaining to
19900+ * distribution of the software without specific, written prior
19901+ * permission. The author makes no representations about the
19902+ * suitability of this software for any purpose. It is provided "as
19903+ * is" without express or implied warranty.
19904+ *
19905+*/
19906+#include <stdio.h>
19907+#include <string.h>
19908+#include <stdlib.h>
19909+#include <unistd.h>
19910+#include <signal.h>
19911+#include <sys/wait.h>
19912+#include <ctype.h>
19913+#include <sys/types.h>
19914+#include <sys/stat.h>
19915+#include <sys/socket.h>
19916+#include <sys/ipc.h>
19917+#include <sys/msg.h>
19918+#include <netdb.h>
19919+#include <netinet/in.h>
19920+#include <errno.h>
19921+#include <fcntl.h>
19922+#include <time.h>
19923+#include <sys/param.h>
19924+#include <arpa/inet.h>
19925+#include <sys/file.h>
19926+
19927+#ifdef HAVE_NETINET_TCP_H
19928+#include <netinet/tcp.h>
19929+#endif
19930+
19931+#ifdef HAVE_SYS_SELECT_H
19932+#include <sys/select.h>
19933+#endif
19934+#include "replicate_com.h"
19935+#include "pglb.h"
19936+
19937+
19938+/*--------------------------------------
19939+ * PROTOTYPE DECLARATION
19940+ *--------------------------------------
19941+ */
19942+void PGRrecovery_main(int fork_wait_time);
19943+
19944+static int set_recovery(RecoveryPacket *packet);
19945+static int receive_recovery(int fd);
19946+
19947+
19948+/*--------------------------------------------------------------------
19949+ * SYMBOL
19950+ * PGRrecovery_main()
19951+ * NOTES
19952+ * main module of recovery function
19953+ * ARGS
19954+ * void
19955+ * RETURN
19956+ * none
19957+ *--------------------------------------------------------------------
19958+ */
19959+void
19960+PGRrecovery_main(int fork_wait_time)
19961+{
19962+ char * func = "PGRrecovery_main()";
19963+ int fd = -1;
19964+ int rtn;
19965+ pid_t pgid = 0;
19966+ pid_t pid = 0;
19967+
19968+ pgid = getpgid(0);
19969+ pid = fork();
19970+ if (pid != 0)
19971+ {
19972+ return;
19973+ }
19974+
19975+ PGRsignal(SIGCHLD, SIG_DFL);
19976+ PGRsignal(SIGHUP, PGRexit_subprocess);
19977+ PGRsignal(SIGINT, PGRexit_subprocess);
19978+ PGRsignal(SIGQUIT, PGRexit_subprocess);
19979+ PGRsignal(SIGTERM, PGRexit_subprocess);
19980+ PGRsignal(SIGPIPE, SIG_IGN);
19981+ /*
19982+ * in child process,
19983+ * call recovery module
19984+ */
19985+ setpgid(0,pgid);
19986+
19987+ if (fork_wait_time > 0) {
19988+#ifdef PRINT_DEBUG
19989+ show_debug("recovery process: wait fork(): pid = %d", getpid());
19990+#endif
19991+ sleep(fork_wait_time);
19992+ }
19993+
19994+ fd = PGRcreate_recv_socket(ResolvedName, Recovery_Port_Number);
19995+ if (fd < 0)
19996+ {
19997+ show_error("%s:PGRcreate_recv_socket failed",func);
19998+ exit(1);
19999+ }
20000+
20001+ for (;;)
20002+ {
20003+ fd_set rmask;
20004+ struct timeval timeout;
20005+
20006+ timeout.tv_sec = 60;
20007+ timeout.tv_usec = 0;
20008+
20009+ /*
20010+ * Wait for something to happen.
20011+ */
20012+ FD_ZERO(&rmask);
20013+ FD_SET(fd,&rmask);
20014+ rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
20015+ if (rtn && FD_ISSET(fd, &rmask))
20016+ {
20017+ receive_recovery(fd);
20018+ }
20019+ }
20020+}
20021+
20022+/*--------------------------------------------------------------------
20023+ * SYMBOL
20024+ * set_recovery()
20025+ * NOTES
20026+ * check a recovery request from replication server
20027+ * ARGS
20028+ * void
20029+ * RETURN
20030+ * none
20031+ *--------------------------------------------------------------------
20032+ */
20033+static int
20034+set_recovery(RecoveryPacket *packet)
20035+{
20036+#ifdef PRINT_DEBUG
20037+ char * func = "set_recovery()";
20038+#endif
20039+ int status = STATUS_OK;
20040+ ClusterTbl key;
20041+ ClusterTbl * ptr;
20042+
20043+ PGRset_key_of_cluster(&key,packet);
20044+#ifdef PRINT_DEBUG
20045+ show_debug("%s:received no:%d",func, ntohs(packet->packet_no));
20046+#endif
20047+ switch (ntohs(packet->packet_no))
20048+ {
20049+ case RECOVERY_PREPARE_REQ:
20050+ /* add cluster db */
20051+#ifdef PRINT_DEBUG
20052+ show_debug("%s:add_db host:%s port:%d max:%d",
20053+ func, packet->hostName,ntohs(packet->port),ntohs(packet->max_connect));
20054+#endif
20055+ ptr = PGRsearch_cluster_tbl(&key);
20056+ if (ptr == NULL)
20057+ {
20058+ ptr = PGRadd_cluster_tbl(&key);
20059+ }
20060+ if (ptr != NULL)
20061+ {
20062+ PGRset_status_on_cluster_tbl(TBL_STOP,ptr);
20063+ if (Use_Connection_Pool)
20064+ {
20065+ signal(SIGCHLD,PGRrecreate_child);
20066+ status = PGRpre_fork_child(ptr);
20067+ }
20068+ }
20069+ break;
20070+ case RECOVERY_FINISH:
20071+ /* start cluster db */
20072+ ptr = PGRsearch_cluster_tbl(&key);
20073+ if (ptr != NULL)
20074+ {
20075+#ifdef PRINT_DEBUG
20076+ show_debug("%s:start_db host:%s port:%d max:%d",
20077+ func,packet->hostName,ntohs(packet->port),ntohs(packet->max_connect));
20078+#endif
20079+ PGRset_status_on_cluster_tbl(TBL_INIT,ptr);
20080+ }
20081+ break;
20082+ case RECOVERY_PGDATA_ANS:
20083+ /* stop cluster db */
20084+ ptr = PGRsearch_cluster_tbl(&key);
20085+ if (ptr != NULL)
20086+ {
20087+#ifdef PRINT_DEBUG
20088+ show_debug("%s:stop_db host:%s port:%d max:%d",
20089+ func, packet->hostName,ntohs(packet->port),ntohs(packet->max_connect));
20090+#endif
20091+ PGRset_status_on_cluster_tbl(TBL_STOP,ptr);
20092+ }
20093+ break;
20094+ case RECOVERY_ERROR:
20095+ /* delete cluster db */
20096+ ptr = PGRsearch_cluster_tbl(&key);
20097+ if (ptr != NULL)
20098+ {
20099+ PGRset_status_on_cluster_tbl(TBL_FREE,ptr);
20100+ if (Use_Connection_Pool)
20101+ {
20102+ PGRquit_children_on_cluster(ptr->rec_no);
20103+ }
20104+ }
20105+ break;
20106+ /* cluster db has error */
20107+ case RECOVERY_ERROR_CONNECTION:
20108+ /* set error cluster db */
20109+ ptr = PGRsearch_cluster_tbl(&key);
20110+ if (ptr != NULL)
20111+ {
20112+ PGRset_status_on_cluster_tbl(TBL_ERROR,ptr);
20113+ if (Use_Connection_Pool)
20114+ {
20115+ PGRquit_children_on_cluster(ptr->rec_no);
20116+ }
20117+ }
20118+ break;
20119+ }
20120+ return STATUS_OK;
20121+}
20122+
20123+static int
20124+receive_recovery(int fd)
20125+{
20126+ int status = STATUS_ERROR;
20127+ int r_size = -1;
20128+ int recv_sock = -1;
20129+ RecoveryPacket packet;
20130+
20131+ recv_sock = PGRcreate_acception(fd,ResolvedName,Recovery_Port_Number);
20132+ if (recv_sock >= 0 )
20133+ {
20134+ memset(&packet,0, sizeof(RecoveryPacket));
20135+ r_size = PGRread_byte(recv_sock,(char *)&packet,sizeof(RecoveryPacket),MSG_WAITALL);
20136+ if ( r_size == sizeof(RecoveryPacket) )
20137+ {
20138+ status = set_recovery(&packet);
20139+ }
20140+ }
20141+ PGRclose_sock(&recv_sock);
20142+ return status;
20143+}
20144diff -aruN postgresql-8.2.4/src/pgcluster/pglb/socket.c pgcluster-1.7.0rc7/src/pgcluster/pglb/socket.c
20145--- postgresql-8.2.4/src/pgcluster/pglb/socket.c 1970-01-01 01:00:00.000000000 +0100
20146+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/socket.c 2007-02-18 22:52:17.000000000 +0100
20147@@ -0,0 +1,395 @@
20148+/*--------------------------------------------------------------------
20149+ * FILE:
20150+ * socket.c
20151+ *
20152+ * NOTE:
20153+ * This file is composed of the communication modules
20154+ *
20155+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
20156+ *--------------------------------------------------------------------
20157+ */
20158+/*
20159+ * Permission to use, copy, modify, and distribute this software and
20160+ * its documentation for any purpose and without fee is hereby
20161+ * granted, provided that the above copyright notice appear in all
20162+ * copies and that both that copyright notice and this permission
20163+ * notice appear in supporting documentation, and that the name of the
20164+ * author not be used in advertising or publicity pertaining to
20165+ * distribution of the software without specific, written prior
20166+ * permission. The author makes no representations about the
20167+ * suitability of this software for any purpose. It is provided "as
20168+ * is" without express or implied warranty.
20169+ *
20170+*/
20171+#include "postgres.h"
20172+#include <stdio.h>
20173+#include <string.h>
20174+#include <stdlib.h>
20175+#include <unistd.h>
20176+#include <sys/wait.h>
20177+#include <ctype.h>
20178+#include <sys/types.h>
20179+#include <sys/stat.h>
20180+#include <sys/socket.h>
20181+#include <sys/un.h>
20182+#include <sys/ipc.h>
20183+#include <netdb.h>
20184+#include <errno.h>
20185+#include <fcntl.h>
20186+#include <time.h>
20187+#include <sys/param.h>
20188+#include <sys/file.h>
20189+#include <netinet/in.h>
20190+#include <arpa/inet.h>
20191+
20192+#ifdef HAVE_SYS_SELECT_H
20193+#include <sys/select.h>
20194+#endif
20195+
20196+#ifdef HAVE_NETINET_TCP_H
20197+#include <netinet/tcp.h>
20198+#endif
20199+
20200+#include "replicate_com.h"
20201+#include "pglb.h"
20202+
20203+
20204+/*--------------------------------------
20205+ * PROTOTYPE DECLARATION
20206+ *--------------------------------------
20207+ */
20208+int PGRcreate_unix_domain_socket(char * sock_dir, unsigned short port);
20209+int PGRcreate_recv_socket(char * hostName , unsigned short portNumber);
20210+int PGRcreate_acception(int fd, char * hostName , unsigned short portNumber);
20211+void PGRclose_sock(int * sock);
20212+int PGRread_byte(int sock,char * buf,int len, int flag);
20213+int PGRcreate_cluster_socket( int * sock, ClusterTbl * ptr );
20214+
20215+static int create_send_socket(int * fdP, char * hostName , unsigned short portNumber);
20216+
20217+
20218+/*
20219+* create UNIX domain socket
20220+*/
20221+int
20222+PGRcreate_unix_domain_socket(char * sock_dir, unsigned short port)
20223+{
20224+ char * func = "PGRcreate_unix_domain_socket()";
20225+ struct sockaddr_un addr;
20226+ int fd;
20227+ int status;
20228+ int len;
20229+
20230+ /* set unix domain socket path */
20231+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
20232+ if (fd == -1)
20233+ {
20234+ show_error("%s:Failed to create UNIX domain socket. reason: %s",func, strerror(errno));
20235+ return -1;
20236+ }
20237+ memset((char *) &addr, 0, sizeof(addr));
20238+ ((struct sockaddr *)&addr)->sa_family = AF_UNIX;
20239+ snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/.s.PGSQL.%d",sock_dir,port);
20240+ len = sizeof(struct sockaddr_un);
20241+ status = bind(fd, (struct sockaddr *)&addr, len);
20242+ if (status == -1)
20243+ {
20244+ show_error("%s: bind() failed. reason: %s", func, strerror(errno));
20245+ return -1;
20246+ }
20247+
20248+ if (chmod(addr.sun_path, 0777) == -1)
20249+ {
20250+ show_error("%s: chmod() failed. reason: %s", func, strerror(errno));
20251+ return -1;
20252+ }
20253+
20254+ status = listen(fd, PGLB_MAX_SOCKET_QUEUE);
20255+ if (status < 0)
20256+ {
20257+ show_error("%s: listen() failed. reason: %s", func, strerror(errno));
20258+ return -1;
20259+ }
20260+ return fd;
20261+}
20262+
20263+int
20264+PGRcreate_recv_socket(char * hostName , unsigned short portNumber)
20265+{
20266+ char * func = "PGRcreate_recv_socket()";
20267+ int fd,err;
20268+ size_t len = 0;
20269+ struct sockaddr_in addr;
20270+ int one = 1;
20271+
20272+ if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
20273+ {
20274+ show_error("%s: socket() failed. (%s)", func, strerror(errno));
20275+ return -1;
20276+ }
20277+ if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one))) == -1)
20278+ {
20279+ PGRclose_sock(&fd);
20280+ show_error("%s: setsockopt() failed. (%s)",func, strerror(errno));
20281+ return -1;
20282+ }
20283+ addr.sin_family = AF_INET;
20284+ if ((hostName == NULL) || (hostName[0] == '\0'))
20285+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
20286+ else
20287+ {
20288+ struct hostent *hp;
20289+
20290+ hp = gethostbyname(hostName);
20291+ if ((hp == NULL) || (hp->h_addrtype != AF_INET))
20292+ {
20293+ PGRclose_sock(&fd);
20294+ return -1;
20295+ }
20296+ memmove((char *) &(addr.sin_addr), (char *) hp->h_addr, hp->h_length);
20297+ }
20298+
20299+ addr.sin_port = htons(portNumber);
20300+ len = sizeof(struct sockaddr_in);
20301+
20302+ err = bind(fd, (struct sockaddr *) & addr, len);
20303+ if (err < 0)
20304+ {
20305+ PGRclose_sock(&fd);
20306+ show_error("%s: bind() failed. (%s)",func, strerror(errno));
20307+ return -1;
20308+ }
20309+ err = listen(fd, PGLB_MAX_SOCKET_QUEUE);
20310+ if (err < 0)
20311+ {
20312+ PGRclose_sock(&fd);
20313+ show_error("%s: listen() failed. (%s)", func, strerror(errno));
20314+ return -1;
20315+ }
20316+ return fd;
20317+}
20318+
20319+int
20320+PGRcreate_acception(int fd, char * hostName , unsigned short portNumber)
20321+{
20322+ char * func = "PGRcreate_acception()";
20323+ int sock;
20324+ struct sockaddr addr;
20325+ size_t len = 0;
20326+ int one = 1;
20327+ int count;
20328+
20329+ len = sizeof(struct sockaddr);
20330+ count = 0;
20331+ while ((sock = accept(fd,&addr,&len)) < 0)
20332+ {
20333+ show_error("%s:accept error",func);
20334+ PGRclose_sock(&fd);
20335+ if ( count > PGLB_CONNECT_RETRY_TIME)
20336+ {
20337+ return -1;
20338+ }
20339+ fd = PGRcreate_recv_socket(hostName , portNumber);
20340+ count ++;
20341+ }
20342+
20343+ count = 0;
20344+ while (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) < 0)
20345+ {
20346+ show_error("%s: setsockopt TCP_NODELAY error (%s)",func, strerror(errno));
20347+ if ( count > PGLB_CONNECT_RETRY_TIME)
20348+ {
20349+ return -1;
20350+ }
20351+ count ++;
20352+ }
20353+ count = 0;
20354+ while (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one)) < 0)
20355+ {
20356+ show_error("%s:setsockopt SO_KEEPALIVE error (%s)",func,strerror(errno));
20357+ if ( count > PGLB_CONNECT_RETRY_TIME)
20358+ {
20359+ return -1;
20360+ }
20361+ count ++;
20362+ }
20363+
20364+ return sock;
20365+}
20366+
20367+void
20368+PGRclose_sock(int * sock)
20369+{
20370+ close(*sock);
20371+ *sock = -1;
20372+}
20373+
20374+int
20375+PGRread_byte(int sock,char * buf,int len, int flag)
20376+{
20377+ char * func = "PGRread_byte()";
20378+ int r;
20379+ char * read_ptr;
20380+ int read_size = 0;
20381+ int max_buf_size ;
20382+ int pid;
20383+
20384+ pid = getpid();
20385+ max_buf_size = len;
20386+ read_ptr = (char*)buf;
20387+ for (;;)
20388+ {
20389+ r = recv(sock,read_ptr + read_size ,max_buf_size - read_size, flag);
20390+ if (r < 0)
20391+ {
20392+ if (errno == EINTR)
20393+ {
20394+ continue;
20395+ }
20396+#ifdef EAGAIN
20397+ if (errno == EAGAIN)
20398+ {
20399+ return read_size;
20400+ }
20401+#endif
20402+#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
20403+ if (errno == EWOULDBLOCK)
20404+ {
20405+ show_error("%s:no data (%s)",func,strerror(errno));
20406+ return read_size;
20407+ }
20408+#endif
20409+#ifdef ECONNRESET
20410+ if (errno == ECONNRESET)
20411+ {
20412+ PGRclose_sock(&sock);
20413+ show_error("%s:connection reset (%s)",func, strerror(errno));
20414+ return -1;
20415+ }
20416+#endif
20417+ show_error("%s:recv() failed. (%s)",func,strerror(errno));
20418+ read_size = -1;
20419+ break;
20420+ }
20421+ if (r > 0)
20422+ {
20423+ read_size += r;
20424+ if (max_buf_size == read_size)
20425+ {
20426+ break;
20427+ }
20428+ break;
20429+ }
20430+ if (read_size)
20431+ {
20432+ return read_size;
20433+ }
20434+ else
20435+ {
20436+ return -1;
20437+ }
20438+ }
20439+
20440+ return read_size;
20441+}
20442+
20443+int
20444+PGRcreate_cluster_socket( int * sock, ClusterTbl * ptr )
20445+{
20446+ char * func = "PGRcreate_cluster_socket()";
20447+ int status = STATUS_ERROR;
20448+
20449+ /*
20450+ if (PGRis_connection_full(ptr) == 1)
20451+ {
20452+ return STATUS_ERROR;
20453+ }
20454+ */
20455+ if (ptr != (ClusterTbl *) NULL)
20456+ {
20457+ status = create_send_socket(sock, ptr->hostName, ptr->port) ;
20458+ }
20459+ else
20460+ {
20461+ show_error("%s:ClusterTbl is not initialize",func);
20462+ }
20463+ return status;
20464+}
20465+
20466+static int
20467+create_send_socket(int * fdP, char * hostName , unsigned short portNumber)
20468+{
20469+ char * func = "create_send_socket()";
20470+ int sock;
20471+ size_t len = 0;
20472+ struct sockaddr_in addr;
20473+ int fd;
20474+ int one = 1;
20475+
20476+#ifdef PRINT_DEBUG
20477+ show_debug("%s: host:%s port:%d",func, hostName,portNumber);
20478+#endif
20479+
20480+ memset((char *)&addr,0,sizeof(addr));
20481+
20482+ if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
20483+ {
20484+ * fdP = -1;
20485+
20486+ show_error("%s:socket() failed. (%s)",func, strerror(errno));
20487+ return STATUS_ERROR;
20488+ }
20489+ if ((setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one))) == -1)
20490+ {
20491+ PGRclose_sock(&fd);
20492+ * fdP = -1;
20493+ show_error("%s:setsockopt() failed. (%s)", func, strerror(errno));
20494+ return STATUS_ERROR;
20495+ return STATUS_ERROR;
20496+ }
20497+ if ((setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one))) == -1)
20498+ {
20499+ PGRclose_sock(&fd);
20500+ * fdP = -1;
20501+ show_error("%s:setsockopt() failed. (%s)", func, strerror(errno));
20502+ return STATUS_ERROR;
20503+ }
20504+ if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) < 0)
20505+ {
20506+ PGRclose_sock(&fd);
20507+ * fdP = -1;
20508+ show_error("%s:setsockopt() failed. (%s)",func, strerror(errno));
20509+ return STATUS_ERROR;
20510+ }
20511+
20512+ addr.sin_family = AF_INET;
20513+ if ((hostName == NULL) || (hostName[0] == '\0'))
20514+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
20515+ else
20516+ {
20517+ struct hostent *hp;
20518+
20519+ hp = gethostbyname(hostName);
20520+ if ((hp == NULL) || (hp->h_addrtype != AF_INET))
20521+ {
20522+ PGRclose_sock(&fd);
20523+ * fdP = -1;
20524+ return STATUS_ERROR;
20525+ }
20526+ memmove((char *) &(addr.sin_addr), (char *) hp->h_addr, hp->h_length);
20527+ }
20528+
20529+ addr.sin_port = htons(portNumber);
20530+ len = sizeof(struct sockaddr_in);
20531+
20532+ if ((sock = connect(fd,(struct sockaddr*)&addr,len)) < 0)
20533+ {
20534+ PGRclose_sock(&fd);
20535+ * fdP = -1;
20536+ return STATUS_ERROR;
20537+ }
20538+
20539+ * fdP = fd;
20540+ return STATUS_OK;
20541+}
20542+
20543diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/AUTHORS pgcluster-1.7.0rc7/src/pgcluster/pgrp/AUTHORS
20544--- postgresql-8.2.4/src/pgcluster/pgrp/AUTHORS 1970-01-01 01:00:00.000000000 +0100
20545+++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/AUTHORS 2007-02-18 22:52:17.000000000 +0100
20546@@ -0,0 +1,3 @@
20547+Authors of pgrp
20548+
20549+pgrp was written by Atsushi Mitani
20550diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/COPYING pgcluster-1.7.0rc7/src/pgcluster/pgrp/COPYING
20551--- postgresql-8.2.4/src/pgcluster/pgrp/COPYING 1970-01-01 01:00:00.000000000 +0100
20552+++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/COPYING 2007-02-18 22:52:17.000000000 +0100
20553@@ -0,0 +1,12 @@
20554+Copyright (c) 2003-2006 Atsushi Mitani
20555+
20556+Permission to use, copy, modify, and distribute this software and
20557+its documentation for any purpose and without fee is hereby
20558+granted, provided that the above copyright notice appear in all
20559+copies and that both that copyright notice and this permission
20560+notice appear in supporting documentation, and that the name of the
20561+author not be used in advertising or publicity pertaining to
20562+distribution of the software without specific, written prior
20563+permission. The author makes no representations about the
20564+suitability of this software for any purpose. It is provided "as
20565+is" without express or implied warranty.
20566diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/Makefile pgcluster-1.7.0rc7/src/pgcluster/pgrp/Makefile
20567--- postgresql-8.2.4/src/pgcluster/pgrp/Makefile 1970-01-01 01:00:00.000000000 +0100
20568+++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/Makefile 2007-02-18 22:52:17.000000000 +0100
20569@@ -0,0 +1,41 @@
20570+#-------------------------------------------------------------------------
20571+#
20572+# Makefile for src/pgcluster/pgrp
20573+#
20574+#-------------------------------------------------------------------------
20575+
20576+subdir = src/pgcluster/pgrp
20577+top_builddir = ../../..
20578+include $(top_builddir)/src/Makefile.global
20579+
20580+# this setup is for V2 protocol
20581+#OBJS= cascade.o conf.o main.o recovery.o replicate.o rlog.o
20582+# this setup is for V3 protocol
20583+OBJS= pqformat.o cascade.o conf.o main.o recovery.o replicate.o rlog.o lifecheck.o
20584+
20585+EXTRA_OBJS = $(top_builddir)/src/backend/libpq/replicate_com.o ../libpgc/SUBSYS.o
20586+
20587+CFLAGS += -DPRINT_DEBUG
20588+override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -DBINDIR=\"$(bindir)\"
20589+all: pgreplicate
20590+
20591+pgreplicate: $(OBJS) $(libpq_builddir)/libpq.a
20592+ $(CC) $(CFLAGS) $(OBJS) $(EXTRA_OBJS) $(libpq) $(libpq_builddir)/libpq.a $(LDFLAGS) $(LIBS) -o $@
20593+
20594+install: all installdirs
20595+ $(INSTALL_PROGRAM) pgreplicate$(X) $(DESTDIR)$(bindir)/pgreplicate$(X)
20596+ $(INSTALL_DATA) pgreplicate.conf.sample $(DESTDIR)$(datadir)/pgreplicate.conf.sample
20597+
20598+installdirs:
20599+ $(mkinstalldirs) $(DESTDIR)$(bindir)
20600+ $(mkinstalldirs) $(DESTDIR)$(datadir)
20601+
20602+uninstall:
20603+ rm -f $(addprefix $(DESTDIR)$(bindir)/, pgreplicate$(X))
20604+ rm -f $(DESTDIR)$(datadir)/pgreplicate.conf.sample
20605+
20606+clean distclean maintainer-clean:
20607+ rm -f pgreplicate$(X) $(OBJS)
20608+
20609+clean_obj:
20610+ rm -f $(OBJS)
20611diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/cascade.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/cascade.c
20612--- postgresql-8.2.4/src/pgcluster/pgrp/cascade.c 1970-01-01 01:00:00.000000000 +0100
20613+++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/cascade.c 2007-02-18 22:52:17.000000000 +0100
20614@@ -0,0 +1,928 @@
20615+/*--------------------------------------------------------------------
20616+ * FILE:
20617+ * cascade.c
20618+ *
20619+ * NOTE:
20620+ * This file is composed of the functions to call with the source
20621+ * at pgreplicate for backup and cascade .
20622+ *
20623+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
20624+ *--------------------------------------------------------------------
20625+ */
20626+#ifdef USE_REPLICATION
20627+
20628+#include "postgres.h"
20629+#include "postgres_fe.h"
20630+
20631+#include <stdio.h>
20632+#include <unistd.h>
20633+#ifdef HAVE_SYS_TYPES_H
20634+#include <sys/types.h>
20635+#endif
20636+#ifdef HAVE_FCNTL_H
20637+#include <fcntl.h>
20638+#endif
20639+#include <errno.h>
20640+#include <ctype.h>
20641+#include <time.h>
20642+#include <sys/ipc.h>
20643+#include <sys/shm.h>
20644+#include <sys/sem.h>
20645+#include <signal.h>
20646+#include <sys/socket.h>
20647+#ifdef HAVE_UNISTD_H
20648+#include <unistd.h>
20649+#endif
20650+#include <netdb.h>
20651+#ifdef HAVE_NETINET_TCP_H
20652+#include <netinet/tcp.h>
20653+#endif
20654+#include <dirent.h>
20655+#include <arpa/inet.h>
20656+
20657+#ifdef HAVE_CRYPT_H
20658+#include <crypt.h>
20659+#endif
20660+
20661+#ifdef MULTIBYTE
20662+#include "mb/pg_wchar.h"
20663+#endif
20664+
20665+#include "libpq-fe.h"
20666+#include "libpq-int.h"
20667+#include "fe-auth.h"
20668+
20669+#include "access/xact.h"
20670+#include "replicate_com.h"
20671+#include "pgreplicate.h"
20672+
20673+#if 0
20674+static int count_cascade(int flag);
20675+static void PGRinit_cascade_child(void);
20676+#endif
20677+
20678+static int fixup_socket_for_cascades(int *sock ,ReplicateServerInfo * target);
20679+static ReplicateServerInfo * get_cascade_data(int * cnt, int flag);
20680+static int add_cascade_data(ReplicateHeader * header, ReplicateServerInfo * add_data);
20681+static int update_cascade_data(ReplicateHeader * header, ReplicateServerInfo * update_data);
20682+static void write_cascade_status_file(ReplicateServerInfo * cascade);
20683+static int notice_cascade_data(int sock);
20684+static int notice_cascade_data_to_cluster_db(void);
20685+
20686+/**
20687+ * socket variables, moved from Cascade_Inf->(lower|upper)->sock.
20688+ * Cascade->Inf is in shared memory, so sometimes cascades returns EBADF due to not initialized socket in specified process.
20689+ * 05/10/05 tanida@sraoss.co.jp
20690+ */
20691+
20692+static int lsock=-1; /* socket for lower-cascade. */
20693+static int usock=-1; /* socket for upper-cascade. */
20694+
20695+/*--------------------------------------
20696+ * PROTOTYPE DECLARATION
20697+ *--------------------------------------
20698+ */
20699+
20700+#if 0
20701+static int
20702+count_cascade(int flag)
20703+{
20704+ int cnt = 0;
20705+ int cascade_cnt = 0;
20706+ ReplicateServerInfo * cascade = NULL;
20707+
20708+ if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
20709+ {
20710+ return 0;
20711+ }
20712+
20713+ /* count cascadeing replication server */
20714+ switch (flag)
20715+ {
20716+ case UPPER_CASCADE:
20717+ case ALL_CASCADE:
20718+ cascade = Cascade_Tbl;
20719+ break;
20720+ case LOWER_CASCADE:
20721+ cascade = Cascade_Inf->myself;
20722+ break;
20723+ }
20724+
20725+ if (cascade == NULL)
20726+ {
20727+ return 0;
20728+ }
20729+ while (cascade->useFlag != DB_TBL_END)
20730+ {
20731+ if (cascade->useFlag == DB_TBL_USE)
20732+ {
20733+ cascade_cnt ++;
20734+ }
20735+ if ((flag == UPPER_CASCADE) &&
20736+ (cascade == Cascade_Inf->myself))
20737+ {
20738+ break;
20739+ }
20740+ cnt ++;
20741+ if (cnt >= MAX_DB_SERVER -1 )
20742+ {
20743+ break;
20744+ }
20745+ cascade ++;
20746+ }
20747+ return cascade_cnt;
20748+}
20749+
20750+static void
20751+PGRinit_cascade_child(void) {
20752+ fixup_socket_for_cascades(&usock,NULL);
20753+ fixup_socket_for_cascades(&lsock,NULL);
20754+}
20755+#endif /* if 0 */
20756+
20757+static ReplicateServerInfo *
20758+get_cascade_data(int * cnt, int flag)
20759+{
20760+ char * func = "get_cascade_data()";
20761+ int i = 0;
20762+ int loop_cnt = 0;
20763+ int size = 0;
20764+ ReplicateServerInfo * buf = NULL;
20765+ ReplicateServerInfo * cascade = NULL;
20766+
20767+ size = sizeof(ReplicateServerInfo) * MAX_DB_SERVER;
20768+ buf = (ReplicateServerInfo *)malloc(size);
20769+ if (buf == (ReplicateServerInfo *)NULL)
20770+ {
20771+ show_error("%s:malloc failed: (%s)",func,strerror(errno));
20772+ *cnt = 0;
20773+ return NULL;
20774+ }
20775+ memset(buf,0,size);
20776+
20777+ switch (flag)
20778+ {
20779+ case UPPER_CASCADE:
20780+ case ALL_CASCADE:
20781+ cascade = Cascade_Tbl;
20782+ break;
20783+ case LOWER_CASCADE:
20784+ cascade = Cascade_Inf->myself;
20785+ break;
20786+ default:
20787+ free(buf);
20788+ *cnt = 0;
20789+ return NULL;
20790+
20791+ }
20792+
20793+ if (cascade == NULL)
20794+ {
20795+ free(buf);
20796+ *cnt = 0;
20797+ return NULL;
20798+ }
20799+ PGRsem_lock(CascadeSemID,1);
20800+ i = 0;
20801+ loop_cnt = 0;
20802+ while (cascade->useFlag != DB_TBL_END)
20803+ {
20804+ if (cascade->useFlag == DB_TBL_USE)
20805+ {
20806+ (buf + i)->useFlag = htonl(cascade->useFlag);
20807+ strncpy((buf + i)->hostName,cascade->hostName,sizeof(cascade->hostName));
20808+ (buf + i)->portNumber = htons(cascade->portNumber);
20809+ (buf + i)->recoveryPortNumber = htons(cascade->recoveryPortNumber);
20810+ (buf + i)->lifecheckPortNumber = htons(cascade->lifecheckPortNumber);
20811+ i++;
20812+ }
20813+ if ((flag == UPPER_CASCADE) &&
20814+ (cascade == Cascade_Inf->myself))
20815+ {
20816+ break;
20817+ }
20818+ loop_cnt ++;
20819+ if (loop_cnt >= MAX_DB_SERVER -1 )
20820+ {
20821+ break;
20822+ }
20823+ if (Cascade_Inf->end == cascade)
20824+ {
20825+ break;
20826+ }
20827+ cascade ++;
20828+ }
20829+ *cnt = i;
20830+ PGRsem_unlock(CascadeSemID,1);
20831+
20832+ return buf;
20833+}
20834+
20835+static int
20836+update_cascade_data(ReplicateHeader * header, ReplicateServerInfo * update_data)
20837+{
20838+ char * func = "update_cascade_data()";
20839+ int size = 0;
20840+ int cnt = 0;
20841+ ReplicateServerInfo * ptr = NULL;
20842+ ReplicateServerInfo * cascade = NULL;
20843+ char hostName[HOSTNAME_MAX_LENGTH];
20844+
20845+
20846+ show_debug("executing %s",func);
20847+ if ((header == NULL ) || ( update_data == NULL))
20848+ {
20849+ show_error("%s:receive data is wrong",func);
20850+ return STATUS_ERROR;
20851+ }
20852+ if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
20853+ {
20854+ show_error("%s:config data read error",func);
20855+ return STATUS_ERROR;
20856+ }
20857+
20858+
20859+ size = ntohl(header->query_size);
20860+ cnt = size / sizeof(ReplicateServerInfo);
20861+ if (cnt >= MAX_DB_SERVER)
20862+ {
20863+ show_error("%s:update cascade data is too large. it's more than %d", func,MAX_DB_SERVER);
20864+ return STATUS_ERROR;
20865+ }
20866+
20867+ Cascade_Inf->useFlag = DB_TBL_INIT;
20868+ fixup_socket_for_cascades(&usock,NULL);
20869+ fixup_socket_for_cascades(&lsock,NULL);
20870+
20871+ Cascade_Inf->upper = NULL;
20872+ Cascade_Inf->lower = NULL;
20873+
20874+ gethostname(hostName,sizeof(hostName));
20875+ ptr = update_data;
20876+ cascade = Cascade_Tbl;
20877+ memset(cascade,0,(sizeof(ReplicateServerInfo)*MAX_DB_SERVER));
20878+ Cascade_Inf->top = cascade;
20879+ while (cnt > 0)
20880+ {
20881+
20882+ cascade->useFlag = ntohl(ptr->useFlag);
20883+ strncpy(cascade->hostName,ptr->hostName,sizeof(cascade->hostName));
20884+ cascade->portNumber = ntohs(ptr->portNumber);
20885+ cascade->recoveryPortNumber = ntohs(ptr->recoveryPortNumber);
20886+ cascade->lifecheckPortNumber = ntohs(ptr->lifecheckPortNumber);
20887+
20888+ if ((!strncmp(cascade->hostName,hostName,sizeof(cascade->hostName))) &&
20889+ (cascade->portNumber == Port_Number) &&
20890+ (cascade->recoveryPortNumber == Recovery_Port_Number))
20891+ {
20892+ Cascade_Inf->myself = cascade;
20893+ }
20894+
20895+ Cascade_Inf->end = cascade;
20896+ cascade ++;
20897+ ptr ++;
20898+ cnt --;
20899+ cascade->useFlag = DB_TBL_END;
20900+ }
20901+ Cascade_Inf->useFlag = DB_TBL_USE;
20902+
20903+ return STATUS_OK;
20904+}
20905+
20906+static int
20907+add_cascade_data(ReplicateHeader * header, ReplicateServerInfo * add_data)
20908+{
20909+ char *func = "add_cascade_data()";
20910+ int size = 0;
20911+ int cnt = 0;
20912+ ReplicateServerInfo * ptr = NULL;
20913+ ReplicateServerInfo * cascade = NULL;
20914+ char hostName[HOSTNAME_MAX_LENGTH];
20915+
20916+ if ((header == NULL ) || ( add_data == NULL))
20917+ {
20918+ show_error("%s:receive data is wrong",func);
20919+ return STATUS_ERROR;
20920+ }
20921+ if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
20922+ {
20923+ show_error("%s:config data read error",func);
20924+ return STATUS_ERROR;
20925+ }
20926+ size = ntohl(header->query_size);
20927+ cnt = size / sizeof(ReplicateServerInfo);
20928+ if (cnt >= MAX_DB_SERVER)
20929+ {
20930+ show_error("%s:addtional cascade data is too large. it's more than %d", func,MAX_DB_SERVER);
20931+ return STATUS_ERROR;
20932+ }
20933+
20934+ Cascade_Inf->useFlag = DB_TBL_INIT;
20935+ fixup_socket_for_cascades(&lsock,NULL);
20936+ Cascade_Inf->lower = NULL;
20937+
20938+ gethostname(hostName,sizeof(hostName));
20939+ ptr = add_data;
20940+ cascade = Cascade_Inf->myself;
20941+ cascade ++;
20942+ while (cnt > 0)
20943+ {
20944+ cascade->useFlag = ntohl(ptr->useFlag);
20945+ strncpy(cascade->hostName,ptr->hostName,sizeof(cascade->hostName));
20946+ cascade->portNumber = ntohs(ptr->portNumber);
20947+ cascade->recoveryPortNumber = ntohs(ptr->recoveryPortNumber);
20948+ cascade->lifecheckPortNumber = ntohs(ptr->lifecheckPortNumber);
20949+ cascade->replicate_id=-1;
20950+ cascade->response_mode=-1;
20951+
20952+ Cascade_Inf->end = cascade;
20953+
20954+ if ((!strncmp(cascade->hostName,hostName,sizeof(cascade->hostName))) &&
20955+ (cascade->portNumber == Port_Number) &&
20956+ (cascade->recoveryPortNumber == Recovery_Port_Number))
20957+ {
20958+ ptr ++;
20959+ cnt --;
20960+ continue;
20961+ }
20962+ cascade ++;
20963+ cascade->useFlag = DB_TBL_END;
20964+ ptr ++;
20965+ cnt --;
20966+ }
20967+ Cascade_Inf->useFlag = DB_TBL_USE;
20968+ return STATUS_OK;
20969+}
20970+
20971+int
20972+PGRstartup_cascade(void)
20973+{
20974+ char * func = "PGRstartup_cascade()";
20975+ int cnt = 0;
20976+ int status = STATUS_OK;
20977+ ReplicateHeader header;
20978+ ReplicateServerInfo * cascade = NULL;
20979+ ReplicateServerInfo * buf = NULL;
20980+
20981+ if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
20982+ {
20983+ show_error("%s:config data read error",func);
20984+ return STATUS_ERROR;
20985+ }
20986+
20987+ /* count lower server */
20988+ cascade = Cascade_Inf->myself;
20989+ if (cascade == NULL)
20990+ {
20991+ show_error("%s:cascade data initialize error",func);
20992+ return STATUS_ERROR;
20993+ }
20994+ buf = get_cascade_data(&cnt,LOWER_CASCADE);
20995+ if (cnt <= 0)
20996+ {
20997+ show_error("%s:cascade data get error",func);
20998+ return STATUS_ERROR;
20999+ }
21000+
21001+ memset(&header,0,sizeof(ReplicateHeader));
21002+ header.cmdSys = CMD_SYS_CASCADE;
21003+ header.cmdSts = CMD_STS_TO_UPPER;
21004+ header.cmdType = CMD_TYPE_ADD;
21005+ header.query_size = htonl(sizeof(ReplicateServerInfo) * cnt);
21006+
21007+ status = PGRsend_upper_cascade(&header, (char *)buf);
21008+ if (buf != NULL)
21009+ {
21010+ free(buf);
21011+ }
21012+ if (status == STATUS_OK)
21013+ {
21014+ memset(&header,0,sizeof(ReplicateHeader));
21015+ buf = PGRrecv_cascade_answer( Cascade_Inf->upper, &header);
21016+ if (buf == NULL)
21017+ {
21018+ status=STATUS_ERROR;
21019+ }
21020+ else if((header.cmdSys == CMD_SYS_CASCADE) &&
21021+ (header.cmdSts == CMD_STS_TO_LOWER) &&
21022+ (header.cmdType == CMD_TYPE_UPDATE_ALL))
21023+ {
21024+ status = update_cascade_data(&header,buf);
21025+ free(buf);
21026+ }
21027+
21028+ }
21029+ show_debug("%s:startup packet result is %d",func,status);
21030+ return status;
21031+}
21032+
21033+int
21034+PGRsend_lower_cascade(ReplicateHeader * header, char * query)
21035+{
21036+
21037+
21038+ char * func = "PGRsend_lower_cascade()";
21039+ ReplicateServerInfo *lower = PGRget_lower_cascade();
21040+
21041+
21042+ while(lower!=NULL)
21043+ {
21044+ /**
21045+ * check lower_cascade validaty.
21046+ *
21047+ */
21048+ if(lsock!=-1 &&
21049+ PGRsend_cascade(lsock,header,query)==STATUS_OK)
21050+ {
21051+ return STATUS_OK;
21052+ }
21053+ else
21054+ {
21055+ /**
21056+ * current lower cascade is missing.
21057+ * fix socket , or go to next one.
21058+ *
21059+ */
21060+ while( lower!=NULL &&
21061+ fixup_socket_for_cascades(&lsock,lower)!=STATUS_OK)
21062+ {
21063+ show_error("%s:lower cascade maybe down,challenge new one.",func);
21064+ PGRset_cascade_server_status(lower,DB_TBL_ERROR);
21065+ lower =PGRget_lower_cascade();
21066+ }
21067+ }
21068+ Cascade_Inf->lower=lower;
21069+ }
21070+
21071+
21072+ return STATUS_ERROR;
21073+}
21074+
21075+
21076+int
21077+PGRsend_upper_cascade(ReplicateHeader * header, char * query)
21078+{
21079+ char * func = "PGRsend_upper_cascade()";
21080+ ReplicateServerInfo *upper = PGRget_upper_cascade();
21081+
21082+
21083+ while(upper!=NULL)
21084+ {
21085+ /**
21086+ * check upper_cascade validaty.
21087+ *
21088+ */
21089+ if(usock!=-1 &&
21090+ PGRsend_cascade(usock,header,query)==STATUS_OK)
21091+ {
21092+ return STATUS_OK;
21093+ }
21094+ else
21095+ {
21096+ /**
21097+ * current upper cascade is missing.
21098+ * fix socket , or go to next one.
21099+ *
21100+ */
21101+ while( upper!=NULL &&
21102+ fixup_socket_for_cascades(&usock,upper)!=STATUS_OK)
21103+ {
21104+ show_error("%s:upper cascade maybe down,challenge new one.",func);
21105+ PGRset_cascade_server_status(upper,DB_TBL_ERROR);
21106+ upper =PGRget_upper_cascade();
21107+ }
21108+ }
21109+ Cascade_Inf->upper=upper;
21110+ }
21111+
21112+ return STATUS_ERROR;
21113+}
21114+
21115+ReplicateServerInfo *
21116+PGRget_lower_cascade(void)
21117+{
21118+ char * func = "PGRget_lower_cascade()";
21119+ ReplicateServerInfo * cascade = NULL;
21120+
21121+ if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
21122+ {
21123+ show_error("%s:config data read error",func);
21124+ return NULL;
21125+ }
21126+
21127+ /* count lower server */
21128+
21129+ cascade = Cascade_Inf->myself;
21130+ if (cascade == NULL)
21131+ {
21132+ show_error("%s:cascade data initialize error",func);
21133+ return NULL;
21134+ }
21135+ if (cascade->useFlag != DB_TBL_END)
21136+ {
21137+ cascade ++;
21138+ }
21139+ while (cascade->useFlag != DB_TBL_END)
21140+ {
21141+#ifdef PRINT_DEBUG
21142+ show_debug("%s:lower cascade search[%d]@[%s] use[%d]",
21143+ func,
21144+ cascade->portNumber,
21145+ cascade->hostName,
21146+ cascade->useFlag);
21147+#endif
21148+ if (cascade->useFlag == DB_TBL_USE)
21149+ {
21150+#ifdef PRINT_DEBUG
21151+ show_debug("%s:find lower cascade",func);
21152+#endif
21153+ return cascade;
21154+ }
21155+ cascade ++;
21156+ }
21157+ return NULL;
21158+}
21159+
21160+ReplicateServerInfo *
21161+PGRget_upper_cascade(void)
21162+{
21163+ char * func = "PGRget_upper_cascade()";
21164+ ReplicateServerInfo * cascade = NULL;
21165+
21166+ if ((Cascade_Tbl == NULL) || (Cascade_Inf == NULL))
21167+ {
21168+ show_error("%s:config data read error",func);
21169+ return NULL;
21170+ }
21171+
21172+
21173+ /* count lower server */
21174+ cascade = Cascade_Inf->myself;
21175+ if ((cascade == NULL) || (Cascade_Inf->top == cascade))
21176+ {
21177+ return NULL;
21178+ }
21179+ cascade --;
21180+ while (cascade != NULL)
21181+ {
21182+ if (cascade->useFlag == DB_TBL_USE)
21183+ {
21184+ return cascade;
21185+ }
21186+ if (Cascade_Inf->top == cascade)
21187+ {
21188+ break;
21189+ }
21190+ cascade --;
21191+ }
21192+ return NULL;
21193+}
21194+
21195+static void
21196+write_cascade_status_file(ReplicateServerInfo * cascade)
21197+{
21198+ switch( cascade->useFlag)
21199+ {
21200+ case DB_TBL_FREE:
21201+ PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) free",
21202+ cascade->hostName,
21203+ cascade->portNumber);
21204+ break;
21205+ case DB_TBL_INIT:
21206+ PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) initialize",
21207+ cascade->hostName,
21208+ cascade->portNumber);
21209+ break;
21210+ case DB_TBL_USE:
21211+ PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) start use",
21212+ cascade->hostName,
21213+ cascade->portNumber);
21214+ break;
21215+ case DB_TBL_ERROR:
21216+ PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) error",
21217+ cascade->hostName,
21218+ cascade->portNumber);
21219+ break;
21220+ case DB_TBL_TOP:
21221+ PGRwrite_log_file(StatusFp,"cascade(%s) port(%d) become top",
21222+ cascade->hostName,
21223+ cascade->portNumber);
21224+ break;
21225+ }
21226+}
21227+
21228+void
21229+PGRset_cascade_server_status(ReplicateServerInfo * cascade, int status)
21230+{
21231+ if (cascade == NULL)
21232+ {
21233+ return;
21234+ }
21235+ if (cascade->useFlag != status)
21236+ {
21237+ cascade->useFlag = status;
21238+ write_cascade_status_file(cascade);
21239+ }
21240+}
21241+
21242+ReplicateServerInfo *
21243+PGRrecv_cascade_answer(ReplicateServerInfo * cascade,ReplicateHeader * header)
21244+{
21245+ ReplicateServerInfo * answer = NULL;
21246+ int sock;
21247+
21248+ if ((cascade == NULL) || (header == NULL))
21249+ {
21250+ return NULL;
21251+ }
21252+
21253+ /* FIXME: ReplicateServerInfo->sock must be removed in cascading. */
21254+ if(cascade == Cascade_Inf->upper )
21255+ {
21256+ sock=usock;
21257+ }
21258+ else if (cascade == Cascade_Inf->lower )
21259+ {
21260+ sock=lsock;
21261+ }
21262+ else
21263+ {
21264+ show_debug("PGRrecv_cascade_answer:receiving packet from sock not belogs to cascade->upper / lower. maybe missing .");
21265+ sock=cascade->sock;
21266+ }
21267+ answer = (ReplicateServerInfo*)PGRread_packet(sock,header);
21268+ return answer;
21269+}
21270+
21271+int
21272+PGRsend_cascade(int sock , ReplicateHeader * header, char * query)
21273+{
21274+ char * func ="PGRsend_cascade()";
21275+ int s;
21276+ char * send_ptr;
21277+ char * buf;
21278+ int send_size = 0;
21279+ int buf_size;
21280+ int header_size;
21281+ int rtn;
21282+ fd_set wmask;
21283+ struct timeval timeout;
21284+ int query_size = 0;
21285+
21286+ /* check parameter */
21287+ if ((header == NULL) || (sock == -1))
21288+ {
21289+ return STATUS_ERROR;
21290+ }
21291+
21292+#ifdef PRINT_DEBUG
21293+ show_debug("%s:PGRsend_cascade sock[%d]",func,sock);
21294+#endif
21295+ query_size = ntohl(header->query_size);
21296+ header_size = sizeof(ReplicateHeader);
21297+ buf_size = header_size + query_size + 4;
21298+ buf = malloc(buf_size);
21299+ memset(buf,0,buf_size);
21300+ buf_size -= 4;
21301+ memcpy(buf,header,header_size);
21302+ if (query_size > 0)
21303+ {
21304+ memcpy((char *)(buf+header_size),query,query_size+1);
21305+ }
21306+ send_ptr = buf;
21307+
21308+ for (;;)
21309+ {
21310+ timeout.tv_sec = 10;
21311+ timeout.tv_usec = 0;
21312+
21313+ /*
21314+ * Wait for something to happen.
21315+ */
21316+ FD_ZERO(&wmask);
21317+ FD_SET(sock,&wmask);
21318+ rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
21319+
21320+ if (rtn < 0)
21321+ {
21322+ if (errno == EINTR || errno == EAGAIN)
21323+ continue;
21324+
21325+ show_error("%s:select failed ,errno is %s",func , strerror(errno));
21326+ free(buf);
21327+ return STATUS_ERROR;
21328+ }
21329+
21330+ if (rtn && FD_ISSET(sock, &wmask))
21331+ {
21332+ s = send(sock,send_ptr + send_size,buf_size - send_size ,0);
21333+ if (s < 0)
21334+ {
21335+ if (errno == EINTR || errno == EAGAIN)
21336+ continue;
21337+ else
21338+ {
21339+ show_error("%s:send failed: %d(%s)",func, errno, strerror(errno));
21340+ free(buf);
21341+ return STATUS_ERROR;
21342+ }
21343+ }
21344+ else if (s == 0)
21345+ {
21346+ show_error("%s:unexpected EOF", func);
21347+ free(buf);
21348+ return STATUS_ERROR;
21349+ }
21350+ send_size += s;
21351+ if (send_size == buf_size)
21352+ {
21353+#ifdef PRINT_DEBUG
21354+ show_debug("%s:send[%s] size[%d]",func,query,send_size);
21355+#endif
21356+ free(buf);
21357+ return STATUS_OK;
21358+ }
21359+ }
21360+ }
21361+ return STATUS_OK;
21362+}
21363+
21364+int
21365+PGRwait_answer_cascade(int sock)
21366+{
21367+ ReplicateHeader header;
21368+ char * answer = NULL;
21369+
21370+ answer = PGRread_packet(sock,&header);
21371+ if (answer != NULL)
21372+ {
21373+ free(answer);
21374+ return STATUS_OK;
21375+ }
21376+ return STATUS_ERROR;
21377+}
21378+/**
21379+ * fixup_socket_for_cascades checks socket's validaty.
21380+ * returns STATUS_OK if succeeded , or STATUS_ERROR if some error occured.
21381+ * if target is null , only close socket.
21382+ *
21383+ * originally written by tanida@sraoss.co.jp
21384+ */
21385+static int
21386+fixup_socket_for_cascades(int *sock, ReplicateServerInfo *target)
21387+{
21388+ if (*sock > 0)
21389+ {
21390+ close(*sock);
21391+ *sock=-1;
21392+ }
21393+ if(target!=NULL) {
21394+ return PGR_Create_Socket_Connect(sock,target->hostName,target->portNumber);
21395+ }
21396+ return STATUS_OK;
21397+}
21398+
21399+
21400+static int
21401+notice_cascade_data(int sock)
21402+{
21403+ char * func = "notice_cascade_data";
21404+ ReplicateServerInfo *cascade_data = NULL;
21405+ ReplicateHeader header;
21406+ int cnt = 0;
21407+ int size = 0;
21408+
21409+ if (sock <= 0)
21410+ {
21411+ return STATUS_ERROR;
21412+ }
21413+
21414+ cascade_data = get_cascade_data(&cnt, ALL_CASCADE );
21415+ if (cnt <= 0)
21416+ {
21417+ show_error("%s:cascade data is wrong",func);
21418+ return STATUS_ERROR;
21419+ }
21420+ size = sizeof (ReplicateServerInfo) * cnt ;
21421+
21422+ memset(&header,0,sizeof(ReplicateHeader));
21423+ header.cmdSys = CMD_SYS_CASCADE ;
21424+ header.cmdSts = CMD_STS_TO_LOWER ;
21425+ header.cmdType = CMD_TYPE_UPDATE_ALL;
21426+ header.query_size = htonl(size);
21427+ PGRsend_cascade(sock, &header, (char *)cascade_data );
21428+ if (cascade_data != NULL)
21429+ {
21430+ free(cascade_data);
21431+ }
21432+ return STATUS_OK;
21433+}
21434+
21435+int
21436+PGRcascade_main(int sock, ReplicateHeader * header, char * query)
21437+{
21438+ switch (header->cmdSts)
21439+ {
21440+ case CMD_STS_TO_UPPER:
21441+ if (header->cmdType == CMD_TYPE_ADD)
21442+ {
21443+ /* add lower cascade data to myself */
21444+ add_cascade_data(header,(ReplicateServerInfo*)query);
21445+ /* send cascade data to upper */
21446+ /* and receive new cascade data from upper */
21447+ PGRstartup_cascade();
21448+ /* return to lower with new cascade data */
21449+ notice_cascade_data(sock);
21450+ /* notifies a cascade server's information to Cluster DBs */
21451+ notice_cascade_data_to_cluster_db();
21452+ }
21453+ break;
21454+ case CMD_STS_TO_LOWER:
21455+ /*
21456+ * use for cascading replication
21457+ */
21458+ break;
21459+ }
21460+ return STATUS_OK;
21461+}
21462+
21463+static int
21464+notice_cascade_data_to_cluster_db(void)
21465+{
21466+ char userName[USERNAME_MAX_LENGTH];
21467+ ReplicateServerInfo *s=NULL;
21468+
21469+ if (Cascade_Inf->lower == NULL)
21470+ {
21471+ Cascade_Inf->lower = PGRget_lower_cascade();
21472+ }
21473+ if (Cascade_Inf->lower == NULL)
21474+ {
21475+ return STATUS_ERROR;
21476+ }
21477+ s=Cascade_Inf->lower;
21478+ memset(userName,0,sizeof(userName));
21479+ strncpy(userName ,getenv("LOGNAME"),sizeof(userName)-1);
21480+
21481+ PGRnotice_replication_server(s->hostName,
21482+ s->portNumber,
21483+ s->recoveryPortNumber,
21484+ s->lifecheckPortNumber,
21485+ userName);
21486+
21487+ return STATUS_OK;
21488+}
21489+
21490+int
21491+PGRwait_notice_rlog_done(void)
21492+{
21493+ ReplicateHeader header;
21494+ if (lsock != -1)
21495+ {
21496+ PGRread_packet(lsock,&header);
21497+ return STATUS_OK;
21498+ }
21499+ return STATUS_ERROR;
21500+
21501+}
21502+
21503+
21504+int
21505+PGRsend_notice_quit(void )
21506+{
21507+ ReplicateHeader header;
21508+ int size = 0;
21509+
21510+ size = strlen("QUIT_SAFELY");
21511+ memset(&header,0,sizeof(ReplicateHeader));
21512+ header.cmdSys = CMD_SYS_CALL ;
21513+ header.cmdSts = CMD_STS_RESPONSE ;
21514+ header.cmdType = CMD_TYPE_FRONTEND_CLOSED;
21515+ header.query_size = htonl(size);
21516+ PGRsend_lower_cascade(&header, "QUIT_SAFELY");
21517+ PGRwait_notice_rlog_done();
21518+ return STATUS_OK;
21519+}
21520+
21521+int
21522+PGRsend_notice_rlog_done(int sock)
21523+{
21524+ ReplicateHeader header;
21525+ int size = 0;
21526+
21527+ if (sock <= 0)
21528+ {
21529+ return STATUS_ERROR;
21530+ }
21531+
21532+ size = strlen(PGR_QUERY_DONE_NOTICE_CMD);
21533+ memset(&header,0,sizeof(ReplicateHeader));
21534+ header.cmdSys = CMD_SYS_CASCADE ;
21535+ header.cmdSts = CMD_STS_RESPONSE ;
21536+ header.cmdType = 0;
21537+ header.query_size = htonl(size);
21538+ PGRsend_cascade(sock, &header, PGR_QUERY_DONE_NOTICE_CMD);
21539+ return STATUS_OK;
21540+
21541+}
21542+#endif /* USE_REPLICATION */
21543diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/conf.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/conf.c
21544--- postgresql-8.2.4/src/pgcluster/pgrp/conf.c 1970-01-01 01:00:00.000000000 +0100
21545+++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/conf.c 2007-02-18 22:52:17.000000000 +0100
21546@@ -0,0 +1,694 @@
21547+/*--------------------------------------------------------------------
21548+ * FILE:
21549+ * conf.c
21550+ * Replication server for PostgreSQL
21551+ *
21552+ * NOTE:
21553+ * Read and set configuration data in this modul.
21554+ *
21555+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
21556+ *--------------------------------------------------------------------
21557+ */
21558+#include "postgres.h"
21559+
21560+#include <stdio.h>
21561+#include <string.h>
21562+#include <unistd.h>
21563+#include <sys/types.h>
21564+#include <fcntl.h>
21565+#include <ctype.h>
21566+#include <sys/stat.h>
21567+#include <sys/ipc.h>
21568+#include <sys/shm.h>
21569+#include <sys/sem.h>
21570+#include <netdb.h>
21571+#include <errno.h>
21572+#include <sys/file.h>
21573+
21574+
21575+
21576+#include "libpq-fe.h"
21577+#include "libpq-int.h"
21578+#include "fe-auth.h"
21579+
21580+#include "replicate_com.h"
21581+#include "pgreplicate.h"
21582+
21583+/*--------------------------------------------------------------------
21584+ * SYMBOL
21585+ * PGRget_Conf_Data()
21586+ * NOTES
21587+ * Initialize mamory and tables
21588+ * ARGS
21589+ * char * path: path of the setup file (I)
21590+ * RETURN
21591+ * OK: STATUS_OK
21592+ * NG: STATUS_ERROR
21593+ *--------------------------------------------------------------------
21594+ */
21595+int
21596+PGRget_Conf_Data(char * path)
21597+{
21598+ char * func = "PGRget_Conf_Data()";
21599+ HostTbl host_tbl[MAX_DB_SERVER];
21600+ ConfDataType * conf = NULL;
21601+ int cnt = 0;
21602+ int lb_cnt = 0;
21603+ int cascade_cnt = 0;
21604+ int rec_no = 0;
21605+ int lb_rec_no = 0;
21606+ int cascade_rec_no = -1;
21607+ int i = 0;
21608+ int size = 0;
21609+ char fname[256];
21610+ union semun sem_arg;
21611+
21612+ /*
21613+ * open log file
21614+ */
21615+ if (path == NULL)
21616+ {
21617+ path = ".";
21618+ }
21619+ size = sizeof(LogFileInf);
21620+ LogFileData = (LogFileInf *) malloc(size);
21621+ if (LogFileData == NULL)
21622+ {
21623+ show_error("%s:malloc() failed. reason: %s", func,strerror(errno));
21624+ return STATUS_ERROR;
21625+ }
21626+ memset(LogFileData,0,size);
21627+
21628+ snprintf(fname,sizeof(fname),"%s/%s",path,PGREPLICATE_STATUS_FILE);
21629+ StatusFp = fopen(fname,"a");
21630+ if (StatusFp == NULL)
21631+ {
21632+ show_error("%s:fopen failed: (%s)",func,strerror(errno));
21633+ return STATUS_ERROR;
21634+ }
21635+
21636+ snprintf(fname,sizeof(fname),"%s/%s",path,PGREPLICATE_RID_FILE);
21637+ RidFp = fopen(fname,"r+");
21638+ if (RidFp == NULL)
21639+ {
21640+ RidFp = fopen(fname,"w+");
21641+ if (RidFp == NULL)
21642+ {
21643+ show_error("%s:fopen failed: (%s)",func,strerror(errno));
21644+ return STATUS_ERROR;
21645+ }
21646+ }
21647+
21648+ /*
21649+ * read configuration file
21650+ */
21651+ if (PGR_Get_Conf_Data(path,PGREPLICATE_CONF_FILE) != STATUS_OK)
21652+ {
21653+ show_error("%s:PGR_Get_Conf_Data failed",func);
21654+ return STATUS_ERROR;
21655+ }
21656+#ifdef PRINT_DEBUG
21657+ show_debug("PGR_Get_Conf_Data ok");
21658+#endif
21659+
21660+ /* allocate response information table */
21661+ PGR_Response_Inf = (ResponseInf *)malloc(sizeof(ResponseInf));
21662+ if (PGR_Response_Inf == NULL)
21663+ {
21664+ show_error("%s:malloc() failed. reason: %s", func,strerror(errno));
21665+ return STATUS_ERROR;
21666+ }
21667+ PGR_Response_Inf->response_mode = PGR_NORMAL_MODE;
21668+ PGR_Response_Inf->current_cluster = 0;
21669+
21670+ /*
21671+ * memory allocate load balance table buffer
21672+ */
21673+ LoadBalanceTbl = (RecoveryTbl *)malloc(sizeof(RecoveryTbl)*MAX_DB_SERVER);
21674+ if (LoadBalanceTbl == (RecoveryTbl *)NULL)
21675+ {
21676+ show_error("%s:malloc failed: (%s)",func,strerror(errno));
21677+ return STATUS_ERROR;
21678+ }
21679+#ifdef PRINT_DEBUG
21680+ show_debug("LoadBalanceTbl allocate ok");
21681+#endif
21682+
21683+ /*
21684+ * memory allocate cascade server table buffer
21685+ */
21686+ size = sizeof(ReplicateServerInfo) * MAX_DB_SERVER;
21687+ CascadeTblShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
21688+ if (CascadeTblShmid < 0)
21689+ {
21690+ show_error("%s:shmget() failed. reason: %s", func,strerror(errno));
21691+ return STATUS_ERROR;
21692+ }
21693+#ifdef PRINT_DEBUG
21694+ show_debug("%s:CascadeTbl shmget ok",func);
21695+#endif
21696+ Cascade_Tbl = (ReplicateServerInfo *)shmat(CascadeTblShmid,0,0);
21697+ if (Cascade_Tbl == (ReplicateServerInfo *)-1)
21698+ {
21699+ show_error("%s:shmat() failed. reason: %s", func,strerror(errno));
21700+ return STATUS_ERROR;
21701+ }
21702+#ifdef PRINT_DEBUG
21703+ show_debug("%s:CascadeTbl shmat ok",func);
21704+#endif
21705+ memset(Cascade_Tbl , 0 , size );
21706+
21707+ /*
21708+ * memory allocate cascade index
21709+ */
21710+ size = sizeof(CascadeInf);
21711+ CascadeInfShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
21712+ if (CascadeInfShmid < 0)
21713+ {
21714+ show_error("%s:shmget() failed. reason: %s", func,strerror(errno));
21715+ return STATUS_ERROR;
21716+ }
21717+#ifdef PRINT_DEBUG
21718+ show_debug("%s:CascadeInf shmget ok",func);
21719+#endif
21720+ Cascade_Inf = (CascadeInf *)shmat(CascadeInfShmid,0,0);
21721+ if (Cascade_Inf == (CascadeInf *)-1)
21722+ {
21723+ show_error("%s:shmat() failed. reason: %s",func, strerror(errno));
21724+ return STATUS_ERROR;
21725+ }
21726+#ifdef PRINT_DEBUG
21727+ show_debug("%s:CascadeInf shmat ok",func);
21728+#endif
21729+ memset(Cascade_Inf , 0 , size );
21730+
21731+ /*
21732+ * memory allocate replication commit log buffer
21733+ */
21734+ size = sizeof(CommitLogInf) * MAX_DB_SERVER * MAX_CONNECTIONS;
21735+ CommitLogShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
21736+ if (CommitLogShmid < 0)
21737+ {
21738+ show_error("%s:shmget() failed. reason: %s", func, strerror(errno));
21739+ return STATUS_ERROR;
21740+ }
21741+#ifdef PRINT_DEBUG
21742+ show_debug("%s:CommitLog shmget ok",func);
21743+#endif
21744+ Commit_Log_Tbl = (CommitLogInf *)shmat(CommitLogShmid,0,0);
21745+ if (Commit_Log_Tbl == (CommitLogInf *)-1)
21746+ {
21747+ show_error("%s:shmat() failed. reason: %s",func, strerror(errno));
21748+ return STATUS_ERROR;
21749+ }
21750+#ifdef PRINT_DEBUG
21751+ show_debug("%s:Commit_Log_Tbl shmat ok",func);
21752+#endif
21753+ memset(Commit_Log_Tbl , 0 , size );
21754+ (Commit_Log_Tbl + (MAX_DB_SERVER * MAX_CONNECTIONS) -1)->inf.useFlag = DB_TBL_END;
21755+
21756+ /* create semapho */
21757+ if ((SemID = semget(IPC_PRIVATE,2,IPC_CREAT | IPC_EXCL | 0600)) < 0)
21758+ {
21759+ show_error("%s:semget() failed. (%s)",func,strerror(errno));
21760+ return STATUS_ERROR;
21761+ }
21762+ for ( i = 0 ; i < 2 ; i ++)
21763+ {
21764+ semctl(SemID, i, GETVAL, sem_arg);
21765+ sem_arg.val = 1;
21766+ semctl(SemID, i, SETVAL, sem_arg);
21767+ }
21768+
21769+ /* create semapho */
21770+ if ((CascadeSemID = semget(IPC_PRIVATE,2,IPC_CREAT | IPC_EXCL | 0600)) < 0)
21771+ {
21772+ show_error("%s:semget() failed. (%s)",func,strerror(errno));
21773+ return STATUS_ERROR;
21774+ }
21775+ for ( i = 0 ; i < 2 ; i ++)
21776+ {
21777+ semctl(CascadeSemID, i, GETVAL, sem_arg);
21778+ sem_arg.val = 1;
21779+ semctl(CascadeSemID, i, SETVAL, sem_arg);
21780+ }
21781+
21782+
21783+ if ((VacuumSemID = semget(IPC_PRIVATE,2,IPC_CREAT | IPC_EXCL | 0600)) < 0)
21784+ {
21785+ show_error("%s:semget() failed. (%s)",func,strerror(errno));
21786+ return STATUS_ERROR;
21787+ }
21788+ for ( i = 0 ; i < 2 ; i ++)
21789+ {
21790+ semctl(VacuumSemID, i, GETVAL, sem_arg);
21791+ sem_arg.val = 1;
21792+ semctl(VacuumSemID, i, SETVAL, sem_arg);
21793+ }
21794+ size = sizeof(ReplicationLogInf);
21795+ Replicateion_Log = malloc(size);
21796+ if (Replicateion_Log == NULL)
21797+ {
21798+ show_error("%s:malloc failed: (%s)",func,strerror(errno));
21799+ return STATUS_ERROR;
21800+ }
21801+ memset(Replicateion_Log , 0 , size );
21802+ Replicateion_Log->RLog_Sock_Path = NULL;
21803+#ifdef PRINT_DEBUG
21804+ show_debug("%s:RLog Memory Allocation ok",func);
21805+#endif
21806+
21807+
21808+ /*
21809+ * set each datas into the tables
21810+ */
21811+ conf = ConfData_Top;
21812+ while (conf != (ConfDataType *)NULL)
21813+ {
21814+ show_debug("registering (key,value)=(%s,%s)",conf->key,conf->value);
21815+ /* get cluster db data */
21816+ if (!STRCMP(conf->table,CLUSTER_SERVER_TAG))
21817+ {
21818+ rec_no = conf->rec_no;
21819+ if (cnt < rec_no)
21820+ {
21821+ cnt = rec_no;
21822+ if (cnt >= MAX_DB_SERVER)
21823+ {
21824+ continue;
21825+ }
21826+ }
21827+ if (!STRCMP(conf->key,HOST_NAME_TAG))
21828+ {
21829+ int ip;
21830+ strncpy(host_tbl[rec_no].hostName,conf->value,sizeof(host_tbl[rec_no].hostName));
21831+ show_debug("registering hostname %s",host_tbl[rec_no].hostName);
21832+ ip=PGRget_ip_by_name(conf->value);
21833+
21834+ sprintf(host_tbl[rec_no].resolvedName,
21835+ "%d.%d.%d.%d",
21836+ (ip ) & 0xff ,
21837+ (ip >> 8) & 0xff ,
21838+ (ip >> 16) & 0xff ,
21839+ (ip >> 24) & 0xff );
21840+ show_debug("resolved name is %s",host_tbl[rec_no].resolvedName);
21841+
21842+ conf = (ConfDataType*)conf->next;
21843+ continue;
21844+ }
21845+ if (!STRCMP(conf->key,PORT_TAG))
21846+ {
21847+ host_tbl[rec_no].port = atoi(conf->value);
21848+ conf = (ConfDataType*)conf->next;
21849+ continue;
21850+ }
21851+ if (!STRCMP(conf->key,RECOVERY_PORT_TAG))
21852+ {
21853+ host_tbl[rec_no].recoveryPort = atoi(conf->value);
21854+ conf = (ConfDataType*)conf->next;
21855+ continue;
21856+ }
21857+ }
21858+ /* get cascade server data */
21859+ else if (!STRCMP(conf->table, REPLICATION_SERVER_INFO_TAG))
21860+ {
21861+ cascade_rec_no = conf->rec_no ;
21862+ if (cascade_cnt < cascade_rec_no)
21863+ {
21864+ cascade_cnt = cascade_rec_no;
21865+ if (cascade_cnt >= MAX_DB_SERVER)
21866+ {
21867+ continue;
21868+ }
21869+ }
21870+ if (!STRCMP(conf->key,HOST_NAME_TAG))
21871+ {
21872+ strncpy((Cascade_Tbl+cascade_rec_no)->hostName,conf->value,sizeof(Cascade_Tbl->hostName));
21873+ conf = (ConfDataType*)conf->next;
21874+ continue;
21875+ }
21876+ if (!STRCMP(conf->key,PORT_TAG))
21877+ {
21878+ if (atoi(conf->value) > 0)
21879+ {
21880+ (Cascade_Tbl+cascade_rec_no)->portNumber = atoi(conf->value);
21881+ }
21882+ else
21883+ {
21884+ (Cascade_Tbl+cascade_rec_no)->portNumber = DEFAULT_PGRP_PORT;
21885+ }
21886+ (Cascade_Tbl+cascade_rec_no)->sock = -1;
21887+
21888+ conf = (ConfDataType*)conf->next;
21889+ PGRset_cascade_server_status(Cascade_Tbl+cascade_rec_no,DB_TBL_USE);
21890+ if (cascade_rec_no == 0)
21891+ {
21892+ Cascade_Inf->top = Cascade_Tbl;
21893+ }
21894+ continue;
21895+ }
21896+ if (!STRCMP(conf->key,RECOVERY_PORT_TAG))
21897+ {
21898+ if (atoi(conf->value) > 0)
21899+ {
21900+ (Cascade_Tbl+cascade_rec_no)->recoveryPortNumber = atoi(conf->value);
21901+ }
21902+ else
21903+ {
21904+ (Cascade_Tbl+cascade_rec_no)->recoveryPortNumber = DEFAULT_PGRP_RECOVERY_PORT;
21905+ }
21906+ (Cascade_Tbl+cascade_rec_no)->rlog_sock=-1;
21907+ (Cascade_Tbl+cascade_rec_no +1)->useFlag = DB_TBL_END;
21908+ conf = (ConfDataType*)conf->next;
21909+ continue;
21910+ }
21911+ }
21912+ /* get loadbalancer table data */
21913+ else if (!STRCMP(conf->table,LOAD_BALANCE_SERVER_TAG))
21914+ {
21915+ lb_rec_no = conf->rec_no;
21916+ if (lb_cnt < lb_rec_no)
21917+ {
21918+ lb_cnt = lb_rec_no;
21919+ if (lb_cnt >= MAX_DB_SERVER)
21920+ {
21921+ continue;
21922+ }
21923+ }
21924+ if (!STRCMP(conf->key,HOST_NAME_TAG))
21925+ {
21926+ strncpy((LoadBalanceTbl + lb_rec_no)->hostName, conf->value,sizeof(LoadBalanceTbl->hostName));
21927+ conf = (ConfDataType*)conf->next;
21928+ continue;
21929+ }
21930+ if (!STRCMP(conf->key,RECOVERY_PORT_TAG))
21931+ {
21932+ (LoadBalanceTbl + lb_rec_no)->recoveryPort = atoi(conf->value);
21933+ (LoadBalanceTbl + lb_rec_no)->sock = -1;
21934+ (LoadBalanceTbl + lb_rec_no)->recovery_sock = -1;
21935+ conf = (ConfDataType*)conf->next;
21936+ continue;
21937+ }
21938+ }
21939+ /* get logging file data */
21940+ else if (!STRCMP(conf->table, LOG_INFO_TAG))
21941+ {
21942+ if (!STRCMP(conf->key, FILE_NAME_TAG))
21943+ {
21944+ strncpy(LogFileData->file_name, conf->value ,sizeof(LogFileData->file_name));
21945+ LogFileData->fp = NULL;
21946+ conf = (ConfDataType*)conf->next;
21947+ continue;
21948+ }
21949+ if (!STRCMP(conf->key, FILE_SIZE_TAG))
21950+ {
21951+ int i,len;
21952+ char * ptr;
21953+ int unit = 1;
21954+ len = strlen(conf->value);
21955+ ptr = conf->value;
21956+ for (i = 0; i < len ; i ++,ptr++)
21957+ {
21958+ if ((! isdigit(*ptr)) && (! isspace(*ptr)))
21959+ {
21960+ switch (*ptr)
21961+ {
21962+ case 'K':
21963+ case 'k':
21964+ unit = 1024;
21965+ break;
21966+ case 'M':
21967+ case 'm':
21968+ unit = 1024*1024;
21969+ break;
21970+ case 'G':
21971+ case 'g':
21972+ unit = 1024*1024*1024;
21973+ break;
21974+ }
21975+ *ptr = '\0';
21976+ break;
21977+ }
21978+ }
21979+ LogFileData->max_size = atoi(conf->value) * unit;
21980+ conf = (ConfDataType*)conf->next;
21981+ continue;
21982+ }
21983+ if (!STRCMP(conf->key, LOG_ROTATION_TAG))
21984+ {
21985+ LogFileData->rotation = atoi(conf->value);
21986+ conf = (ConfDataType*)conf->next;
21987+ continue;
21988+ }
21989+ }
21990+ else
21991+ {
21992+ if (!STRCMP(conf->key,HOST_NAME_TAG))
21993+ {
21994+ int ip;
21995+ ip=PGRget_ip_by_name(conf->value);
21996+ if (ResolvedName == NULL)
21997+ {
21998+ ResolvedName = malloc(ADDRESS_LENGTH);
21999+ }
22000+ if (ResolvedName == NULL)
22001+ {
22002+ continue;
22003+ }
22004+ else
22005+ {
22006+ memset(ResolvedName,0,ADDRESS_LENGTH);
22007+ }
22008+
22009+ sprintf(ResolvedName,
22010+ "%d.%d.%d.%d",
22011+ (ip ) & 0xff ,
22012+ (ip >> 8) & 0xff ,
22013+ (ip >> 16) & 0xff ,
22014+ (ip >> 24) & 0xff );
22015+ conf = (ConfDataType*)conf->next;
22016+ continue;
22017+ }
22018+ else if (!STRCMP(conf->key,REPLICATE_PORT_TAG))
22019+ {
22020+ Port_Number = atoi(conf->value);
22021+ conf = (ConfDataType*)conf->next;
22022+ continue;
22023+ }
22024+ /* get port number for recovery cluster db server */
22025+ else if (!STRCMP(conf->key,RECOVERY_PORT_TAG))
22026+ {
22027+ if (atoi(conf->value) > 0)
22028+ {
22029+ Recovery_Port_Number = atoi(conf->value);
22030+ }
22031+ else
22032+ {
22033+ Recovery_Port_Number =DEFAULT_PGRP_RECOVERY_PORT;
22034+ }
22035+ conf = (ConfDataType*)conf->next;
22036+ continue;
22037+ }
22038+ else if (!STRCMP(conf->key,LIFECHECK_PORT_TAG))
22039+ {
22040+ if (atoi(conf->value) > 0)
22041+ {
22042+ LifeCheck_Port_Number = atoi(conf->value);
22043+ }
22044+ else
22045+ {
22046+ LifeCheck_Port_Number = DEFAULT_PGRP_LIFECHECK_PORT;
22047+ }
22048+ conf = (ConfDataType*)conf->next;
22049+ continue;
22050+ }
22051+ else if (!STRCMP(conf->key,RLOG_PORT_TAG))
22052+ {
22053+ if (atoi(conf->value) > 0)
22054+ {
22055+ Replicateion_Log->RLog_Port_Number = atoi(conf->value);
22056+ }
22057+ else
22058+ {
22059+ Replicateion_Log->RLog_Port_Number = DEFAULT_PGRP_RLOG_PORT;
22060+ }
22061+ conf = (ConfDataType*)conf->next;
22062+ continue;
22063+ }
22064+ /* get response mode */
22065+ else if (!STRCMP(conf->key,RESPONSE_MODE_TAG))
22066+ {
22067+ if (!STRCMP(conf->value,RESPONSE_MODE_RELIABLE))
22068+ {
22069+ PGR_Response_Inf->response_mode = PGR_RELIABLE_MODE;
22070+ }
22071+ else if (!STRCMP(conf->value,RESPONSE_MODE_FAST))
22072+ {
22073+ PGR_Response_Inf->response_mode = PGR_FAST_MODE;
22074+ }
22075+ else
22076+ {
22077+ PGR_Response_Inf->response_mode = PGR_NORMAL_MODE;
22078+ }
22079+ conf = (ConfDataType*)conf->next;
22080+ continue;
22081+ }
22082+ /* get replication log use or not */
22083+ else if (!STRCMP(conf->key,USE_REPLICATION_LOG_TAG))
22084+ {
22085+ if (!STRCMP(conf->value,"yes"))
22086+ {
22087+ PGR_Use_Replication_Log = true;
22088+ }
22089+ conf = (ConfDataType*)conf->next;
22090+ continue;
22091+ }
22092+ /* get replication timeout */
22093+ else if (!STRCMP(conf->key,TIMEOUT_TAG))
22094+ {
22095+ /* get repliaction timeout */
22096+ PGR_Replication_Timeout = PGRget_time_value(conf->value);
22097+ if ((PGR_Replication_Timeout < 1) || (PGR_Replication_Timeout > 3600))
22098+ {
22099+ fprintf(stderr,"%s is out of range. It should be between 1sec-1hr.\n",TIMEOUT_TAG);
22100+ return STATUS_ERROR;
22101+ }
22102+ conf = (ConfDataType*)conf->next;
22103+ continue;
22104+ }
22105+ else if (!STRCMP(conf->key,LIFECHECK_TIMEOUT_TAG))
22106+ {
22107+ /* get lifecheck timeout */
22108+ PGR_Lifecheck_Timeout = PGRget_time_value(conf->value);
22109+ if ((PGR_Lifecheck_Timeout < 1) || (PGR_Lifecheck_Timeout > 3600))
22110+ {
22111+ show_error("%s is out of range. It should be between 1sec-1hr.\n",LIFECHECK_TIMEOUT_TAG);
22112+ return STATUS_ERROR;
22113+ }
22114+ conf = (ConfDataType*)conf->next;
22115+ continue;
22116+ }
22117+ else if (!STRCMP(conf->key,LIFECHECK_INTERVAL_TAG))
22118+ {
22119+ /* get lifecheck interval */
22120+ PGR_Lifecheck_Interval = PGRget_time_value(conf->value);
22121+ if ((PGR_Lifecheck_Interval < 1) || (PGR_Lifecheck_Interval > 3600))
22122+ {
22123+ show_error("%s is out of range. It should between 1sec-1hr.\n",LIFECHECK_INTERVAL_TAG);
22124+ return STATUS_ERROR;
22125+ }
22126+ conf = (ConfDataType*)conf->next;
22127+ continue;
22128+ }
22129+ }
22130+ conf = (ConfDataType*)conf->next;
22131+ }
22132+
22133+ /* create cluster db server table */
22134+ Host_Tbl_Begin = (HostTbl *)NULL;
22135+
22136+ size = sizeof(HostTbl) * MAX_DB_SERVER;
22137+ HostTblShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
22138+ if (HostTblShmid < 0)
22139+ {
22140+ show_error("%s:shmget() failed. reason: %s", func,strerror(errno));
22141+ return STATUS_ERROR;
22142+ }
22143+#ifdef PRINT_DEBUG
22144+ show_debug("%s:HostTbl shmget ok",func);
22145+#endif
22146+ Host_Tbl_Begin = (HostTbl *)shmat(HostTblShmid,0,0);
22147+ if (Host_Tbl_Begin == (HostTbl *)-1)
22148+ {
22149+ show_error("%s:shmat() failed. reason: %s", func, strerror(errno));
22150+ return STATUS_ERROR;
22151+ }
22152+#ifdef PRINT_DEBUG
22153+ show_debug("%s:HostTbl shmat ok",func);
22154+#endif
22155+ memset(Host_Tbl_Begin , 0 , size );
22156+ Host_Tbl_Begin -> useFlag = DB_TBL_END;
22157+
22158+ for ( i = 0 ; i <= cnt ; i ++)
22159+ {
22160+ PGRadd_HostTbl(&host_tbl[i],DB_TBL_INIT);
22161+ }
22162+ /* set load balance table */
22163+ for ( i = 0 ; i <= lb_cnt ; i ++)
22164+ {
22165+ (LoadBalanceTbl + i)->port = -1;
22166+ (LoadBalanceTbl + i)->sock = -1;
22167+ }
22168+ memset((LoadBalanceTbl + i),0,sizeof(RecoveryTbl));
22169+ PGR_Free_Conf_Data();
22170+
22171+ /* allocate result buffer of query */
22172+ PGR_Result = malloc(PGR_MESSAGE_BUFSIZE);
22173+ if (PGR_Result == NULL)
22174+ {
22175+ show_error("%s:malloc() failed. reason: %s", func, strerror(errno));
22176+ return STATUS_ERROR;
22177+ }
22178+ memset(PGR_Result,0,PGR_MESSAGE_BUFSIZE);
22179+
22180+ /* allocate log_data */
22181+ PGR_Log_Header = malloc(sizeof(ReplicateHeader));
22182+ if (PGR_Log_Header == NULL)
22183+ {
22184+ show_error("%s:malloc() failed. reason: %s", func, strerror(errno));
22185+ return STATUS_ERROR;
22186+ }
22187+ memset(PGR_Log_Header,0,sizeof(ReplicateHeader));
22188+
22189+ /* allocate send query id */
22190+ size = sizeof(unsigned int) * (MAX_DB_SERVER +1);
22191+ PGR_Send_Query_ID = malloc (size);
22192+ if (PGR_Send_Query_ID == NULL)
22193+ {
22194+ show_error("%s:malloc() failed. reason: %s", func, strerror(errno));
22195+ return STATUS_ERROR;
22196+ }
22197+ memset(PGR_Send_Query_ID, 0, size);
22198+ for ( i = 0 ; i < MAX_DB_SERVER ; i ++)
22199+ {
22200+ StartReplication[i] = true;
22201+ }
22202+
22203+ /* set self data into cascade table */
22204+
22205+ cascade_rec_no ++;
22206+ if (ResolvedName != NULL)
22207+ {
22208+ strncpy((Cascade_Tbl+cascade_rec_no)->hostName,ResolvedName,ADDRESS_LENGTH);
22209+ }
22210+ else
22211+ {
22212+
22213+ gethostname((Cascade_Tbl+cascade_rec_no)->hostName,sizeof(Cascade_Tbl->hostName));
22214+ }
22215+ (Cascade_Tbl+cascade_rec_no)->portNumber = Port_Number;
22216+ (Cascade_Tbl+cascade_rec_no)->recoveryPortNumber = Recovery_Port_Number;
22217+ (Cascade_Tbl+cascade_rec_no)->sock = -1;
22218+
22219+ PGRset_cascade_server_status(Cascade_Tbl+cascade_rec_no,DB_TBL_USE);
22220+ /* terminate */
22221+ (Cascade_Tbl+(cascade_rec_no+1))->useFlag = DB_TBL_END;
22222+
22223+ Cascade_Inf->top = Cascade_Tbl;
22224+ Cascade_Inf->end = Cascade_Tbl+cascade_rec_no;
22225+ Cascade_Inf->upper = NULL;
22226+ Cascade_Inf->lower = NULL;
22227+ if (cascade_rec_no >= 1)
22228+ {
22229+ Cascade_Inf->upper = (Cascade_Tbl+cascade_rec_no - 1);
22230+ }
22231+ (Cascade_Tbl+(cascade_rec_no+1))->useFlag = DB_TBL_END;
22232+
22233+ Cascade_Inf->myself = (Cascade_Tbl+cascade_rec_no);
22234+ Cascade_Inf->useFlag = DB_TBL_USE;
22235+
22236+ PGR_Response_Inf->response_mode = PGR_NORMAL_MODE;
22237+
22238+ return STATUS_OK;
22239+}
22240+
22241diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/lifecheck.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/lifecheck.c
22242--- postgresql-8.2.4/src/pgcluster/pgrp/lifecheck.c 1970-01-01 01:00:00.000000000 +0100
22243+++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/lifecheck.c 2007-03-01 16:27:15.000000000 +0100
22244@@ -0,0 +1,276 @@
22245+/*--------------------------------------------------------------------
22246+ * FILE:
22247+ * lifecheck.c
22248+ *
22249+ * NOTE:
22250+ * This file is composed of the functions to call with the source
22251+ * at pgreplicate for the lifecheck.
22252+ *
22253+ * Portions Copyright (c) 2003-2007, Atsushi Mitani
22254+ *--------------------------------------------------------------------
22255+ */
22256+#include "postgres.h"
22257+#include "postgres_fe.h"
22258+
22259+#include <pthread.h>
22260+#include <stdio.h>
22261+#include <stdarg.h>
22262+#include <sys/types.h>
22263+#include <fcntl.h>
22264+#include <errno.h>
22265+#include <ctype.h>
22266+#include <time.h>
22267+#include <sys/ipc.h>
22268+#include <sys/shm.h>
22269+#include <sys/sem.h>
22270+#include <sys/msg.h>
22271+#include <signal.h>
22272+
22273+/*
22274+#include "libpq/pqsignal.h"
22275+#include "utils/guc.h"
22276+#include "miscadmin.h"
22277+#include "nodes/nodes.h"
22278+#include "nodes/parsenodes.h"
22279+#include "access/xact.h"
22280+#include "access/xlog.h"
22281+#include "tcop/tcopprot.h"
22282+#include "postmaster/postmaster.h"
22283+*/
22284+
22285+#include "libpq-fe.h"
22286+#include "libpq-int.h"
22287+#include "fe-auth.h"
22288+
22289+#include <sys/socket.h>
22290+#include <unistd.h>
22291+#include <netdb.h>
22292+#include <arpa/inet.h>
22293+
22294+#ifdef HAVE_NETINET_TCP_H
22295+#include <netinet/tcp.h>
22296+#endif
22297+
22298+#ifdef HAVE_SYS_SELECT_H
22299+#include <sys/select.h>
22300+#endif
22301+
22302+
22303+#ifdef HAVE_CRYPT_H
22304+#include <crypt.h>
22305+#endif
22306+
22307+
22308+#ifdef MULTIBYTE
22309+#include "mb/pg_wchar.h"
22310+#endif
22311+
22312+#include "access/xact.h"
22313+#include "lib/dllist.h"
22314+#include "libpq/pqformat.h"
22315+#include "replicate_com.h"
22316+#include "pgreplicate.h"
22317+
22318+#define PING_DB "template1"
22319+#define PING_QUERY "SELECT 1"
22320+
22321+static HostTbl * PGR_Cluster_DB_4_Lifecheck = (HostTbl*)NULL;
22322+
22323+/*--------------------------------------
22324+ * PROTOTYPE DECLARATION
22325+ *--------------------------------------
22326+ */
22327+int PGRlifecheck_main(int fork_wait_time);
22328+
22329+static bool is_started_replication(void);
22330+static void set_timeout(SIGNAL_ARGS);
22331+static int lifecheck_loop(void);
22332+static int ping_cluster(PGconn * conn);
22333+static void set_host_status( HostTbl * host_ptr , int status );
22334+
22335+int
22336+PGRlifecheck_main(int fork_wait_time)
22337+{
22338+ bool started = false;
22339+ pid_t pgid = 0;
22340+ pid_t pid = 0;
22341+
22342+ pgid = getpgid(0);
22343+ pid = fork();
22344+ if (pid != 0)
22345+ {
22346+ return STATUS_OK;
22347+ }
22348+
22349+ /*
22350+ * in child process,
22351+ * call recovery module
22352+ */
22353+ setpgid(0,pgid);
22354+
22355+ PGRsignal(SIGHUP, PGRexit_subprocess);
22356+ PGRsignal(SIGTERM, PGRexit_subprocess);
22357+ PGRsignal(SIGINT, PGRexit_subprocess);
22358+ PGRsignal(SIGQUIT, PGRexit_subprocess);
22359+ PGRsignal(SIGALRM, set_timeout);
22360+
22361+ if (fork_wait_time > 0) {
22362+ sleep(fork_wait_time);
22363+ }
22364+
22365+ if (PGRuserName == NULL)
22366+ {
22367+ PGRuserName = getenv("LOGNAME");
22368+ if (PGRuserName == NULL)
22369+ {
22370+ PGRuserName = getenv("USER");
22371+ if (PGRuserName == NULL)
22372+ PGRuserName = "postgres";
22373+ }
22374+ }
22375+
22376+ for (;;)
22377+ {
22378+ started = is_started_replication();
22379+ if (!started)
22380+ {
22381+ /* wait next lifecheck as interval */
22382+ sleep(PGR_Lifecheck_Interval);
22383+ continue;
22384+ }
22385+
22386+ /* life check to all cluster dbs */
22387+ lifecheck_loop();
22388+
22389+ /* wait next lifecheck as interval */
22390+ sleep(PGR_Lifecheck_Interval);
22391+ }
22392+ return STATUS_OK;
22393+}
22394+
22395+static bool
22396+is_started_replication(void)
22397+{
22398+ HostTbl * host_ptr = (HostTbl*)NULL;
22399+
22400+ host_ptr = Host_Tbl_Begin;
22401+ while(host_ptr->useFlag != DB_TBL_END)
22402+ {
22403+ if (host_ptr->useFlag == DB_TBL_USE)
22404+ {
22405+ return true;
22406+ }
22407+ host_ptr ++;
22408+ }
22409+ return false;
22410+}
22411+
22412+static void
22413+set_timeout(SIGNAL_ARGS)
22414+{
22415+ if (PGR_Cluster_DB_4_Lifecheck != NULL)
22416+ {
22417+ PGR_Cluster_DB_4_Lifecheck->retry_count ++;
22418+ if (PGR_Cluster_DB_4_Lifecheck->retry_count > PGR_CONNECT_RETRY_TIME )
22419+ {
22420+ set_host_status(PGR_Cluster_DB_4_Lifecheck,DB_TBL_ERROR);
22421+ }
22422+ }
22423+ PGRsignal(SIGALRM, set_timeout);
22424+}
22425+
22426+static int
22427+lifecheck_loop(void)
22428+{
22429+ HostTbl * host_ptr = (HostTbl*)NULL;
22430+ char port[8];
22431+ char * host = NULL;
22432+ PGconn * conn = NULL;
22433+
22434+ host_ptr = Host_Tbl_Begin;
22435+ if (host_ptr == NULL)
22436+ {
22437+ return STATUS_ERROR;
22438+ }
22439+ alarm(0);
22440+ while(host_ptr->useFlag != DB_TBL_END)
22441+ {
22442+ /*
22443+ * check the status of the cluster DB
22444+ */
22445+ if (host_ptr->useFlag != DB_TBL_USE)
22446+ {
22447+ host_ptr ++;
22448+ continue;
22449+ }
22450+ snprintf(port,sizeof(port),"%d", host_ptr->port);
22451+ host = (char *)(host_ptr->resolvedName);
22452+ /* set host data */
22453+ PGR_Cluster_DB_4_Lifecheck = host_ptr;
22454+
22455+ /* set alarm as lifecheck timeout */
22456+ alarm(PGR_Lifecheck_Timeout);
22457+
22458+ /* connect DB */
22459+ conn = PGRcreateConn(host,port, PING_DB ,PGRuserName,"","","");
22460+ if ((conn != NULL) &&
22461+ (ping_cluster(conn) == STATUS_OK))
22462+ {
22463+ set_host_status(host_ptr, DB_TBL_USE);
22464+ }
22465+ else
22466+ {
22467+ set_host_status(host_ptr, DB_TBL_ERROR);
22468+ }
22469+ /* reset alarm */
22470+ alarm(0);
22471+
22472+ PQfinish(conn);
22473+ conn = NULL;
22474+ host_ptr ++;
22475+ }
22476+
22477+ return STATUS_OK;
22478+}
22479+
22480+static int
22481+ping_cluster(PGconn * conn)
22482+{
22483+ int status = 0;
22484+ PGresult * res = (PGresult *)NULL;
22485+
22486+ res = PQexec(conn, PING_QUERY );
22487+
22488+ status = PQresultStatus(res);
22489+ if (res != NULL)
22490+ {
22491+ PQclear(res);
22492+ }
22493+ if ((status == PGRES_NONFATAL_ERROR ) ||
22494+ (status == PGRES_FATAL_ERROR ))
22495+ {
22496+ return STATUS_ERROR;
22497+ }
22498+ return STATUS_OK;
22499+}
22500+
22501+static void
22502+set_host_status( HostTbl * host_ptr , int status )
22503+{
22504+ if (host_ptr == NULL)
22505+ return;
22506+ if (status == DB_TBL_ERROR)
22507+ {
22508+ host_ptr->retry_count ++;
22509+ if (host_ptr->retry_count > PGR_CONNECT_RETRY_TIME )
22510+ {
22511+ PGRset_host_status(host_ptr, status);
22512+ }
22513+ }
22514+ else
22515+ {
22516+ host_ptr->retry_count = 0;
22517+ PGRset_host_status(host_ptr, status);
22518+ }
22519+}
22520+
22521diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/main.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/main.c
22522--- postgresql-8.2.4/src/pgcluster/pgrp/main.c 1970-01-01 01:00:00.000000000 +0100
22523+++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/main.c 2007-02-18 22:52:17.000000000 +0100
22524@@ -0,0 +1,935 @@
22525+/*--------------------------------------------------------------------
22526+ * FILE:
22527+ * main.c
22528+ * Replication server for PostgreSQL
22529+ *
22530+ * NOTE:
22531+ * This is the main module of the replication server.
22532+ *
22533+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
22534+ *--------------------------------------------------------------------
22535+ */
22536+#include "postgres.h"
22537+
22538+#include <stdio.h>
22539+#include <string.h>
22540+#include <unistd.h>
22541+#include <sys/time.h>
22542+#include <signal.h>
22543+#include <sys/wait.h>
22544+#include <ctype.h>
22545+#include <sys/types.h>
22546+#include <sys/stat.h>
22547+#include <sys/socket.h>
22548+#include <sys/ipc.h>
22549+#include <sys/shm.h>
22550+#include <netdb.h>
22551+#include <netinet/in.h>
22552+#include <errno.h>
22553+#include <fcntl.h>
22554+#include <time.h>
22555+#include <sys/param.h>
22556+#include <arpa/inet.h>
22557+#include <sys/file.h>
22558+#include <pthread.h>
22559+
22560+#ifdef HAVE_NETINET_TCP_H
22561+#include <netinet/tcp.h>
22562+#endif
22563+#ifdef HAVE_SYS_SELECT_H
22564+#include <sys/select.h>
22565+#endif
22566+
22567+#ifdef HAVE_GETOPT_H
22568+#include <getopt.h>
22569+#endif
22570+
22571+#include "miscadmin.h"
22572+#include "nodes/nodes.h"
22573+
22574+#include "libpq-fe.h"
22575+#include "libpq/libpq-fs.h"
22576+#include "libpq-int.h"
22577+#include "fe-auth.h"
22578+
22579+
22580+#include "access/xact.h"
22581+#include "replicate_com.h"
22582+#include "pgreplicate.h"
22583+
22584+#ifdef WIN32
22585+#include "win32.h"
22586+#endif
22587+#include <arpa/inet.h>
22588+#ifdef HAVE_CRYPT_H
22589+#include <crypt.h>
22590+#endif
22591+
22592+#ifdef MULTIBYTE
22593+#include "mb/pg_wchar.h"
22594+#endif
22595+
22596+/*--------------------------------------
22597+ * GLOBAL VARIABLE DECLARATION
22598+ *--------------------------------------
22599+ */
22600+/* for replicate_com.h */
22601+
22602+ConfDataType * ConfData_Top = (ConfDataType *)NULL;
22603+ConfDataType * ConfData_End = (ConfDataType *)NULL;
22604+
22605+/* replication server data */
22606+char * ResolvedName = NULL;
22607+uint16_t Port_Number = 0;
22608+uint16_t LifeCheck_Port_Number = 0;
22609+uint16_t Recovery_Port_Number = 0;
22610+bool PGR_Parse_Session_Started = false;
22611+int PGR_Replication_Timeout = 60;
22612+int PGR_Lifecheck_Timeout = 3;
22613+int PGR_Lifecheck_Interval = 15;
22614+
22615+/* global table data */
22616+HostTbl *Host_Tbl_Begin = NULL;
22617+Dllist * Transaction_Tbl_Begin = NULL;
22618+TransactionTbl * Transaction_Tbl_End = NULL;
22619+RecoveryTbl * LoadBalanceTbl = NULL;
22620+RecoveryStatusInf * Recovery_Status_Inf = NULL;
22621+ReplicateHeader * PGR_Log_Header = NULL;
22622+ReplicateServerInfo * Cascade_Tbl = NULL;;
22623+CommitLogInf * Commit_Log_Tbl = NULL;
22624+QueryLogType * Query_Log_Top = NULL;
22625+QueryLogType * Query_Log_End = NULL;
22626+CascadeInf * Cascade_Inf = NULL;
22627+ReplicationLogInf * Replicateion_Log = NULL;
22628+/* IPC's id data */
22629+int RecoveryShmid = 0;
22630+int ReplicateSerializationShmid=0;
22631+int RecoveryMsgShmid = 0;
22632+int *RecoveryMsgid = NULL;
22633+int HostTblShmid = 0;
22634+int LockWaitTblShmid = 0;
22635+int LoadBalanceTblShmid = 0;
22636+int CascadeTblShmid = 0;
22637+int CascadeInfShmid = 0;
22638+int CommitLogShmid = 0;
22639+int QueryLogMsgid = 0;
22640+int QueryLogAnsMsgid = 0;
22641+int PGconnMsgid = 0;
22642+int MaxBackends = 0;
22643+char * PGR_Result = NULL;
22644+int SemID = 0;
22645+int RecoverySemID= 0;
22646+int RecovErysemid = 0;
22647+int VacuumSemID = 0;
22648+int CascadeSemID= 0;
22649+char * PGR_Data_Path = NULL;
22650+char * PGR_Write_Path = NULL;
22651+int IS_SESSION_AUTHORIZATION = 0;
22652+ResponseInf * PGR_Response_Inf = NULL;
22653+bool StartReplication[MAX_DB_SERVER];
22654+bool PGR_Cascade = false;
22655+bool PGR_Use_Replication_Log = false;
22656+bool PGR_AutoCommit = true;
22657+unsigned int * PGR_Send_Query_ID = NULL;
22658+unsigned int PGR_Query_ID = 0;
22659+volatile bool exit_processing = false;
22660+int pgreplicate_pid = 0;
22661+
22662+int ReplicateSock = -1;
22663+int exit_signo = SIGTERM;
22664+
22665+RecoveryQueueInf RecoveryQueue;
22666+char * Backend_Socket_Dir = NULL;
22667+
22668+unsigned int * PGR_ReplicateSerializationID = NULL;
22669+
22670+int Log_Print = 0;
22671+int Debug_Print = 0;
22672+FILE * LogFp = (FILE *)NULL;
22673+FILE * StatusFp = (FILE *)NULL;
22674+FILE * RidFp = (FILE *)NULL;
22675+FILE * QueueFp = (FILE *)NULL;
22676+
22677+extern char *optarg;
22678+char * PGRuserName = NULL;
22679+
22680+int fork_wait_time = 0;
22681+int Idle_Flag = IDLE_MODE;
22682+volatile bool Exit_Request = false;
22683+
22684+pthread_mutex_t transaction_table_mutex;
22685+
22686+/*--------------------------------------
22687+ * PROTOTYPE DECLARATION
22688+ *--------------------------------------
22689+ */
22690+static void startup_replication_server(void);
22691+static int replicate_loop(int fd);
22692+static void replicate_main(void);
22693+static void quick_exit(SIGNAL_ARGS);
22694+static void daemonize(void);
22695+static void write_pid_file(void);
22696+static void stop_pgreplicate(void);
22697+static bool is_exist_pid_file(void);
22698+static void usage(void);
22699+static void set_exit_processing(int signo);
22700+
22701+/*--------------------------------------------------------------------
22702+ * SYMBOL
22703+ * replicate_loop()
22704+ * NOTES
22705+ * replication module
22706+ * ARGS
22707+ * int fd :
22708+ * RETURN
22709+ * OK: STATUS_OK
22710+ * NG: STATUS_ERROR
22711+ *--------------------------------------------------------------------
22712+ */
22713+static int
22714+replicate_loop(int fd)
22715+{
22716+ char * func = "replicate_loop()";
22717+ pid_t pgid = 0;
22718+ pid_t pid = 0;
22719+ int sock = -1;
22720+ int rtn = 0;
22721+ int cnt = 0;
22722+ int result;
22723+ bool exist_sys_log=false;
22724+ bool exist_replicate=false;
22725+ bool clear_connection = false;
22726+
22727+
22728+ result = PGR_Create_Acception(fd,&sock,"",Port_Number);
22729+ if (result == STATUS_ERROR)
22730+ {
22731+ show_error("%s: accept failed (%s)", func, strerror(errno));
22732+ if (sock != -1)
22733+ close(sock);
22734+ return 1;
22735+ }
22736+
22737+ pgid = getpgid(0);
22738+ pid = fork();
22739+ if (pid <0)
22740+ {
22741+ show_error("%s:fork failed (%s)",func,strerror(errno));
22742+ PGRreplicate_exit(0);
22743+ }
22744+ if (pid == 0)
22745+ {
22746+ int status = LOOP_CONTINUE;
22747+ bool PGR_Cascade = false;
22748+ ReplicateHeader header;
22749+ ReplicateHeader header_save_for_recovering;
22750+ char * query = NULL;
22751+
22752+ if (fork_wait_time > 0) {
22753+ sleep(fork_wait_time);
22754+ }
22755+
22756+ close(fd);
22757+
22758+ PGRsignal(SIGHUP, quick_exit);
22759+ PGRsignal(SIGINT, quick_exit);
22760+ PGRsignal(SIGQUIT, quick_exit);
22761+ PGRsignal(SIGTERM, quick_exit);
22762+ PGRsignal(SIGALRM, quick_exit);
22763+ PGRsignal(SIGPIPE, SIG_IGN);
22764+ setpgid(0,pgid);
22765+
22766+ if (PGRinit_transaction_table() != STATUS_OK)
22767+ {
22768+ show_error("transaction table memory allocate failed");
22769+ PGR_Close_Sock(&sock);
22770+ exit(1);
22771+ }
22772+
22773+ pthread_mutex_init(&transaction_table_mutex, NULL);
22774+
22775+ /* child loop */
22776+ for (;;)
22777+ {
22778+ fd_set rmask;
22779+ struct timeval timeout;
22780+
22781+ timeout.tv_sec = PGR_Replication_Timeout;
22782+ timeout.tv_usec = 0;
22783+
22784+ if (query != NULL)
22785+ {
22786+ free(query);
22787+ query = NULL;
22788+ }
22789+ /*
22790+ * Wait for something to happen.
22791+ */
22792+ FD_ZERO(&rmask);
22793+ FD_SET(sock,&rmask);
22794+ rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
22795+ if (rtn < 0)
22796+ {
22797+ if (errno == EINTR)
22798+ continue;
22799+ }
22800+
22801+ if (rtn && FD_ISSET(sock, &rmask))
22802+ {
22803+ query = NULL;
22804+ query = PGRread_packet(sock,&header);
22805+ if ((query == NULL) || (header.cmdSts == 0))
22806+ {
22807+
22808+ if (exist_sys_log)
22809+ {
22810+ show_error("%s:upper cascade closed? , errno=%d(%s)",func,errno,strerror(errno));
22811+ memset(&header, 0, sizeof(ReplicateHeader));
22812+ header.cmdSys = CMD_SYS_CALL;
22813+ header.cmdSts = CMD_STS_QUERY_SUSPEND;
22814+ header.query_size = htonl(0);
22815+ PGRsend_rlog_to_local(&header, NULL);
22816+ exist_sys_log = false;
22817+ }
22818+ else
22819+ {
22820+ if (exist_replicate)
22821+ {
22822+ PGRclear_connections();
22823+ clear_connection = true;
22824+ header_save_for_recovering.cmdSts=CMD_TYPE_OTHER;
22825+ header_save_for_recovering.cmdType=CMD_TYPE_CONNECTION_CLOSE;
22826+ header_save_for_recovering.query_size = htonl(21);
22827+ PGRdo_replicate(sock,&header_save_for_recovering,"PGR_CLOSE_CONNECTION");
22828+ }
22829+ PGRsend_notice_quit();
22830+ }
22831+ break;
22832+ }
22833+ cnt = 0;
22834+ switch (header.cmdSys)
22835+ {
22836+ case CMD_SYS_LIFECHECK:
22837+ PGRreturn_result(sock,"1", PGR_NOWAIT_ANSWER);
22838+ break;
22839+ case CMD_SYS_PREREPLICATE:
22840+ if(Cascade_Inf!=NULL ||
22841+ Cascade_Inf->upper == NULL)
22842+ {
22843+ /* 1 means "I am primary replicate server." */
22844+ PGRreturn_result(sock,"1", PGR_NOWAIT_ANSWER);
22845+ }
22846+ else
22847+ {
22848+ /* 0 means "I am not primary replicate server." */
22849+ PGRreturn_result(sock,"0", PGR_NOWAIT_ANSWER);
22850+ }
22851+ break;
22852+ case CMD_SYS_REPLICATE:
22853+ if (exist_replicate == false)
22854+ {
22855+ exist_replicate=true;
22856+ memcpy(&header_save_for_recovering,
22857+ &header,
22858+ sizeof(ReplicateHeader));
22859+ }
22860+ status = PGRdo_replicate(sock,&header,query);
22861+ break;
22862+ case CMD_SYS_LOG:
22863+ exist_sys_log = true;
22864+ PGRsend_rlog_to_local(&header, query);
22865+ /* set own replicate id by rlog */
22866+ PGRset_replication_id(ntohl(header.replicate_id));
22867+ PGRsend_notice_rlog_done(sock);
22868+ break;
22869+ case CMD_SYS_CASCADE:
22870+ PGR_Cascade = true;
22871+ PGRcascade_main(sock,&header,query);
22872+ break;
22873+ case CMD_SYS_CALL:
22874+ if (header.cmdSts == CMD_STS_TRANSACTION_ABORT)
22875+ {
22876+ PGRreconfirm_commit(sock,&header);
22877+ }
22878+ else if (header.cmdSts == CMD_STS_NOTICE)
22879+ {
22880+
22881+ }
22882+ else if (header.cmdSts == CMD_STS_RESPONSE)
22883+ {
22884+ if (header.cmdType == CMD_TYPE_FRONTEND_CLOSED)
22885+ {
22886+ PGRsend_notice_rlog_done(sock);
22887+ status = LOOP_END;
22888+ }
22889+ }
22890+ break;
22891+ default:
22892+ show_error("WARNING: unknown Header->cmdSys %c",header.cmdSys);
22893+ }
22894+ }
22895+ if (status == LOOP_END)
22896+ {
22897+ break;
22898+ }
22899+ }
22900+
22901+ PGR_Close_Sock(&sock);
22902+ if (query != NULL)
22903+ {
22904+ free(query);
22905+ query = NULL;
22906+ }
22907+ if (!clear_connection)
22908+ PGRclear_connections();
22909+ PGRdestroy_transaction_table();
22910+ pthread_mutex_destroy(&transaction_table_mutex);
22911+ exit(0);
22912+ }
22913+ else
22914+ {
22915+ PGR_Close_Sock(&sock);
22916+ return 0;
22917+ }
22918+}
22919+
22920+static void
22921+startup_replication_server(void)
22922+{
22923+ ReplicateHeader header;
22924+ char hostName[HOSTNAME_MAX_LENGTH];
22925+ char userName[USERNAME_MAX_LENGTH];
22926+ char query[256];
22927+
22928+ if (PGRuserName == NULL)
22929+ {
22930+ PGRuserName = getenv("LOGNAME");
22931+ if (PGRuserName == NULL)
22932+ {
22933+ PGRuserName = getenv("USER");
22934+ if (PGRuserName == NULL)
22935+ PGRuserName = "postgres";
22936+ }
22937+ }
22938+ memset(&header,0,sizeof(ReplicateHeader));
22939+ memset(query,0,sizeof(query));
22940+ memset(hostName,0,sizeof(hostName));
22941+ memset(userName,0,sizeof(userName));
22942+ if (ResolvedName != NULL)
22943+ {
22944+ strncpy(hostName,ResolvedName,ADDRESS_LENGTH);
22945+ }
22946+ else
22947+ {
22948+ gethostname(hostName,sizeof(hostName)-1);
22949+ }
22950+ strncpy(userName ,PGRuserName,sizeof(userName)-1);
22951+ snprintf(query,sizeof(query)-1,"SELECT %s(%d,'%s',%d,%d)",
22952+ PGR_SYSTEM_COMMAND_FUNC,
22953+ PGR_STARTUP_REPLICATION_SERVER_FUNC_NO,
22954+ hostName,
22955+ Port_Number,
22956+ Recovery_Port_Number);
22957+ header.cmdSts = CMD_STS_NOTICE;
22958+ header.query_id = htonl(PGRget_next_query_id());
22959+ header.query_size = htonl(strlen(query));
22960+ memcpy(header.from_host,hostName,sizeof(header.from_host));
22961+ memcpy(header.userName,userName,sizeof(header.userName));
22962+ strcpy(header.dbName,"template1");
22963+ replicate_packet_send_internal( &header, query,-1,PGRget_recovery_status(),true);
22964+}
22965+
22966+/*--------------------------------------------------------------------
22967+ * SYMBOL
22968+ * replicate_main()
22969+ * NOTES
22970+ * Replication main module
22971+ * ARGS
22972+ * void
22973+ * RETURN
22974+ * none
22975+ *--------------------------------------------------------------------
22976+ */
22977+static void
22978+replicate_main(void)
22979+{
22980+#ifdef PRINT_DEBUG
22981+ char * func = "replicate_main()";
22982+#endif
22983+ int status;
22984+ int rtn;
22985+ show_debug ("%s:entering replicate_main",func);
22986+
22987+ /* cascade start up notice */
22988+ if (Cascade_Inf->upper != NULL)
22989+ {
22990+ show_debug("initialize cascade information");
22991+ PGRstartup_cascade();
22992+ }
22993+
22994+ status = PGR_Create_Socket_Bind(&ReplicateSock, ResolvedName, Port_Number);
22995+
22996+ if (status != STATUS_OK)
22997+ {
22998+ show_debug("%s %d port bind failed. quit.",func,Port_Number);
22999+ stop_pgreplicate();
23000+ PGRreplicate_exit(0);
23001+ }
23002+#ifdef PRINT_DEBUG
23003+ show_debug("%s %d port bind OK",func,Port_Number);
23004+#endif
23005+
23006+
23007+ /* replication start up notice */
23008+ startup_replication_server();
23009+
23010+ for (;;)
23011+ {
23012+ fd_set rmask;
23013+ struct timeval timeout;
23014+
23015+ if (exit_processing == true)
23016+ PGRreplicate_exit(0);
23017+
23018+ timeout.tv_sec = PGR_Replication_Timeout;
23019+ timeout.tv_usec = 0;
23020+
23021+
23022+ /*
23023+ * Wait for something to happen.
23024+ */
23025+ FD_ZERO(&rmask);
23026+ FD_SET(ReplicateSock,&rmask);
23027+ rtn = select(ReplicateSock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
23028+ if (rtn < 0)
23029+ continue;
23030+
23031+ if (rtn && FD_ISSET(ReplicateSock, &rmask))
23032+ {
23033+ /*
23034+ * get recovery status.
23035+ */
23036+ PGRcheck_recovered_host();
23037+
23038+ if (exit_processing == true)
23039+ break;
23040+
23041+ /*
23042+ * call replication module
23043+ */
23044+ replicate_loop(ReplicateSock);
23045+ }
23046+ }
23047+}
23048+
23049+/*--------------------------------------------------------------------
23050+ * SYMBOL
23051+ * quick_exit()
23052+ * NOTES
23053+ * Exit child process
23054+ * ARGS
23055+ * SIGNAL_ARGS: receive signal number(I)
23056+ * RETURN
23057+ * none
23058+ *--------------------------------------------------------------------
23059+ */
23060+static void
23061+quick_exit(SIGNAL_ARGS)
23062+{
23063+#ifdef PRINT_DEBUG
23064+ show_debug("quick_exit:signo = %d", postgres_signal_arg);
23065+#endif
23066+ exit(0);
23067+}
23068+
23069+/*--------------------------------------------------------------------
23070+ * SYMBOL
23071+ * daemonize()
23072+ * NOTES
23073+ * Daemonize this process
23074+ * ARGS
23075+ * void
23076+ * RETURN
23077+ * none
23078+ *--------------------------------------------------------------------
23079+ */
23080+static void
23081+daemonize(void)
23082+{
23083+ char * func = "daemonize()";
23084+ int i;
23085+ pid_t pid;
23086+
23087+ pid = fork();
23088+ if (pid == (pid_t) -1)
23089+ {
23090+ show_error("%s:fork() failed. reason: %s",func, strerror(errno));
23091+ exit(1);
23092+ return; /* not reached */
23093+ }
23094+ else if (pid > 0)
23095+ { /* parent */
23096+ exit(0);
23097+ }
23098+
23099+#ifdef HAVE_SETSID
23100+ if (setsid() < 0)
23101+ {
23102+ show_error("%s:setsid() failed. reason:%s", func,strerror(errno));
23103+ exit(1);
23104+ }
23105+#endif
23106+
23107+ i = open("/dev/null", O_RDWR);
23108+ dup2(i, 0);
23109+ dup2(i, 1);
23110+ dup2(i, 2);
23111+ close(i);
23112+}
23113+
23114+/*--------------------------------------------------------------------
23115+ * SYMBOL
23116+ * write_pid_file()
23117+ * NOTES
23118+ * The process ID is written in the file.
23119+ * This process ID is used when finish pglb.
23120+ * ARGS
23121+ * void
23122+ * RETURN
23123+ * none
23124+ *--------------------------------------------------------------------
23125+ */
23126+static void
23127+write_pid_file(void)
23128+{
23129+ char * func = "write_pid_file()";
23130+ FILE *fd;
23131+ char fname[256];
23132+ char pidbuf[128];
23133+
23134+ snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGREPLICATE_PID_FILE);
23135+ fd = fopen(fname, "w");
23136+ if (!fd)
23137+ {
23138+ show_error("%s:could not open pid file as %s. reason: %s",
23139+ func, fname, strerror(errno));
23140+ exit(1);
23141+ }
23142+ snprintf(pidbuf, sizeof(pidbuf), "%d", getpid());
23143+ fwrite(pidbuf, strlen(pidbuf), 1, fd);
23144+ if (fclose(fd))
23145+ {
23146+ show_error("%s:could not write pid file as %s. reason: %s",
23147+ func,fname, strerror(errno));
23148+ exit(1);
23149+ }
23150+}
23151+
23152+/*--------------------------------------------------------------------
23153+ * SYMBOL
23154+ * stop_pgreplicate()
23155+ * NOTES
23156+ * Stop the pgreplicate process
23157+ * ARGS
23158+ * void
23159+ * RETURN
23160+ * none
23161+ *--------------------------------------------------------------------
23162+ */
23163+static void
23164+stop_pgreplicate(void)
23165+{
23166+ char * func = "stop_pgreplicate()";
23167+ FILE *fd;
23168+ char fname[256];
23169+ char pidbuf[128];
23170+ pid_t pid;
23171+
23172+ snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGREPLICATE_PID_FILE);
23173+ fd = fopen(fname, "r");
23174+ if (!fd)
23175+ {
23176+ show_error("%s:could not open pid file as %s. reason: %s",
23177+ func,fname, strerror(errno));
23178+ exit(1);
23179+ }
23180+ memset(pidbuf,0,sizeof(pidbuf));
23181+ fread(pidbuf, sizeof(pidbuf), 1, fd);
23182+ fclose(fd);
23183+ pid = atoi(pidbuf);
23184+
23185+ if (kill (pid,SIGTERM) == -1)
23186+ {
23187+ show_error("%s:could not stop pid: %d, reason: %s",func,pid,strerror(errno));
23188+ exit(1);
23189+ }
23190+}
23191+
23192+/*--------------------------------------------------------------------
23193+ * SYMBOL
23194+ * is_exist_pid_file()
23195+ * NOTES
23196+ * Check existence of pid file.
23197+ * ARGS
23198+ * void
23199+ * RETURN
23200+ * 1: the pid file is exist
23201+ * 0: the pid file is not exist
23202+ *--------------------------------------------------------------------
23203+ */
23204+static bool
23205+is_exist_pid_file(void)
23206+{
23207+ char fname[256];
23208+ struct stat buf;
23209+
23210+ snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGREPLICATE_PID_FILE);
23211+ if (stat(fname,&buf) == 0)
23212+ {
23213+ /* pid file is exist */
23214+ return true;
23215+ }
23216+ else
23217+ {
23218+ /* pid file is not exist */
23219+ return false;
23220+ }
23221+}
23222+
23223+/*--------------------------------------------------------------------
23224+ * SYMBOL
23225+ * child_wait()
23226+ * NOTES
23227+ * Waiting for hung up a child
23228+ * ARGS
23229+ * int signal_args: signal number (expecting the SIGCHLD)
23230+ * RETURN
23231+ * none
23232+ *--------------------------------------------------------------------
23233+ */
23234+void
23235+child_wait(SIGNAL_ARGS)
23236+{
23237+ pid_t pid = 0;
23238+
23239+ do {
23240+ int ret;
23241+ pid = waitpid(-1,&ret,WNOHANG);
23242+ } while(pid > 0);
23243+}
23244+
23245+/*--------------------------------------------------------------------
23246+ * SYMBOL
23247+ * usage()
23248+ * NOTES
23249+ * show usage of pglb
23250+ * ARGS
23251+ * void
23252+ * RETURN
23253+ * none
23254+ *--------------------------------------------------------------------
23255+ */
23256+static void
23257+usage(void)
23258+{
23259+ char * path;
23260+
23261+ path = getenv("PGDATA");
23262+ if (path == NULL)
23263+ path = ".";
23264+ fprintf(stderr,"PGReplicate version [%s]\n",PGREPLICATE_VERSION);
23265+ fprintf(stderr,"A replication server for cluster DB servers (based on PostgreSQL)\n\n");
23266+ fprintf(stderr,"usage: pgreplicate [-D path_of_config_file] [-W path_of_work_files] [-U login user][-l][-n][-v][-h][stop]\n");
23267+ fprintf(stderr," config file default path: %s/%s\n",path, PGREPLICATE_CONF_FILE);
23268+ fprintf(stderr," -l: print error logs in the log file.\n");
23269+ fprintf(stderr," -n: don't run in daemon mode.\n");
23270+ fprintf(stderr," -v: debug mode. need '-n' flag\n");
23271+ fprintf(stderr," -h: print this help\n");
23272+ fprintf(stderr," stop: stop pgreplicate\n");
23273+}
23274+
23275+/*--------------------------------------------------------------------
23276+ * SYMBOL
23277+ * main()
23278+ * NOTES
23279+ * main module of pgreplicate
23280+ * ARGS
23281+ * int argc: number of parameter
23282+ * char ** argv: value of parameter
23283+ * RETURN
23284+ * none
23285+ *--------------------------------------------------------------------
23286+ */
23287+int
23288+main(int argc, char * argv[])
23289+{
23290+ char * func = "main()";
23291+ int opt = 0;
23292+ char * r_path = NULL;
23293+ char * w_path = NULL;
23294+ bool detach = true;
23295+ pid_t rlog_pid;
23296+
23297+ r_path = getenv("PGDATA");
23298+ if (r_path == NULL)
23299+ r_path = ".";
23300+ while ((opt = getopt(argc, argv, "U:D:W:w:lvnh")) != -1)
23301+ {
23302+ switch (opt)
23303+ {
23304+ case 'U':
23305+ if (!optarg)
23306+ {
23307+ usage();
23308+ exit(1);
23309+ }
23310+ PGRuserName = strdup(optarg);
23311+ break;
23312+ case 'D':
23313+ if (!optarg)
23314+ {
23315+ usage();
23316+ exit(1);
23317+ }
23318+ r_path = optarg;
23319+ break;
23320+ case 'W':
23321+ if (!optarg)
23322+ {
23323+ usage();
23324+ exit(1);
23325+ }
23326+ w_path = optarg;
23327+ break;
23328+ case 'w':
23329+ fork_wait_time = atoi(optarg);
23330+ if (fork_wait_time < 0)
23331+ fork_wait_time = 0;
23332+ break;
23333+ case 'l':
23334+ Log_Print = 1;
23335+ break;
23336+ case 'v':
23337+ Debug_Print = 1;
23338+ break;
23339+ case 'n':
23340+ detach = false;
23341+ break;
23342+ case 'h':
23343+ usage();
23344+ exit(0);
23345+ break;
23346+ default:
23347+ usage();
23348+ exit(1);
23349+ }
23350+ }
23351+ PGR_Data_Path = r_path;
23352+ if (w_path == NULL)
23353+ {
23354+ PGR_Write_Path = PGR_Data_Path;
23355+ }
23356+ else
23357+ {
23358+ PGR_Write_Path = w_path;
23359+ }
23360+
23361+ if (optind == (argc-1) && !strncasecmp(argv[optind],"stop",4))
23362+ {
23363+ stop_pgreplicate();
23364+ exit(0);
23365+ }
23366+ else if (optind == argc)
23367+ {
23368+ if (is_exist_pid_file())
23369+ {
23370+ fprintf(stderr,"pid file %s/%s found. is another pgreplicate running?", PGR_Write_Path, PGREPLICATE_PID_FILE);
23371+ exit(1);
23372+ }
23373+ }
23374+ else if (optind < argc)
23375+ {
23376+ usage();
23377+ exit(1);
23378+ }
23379+
23380+ if (detach)
23381+ {
23382+ daemonize();
23383+ }
23384+
23385+ PGR_Under_Replication_Server = true;
23386+ write_pid_file();
23387+ pgreplicate_pid = getpid();
23388+
23389+ PGRsignal(SIGINT, set_exit_processing);
23390+ PGRsignal(SIGQUIT, set_exit_processing);
23391+ PGRsignal(SIGTERM, set_exit_processing);
23392+ PGRsignal(SIGCHLD, child_wait);
23393+ PGRsignal(SIGPIPE, SIG_IGN);
23394+
23395+ if (PGRget_Conf_Data(PGR_Data_Path) != STATUS_OK)
23396+ {
23397+ show_error("%s:PGRget_Conf_Data error",func);
23398+ PGRreplicate_exit(0);
23399+ }
23400+ if (PGRinit_recovery() != STATUS_OK)
23401+ {
23402+ show_error("%s:PGRinit_recovery error",func);
23403+ PGRreplicate_exit(0);
23404+ }
23405+ if (PGRload_replication_id() != STATUS_OK)
23406+ {
23407+ show_error("%s:PGRload_replication_id error",func);
23408+ PGRreplicate_exit(0);
23409+ }
23410+
23411+ if ( PGR_Use_Replication_Log == true )
23412+ {
23413+#ifdef PRINT_DEBUG
23414+ show_debug("Use Replication Log. Start PGR_RLog_Main()");
23415+#endif
23416+ rlog_pid = PGR_RLog_Main();
23417+ if (rlog_pid < 0)
23418+ {
23419+ show_error("%s:PGR_RLog_Main failed",func);
23420+ PGRreplicate_exit(0);
23421+ }
23422+ }
23423+
23424+ /*
23425+ * fork recovery process
23426+ */
23427+ PGRrecovery_main(fork_wait_time);
23428+
23429+ /*
23430+ * fork lifecheck process
23431+ */
23432+ PGRlifecheck_main(fork_wait_time);
23433+
23434+ /*
23435+ * call replicate module
23436+ */
23437+ Replicateion_Log->r_log_sock =-1;
23438+
23439+ if (fork_wait_time > 0) {
23440+#ifdef PRINT_DEBUG
23441+ show_debug("replicate process: wait fork(): pid = %d", getpid());
23442+#endif
23443+ sleep(fork_wait_time);
23444+ }
23445+
23446+ replicate_main();
23447+
23448+ PGRreplicate_exit(0);
23449+ return STATUS_OK;
23450+}
23451+
23452+static void
23453+set_exit_processing(int signo)
23454+{
23455+ exit_signo = signo;
23456+ exit_processing = true;
23457+ PGRsignal(signo, SIG_IGN);
23458+}
23459+
23460diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/pgreplicate.conf.sample pgcluster-1.7.0rc7/src/pgcluster/pgrp/pgreplicate.conf.sample
23461--- postgresql-8.2.4/src/pgcluster/pgrp/pgreplicate.conf.sample 1970-01-01 01:00:00.000000000 +0100
23462+++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/pgreplicate.conf.sample 2007-02-18 22:52:17.000000000 +0100
23463@@ -0,0 +1,113 @@
23464+#=============================================================
23465+# PGReplicate configuration file
23466+#-------------------------------------------------------------
23467+# file: pgreplicate.conf
23468+#-------------------------------------------------------------
23469+# This file controls:
23470+# o which hosts & port are cluster server
23471+# o which port use for replication request from cluster server
23472+#=============================================================
23473+#
23474+#-------------------------------------------------------------
23475+# A setup of Cluster DB(s)
23476+#
23477+# o Host_Name : The host name of Cluster DB.
23478+# Please write a host name by FQDN.
23479+# DO NOT write IP address.
23480+# o Port : The connection port with postmaster.
23481+# o Recovery_Port : The connection port at the time of
23482+# a recovery sequence.
23483+#-------------------------------------------------------------
23484+#<Cluster_Server_Info>
23485+# <Host_Name> master.pgcluster.org </Host_Name>
23486+# <Port> 5432 </Port>
23487+# <Recovery_Port> 7001 </Recovery_Port>
23488+#</Cluster_Server_Info>
23489+#<Cluster_Server_Info>
23490+# <Host_Name> clusterdb2.pgcluster.org </Host_Name>
23491+# <Port> 5432 </Port>
23492+# <Recovery_Port> 7001 </Recovery_Port>
23493+#</Cluster_Server_Info>
23494+#<Cluster_Server_Info>
23495+# <Host_Name> cluster3.pgcluster.org </Host_Name>
23496+# <Port> 5432 </Port>
23497+# <Recovery_Port> 7001 </Recovery_Port>
23498+#</Cluster_Server_Info>
23499+#
23500+#-------------------------------------------------------------
23501+# A setup of Load Balance Server
23502+#
23503+# o Host_Name : The host name of a load balance server.
23504+# Please write a host name by FQDN or IP address.
23505+# o Recovery_Port : The connection port at the time of
23506+# a recovery sequence .
23507+#-------------------------------------------------------------
23508+#<LoadBalance_Server_Info>
23509+# <Host_Name> loadbalancer.pgcluster.org </Host_Name>
23510+# <Recovery_Port> 6001 </Recovery_Port>
23511+#</LoadBalance_Server_Info>
23512+#
23513+#------------------------------------------------------------
23514+# A setup of the cascade connection between replication servers.
23515+# When you do not use RLOG recovery, you can skip this setup
23516+#
23517+# o Host_Name : The host name of the upper replication server.
23518+# Please write a host name by FQDN or IP address.
23519+# o Port : The connection port with postmaster.
23520+# o Recovery_Port : The connection port at the time of
23521+# a recovery sequence .
23522+#------------------------------------------------------------
23523+#<Replicate_Server_Info>
23524+# <Host_Name> upper_replicate.pgcluster.org </Host_Name>
23525+# <Port> 8002 </Port>
23526+# <Recovery_Port> 8102 </Recovery_Port>
23527+#</Replicate_Server_Info>
23528+#
23529+#-------------------------------------------------------------
23530+# A setup of a replication server
23531+#
23532+# o Host_Name : The host name of the this replication server.
23533+# Please write a host name by FQDN or IP address.
23534+# o Replicate_Port : Connection port for replication
23535+# o Recovery_Port : Connection port for recovery
23536+# o RLOG_Port : Connection port for replication log
23537+# o Response_mode : Timing which returns a response
23538+# normal -- return result of DB which received the query
23539+# reliable -- return result after waiting for response of
23540+# all Cluster DBs.
23541+# o Use_Replication_Log : Use replication log
23542+# [yes/no]. default : no
23543+# o Replication_Timeout : Timeout of each replication response
23544+# o Lifecheck_Timeout : Timeout of the lifecheck response
23545+# o Lifecheck_Interval : Interval time of the lifecheck
23546+# (range 1s - 1h)
23547+# 10s -- 10 seconds
23548+# 10min -- 10 minutes
23549+# 1h -- 1 hours
23550+#-------------------------------------------------------------
23551+<Host_Name> replicate.pgcluster.org </Host_Name>
23552+<Replication_Port> 8001 </Replication_Port>
23553+<Recovery_Port> 8101 </Recovery_Port>
23554+<RLOG_Port> 8301 </RLOG_Port>
23555+<Response_Mode> normal </Response_Mode>
23556+<Use_Replication_Log> no </Use_Replication_Log>
23557+<Replication_Timeout> 1min </Replication_Timeout>
23558+<LifeCheck_Timeout> 3s </LifeCheck_Timeout>
23559+<LifeCheck_Interval> 15s </LifeCheck_Interval>
23560+#-------------------------------------------------------------
23561+# A setup of a log files
23562+#
23563+# o File_Name : Log file name with full path
23564+# o File_Size : Maximum size of each log files
23565+# Please specify in a number and unit(K or M)
23566+# 10 -- 10 Byte
23567+# 10K -- 10 KByte
23568+# 10M -- 10 MByte
23569+# o Rotate : Rotation times
23570+# If specified 0, old versions are removed.
23571+#-------------------------------------------------------------
23572+<Log_File_Info>
23573+ <File_Name> /tmp/pgreplicate.log </File_Name>
23574+ <File_Size> 1M </File_Size>
23575+ <Rotate> 3 </Rotate>
23576+</Log_File_Info>
23577diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/pgreplicate.h pgcluster-1.7.0rc7/src/pgcluster/pgrp/pgreplicate.h
23578--- postgresql-8.2.4/src/pgcluster/pgrp/pgreplicate.h 1970-01-01 01:00:00.000000000 +0100
23579+++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/pgreplicate.h 2007-03-01 16:27:56.000000000 +0100
23580@@ -0,0 +1,425 @@
23581+/*--------------------------------------------------------------------
23582+ * FILE:
23583+ * pgreplicate.h
23584+ *
23585+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
23586+ *--------------------------------------------------------------------
23587+ */
23588+#ifndef PGREPLICATE_H
23589+#define PGREPLICATE_H
23590+
23591+#define PGREPLICATE_VERSION "1.7.0rc7"
23592+
23593+#include "lib/dllist.h"
23594+#include "lib/stringinfo.h"
23595+#include "../libpgc/libpgc.h"
23596+
23597+/* cascade packet id */
23598+#define CMD_SYS_CASCADE 'C'
23599+#define CMD_STS_TO_UPPER 'U'
23600+#define CMD_STS_TO_LOWER 'L'
23601+#define CMD_TYPE_ADD 'A'
23602+#define CMD_TYPE_DELTE 'D'
23603+#define CMD_TYPE_UPDATE_ALL 'A'
23604+
23605+/* log packet id */
23606+#define CMD_SYS_LOG 'L'
23607+#define CMD_STS_DELETE_QUERY 'q'
23608+#define CMD_STS_DELETE_TRANSACTION 't'
23609+#define CMD_STS_UPDATE_QUERY 'r'
23610+#define CMD_STS_UPDATE_TRANSACTION 'u'
23611+
23612+#define INIT_TRANSACTION_TBL_NUM (12)
23613+#define FILENAME_MAX_LENGTH (256)
23614+#define MAX_DB_SERVER (32)
23615+#define MAX_CONNECTIONS (128)
23616+#define MAX_QUEUE_FILE_SIZE (0x40000000)
23617+#define PGR_MAX_TICKETS (0x7FFFFFFF)
23618+#define PGR_MAX_QUERY_ID (0x7FFFFFFF)
23619+#define PGR_CONNECT_RETRY_TIME (3)
23620+#define PGR_EXEC_RETRY_TIME (5)
23621+#define DB_TBL_FREE (0)
23622+#define DB_TBL_INIT (1)
23623+#define DB_TBL_USE (2)
23624+#define DB_TBL_ERROR (-1)
23625+#define DB_TBL_TOP (10)
23626+#define DB_TBL_END (11)
23627+#define RECOVERY_FILE_MTYPE (1)
23628+#define QUERY_LOG_MTYPE (2)
23629+#define PGREPLICATE_CONF_FILE "pgreplicate.conf"
23630+#define PGREPLICATE_LOG_FILE "pgreplicate.log"
23631+#define PGREPLICATE_STATUS_FILE "pgreplicate.sts"
23632+#define PGREPLICATE_PID_FILE "pgreplicate.pid"
23633+#define PGREPLICATE_RID_FILE "pgreplicate.rid"
23634+#define RECOVERY_QUEUE_FILE "pgr_recovery"
23635+/* setup data tag of the configuration file */
23636+#define CLUSTER_SERVER_TAG "Cluster_Server_Info"
23637+#define LOAD_BALANCE_SERVER_TAG "LoadBalance_Server_Info"
23638+#define REPLICATE_PORT_TAG "Replication_Port"
23639+#define RECOVERY_PORT_TAG "Recovery_Port"
23640+#define LIFECHECK_PORT_TAG "LifeCheck_Port"
23641+#define RLOG_PORT_TAG "RLOG_Port"
23642+#define RESPONSE_MODE_TAG "Response_Mode"
23643+#define RESPONSE_MODE_FAST "fast"
23644+#define RESPONSE_MODE_NORMAL "normal"
23645+#define RESPONSE_MODE_RELIABLE "reliable"
23646+#define USE_REPLICATION_LOG_TAG "Use_Replication_Log"
23647+#define RESERVED_CONNECTIONS_TAG "Reserved_Connections"
23648+/* semapho numner of recovery queue */
23649+#define SEM_NUM_OF_RECOVERY (1)
23650+#define SEM_NUM_OF_RECOVERY_QUEUE (2)
23651+/* semapho numner of lock tickets */
23652+#define SEM_NUM_OF_LOCK (1)
23653+#define STATUS_LOCK_CONFLICT (2)
23654+#define STATUS_DEADLOCK_DETECT (3)
23655+#define STATUS_ABORTED (4)
23656+#define STATUS_NOT_YET_REPLICATE (5)
23657+#define STATUS_ALREADY_REPLICATED (6)
23658+#define STATUS_SKIP_REPLICATE (7)
23659+#define PGR_NOWAIT_ANSWER (0)
23660+#define PGR_WAIT_ANSWER (1)
23661+#define LOOP_CONTINUE (0)
23662+#define LOOP_END (1)
23663+#define LOWER_CASCADE (1)
23664+#define UPPER_CASCADE (2)
23665+#define ALL_CASCADE (3)
23666+#define NOTICE_SYSTEM_CALL_TYPE (10)
23667+#define RECOVERY_QUERY_TYPE (20)
23668+
23669+#define PGR_TIME_OUT (60)
23670+#define PGR_SEND_RETRY_CNT (100)
23671+#define PGR_SEND_WAIT_MSEC (500)
23672+#define PGR_RECV_RETRY_CNT (100)
23673+#define PGR_RECV_WAIT_MSEC (500)
23674+#define PGR_SEM_UNLOCK_WAIT_MSEC (100)
23675+#define PGR_SEM_LOCK_WAIT_MSEC (500)
23676+#define PGR_RECOVERY_RETRY_CNT (6000)
23677+#define PGR_RECOVERY_WAIT_MSEC (500)
23678+#define PGR_CHECK_POINT (300)
23679+
23680+#define PGR_RECOVERY_1ST_STAGE (1)
23681+#define PGR_RECOVERY_2ND_STAGE (2)
23682+
23683+#define IDLE_MODE (0)
23684+#define BUSY_MODE (1)
23685+
23686+/*
23687+ * connection table for transaction query
23688+ */
23689+typedef struct {
23690+ int useFlag;
23691+ int lock;
23692+ int transaction_count;
23693+ unsigned short port;
23694+ unsigned short pid;
23695+ unsigned int hostIP;
23696+ unsigned int srcHostIP;
23697+ char host[HOSTNAME_MAX_LENGTH];
23698+ char srcHost[HOSTNAME_MAX_LENGTH];
23699+ char dbName[DBNAME_MAX_LENGTH];
23700+ PGconn * conn;
23701+ bool in_transaction;
23702+ bool exec_copy;
23703+}TransactionTbl;
23704+
23705+/*
23706+ * cluster server table
23707+ */
23708+typedef struct {
23709+ int useFlag;
23710+ char hostName[HOSTNAME_MAX_LENGTH];
23711+ char resolvedName[24];
23712+ int port;
23713+ int recoveryPort;
23714+ int hostNum;
23715+ int transaction_count;
23716+ int retry_count;
23717+}HostTbl;
23718+
23719+
23720+typedef struct {
23721+ FILE * queue_fp;
23722+ int current_queue_no;
23723+} RecoveryQueueInf;
23724+
23725+
23726+/*
23727+ * host table for recovery request
23728+ */
23729+typedef struct {
23730+ char hostName[HOSTNAME_MAX_LENGTH];
23731+ char resolvedName[24];
23732+ int port;
23733+ int recoveryPort;
23734+ int sock;
23735+ int recovery_sock;
23736+} RecoveryTbl;
23737+
23738+/*
23739+ * status table for recovery
23740+ */
23741+typedef struct {
23742+ int useFlag;
23743+ int transaction_count;
23744+ int recovery_status;
23745+ unsigned int replication_id;
23746+ HostTbl target_host;
23747+ int read_queue_no;
23748+ int write_queue_no;
23749+ int check_point;
23750+ unsigned int file_size;
23751+ char write_file[FILENAME_MAX_LENGTH];
23752+ char read_file[FILENAME_MAX_LENGTH];
23753+} RecoveryStatusInf;
23754+
23755+typedef struct {
23756+ long mtype;
23757+ char mdata[1];
23758+} RecoveryQueueFile;
23759+
23760+typedef struct {
23761+ long mtype;
23762+ unsigned int replicationId;
23763+ char mdata[1];
23764+} RecoveryQueueQuery;
23765+
23766+typedef struct {
23767+ unsigned int entry_ticket;
23768+ unsigned int lock_wait_queue_length;
23769+ int overflow;
23770+} LockWaitInf;
23771+
23772+typedef struct {
23773+ int response_mode;
23774+ int current_cluster;
23775+} ResponseInf;
23776+
23777+typedef struct {
23778+ ReplicateHeader * header;
23779+ char * query;
23780+ char * next;
23781+ char * last;
23782+} QueryLogType;
23783+
23784+typedef struct {
23785+ ReplicateServerInfo * top;
23786+ ReplicateServerInfo * end;
23787+ ReplicateServerInfo * lower;
23788+ ReplicateServerInfo * upper;
23789+ ReplicateServerInfo * myself;
23790+ int useFlag;
23791+} CascadeInf;
23792+
23793+typedef struct {
23794+ union
23795+ {
23796+ int useFlag;
23797+ int commit_log_num;
23798+ } inf;
23799+ ReplicateHeader header;
23800+} CommitLogInf;
23801+
23802+typedef struct {
23803+ int useFlag;
23804+ char * RLog_Sock_Path;
23805+ uint16_t RLog_Port_Number;
23806+ int r_log_sock;
23807+ ReplicateHeader * header;
23808+ char * query;
23809+} ReplicationLogInf;
23810+
23811+typedef struct {
23812+ char hostName[HOSTNAME_MAX_LENGTH];
23813+ uint16_t port;
23814+ uint16_t pid;
23815+ uint32_t request_id;
23816+} QueryLogID;
23817+
23818+typedef struct {
23819+ QueryLogID query_log_id;
23820+ char * last;
23821+ char * next;
23822+} ConfirmQueryList;
23823+
23824+typedef struct {
23825+ ReplicateHeader * header;
23826+ char * query;
23827+ int dest;
23828+ int current_cluster;
23829+ int transaction_count;
23830+ HostTbl * host_ptr;
23831+ TransactionTbl *transaction_tbl;
23832+} ThreadArgInf;
23833+
23834+/* replication server data */
23835+extern char * ResolvedName;
23836+extern uint16_t Port_Number;
23837+extern uint16_t LifeCheck_Port_Number;
23838+extern uint16_t Recovery_Port_Number;
23839+extern int Reserved_Connections;
23840+extern bool PGR_Parse_Session_Started;
23841+extern int PGR_Replication_Timeout;
23842+
23843+/* global tables */
23844+extern HostTbl * Host_Tbl_Begin;
23845+extern Dllist * Transaction_Tbl_Begin;
23846+extern TransactionTbl * Transaction_Tbl_End;
23847+extern RecoveryTbl * LoadBalanceTbl;
23848+extern RecoveryStatusInf * Recovery_Status_Inf;
23849+extern LockWaitInf * Lock_Wait_Tbl;
23850+extern ReplicateHeader * PGR_Log_Header;
23851+extern ReplicateServerInfo * Cascade_Tbl;
23852+extern CascadeInf * Cascade_Inf;
23853+extern CommitLogInf * Commit_Log_Tbl;
23854+extern QueryLogType * Query_Log_Top;
23855+extern QueryLogType * Query_Log_End;
23856+extern ReplicationLogInf * Replicateion_Log;
23857+extern int RecoveryShmid;
23858+extern int ReplicateSerializationShmid;
23859+extern int RecoveryMsgShmid;
23860+extern int *RecoveryMsgid;
23861+extern int HostTblShmid;
23862+extern int LockWaitTblShmid;
23863+extern int CascadeTblShmid;
23864+extern int CascadeInfShmid;
23865+extern int CommitLogShmid;
23866+extern int MaxBackends;
23867+extern char * PGR_Result;
23868+extern int SemID;
23869+extern int RecoverySemID;
23870+extern int CascadeSemID;
23871+extern int LockSemID;
23872+extern int VacuumSemID;
23873+extern char * PGR_Data_Path;
23874+extern char * PGR_Write_Path;
23875+extern FILE * LogFp;
23876+extern FILE * StatusFp;
23877+extern FILE * RidFp;
23878+extern FILE * QueueFp;
23879+extern int Log_Print;
23880+extern int Debug_Print;
23881+extern char * Function;
23882+extern int IS_SESSION_AUTHORIZATION;
23883+extern ResponseInf * PGR_Response_Inf;
23884+extern bool StartReplication[MAX_DB_SERVER];
23885+extern bool PGR_Cascade;
23886+extern bool PGR_Use_Replication_Log;
23887+extern bool PGR_AutoCommit;
23888+extern unsigned int * PGR_ReplicateSerializationID;
23889+extern unsigned int * PGR_Send_Query_ID;
23890+extern unsigned int PGR_Query_ID;
23891+extern volatile bool exit_processing;
23892+extern RecoveryQueueInf RecoveryQueue;
23893+extern int pgreplicate_pid;
23894+extern char * PGRuserName;
23895+extern int exit_signo;
23896+
23897+extern int ReplicateSock;
23898+
23899+/* smart shutdown */
23900+extern int Idle_Flag;
23901+extern volatile bool Exit_Request;
23902+
23903+/*
23904+ * external prototype in main.c
23905+ */
23906+extern void child_wait(SIGNAL_ARGS);
23907+
23908+/*
23909+ * external prototype in conf.c
23910+ */
23911+extern int PGRget_Conf_Data(char * path);
23912+
23913+/*
23914+ * external prototype in replicate.c
23915+ */
23916+extern int PGRset_replication_id(uint32_t rid);
23917+extern bool PGRis_same_host(char * host1, unsigned short port1 , char * host2, unsigned short port2);
23918+extern HostTbl * PGRadd_HostTbl(HostTbl * conf_data, int useFlag);
23919+extern HostTbl * PGRget_master(void);
23920+extern void PGRset_recovery_status(int status);
23921+extern int PGRget_recovery_status(void);
23922+extern int PGRcheck_recovered_host(void);
23923+extern int PGRset_recovered_host(HostTbl * target,int useFlag);
23924+extern int PGRinit_recovery(void);
23925+extern void PGRexit_subprocess(int signo);
23926+extern void PGRreplicate_exit(int exit_status);
23927+extern int PGRsend_replicate_packet_to_server( HostTbl * host_ptr, ReplicateHeader * header, char *query , char * result,unsigned int replicationId, bool recovery);
23928+extern int PGRreplicate_packet_send_each_server( HostTbl * ptr, bool return_response, ReplicateHeader * header, char * query,int dest);
23929+extern HostTbl * PGRget_HostTbl(char * hostName,int port);
23930+extern int PGRset_queue(ReplicateHeader * header,char * query);
23931+extern int PGRset_host_status(HostTbl * host_ptr,int status);
23932+extern void PGRclear_connections(void);
23933+extern void PGRdestroy_transaction_table(void);
23934+extern void PGRsem_unlock( int semid, short sem_num );
23935+extern void PGRsem_lock( int semid, short sem_num );
23936+extern int PGRdo_replicate(int sock,ReplicateHeader *header, char * query);
23937+extern int PGRreturn_result(int dest, char * result, int wait);
23938+extern int PGRreplicate_packet_send( ReplicateHeader * header, char * query,int dest,int recovery_status);
23939+extern char * PGRread_packet(int sock, ReplicateHeader *header);
23940+extern void PGRnotice_replication_server(char * hostName, unsigned short portNumber,unsigned short recoveryPortNumber, unsigned short lifecheckPortNumber, char * userName);
23941+extern char * PGRread_query(int sock, ReplicateHeader *header);
23942+extern int PGRsync_oid(ReplicateHeader *header);
23943+extern unsigned int PGRget_next_query_id(void);
23944+extern int PGRinit_transaction_table(void);
23945+extern int replicate_packet_send_internal(ReplicateHeader * header, char * query,int dest,int recovery_status,bool isHeldLock);
23946+extern int PGRsync_oid(ReplicateHeader *header);
23947+extern int PGRload_replication_id(void);
23948+extern PGconn * PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt );
23949+/*
23950+ * external prototype in recovery.c
23951+ */
23952+extern int PGRsend_load_balance_packet(RecoveryPacket * packet);
23953+extern void PGRrecovery_main(int fork_wait_time);
23954+extern FILE * PGRget_recovery_queue_file_for_write(void);
23955+extern FILE * PGRget_recovery_queue_file_for_read(int next);
23956+
23957+/*
23958+ * external prototype in rlog.c
23959+ */
23960+extern int PGRwrite_rlog(ReplicateHeader * header, char * query);
23961+extern ReplicateHeader * PGRget_requested_query(ReplicateHeader * header);
23962+extern void PGRreconfirm_commit(int sock, ReplicateHeader * header);
23963+extern void PGRset_rlog(ReplicateHeader * header, char * query);
23964+extern void PGRunset_rlog(ReplicateHeader * header, char * query);
23965+extern int PGRresend_rlog_to_db(void);
23966+extern void PGRreconfirm_query(int sock, ReplicateHeader * header);
23967+extern pid_t PGR_RLog_Main(void);
23968+extern int PGRcreate_send_rlog_socket(void);
23969+extern int PGRsend_rlog_packet(int sock,ReplicateHeader * header, const char * query_string);
23970+extern int PGRrecv_rlog_result(int sock,void * result, int size);
23971+extern int PGRsend_rlog_to_local(ReplicateHeader * header,char * query);
23972+extern int PGRget_rlog_header(ReplicateHeader * header);
23973+
23974+/*
23975+ * external prototype in cascade.c
23976+ */
23977+extern int PGRstartup_cascade(void);
23978+extern int PGRsend_lower_cascade(ReplicateHeader * header, char * query);
23979+extern int PGRsend_upper_cascade(ReplicateHeader * header, char * query);
23980+extern int PGRwait_answer_cascade(int sock);
23981+extern ReplicateServerInfo * PGRget_lower_cascade(void);
23982+extern ReplicateServerInfo * PGRget_upper_cascade(void);
23983+extern void PGRset_cascade_server_status(ReplicateServerInfo * cascade, int status);
23984+extern ReplicateServerInfo * PGRrecv_cascade_answer(ReplicateServerInfo * cascade,ReplicateHeader * header);
23985+extern int PGRsend_cascade(int sock , ReplicateHeader * header, char * query);
23986+extern int PGRcascade_main(int sock, ReplicateHeader * header, char * query);
23987+extern int PGRwait_notice_rlog_done(void);
23988+extern int PGRsend_notice_rlog_done(int sock);
23989+extern int PGRsend_notice_quit(void);
23990+
23991+/*
23992+ * external prototype in pqformat.c
23993+ */
23994+extern const char * pq_getmsgstring(StringInfo msg);
23995+extern unsigned int pq_getmsgint(StringInfo msg, int b);
23996+extern void pq_copymsgbytes(StringInfo msg, char *buf, int datalen);
23997+extern const char * pq_getmsgbytes(StringInfo msg, int datalen);
23998+extern int pq_getmsgbyte(StringInfo msg);
23999+
24000+/*
24001+ * external prototype in lifecheck.c
24002+ */
24003+extern int PGRlifecheck_main(int fork_wait_time);
24004+
24005+#endif /* PGREPLICATE_H */
24006diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/pqformat.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/pqformat.c
24007--- postgresql-8.2.4/src/pgcluster/pgrp/pqformat.c 1970-01-01 01:00:00.000000000 +0100
24008+++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/pqformat.c 2007-02-18 22:52:17.000000000 +0100
24009@@ -0,0 +1,173 @@
24010+/*-------------------------------------------------------------------------
24011+ * pqformat.c
24012+ * Routines for formatting and parsing frontend/backend messages
24013+ *
24014+ * These modules copyed from src/backend/libpq/pgformat.c.
24015+ * Original modules have some shared modules and macro,
24016+ * then it is difficult link to replication server directory.
24017+ * Therefore, these modules were custamized.
24018+ * (removed shared module and macro)
24019+ *
24020+ * Original source code is under the following copyright
24021+ *
24022+ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
24023+ * Portions Copyright (c) 1994, Regents of the University of California
24024+ *
24025+ *-------------------------------------------------------------------------
24026+ */
24027+/*
24028+ * INTERFACE ROUTINES
24029+ * Message parsing after input:
24030+ * pq_getmsgbyte - get a raw byte from a message buffer
24031+ * pq_getmsgint - get a binary integer from a message buffer
24032+ * pq_getmsgbytes - get raw data from a message buffer
24033+ * pq_copymsgbytes - copy raw data from a message buffer
24034+ * pq_getmsgstring - get a null-terminated text string (with conversion)
24035+ */
24036+
24037+/* --------------------------------
24038+ * pq_getmsgstring - get a null-terminated text string (with conversion)
24039+ *
24040+ * May return a pointer directly into the message buffer, or a pointer
24041+ * to a palloc'd conversion result.
24042+ * --------------------------------
24043+ */
24044+
24045+#include "postgres.h"
24046+#include <errno.h>
24047+#include <sys/types.h>
24048+#include <sys/param.h>
24049+#include <netinet/in.h>
24050+#include <arpa/inet.h>
24051+#ifdef HAVE_ENDIAN_H
24052+#include <endian.h>
24053+#endif
24054+
24055+#include "mb/pg_wchar.h"
24056+
24057+#include "libpq-fe.h"
24058+#include "libpq-int.h"
24059+#include "fe-auth.h"
24060+#include "replicate_com.h"
24061+#include "pgreplicate.h"
24062+
24063+const char * pq_getmsgstring(StringInfo msg);
24064+unsigned int pq_getmsgint(StringInfo msg, int b);
24065+void pq_copymsgbytes(StringInfo msg, char *buf, int datalen);
24066+const char * pq_getmsgbytes(StringInfo msg, int datalen);
24067+int pq_getmsgbyte(StringInfo msg);
24068+
24069+const char *
24070+pq_getmsgstring(StringInfo msg)
24071+{
24072+ char *str;
24073+ int slen;
24074+
24075+ if (msg == NULL)
24076+ {
24077+ return NULL;
24078+ }
24079+ str = &msg->data[msg->cursor];
24080+ /*
24081+ * It's safe to use strlen() here because a StringInfo is guaranteed to
24082+ * have a trailing null byte. But check we found a null inside the
24083+ * message.
24084+ */
24085+ slen = strlen(str);
24086+ if (msg->cursor + slen >= msg->len)
24087+ {
24088+ return NULL;
24089+ }
24090+ msg->cursor += slen + 1;
24091+
24092+ return str;
24093+}
24094+
24095+
24096+/* --------------------------------
24097+ * pq_getmsgint - get a binary integer from a message buffer
24098+ *
24099+ * Values are treated as unsigned.
24100+ * --------------------------------
24101+ */
24102+unsigned int
24103+pq_getmsgint(StringInfo msg, int b)
24104+{
24105+ unsigned int result;
24106+ unsigned char n8;
24107+ uint16 n16;
24108+ uint32 n32;
24109+
24110+ switch (b)
24111+ {
24112+ case 1:
24113+ pq_copymsgbytes(msg, (char *) &n8, 1);
24114+ result = n8;
24115+ break;
24116+ case 2:
24117+ pq_copymsgbytes(msg, (char *) &n16, 2);
24118+ result = ntohs(n16);
24119+ break;
24120+ case 4:
24121+ pq_copymsgbytes(msg, (char *) &n32, 4);
24122+ result = ntohl(n32);
24123+ break;
24124+ default:
24125+ result = 0; /* keep compiler quiet */
24126+ break;
24127+ }
24128+ return result;
24129+}
24130+
24131+/* --------------------------------
24132+ * pq_copymsgbytes - copy raw data from a message buffer
24133+ *
24134+ * Same as above, except data is copied to caller's buffer.
24135+ * --------------------------------
24136+ */
24137+void
24138+pq_copymsgbytes(StringInfo msg, char *buf, int datalen)
24139+{
24140+ if (datalen < 0 || datalen > (msg->len - msg->cursor))
24141+ {
24142+ return;
24143+ }
24144+ memcpy(buf, &msg->data[msg->cursor], datalen);
24145+ msg->cursor += datalen;
24146+}
24147+
24148+
24149+/* --------------------------------
24150+ * pq_getmsgbytes - get raw data from a message buffer
24151+ *
24152+ * Returns a pointer directly into the message buffer; note this
24153+ * may not have any particular alignment.
24154+ * --------------------------------
24155+ */
24156+const char *
24157+pq_getmsgbytes(StringInfo msg, int datalen)
24158+{
24159+ const char *result;
24160+
24161+ if (datalen < 0 || datalen > (msg->len - msg->cursor))
24162+ {
24163+ return NULL;
24164+ }
24165+ result = &msg->data[msg->cursor];
24166+ msg->cursor += datalen;
24167+ return result;
24168+}
24169+
24170+/* --------------------------------
24171+ * pq_getmsgbyte - get a raw byte from a message buffer
24172+ * --------------------------------
24173+ */
24174+int
24175+pq_getmsgbyte(StringInfo msg)
24176+{
24177+ if (msg->cursor >= msg->len)
24178+ {
24179+ return 0;
24180+ }
24181+ return (unsigned char) msg->data[msg->cursor++];
24182+}
24183diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/recovery.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/recovery.c
24184--- postgresql-8.2.4/src/pgcluster/pgrp/recovery.c 1970-01-01 01:00:00.000000000 +0100
24185+++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/recovery.c 2007-02-18 22:52:17.000000000 +0100
24186@@ -0,0 +1,1143 @@
24187+/*--------------------------------------------------------------------
24188+ * FILE:
24189+ * recovery.c
24190+ *
24191+ * NOTE:
24192+ * This file is composed of the functions to call with the source
24193+ * at pgreplicate for the recovery.
24194+ *
24195+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
24196+ *--------------------------------------------------------------------
24197+ */
24198+#include "postgres.h"
24199+
24200+#include <stdio.h>
24201+#include <unistd.h>
24202+#include <signal.h>
24203+#include <sys/wait.h>
24204+#include <sys/types.h>
24205+#include <sys/stat.h>
24206+#include <sys/socket.h>
24207+#include <sys/ipc.h>
24208+#include <sys/shm.h>
24209+#include <sys/msg.h>
24210+#include <netdb.h>
24211+#include <netinet/in.h>
24212+#include <errno.h>
24213+#include <fcntl.h>
24214+#include <time.h>
24215+#include <arpa/inet.h>
24216+#include <sys/param.h>
24217+#include <sys/file.h>
24218+
24219+#ifdef HAVE_SYS_SELECT_H
24220+#include <sys/select.h>
24221+#endif
24222+
24223+#ifdef HAVE_CRYPT_H
24224+#include <crypt.h>
24225+#endif
24226+
24227+#include "miscadmin.h"
24228+#include "nodes/nodes.h"
24229+
24230+#include "libpq-fe.h"
24231+#include "libpq/libpq-fs.h"
24232+#include "libpq-int.h"
24233+#include "fe-auth.h"
24234+
24235+#include "access/xact.h"
24236+#include "replicate_com.h"
24237+#include "pgreplicate.h"
24238+
24239+
24240+#ifdef WIN32
24241+#include "win32.h"
24242+#else
24243+#ifdef HAVE_NETINET_TCP_H
24244+#include <netinet/tcp.h>
24245+#endif
24246+#include <arpa/inet.h>
24247+#endif
24248+
24249+#ifdef HAVE_CRYPT_H
24250+#include <crypt.h>
24251+#endif
24252+
24253+#ifdef MULTIBYTE
24254+#include "mb/pg_wchar.h"
24255+#endif
24256+#include "pgreplicate.h"
24257+
24258+
24259+/*--------------------------------------
24260+ * GLOBAL VARIABLE DECLARATION
24261+ *--------------------------------------
24262+ */
24263+RecoveryPacket MasterPacketData;
24264+RecoveryTbl Master;
24265+RecoveryTbl Target;
24266+
24267+
24268+/*--------------------------------------
24269+ * PROTOTYPE DECLARATION
24270+ *--------------------------------------
24271+ */
24272+static int read_packet(int sock,RecoveryPacket * packet);
24273+static int read_packet_from_master( RecoveryTbl * host, RecoveryPacket * packet );
24274+static int send_recovery_packet(int sock, RecoveryPacket * packet);
24275+static int send_packet(RecoveryTbl * host, RecoveryPacket * packet );
24276+static void start_recovery_prepare(void);
24277+static void reset_recovery_prepare(void);
24278+static void finish_recovery(void);
24279+static bool first_setup_recovery(int * sock, RecoveryPacket * packet);
24280+static int wait_transaction_count_clear(void);
24281+static bool second_setup_recovery (RecoveryPacket * packet);
24282+static void pgrecovery_loop(int fd);
24283+static int PGRsend_queue(RecoveryTbl * master, RecoveryTbl * target);
24284+static int send_vacuum(HostTbl *host, char * userName, int stage);
24285+static char * read_queue_file(FILE * fp, ReplicateHeader * header, char * query);
24286+
24287+#ifdef PRINT_DEBUG
24288+static void show_recovery_packet(RecoveryPacket * packet);
24289+#endif
24290+
24291+int PGRsend_load_balance_packet(RecoveryPacket * packet);
24292+void PGRrecovery_main(int fork_wait_time);
24293+
24294+/*-----------------------------------------------------------
24295+ * SYMBOL
24296+ * read_packet()
24297+ * NOTES
24298+ * Read recovery packet data
24299+ * ARGS
24300+ * int sock : socket
24301+ * RecoveryPacket * packet : read packet buffer
24302+ * RETURN
24303+ * -1 : error
24304+ * >0 : read size
24305+ *-----------------------------------------------------------
24306+ */
24307+static int
24308+read_packet(int sock,RecoveryPacket * packet)
24309+{
24310+#ifdef PRINT_DEBUG
24311+ char * func = "read_packet()";
24312+#endif
24313+ int r = 0;
24314+ char * read_ptr = NULL;
24315+ int read_size = 0;
24316+ int packet_size = 0;
24317+
24318+ if (packet == NULL)
24319+ {
24320+ return -1;
24321+ }
24322+ read_ptr = (char*)packet;
24323+ packet_size = sizeof(RecoveryPacket);
24324+ for (;;)
24325+ {
24326+ r = recv(sock,read_ptr + read_size ,packet_size - read_size, MSG_WAITALL);
24327+ if (r < 0)
24328+ {
24329+ if (errno == EINTR || errno == EAGAIN)
24330+ continue;
24331+ else
24332+ {
24333+ show_error("%s:recv failed: %d(%s)",func, errno, strerror(errno));
24334+ return -1;
24335+ }
24336+ }
24337+ else if (r > 0)
24338+ {
24339+ read_size += r;
24340+ if (read_size == packet_size)
24341+ {
24342+#ifdef PRINT_DEBUG
24343+ show_debug("%s:receive packet",func);
24344+ show_recovery_packet(packet);
24345+#endif
24346+ return read_size;
24347+ }
24348+ }
24349+ else /* r == 0 */
24350+ {
24351+ show_error("%s:unexpected EOF", func);
24352+ return -1;
24353+ }
24354+ }
24355+ return -1;
24356+}
24357+
24358+static int
24359+read_packet_from_master( RecoveryTbl * host, RecoveryPacket * packet )
24360+{
24361+ int read_size = 0;
24362+ int rtn;
24363+ fd_set rmask;
24364+ struct timeval timeout;
24365+
24366+ for(;;)
24367+ {
24368+ timeout.tv_sec = RECOVERY_TIMEOUT;
24369+ timeout.tv_usec = 0;
24370+
24371+ /*
24372+ * Wait for something to happen.
24373+ */
24374+ FD_ZERO(&rmask);
24375+ FD_SET(host->recovery_sock,&rmask);
24376+ rtn = select(host->recovery_sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
24377+
24378+ if (rtn == 0) /* timeout */
24379+ {
24380+ return -1;
24381+ }
24382+
24383+ if (rtn && FD_ISSET(host->recovery_sock, &rmask))
24384+ {
24385+ read_size = read_packet(host->recovery_sock, packet);
24386+ return read_size;
24387+ }
24388+ }
24389+}
24390+
24391+static int
24392+send_recovery_packet(int sock, RecoveryPacket * packet)
24393+{
24394+ char *func = "send_recovery_packet";
24395+ char * send_ptr;
24396+ int send_size= 0;
24397+ int buf_size = 0;
24398+ int s;
24399+
24400+ send_ptr = (char *)packet;
24401+ buf_size = sizeof(RecoveryPacket);
24402+
24403+ for (;;)
24404+ {
24405+ s = send(sock, send_ptr + send_size,buf_size - send_size ,0);
24406+ if (s < 0)
24407+ {
24408+ if (errno == EINTR || errno == EAGAIN)
24409+ continue;
24410+
24411+ show_error("%s:send error: %d(%s)", func, errno, strerror(errno));
24412+ return STATUS_ERROR;
24413+ }
24414+ else if (s == 0)
24415+ {
24416+ show_error("%s:unexpected EOF", func);
24417+ return STATUS_ERROR;
24418+ }
24419+
24420+ send_size += s;
24421+ if (send_size == buf_size)
24422+ return STATUS_OK;
24423+ }
24424+}
24425+
24426+static int
24427+send_packet(RecoveryTbl * host, RecoveryPacket * packet )
24428+{
24429+ char * func = "send_packet()";
24430+ int count = 0;
24431+
24432+ if (host->recovery_sock == -1)
24433+ {
24434+ while(PGR_Create_Socket_Connect(&(host->recovery_sock), host->hostName , host->recoveryPort) != STATUS_OK )
24435+ {
24436+ if (count > MAX_RETRY_TIMES )
24437+ {
24438+ show_error("%s:host[%s] port[%d]PGR_Create_Socket_Connect failed",func,host->hostName, host->recoveryPort);
24439+ return STATUS_ERROR;
24440+ }
24441+ count ++;
24442+ }
24443+ }
24444+ count = 0;
24445+ while (send_recovery_packet(host->recovery_sock,packet) != STATUS_OK)
24446+ {
24447+ close(host->recovery_sock);
24448+ host->recovery_sock = -1;
24449+ PGR_Create_Socket_Connect(&(host->recovery_sock), host->hostName , host->recoveryPort);
24450+#ifdef PRINT_DEBUG
24451+ show_debug("%s:PGR_Create_Socket_Connectsock[%d] host[%s] port[%d]",
24452+ func,host->recovery_sock,host->hostName,host->recoveryPort);
24453+#endif
24454+ if (count > PGR_CONNECT_RETRY_TIME )
24455+ {
24456+
24457+ show_error("%s:send failed and PGR_Create_Socket_Connect failed",func);
24458+ return STATUS_ERROR;
24459+ }
24460+ count ++;
24461+ }
24462+ return STATUS_OK;
24463+}
24464+
24465+static void
24466+start_recovery_prepare(void)
24467+{
24468+ PGRset_recovery_status (RECOVERY_PREPARE_START);
24469+}
24470+
24471+static void
24472+reset_recovery_prepare(void)
24473+{
24474+ PGRset_recovery_status (RECOVERY_INIT);
24475+}
24476+
24477+static void
24478+finish_recovery(void)
24479+{
24480+ PGRset_recovery_status (RECOVERY_INIT);
24481+}
24482+
24483+int
24484+PGRsend_load_balance_packet(RecoveryPacket * packet)
24485+{
24486+ char * func = "PGRsend_load_balance_packet()";
24487+ RecoveryTbl * lbp;
24488+ int status;
24489+
24490+ lbp = LoadBalanceTbl;
24491+ if (lbp == (RecoveryTbl *)NULL)
24492+ {
24493+ show_error("%s:recovery table is NULL",func);
24494+ return STATUS_ERROR;
24495+ }
24496+ while (lbp->hostName[0] != 0)
24497+ {
24498+ if (lbp->recovery_sock != -1)
24499+ {
24500+ close(lbp->recovery_sock);
24501+ lbp->recovery_sock = -1;
24502+ }
24503+#ifdef PRINT_DEBUG
24504+ show_debug("%s:host[%s] port[%d]",func,lbp->hostName,lbp->recoveryPort);
24505+#endif
24506+ status = send_packet(lbp,packet);
24507+ if (lbp->recovery_sock != -1)
24508+ {
24509+ close(lbp->recovery_sock);
24510+ lbp->recovery_sock = -1;
24511+ }
24512+ lbp ++;
24513+ }
24514+ return STATUS_OK;
24515+}
24516+
24517+static int
24518+send_vacuum(HostTbl *host, char * userName, int stage)
24519+{
24520+ int rtn = STATUS_OK;
24521+ ReplicateHeader header;
24522+ char * query = NULL;
24523+
24524+ if (stage == PGR_RECOVERY_1ST_STAGE)
24525+ {
24526+ query = strdup("VACUUM");
24527+ }
24528+ else
24529+ {
24530+ query = strdup("VACUUM FULL");
24531+ }
24532+ memset(&header,0,sizeof(header));
24533+ header.query_size = strlen(query) + 1;
24534+ strncpy(header.dbName,"template1",sizeof(header.dbName));
24535+ strncpy(header.userName,userName,sizeof(header.userName));
24536+ header.cmdSys = CMD_SYS_REPLICATE;
24537+ header.cmdSts = CMD_STS_QUERY;
24538+ header.cmdType = CMD_TYPE_VACUUM;
24539+ header.pid = getpid();
24540+ header.query_id = getpid();
24541+ header.isAutoCommit=1;
24542+ rtn = PGRsend_replicate_packet_to_server(host,&header,query,PGR_Result,0, true);
24543+ if (query !=NULL)
24544+ free(query);
24545+ return rtn;
24546+}
24547+
24548+static bool
24549+first_setup_recovery(int * sock, RecoveryPacket * packet)
24550+{
24551+ char * func = "first_setup_recovery()";
24552+ int status;
24553+ HostTbl * master = (HostTbl *)NULL;
24554+ bool loop_end = false;
24555+ HostTbl host_tbl;
24556+ char * userName = NULL;
24557+ int ip;
24558+
24559+ memset(Target.hostName,0,sizeof(Target.hostName));
24560+ strncpy(Target.hostName,packet->hostName,sizeof(Target.hostName));
24561+ ip = PGRget_ip_by_name(Target.hostName);
24562+ sprintf(Target.resolvedName,
24563+ "%d.%d.%d.%d",
24564+ (ip ) & 0xff ,
24565+ (ip >> 8) & 0xff ,
24566+ (ip >> 16) & 0xff ,
24567+ (ip >> 24) & 0xff );
24568+ Target.port = ntohs(packet->port);
24569+ Target.recoveryPort = ntohs(packet->recoveryPort);
24570+ Target.sock = *sock;
24571+ Target.recovery_sock = *sock;
24572+#ifdef PRINT_DEBUG
24573+ show_debug("%s:1st setup target %s",func,Target.hostName);
24574+ show_debug("%s:1st setup port %d",func,Target.port);
24575+#endif
24576+ /*
24577+ * check another recovery process
24578+ */
24579+ if (PGRget_recovery_status() != RECOVERY_INIT)
24580+ {
24581+ /*
24582+ * recovery process is already running
24583+ */
24584+#ifdef PRINT_DEBUG
24585+ show_debug("%s:already recovery job runing",func);
24586+#endif
24587+ memset(packet,0,sizeof(packet));
24588+ PGRset_recovery_packet_no(packet, RECOVERY_ERROR_OCCUPIED) ;
24589+ status = send_packet(&Target,packet);
24590+ loop_end = true;
24591+ return loop_end;
24592+ }
24593+ /*
24594+ * add recovery target to host table
24595+ */
24596+#ifdef PRINT_DEBUG
24597+ show_debug("%s:add recovery target to host table",func);
24598+#endif
24599+ memcpy(host_tbl.hostName,Target.hostName,sizeof(host_tbl.hostName));
24600+ memcpy(host_tbl.resolvedName,Target.resolvedName,sizeof(host_tbl.resolvedName));
24601+ host_tbl.port = Target.port;
24602+ host_tbl.recoveryPort = Target.recoveryPort;
24603+ PGRset_recovered_host(&host_tbl,DB_TBL_INIT);
24604+ PGRadd_HostTbl(&host_tbl,DB_TBL_INIT);
24605+ /*
24606+ * send prepare recovery to load balancer
24607+ */
24608+ PGRsend_load_balance_packet(packet);
24609+ userName = strdup(packet->userName);
24610+
24611+ /*
24612+ * set RECOVERY_PGDATA_REQ packet data
24613+ */
24614+#ifdef PRINT_DEBUG
24615+ show_debug("%s:set RECOVERY_PGDATA_REQ packet data",func);
24616+#endif
24617+ memset(packet,0,sizeof(RecoveryPacket));
24618+ PGRset_recovery_packet_no(packet, RECOVERY_PGDATA_REQ );
24619+
24620+retry_connect_master:
24621+ master = PGRget_master();
24622+ if (master == (HostTbl *)NULL)
24623+ {
24624+ /*
24625+ * connection error , master may be down
24626+ */
24627+ show_error("%s:get master info error , master may be down",func);
24628+ PGRset_recovery_packet_no(packet, RECOVERY_ERROR_TARGET_ONLY);
24629+ status = send_packet(&Target, packet);
24630+ reset_recovery_prepare();
24631+ loop_end = true;
24632+ if (userName != NULL)
24633+ free(userName);
24634+ return loop_end;
24635+ }
24636+ /* send vauum command to master server */
24637+ status = send_vacuum(master, userName, PGR_RECOVERY_1ST_STAGE );
24638+ if (status != STATUS_OK)
24639+ {
24640+ PGRset_host_status(master, DB_TBL_ERROR);
24641+ goto retry_connect_master;
24642+ }
24643+
24644+ memcpy(Master.hostName,master->hostName,sizeof(Master.hostName));
24645+ memcpy(Master.resolvedName,master->resolvedName,sizeof(Master.resolvedName));
24646+ Master.sock = -1;
24647+ Master.recovery_sock = -1;
24648+ Master.port = master->port;
24649+ Master.recoveryPort = master->recoveryPort;
24650+
24651+#ifdef PRINT_DEBUG
24652+ show_debug("%s:send packet to master %s recoveryPort %d",func, Master.hostName, Master.recoveryPort);
24653+#endif
24654+ status = send_packet(&Master, packet);
24655+ if (status != STATUS_OK)
24656+ {
24657+ /*
24658+ * connection error , master may be down
24659+ */
24660+ show_error("%s:connection error , master may be down",func);
24661+ PGRset_host_status(master,DB_TBL_ERROR);
24662+ goto retry_connect_master ;
24663+ }
24664+
24665+ /*
24666+ * start prepare of recovery
24667+ * set recovery status to "prepare start"
24668+ * start transaction count up
24669+ */
24670+ start_recovery_prepare();
24671+ /*
24672+ * wait answer from master server
24673+ */
24674+#ifdef PRINT_DEBUG
24675+ show_debug("%s:wait answer from master server",func);
24676+#endif
24677+ memset(packet,0,sizeof(RecoveryPacket));
24678+ read_packet_from_master(&Master, packet);
24679+#ifdef PRINT_DEBUG
24680+ show_debug("%s:get answer from master:no[%d]",func,ntohs(packet->packet_no));
24681+#endif
24682+ if (ntohs(packet->packet_no) == RECOVERY_PGDATA_ANS)
24683+ {
24684+ /*
24685+ * send a packet to load balancer that is stopped master's
24686+ * load balancing until all recovery process is finished
24687+ */
24688+ PGRsend_load_balance_packet(packet);
24689+ memcpy((char *)&MasterPacketData,packet,sizeof(RecoveryPacket));
24690+
24691+ /*
24692+ * prepare answer from master DB
24693+ */
24694+ PGRset_recovery_packet_no(packet, RECOVERY_PREPARE_ANS );
24695+ memcpy(packet->hostName,Master.hostName,sizeof(packet->hostName));
24696+ status = send_packet(&Target, packet);
24697+ if (status != STATUS_OK)
24698+ {
24699+ show_error("%s:no[%d] send_packet to target error",func,ntohs(packet->packet_no));
24700+ PGRset_recovery_packet_no(packet, RECOVERY_ERROR_TARGET_ONLY);
24701+ status = send_packet(&Master,packet);
24702+ reset_recovery_prepare();
24703+ loop_end = true;
24704+ }
24705+ }
24706+ if (userName != NULL)
24707+ free(userName);
24708+
24709+
24710+ return loop_end;
24711+}
24712+
24713+static int
24714+wait_transaction_count_clear(void)
24715+{
24716+ char * func ="wait_transaction_count_clear()";
24717+ HostTbl * master = (HostTbl *)NULL;
24718+ int cnt = 0;
24719+ int recovery_status = PGRget_recovery_status();
24720+
24721+ while (recovery_status != RECOVERY_CLEARED)
24722+ {
24723+ master = PGRget_master();
24724+ if (master == (HostTbl *)NULL)
24725+ {
24726+ show_error("%s:get master info error , master may be down",func);
24727+ continue;
24728+ }
24729+ if ((recovery_status == RECOVERY_PREPARE_START) &&
24730+ (master->transaction_count==0))
24731+ {
24732+ PGRset_recovery_status(RECOVERY_CLEARED);
24733+ break;
24734+ }
24735+
24736+ sleep(1);
24737+#ifdef PRINT_DEBUG
24738+ show_debug("now, waiting clear every transaction for recovery");
24739+#endif
24740+ cnt ++;
24741+ if (cnt > RECOVERY_TIMEOUT * 60 )
24742+ {
24743+ show_error("sorry, it is timeout for waiting clear transaction");
24744+ return STATUS_ERROR;
24745+ }
24746+ recovery_status = PGRget_recovery_status();
24747+ }
24748+ return STATUS_OK;
24749+}
24750+
24751+static bool
24752+second_setup_recovery (RecoveryPacket * packet)
24753+{
24754+ char * func = "second_setup_recovery()";
24755+ HostTbl * master = (HostTbl *)NULL;
24756+ int status;
24757+ bool loop_end = false;
24758+ char * userName = NULL;
24759+ int recovery_status = 0;
24760+
24761+ /* send vauum command to master server */
24762+ while ((master = PGRget_master()) != NULL)
24763+ {
24764+ /*
24765+ * wait until all started transactions are going to finish
24766+ */
24767+ status = wait_transaction_count_clear();
24768+ if (status != STATUS_OK)
24769+ {
24770+ show_error("%s:transaction is too busy, please try again after",func);
24771+ PGRset_recovery_packet_no(packet, RECOVERY_ERROR_TARGET_ONLY);
24772+ status = send_packet(&Target,packet);
24773+ status = send_packet(&Master,packet);
24774+ reset_recovery_prepare();
24775+ return true;
24776+ }
24777+ userName = strdup(packet->userName);
24778+ status = send_vacuum(master, userName, PGR_RECOVERY_2ND_STAGE );
24779+ if (status != STATUS_OK)
24780+ {
24781+ PGRset_host_status(master, DB_TBL_ERROR);
24782+ if (userName != NULL)
24783+ {
24784+ free(userName);
24785+ userName = NULL;
24786+ }
24787+ continue;
24788+ }
24789+ break;
24790+ }
24791+
24792+ if (master == NULL)
24793+ {
24794+ show_error("%s:vacuum error , master may be down",func);
24795+ PGRset_recovery_packet_no(packet, RECOVERY_ERROR_TARGET_ONLY);
24796+ status = send_packet(&Target,packet);
24797+ status = send_packet(&Master,packet);
24798+ reset_recovery_prepare();
24799+
24800+ return true;
24801+ }
24802+
24803+ recovery_status = PGRget_recovery_status();
24804+ if ((recovery_status != RECOVERY_PREPARE_START) &&
24805+ (recovery_status != RECOVERY_WAIT_CLEAN) &&
24806+ (recovery_status != RECOVERY_CLEARED))
24807+ {
24808+ show_error("%s:queue set failed. stop to recovery",func);
24809+ PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
24810+ status = send_packet(&Target,packet);
24811+ status = send_packet(&Master,packet);
24812+ reset_recovery_prepare();
24813+ if (userName != NULL)
24814+ free(userName);
24815+ return true;
24816+ }
24817+
24818+ /*
24819+ * then, send fsync request to master DB
24820+ */
24821+ PGRset_recovery_packet_no(packet, RECOVERY_FSYNC_REQ );
24822+ status = send_packet(&Master,packet);
24823+ if (status != STATUS_OK)
24824+ {
24825+ /*
24826+ * connection error , master may be down
24827+ */
24828+ show_error("%s:connection error , master may be down",func);
24829+ PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
24830+ status = send_packet(&Target,packet);
24831+ status = send_packet(&Master,packet);
24832+ reset_recovery_prepare();
24833+ if (userName != NULL)
24834+ free(userName);
24835+ return true;
24836+ }
24837+
24838+ recovery_status = PGRget_recovery_status();
24839+ if ((recovery_status != RECOVERY_PREPARE_START) &&
24840+ (recovery_status != RECOVERY_WAIT_CLEAN) &&
24841+ (recovery_status != RECOVERY_CLEARED))
24842+ {
24843+ show_error("%s:queue set failed. stop to recovery",func);
24844+ PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
24845+ status = send_packet(&Target,packet);
24846+ status = send_packet(&Master,packet);
24847+ reset_recovery_prepare();
24848+ if (userName != NULL)
24849+ free(userName);
24850+ return true;
24851+ }
24852+
24853+ /*
24854+ * wait answer from master server
24855+ */
24856+ memset(packet,0,sizeof(RecoveryPacket));
24857+ read_packet_from_master(&Master,packet);
24858+ if (ntohs(packet->packet_no) == RECOVERY_FSYNC_ANS )
24859+ {
24860+ /*
24861+ * master DB finished fsync
24862+ */
24863+ PGRset_recovery_packet_no(packet, RECOVERY_START_ANS );
24864+ memcpy(packet->hostName,Master.hostName,sizeof(packet->hostName));
24865+ status = send_packet(&Target,packet);
24866+ if (status != STATUS_OK)
24867+ {
24868+ finish_recovery();
24869+ loop_end = true;
24870+ }
24871+ }
24872+ else
24873+ {
24874+ show_error("%s:failure answer returned",func);
24875+ PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
24876+ status = send_packet(&Target,packet);
24877+ status = send_packet(&Master,packet);
24878+ reset_recovery_prepare();
24879+ loop_end = true;
24880+ }
24881+ if (userName != NULL)
24882+ free(userName);
24883+ return loop_end;
24884+}
24885+
24886+static char *
24887+read_queue_file(FILE * fp, ReplicateHeader * header, char *query)
24888+{
24889+ char * func = "read_queue_file()";
24890+ int size = 0;
24891+
24892+ if (fp == NULL)
24893+ {
24894+ return NULL;
24895+ }
24896+ if (fread((char*)header,sizeof(ReplicateHeader),1,fp) < 1)
24897+ {
24898+ return NULL;
24899+ }
24900+ size = ntohl(header->query_size);
24901+ if (size >= 0)
24902+ {
24903+ query = malloc(size+4);
24904+ if (query == NULL)
24905+ {
24906+ show_error("%s:malloc failed:(%s)",func,strerror(errno));
24907+ }
24908+ memset(query,0,size+4);
24909+ if (size > 0)
24910+ {
24911+ if (fread(query,size,1,fp) < 1)
24912+ {
24913+ return NULL;
24914+ }
24915+ }
24916+ return query;
24917+ }
24918+ return NULL;
24919+}
24920+
24921+/**
24922+ * send queries from queue.
24923+ *
24924+ * return
24925+ * STATUS_OK - success both
24926+ * STATUS_ERROR - fail both
24927+ */
24928+static int
24929+PGRsend_queue(RecoveryTbl * master, RecoveryTbl * target)
24930+{
24931+ char * func = "PGRsend_queue()";
24932+ HostTbl * master_ptr = NULL;
24933+ HostTbl * target_ptr = NULL;
24934+ RecoveryQueueFile * msg = NULL;
24935+ FILE * rfp = NULL;
24936+ ReplicateHeader header;
24937+ char * query = NULL;
24938+ int size = 0;
24939+ int status = 0;
24940+ int query_size = 0;
24941+ int rtn=0;
24942+
24943+ if (master == (RecoveryTbl *)NULL)
24944+ {
24945+ show_error("%s:there is no master ",func);
24946+ return STATUS_ERROR;
24947+ }
24948+#ifdef PRINT_DEBUG
24949+ show_debug("%s:master %s - %d",func,master->hostName,master->port);
24950+#endif
24951+ master_ptr = PGRget_HostTbl(master->resolvedName,master->port);
24952+ if (master_ptr == (HostTbl *)NULL)
24953+ {
24954+ show_error("%s:master table is null",func);
24955+ return STATUS_ERROR;
24956+ }
24957+ if (target != (RecoveryTbl *)NULL)
24958+ {
24959+#ifdef PRINT_DEBUG
24960+ show_debug("%s:target %s - %d",func,target->hostName,target->port);
24961+#endif
24962+ target_ptr = PGRget_HostTbl(target->resolvedName,target->port);
24963+ if (target_ptr == (HostTbl *)NULL)
24964+ {
24965+ show_error("%s:target table is null",func);
24966+ return STATUS_ERROR;
24967+ }
24968+ }
24969+
24970+ size = sizeof(RecoveryQueueFile) + FILENAME_MAX_LENGTH;
24971+ msg = (RecoveryQueueFile *)malloc(size+4);
24972+ if (msg == NULL)
24973+ {
24974+#ifdef PRINT_DEBUG
24975+ show_debug("%s:malloc() failed. reason: %s",func, strerror(errno));
24976+#endif
24977+ return STATUS_ERROR;
24978+ }
24979+ memset(msg,0,size+4);
24980+ status = STATUS_OK;
24981+ while (msgrcv(*RecoveryMsgid , msg, FILENAME_MAX_LENGTH, 0, IPC_NOWAIT) > 0 )
24982+ {
24983+ strncpy(Recovery_Status_Inf->read_file,(char *)(msg->mdata),FILENAME_MAX_LENGTH);
24984+ PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
24985+ if (!strncmp(Recovery_Status_Inf->write_file,Recovery_Status_Inf->read_file,sizeof(Recovery_Status_Inf->write_file)))
24986+ {
24987+ memset(Recovery_Status_Inf->write_file,0,sizeof(Recovery_Status_Inf->write_file));
24988+ }
24989+ PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
24990+ rfp = fopen(Recovery_Status_Inf->read_file,"r");
24991+ if (rfp == NULL)
24992+ {
24993+ show_error("%s:queue file [%s] can not be opened:(%s)",func,Recovery_Status_Inf->read_file,strerror(errno));
24994+ return STATUS_ERROR;
24995+ }
24996+ while ((query = read_queue_file(rfp, &header,query)) != NULL)
24997+ {
24998+ query_size = ntohl(header.query_size);
24999+ if (query_size < 0)
25000+ {
25001+ if (query != NULL)
25002+ {
25003+ free(query);
25004+ query = NULL;
25005+ }
25006+ break;
25007+ }
25008+ PGR_Response_Inf->current_cluster = 0;
25009+ rtn=PGRsend_replicate_packet_to_server(master_ptr,&header,query,PGR_Result,ntohl(header.replicate_id), true);
25010+ if (target_ptr != NULL)
25011+ {
25012+ PGR_Response_Inf->current_cluster = 1;
25013+ rtn=PGRsend_replicate_packet_to_server(target_ptr,&header,query,PGR_Result,ntohl(header.replicate_id), true);
25014+ }
25015+ }
25016+ if (query != NULL)
25017+ {
25018+ free(query);
25019+ query = NULL;
25020+ }
25021+ if (rfp != NULL)
25022+ {
25023+ fclose(rfp);
25024+ rfp = NULL;
25025+ unlink(Recovery_Status_Inf->read_file);
25026+ memset(Recovery_Status_Inf->read_file,0,sizeof(Recovery_Status_Inf->read_file));
25027+ }
25028+ }
25029+#ifdef PRINT_DEBUG
25030+ show_debug("%s:send_queue return status %d",func,status);
25031+#endif
25032+ return status;
25033+}
25034+
25035+static void
25036+pgrecovery_loop(int fd)
25037+{
25038+ char * func = "pgrecovery_loop()";
25039+ int count;
25040+ int sock;
25041+ int status;
25042+ bool loop_end = false;
25043+ RecoveryPacket packet;
25044+ HostTbl new_host;
25045+ RecoveryTbl * lbp;
25046+
25047+ lbp = LoadBalanceTbl;
25048+ if (lbp == (RecoveryTbl *)NULL)
25049+ {
25050+ show_error("%s:recovery table is NULL",func);
25051+ return ;
25052+ }
25053+#ifdef PRINT_DEBUG
25054+ show_debug("%s:recovery accept port %d",func, Recovery_Port_Number);
25055+#endif
25056+ count = 0;
25057+ while ((status = PGR_Create_Acception(fd,&sock,"",Recovery_Port_Number)) != STATUS_OK)
25058+ {
25059+ show_error("%s:PGR_Create_Acception failed",func);
25060+ PGR_Close_Sock(&sock);
25061+ sock = -1;
25062+ if ( count > PGR_CONNECT_RETRY_TIME)
25063+ {
25064+ return;
25065+ }
25066+ count ++;
25067+ }
25068+ if(sock==-1) {
25069+ show_error("can't create recovery socket.exit.");
25070+ PGRreplicate_exit(1);
25071+ }
25072+ for(;;)
25073+ {
25074+ int read_size = 0;
25075+ int rtn;
25076+ fd_set rmask;
25077+ struct timeval timeout;
25078+
25079+ timeout.tv_sec = RECOVERY_TIMEOUT;
25080+ timeout.tv_usec = 0;
25081+
25082+ /*
25083+ * Wait for something to happen.
25084+ */
25085+ FD_ZERO(&rmask);
25086+ FD_SET(sock,&rmask);
25087+ /*
25088+ * read packet from target cluster server
25089+ */
25090+ rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
25091+
25092+ if (rtn == 0) /* timeout */
25093+ {
25094+ return;
25095+ }
25096+
25097+ if (rtn && FD_ISSET(sock, &rmask))
25098+ {
25099+ read_size = read_packet(sock, &packet);
25100+ }
25101+ else
25102+ {
25103+ continue;
25104+ }
25105+
25106+#ifdef PRINT_DEBUG
25107+ show_debug("%s:receive packet no:%d",func,ntohs(packet.packet_no));
25108+#endif
25109+
25110+ switch (ntohs(packet.packet_no))
25111+ {
25112+ case RECOVERY_PREPARE_REQ :
25113+ /*
25114+ * start prepare of recovery
25115+ */
25116+
25117+#ifdef PRINT_DEBUG
25118+ show_debug("%s:1st master %s - %d",
25119+ func,Master.hostName,Master.port);
25120+ show_debug("%s:1st target %s - %d",
25121+ func,Target.hostName,Target.port);
25122+#endif
25123+
25124+ loop_end = first_setup_recovery(&sock, &packet);
25125+#ifdef PRINT_DEBUG
25126+ show_debug("%s:first_setup_recovery end:%d ",func,loop_end);
25127+#endif
25128+ break;
25129+ case RECOVERY_START_REQ :
25130+ /*
25131+ * now, recovery process will start
25132+ * stop the transaction count up
25133+ * start queueing and stop send all queries for master DB
25134+ */
25135+#ifdef PRINT_DEBUG
25136+ show_debug("%s:2nd master %s - %d",
25137+ func, Master.hostName,Master.port);
25138+ show_debug("%s:2nd target %s - %d",
25139+ func, Target.hostName,Target.port);
25140+#endif
25141+ loop_end = second_setup_recovery (&packet);
25142+#ifdef PRINT_DEBUG
25143+ show_debug("%s:second_setup_recovery end :%d ",
25144+ func,loop_end);
25145+#endif
25146+ break;
25147+ case RECOVERY_QUEUE_DATA_REQ :
25148+ /*
25149+ * send all queries in queue
25150+ */
25151+
25152+#ifdef PRINT_DEBUG
25153+ show_debug("%s:last master %s - %d",
25154+ func, Master.hostName,Master.port);
25155+ show_debug("%s:last target %s - %d",
25156+ func, Target.hostName,Target.port);
25157+#endif
25158+ status = PGRsend_queue(&Master,&Target);
25159+ if (status == STATUS_OK)
25160+ {
25161+ memcpy(new_host.hostName,Target.hostName,sizeof(new_host.hostName));
25162+ memcpy(new_host.resolvedName,Target.resolvedName,sizeof(new_host.resolvedName));
25163+ new_host.port = Target.port;
25164+ new_host.recoveryPort = Target.recoveryPort;
25165+ PGRset_recovered_host(&new_host,DB_TBL_USE);
25166+ PGRadd_HostTbl(&new_host,DB_TBL_USE);
25167+ PGRset_recovery_packet_no(&packet, RECOVERY_QUEUE_DATA_ANS );
25168+ status = send_packet(&Target, &packet);
25169+ if (status != STATUS_OK)
25170+ {
25171+ finish_recovery();
25172+ }
25173+ }
25174+ else
25175+ {
25176+ /* connection error , master or target may be down */
25177+ show_error("%s:PGRsend_queue failed",func);
25178+ PGRset_recovery_packet_no(&packet, RECOVERY_ERROR_CONNECTION);
25179+ status = send_packet(&Target,&packet);
25180+ finish_recovery();
25181+ }
25182+ loop_end = true;
25183+ break;
25184+ case RECOVERY_FINISH :
25185+ /*
25186+ * finished rsync DB datas from master to target
25187+ */
25188+ /*
25189+ * stop queueing, and re-initialize recovery status
25190+ */
25191+ finish_recovery();
25192+ loop_end = true;
25193+ /*
25194+ * send finish recovery to load balancer
25195+ */
25196+ if (Master.recovery_sock != -1)
25197+ {
25198+ close(Master.recovery_sock);
25199+ Master.recovery_sock = -1;
25200+ }
25201+ if (Target.recovery_sock != -1)
25202+ {
25203+ close(Target.recovery_sock);
25204+ Target.recovery_sock = -1;
25205+ }
25206+ send_packet(&Master, &packet);
25207+ MasterPacketData.packet_no = packet.packet_no;
25208+ PGRsend_load_balance_packet(&MasterPacketData);
25209+ PGRsend_load_balance_packet(&packet);
25210+ memset((char *)&MasterPacketData,0,sizeof(RecoveryPacket));
25211+ break;
25212+ case RECOVERY_ERROR_ANS :
25213+#ifdef PRINT_DEBUG
25214+ show_debug("%s:recovery error accept. top queueing and initiarse recovery status",func);
25215+#endif
25216+ status = PGRsend_queue(&Master,NULL);
25217+ memset(&packet,0,sizeof(RecoveryPacket));
25218+ PGRset_recovery_packet_no(&packet, RECOVERY_ERROR_ANS);
25219+ send_packet(&Master, &packet);
25220+ finish_recovery();
25221+ loop_end = true;
25222+ PGRset_recovery_packet_no(&MasterPacketData, RECOVERY_FINISH );
25223+ PGRsend_load_balance_packet(&MasterPacketData);
25224+ memset((char *)&MasterPacketData,0,sizeof(RecoveryPacket));
25225+ break;
25226+ default:
25227+ show_error("%s:unknown packet. abort to parse");
25228+ loop_end=true;
25229+ break;
25230+ }
25231+ if (loop_end)
25232+ {
25233+ if (Master.sock != -1)
25234+ {
25235+ close (Master.sock);
25236+ }
25237+ if (Master.recovery_sock != -1)
25238+ {
25239+ close (Master.recovery_sock);
25240+ }
25241+ PGR_Close_Sock(&sock);
25242+ return;
25243+ }
25244+ }
25245+}
25246+
25247+void
25248+PGRrecovery_main(int fork_wait_time)
25249+{
25250+ char * func = "PGRrecovery_main()";
25251+ int status;
25252+ int fd = -1;
25253+ int rtn;
25254+ pid_t pgid = 0;
25255+ pid_t pid = 0;
25256+
25257+ pgid = getpgid(0);
25258+ pid = fork();
25259+ if (pid != 0)
25260+ {
25261+ return;
25262+ }
25263+
25264+ PGRsignal(SIGCHLD, SIG_DFL);
25265+ PGRsignal(SIGHUP, PGRexit_subprocess);
25266+ PGRsignal(SIGINT, PGRexit_subprocess);
25267+ PGRsignal(SIGQUIT, PGRexit_subprocess);
25268+ PGRsignal(SIGTERM, PGRexit_subprocess);
25269+ PGRsignal(SIGPIPE, SIG_IGN);
25270+ /*
25271+ * in child process,
25272+ * call recovery module
25273+ */
25274+ setpgid(0,pgid);
25275+
25276+ if (fork_wait_time > 0) {
25277+#ifdef PRINT_DEBUG
25278+ show_debug("recovery process: wait fork(): pid = %d", getpid());
25279+#endif
25280+ sleep(fork_wait_time);
25281+ }
25282+
25283+#ifdef PRINT_DEBUG
25284+ show_debug("%s:PGRrecovery_main bind port %d",func,Recovery_Port_Number);
25285+#endif
25286+ status = PGR_Create_Socket_Bind(&fd, "", Recovery_Port_Number);
25287+ if (status != STATUS_OK)
25288+ {
25289+ show_error("%s:PGR_Create_Socket_Bind failed",func);
25290+ exit(1);
25291+ }
25292+ memset(&MasterPacketData,0,sizeof(RecoveryPacket));
25293+ memset(&Master,0,sizeof(RecoveryTbl));
25294+ memset(&Target,0,sizeof(RecoveryTbl));
25295+ for (;;)
25296+ {
25297+ fd_set rmask;
25298+ struct timeval timeout;
25299+
25300+ timeout.tv_sec = RECOVERY_TIMEOUT;
25301+ timeout.tv_usec = 0;
25302+
25303+ /*
25304+ * Wait for something to happen.
25305+ */
25306+ FD_ZERO(&rmask);
25307+ FD_SET(fd,&rmask);
25308+ rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
25309+ if (rtn && FD_ISSET(fd, &rmask))
25310+ {
25311+ pgrecovery_loop(fd);
25312+ }
25313+ }
25314+}
25315+
25316+#ifdef PRINT_DEBUG
25317+static void
25318+show_recovery_packet(RecoveryPacket * packet)
25319+{
25320+ show_debug("no = %d",ntohs(packet->packet_no));
25321+ show_debug("max_connect = %d",ntohs(packet->max_connect));
25322+ show_debug("port = %d",ntohs(packet->port));
25323+ show_debug("recoveryPort = %d",ntohs(packet->recoveryPort));
25324+ if (packet->hostName != NULL)
25325+ show_debug("hostName = %s",packet->hostName);
25326+ if (packet->pg_data != NULL)
25327+ show_debug("pg_data = %s",packet->pg_data);
25328+}
25329+#endif
25330diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/replicate.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/replicate.c
25331--- postgresql-8.2.4/src/pgcluster/pgrp/replicate.c 1970-01-01 01:00:00.000000000 +0100
25332+++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/replicate.c 2007-03-01 16:27:15.000000000 +0100
25333@@ -0,0 +1,4088 @@
25334+/*--------------------------------------------------------------------
25335+ * FILE:
25336+ * replicate.c
25337+ *
25338+ * NOTE:
25339+ * This file is composed of the functions to call with the source
25340+ * at pgreplicate for the replication.
25341+ *
25342+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
25343+ *--------------------------------------------------------------------
25344+ */
25345+#include "postgres.h"
25346+#include "postgres_fe.h"
25347+
25348+#include <pthread.h>
25349+#include <stdio.h>
25350+#include <stdarg.h>
25351+#include <sys/types.h>
25352+#include <fcntl.h>
25353+#include <errno.h>
25354+#include <ctype.h>
25355+#include <time.h>
25356+#include <sys/ipc.h>
25357+#include <sys/shm.h>
25358+#include <sys/sem.h>
25359+#include <sys/msg.h>
25360+#include <signal.h>
25361+
25362+
25363+#include "libpq-fe.h"
25364+#include "libpq-int.h"
25365+#include "fe-auth.h"
25366+
25367+#include <sys/socket.h>
25368+#include <unistd.h>
25369+#include <netdb.h>
25370+#include <arpa/inet.h>
25371+
25372+#ifdef HAVE_NETINET_TCP_H
25373+#include <netinet/tcp.h>
25374+#endif
25375+
25376+#ifdef HAVE_SYS_SELECT_H
25377+#include <sys/select.h>
25378+#endif
25379+
25380+
25381+#ifdef HAVE_CRYPT_H
25382+#include <crypt.h>
25383+#endif
25384+
25385+
25386+#ifdef MULTIBYTE
25387+#include "mb/pg_wchar.h"
25388+#endif
25389+
25390+#include "access/xact.h"
25391+#include "lib/dllist.h"
25392+#include "libpq/pqformat.h"
25393+#include "replicate_com.h"
25394+#include "pgreplicate.h"
25395+
25396+
25397+#define IPC_NMAXSEM (32)
25398+
25399+/*--------------------------------------
25400+ * PROTOTYPE DECLARATION
25401+ *--------------------------------------
25402+ */
25403+static TransactionTbl * setTransactionTbl(HostTbl * host_ptr, ReplicateHeader * header);
25404+static TransactionTbl * insertTransactionTbl( HostTbl * host_ptr, TransactionTbl * datap);
25405+static TransactionTbl * getTransactionTbl( HostTbl * host_ptr, ReplicateHeader * header);
25406+static void deleteTransactionTbl(HostTbl * host_ptr,ReplicateHeader * header);
25407+
25408+static HostTbl * deleteHostTbl(HostTbl * ptr);
25409+static bool is_master_in_recovery(char * host, int port,int recovery_status);
25410+static void sem_quit(int semid);
25411+static int send_cluster_status_to_load_balance(HostTbl * host_ptr,int status);
25412+static void set_transaction_status(int status);
25413+static void check_transaction_status(ReplicateHeader * header,TransactionTbl *transaction);
25414+static HostTbl * check_host_transaction_status(ReplicateHeader * header,HostTbl *host );
25415+static void clearHostTbl(void);
25416+static bool is_need_sync_time(ReplicateHeader * header);
25417+static bool is_need_wait_answer(ReplicateHeader * header);
25418+static void write_host_status_file(HostTbl * host_ptr);
25419+
25420+static void delete_template(HostTbl * ptr, ReplicateHeader * header);
25421+static char * check_copy_command(char * query);
25422+static int read_answer(int dest);
25423+static bool is_autocommit_off(char * query);
25424+static bool is_autocommit_on(char * query);
25425+static unsigned int get_host_ip_from_tbl(char * host);
25426+static unsigned int get_srcHost_ip_from_tbl(char * srcHost);
25427+
25428+static int next_replication_id(void);
25429+static void check_replication_id(void);
25430+static bool is_need_use_rlog(ReplicateHeader * header);
25431+static bool is_need_queue_jump( ReplicateHeader * header,char * query);
25432+static int check_delete_transaction (HostTbl * host_ptr, ReplicateHeader * header);
25433+
25434+static bool is_executed_query_in_origin( ReplicateHeader *header );
25435+static bool is_executed_query( PGconn *conn,ReplicateHeader *header );
25436+
25437+static void * thread_send_source(void * arg);
25438+static void * thread_send_cluster(void * arg);
25439+
25440+static int send_replicate_packet_to_server( TransactionTbl * transaction_tbl, int current_cluster, HostTbl * host_ptr, ReplicateHeader * header, char *query , char * result,unsigned int replicationId, bool recovery);
25441+static int check_result( PGresult * res );
25442+static bool compare_results(int *results, int size, int source_id);
25443+
25444+static int send_func(HostTbl * host_ptr,ReplicateHeader * header, char * func,char * result);
25445+static uint32_t get_oid(HostTbl * host_ptr,ReplicateHeader * header);
25446+static int set_oid(HostTbl * host_ptr,ReplicateHeader * header, uint32_t oid);
25447+static int replicate_lo( PGconn * conn, ReplicateHeader * header, LOArgs * query);
25448+static int notice_abort(HostTbl * host_ptr,ReplicateHeader * header);
25449+static FILE * create_queue_file(void);
25450+static int add_queue_file(char * data, int size);
25451+
25452+static int send_p_parse (PGconn * conn, StringInfo input_message);
25453+static int send_p_bind (PGconn * conn, StringInfo input_message);
25454+static int send_p_describe (PGconn * conn, StringInfo input_message);
25455+static int send_p_execute (PGconn * conn, StringInfo input_message);
25456+static int send_p_sync (PGconn * conn, StringInfo input_message);
25457+static int send_p_close (PGconn * conn, StringInfo input_message);
25458+static void set_string_info(StringInfo input_message, ReplicateHeader * header, char * query);
25459+
25460+int replicate_packet_send_internal(ReplicateHeader * header, char * query,int dest,int recovery_status,bool isHeldLock);
25461+bool PGRis_same_host(char * host1, unsigned short port1 , char * host2, unsigned short port2);
25462+HostTbl * PGRadd_HostTbl(HostTbl * conf_data, int useFlag);
25463+HostTbl * PGRget_master(void);
25464+void PGRset_recovery_status(int status);
25465+int PGRget_recovery_status(void);
25466+int PGRcheck_recovered_host(void);
25467+int PGRset_recovered_host(HostTbl * target,int useFlag);
25468+int PGRinit_recovery(void);
25469+void PGRexit_subprocess(int signo);
25470+void PGRreplicate_exit(int exit_status);
25471+int PGRsend_replicate_packet_to_server( HostTbl * host_ptr, ReplicateHeader * header, char *query , char * result,unsigned int replicationId, bool recovery);
25472+HostTbl * PGRget_HostTbl(char * resolvedName,int port);
25473+int PGRset_queue(ReplicateHeader * header,char * query);
25474+int PGRset_host_status(HostTbl * host_ptr,int status);
25475+void PGRclear_transactions(void);
25476+void PGRclear_connections();
25477+int PGRset_replication_id(uint32_t id);
25478+int PGRdo_replicate(int sock,ReplicateHeader *header, char * query);
25479+int PGRreturn_result(int dest, char * result,int wait);
25480+int PGRreplicate_packet_send( ReplicateHeader * header, char * query,int dest,int recovery_status);
25481+char * PGRread_packet(int sock, ReplicateHeader *header);
25482+char * PGRread_query(int sock, ReplicateHeader *header);
25483+PGconn * PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt );
25484+
25485+unsigned int PGRget_next_query_id(void);
25486+int PGRinit_transaction_table(void);
25487+int PGRsync_oid(ReplicateHeader *header);
25488+int PGRload_replication_id(void);
25489+extern pthread_mutex_t transaction_table_mutex;
25490+
25491+bool
25492+PGRis_same_host(char * host1, unsigned short port1 , char * host2, unsigned short port2)
25493+{
25494+#ifdef PRINT_DEBUG
25495+ char * func = "PGRis_same_host()";
25496+#endif
25497+ unsigned int ip1, ip2;
25498+
25499+ if ((host1[0] == '\0' ) || (host2[0] == '\0') ||
25500+ ( port1 != port2 ))
25501+ {
25502+#ifdef PRINT_DEBUG
25503+ show_debug("%s:target host",func);
25504+#endif
25505+ return false;
25506+ }
25507+ ip1 = PGRget_ip_by_name( host1);
25508+ ip2 = PGRget_ip_by_name( host2);
25509+
25510+ if ((ip1 == ip2) && (port1 == port2))
25511+ {
25512+ return true;
25513+ }
25514+ return false;
25515+}
25516+
25517+PGconn *
25518+PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt )
25519+{
25520+ char * func = "PGRcreateConn()";
25521+ int cnt = 0;
25522+ PGconn * conn = NULL;
25523+ char pwd[256];
25524+
25525+ memset(pwd,0,sizeof(pwd));
25526+ if (*password != '\0')
25527+ {
25528+ if ((strncmp(password,"md5",3) == 0) && (md5Salt != NULL))
25529+ {
25530+ sprintf(pwd,"%s(%d)(%d)(%d)(%d)",password,
25531+ *md5Salt,*(md5Salt+1),*(md5Salt+2),*(md5Salt+3));
25532+ }
25533+ else
25534+ {
25535+ strncpy(pwd,password,sizeof(pwd));
25536+ }
25537+ }
25538+ conn = PQsetdbLogin(host, port, NULL, NULL, database, userName, pwd);
25539+ /* check to see that the backend Connection was successfully made */
25540+ cnt = 0;
25541+ while (PQstatus(conn) == CONNECTION_BAD)
25542+ {
25543+ if (conn != NULL)
25544+ {
25545+ PQfinish(conn);
25546+ conn = NULL;
25547+ }
25548+ conn = PQsetdbLogin(host, port, NULL, NULL, database, userName, pwd);
25549+ if (cnt > PGR_CONNECT_RETRY_TIME )
25550+ {
25551+ if (conn != NULL)
25552+ {
25553+ PQfinish(conn);
25554+ conn = NULL;
25555+ }
25556+ return (PGconn *)NULL;
25557+ }
25558+
25559+ if(PQstatus(conn) == CONNECTION_BAD && h_errno==2)
25560+ {
25561+ show_error("gethostbyname() failed. sleep and retrying...");
25562+ usleep(PGR_SEND_WAIT_MSEC);
25563+ cnt ++;
25564+ }
25565+ else if(!strncasecmp(PQerrorMessage(conn),"FATAL: Sorry, too many clients already",30) ||
25566+ !strncasecmp(PQerrorMessage(conn),"FATAL: Non-superuser connection limit",30) )
25567+ {
25568+ usleep(PGR_SEND_WAIT_MSEC);
25569+ show_error("Connection overflow. sleep and retrying...");
25570+ cnt ++;
25571+ }
25572+ else if(!strncasecmp(PQerrorMessage(conn),"FATAL: The database system is starting up",40) )
25573+ {
25574+#ifdef PRINT_DEBUG
25575+ show_debug("waiting for starting up...");
25576+#endif
25577+ usleep(PGR_SEND_WAIT_MSEC);
25578+ }
25579+ else
25580+ {
25581+#ifdef PRINT_DEBUG
25582+ show_error("%s:Retry. h_errno is %d,reason is '%s'",func,h_errno,PQerrorMessage(conn));
25583+#endif
25584+
25585+ usleep(PGR_SEND_WAIT_MSEC);
25586+ cnt ++;
25587+ }
25588+ }
25589+ return conn;
25590+}
25591+
25592+static TransactionTbl *
25593+setTransactionTbl(HostTbl * host_ptr, ReplicateHeader * header)
25594+{
25595+ char * func = "setTransactionTbl()";
25596+ TransactionTbl * ptr = NULL;
25597+ TransactionTbl work ;
25598+ char port[8];
25599+ char * hostName = NULL;
25600+ char * dbName = NULL;
25601+ char * userName = NULL;
25602+ char * password = NULL;
25603+ char * md5Salt = NULL;
25604+ char * cryptSalt = NULL;
25605+
25606+ if ((host_ptr == NULL) || (header == NULL))
25607+ {
25608+ return (TransactionTbl *)NULL;
25609+ }
25610+ dbName = (char *)header->dbName;
25611+ snprintf(port,sizeof(port),"%d", host_ptr->port);
25612+ userName = (char *)(header->userName);
25613+ password = (char *)(header->password);
25614+ md5Salt = (char *)(header->md5Salt);
25615+ cryptSalt = (char *)(header->cryptSalt);
25616+ hostName = (char *)(host_ptr->resolvedName);
25617+
25618+ ptr = getTransactionTbl(host_ptr,header);
25619+ if (ptr != NULL)
25620+ {
25621+ ptr->transaction_count = 0;
25622+ ptr->conn = PGRcreateConn(hostName,port,dbName,userName,password,md5Salt,cryptSalt);
25623+ if (ptr->conn == NULL)
25624+ {
25625+ show_error("%s:Transaction is pooling but PGRcreateConn failed",func);
25626+ deleteTransactionTbl(host_ptr, header);
25627+ PGRset_host_status(host_ptr,DB_TBL_ERROR);
25628+ ptr = NULL;
25629+ }
25630+ return ptr;
25631+ }
25632+
25633+ memset(&work,0,sizeof(work));
25634+ strncpy(work.host, hostName, sizeof(work.host));
25635+ strncpy(work.srcHost, header->from_host, sizeof(work.srcHost));
25636+ work.hostIP = PGRget_ip_by_name(hostName);
25637+ work.port = host_ptr->port;
25638+ work.srcHostIP = PGRget_ip_by_name(header->from_host);
25639+ work.pid = ntohs(header->pid);
25640+ strncpy(work.dbName,header->dbName,sizeof(work.dbName));
25641+ work.conn = PGRcreateConn(hostName,port,dbName,userName,password,md5Salt,cryptSalt);
25642+ if (work.conn == NULL)
25643+ {
25644+#ifdef PRINT_DEBUG
25645+ show_debug("%s: %s@%s is not ready",func,port,hostName);
25646+#endif
25647+ return (TransactionTbl *)NULL;
25648+ }
25649+ work.useFlag = DB_TBL_USE ;
25650+ work.in_transaction = false;
25651+ work.transaction_count = 0;
25652+ ptr = insertTransactionTbl(host_ptr,&work);
25653+ if (ptr == (TransactionTbl *)NULL)
25654+ {
25655+ show_error("%s:insertTransactionTbl failed",func);
25656+ return (TransactionTbl *)NULL;
25657+ }
25658+ return ptr;
25659+}
25660+
25661+static TransactionTbl *
25662+insertTransactionTbl( HostTbl * host_ptr, TransactionTbl * datap)
25663+{
25664+ char * func = "insertTransactionTbl()";
25665+ TransactionTbl * workp = NULL;
25666+
25667+ pthread_mutex_lock(&transaction_table_mutex);
25668+ if ((host_ptr == (HostTbl *)NULL) || (datap == (TransactionTbl*)NULL))
25669+ {
25670+ show_error("%s:host table or transaction table is NULL",func);
25671+ pthread_mutex_unlock(&transaction_table_mutex);
25672+
25673+ return (TransactionTbl *)NULL;
25674+ }
25675+ if (Transaction_Tbl_Begin == NULL)
25676+ {
25677+ if (PGRinit_transaction_table() != STATUS_OK)
25678+ {
25679+ pthread_mutex_unlock(&transaction_table_mutex);
25680+
25681+ return (TransactionTbl *)NULL;
25682+ }
25683+ }
25684+
25685+ workp = (TransactionTbl *)malloc(sizeof(TransactionTbl));
25686+ memset(workp,0,sizeof(TransactionTbl));
25687+ Transaction_Tbl_End = workp;
25688+ workp->hostIP = datap->hostIP;
25689+ workp->port = datap->port;
25690+ workp->pid = datap->pid;
25691+ workp->srcHostIP = datap->srcHostIP;
25692+ strncpy(workp->host,datap->host,sizeof(workp->host));
25693+ strncpy(workp->srcHost,datap->srcHost,sizeof(workp->srcHost));
25694+ strncpy(workp->dbName,datap->dbName,sizeof(workp->dbName));
25695+ workp->conn = datap->conn;
25696+ workp->useFlag = DB_TBL_USE;
25697+ workp->lock = STATUS_OK;
25698+ workp->in_transaction =datap->in_transaction;
25699+ workp->transaction_count =datap->transaction_count;
25700+ DLAddTail(Transaction_Tbl_Begin, DLNewElem(workp));
25701+
25702+ pthread_mutex_unlock(&transaction_table_mutex);
25703+
25704+ return workp;
25705+}
25706+
25707+static TransactionTbl *
25708+getTransactionTbl( HostTbl * host_ptr, ReplicateHeader * header)
25709+{
25710+ Dlelem * ptr = NULL;
25711+ unsigned int host_ip,srcHost_ip;
25712+ unsigned short pid = 0;
25713+
25714+ if (Transaction_Tbl_Begin == (Dllist *) NULL)
25715+ {
25716+ return (TransactionTbl * )NULL;
25717+ }
25718+ if ((host_ptr == (HostTbl *)NULL) ||
25719+ (header == (ReplicateHeader *)NULL))
25720+ {
25721+ return (TransactionTbl * )NULL;
25722+ }
25723+ host_ip = get_host_ip_from_tbl(host_ptr->resolvedName);
25724+ if (host_ip == 0)
25725+ {
25726+ host_ip = PGRget_ip_by_name(host_ptr->resolvedName);
25727+ }
25728+ srcHost_ip = get_srcHost_ip_from_tbl(header->from_host);
25729+ if (srcHost_ip == 0)
25730+ {
25731+ srcHost_ip = PGRget_ip_by_name(header->from_host);
25732+ }
25733+ pid = ntohs(header->pid);
25734+
25735+ pthread_mutex_lock(&transaction_table_mutex);
25736+
25737+ ptr = DLGetHead(Transaction_Tbl_Begin);
25738+ while (ptr)
25739+ {
25740+ TransactionTbl *transaction = DLE_VAL(ptr);
25741+ if ((transaction->useFlag == DB_TBL_USE) &&
25742+ (transaction->hostIP == host_ip) &&
25743+ (transaction->port == host_ptr->port) &&
25744+ (transaction->srcHostIP == srcHost_ip) &&
25745+ (!strncasecmp(transaction->dbName,header->dbName,sizeof(transaction->dbName))) &&
25746+ (transaction->pid == pid))
25747+ {
25748+ pthread_mutex_unlock(&transaction_table_mutex);
25749+ return transaction;
25750+ }
25751+ ptr = DLGetSucc(ptr);
25752+ }
25753+ pthread_mutex_unlock(&transaction_table_mutex);
25754+
25755+ return (TransactionTbl * )NULL;
25756+}
25757+
25758+static void
25759+deleteTransactionTbl(HostTbl * host_ptr,ReplicateHeader * header)
25760+{
25761+ TransactionTbl *ptr = NULL;
25762+ Dlelem *elem;
25763+
25764+ ptr = getTransactionTbl(host_ptr,header);
25765+
25766+ pthread_mutex_lock(&transaction_table_mutex);
25767+
25768+ if (ptr != NULL)
25769+ {
25770+ /*
25771+ if (ptr->in_transaction)
25772+ {
25773+ if (host_ptr->transaction_count > 0)
25774+ host_ptr->transaction_count--;
25775+ }
25776+ */
25777+
25778+ if (ptr->conn != NULL)
25779+ {
25780+ PQfinish(ptr->conn);
25781+ }
25782+ elem = DLGetHead(Transaction_Tbl_Begin);
25783+ while (elem)
25784+ {
25785+ TransactionTbl *transaction = DLE_VAL(elem);
25786+ if (transaction == ptr) {
25787+ free(ptr);
25788+ DLRemove(elem);
25789+ DLFreeElem(elem);
25790+ pthread_mutex_unlock(&transaction_table_mutex);
25791+ return;
25792+ }
25793+ elem = DLGetSucc(elem);
25794+ }
25795+ }
25796+ pthread_mutex_unlock(&transaction_table_mutex);
25797+}
25798+
25799+static HostTbl *
25800+deleteHostTbl(HostTbl * ptr)
25801+{
25802+ if (ptr != (HostTbl*)NULL)
25803+ {
25804+ memset(ptr,0,sizeof(HostTbl));
25805+ }
25806+ return ++ptr;
25807+}
25808+
25809+HostTbl *
25810+PGRadd_HostTbl(HostTbl *conf_data, int useFlag)
25811+{
25812+ HostTbl * ptr = NULL;
25813+ int cnt = 0;
25814+
25815+ ptr = PGRget_HostTbl(conf_data->resolvedName, conf_data->port);
25816+ if (ptr != (HostTbl*)NULL)
25817+ {
25818+ PGRset_host_status(ptr,useFlag);
25819+ return ptr;
25820+ }
25821+
25822+ ptr = Host_Tbl_Begin;
25823+ cnt = 1;
25824+ while (ptr->useFlag != DB_TBL_END)
25825+ {
25826+ if (ptr->useFlag == DB_TBL_FREE)
25827+ {
25828+ break;
25829+ }
25830+ ptr ++;
25831+ cnt ++;
25832+ }
25833+ if (cnt >= MAX_DB_SERVER)
25834+ {
25835+ return (HostTbl*)NULL;
25836+ }
25837+ if (ptr->useFlag == DB_TBL_END)
25838+ {
25839+ (ptr + 1) -> useFlag = DB_TBL_END;
25840+ }
25841+ memset(ptr,0,sizeof(HostTbl));
25842+ ptr->hostNum = cnt;
25843+ memcpy(ptr->hostName,conf_data->hostName,sizeof(ptr->hostName));
25844+ memcpy(ptr->resolvedName,conf_data->resolvedName,sizeof(ptr->resolvedName));
25845+ ptr->port = conf_data->port;
25846+ ptr->recoveryPort = conf_data->recoveryPort;
25847+ ptr->transaction_count = 0;
25848+ PGRset_host_status(ptr,useFlag);
25849+
25850+ return ptr;
25851+}
25852+
25853+HostTbl *
25854+PGRget_master(void)
25855+{
25856+ HostTbl * host_tbl = NULL;
25857+
25858+ host_tbl = Host_Tbl_Begin;
25859+ while(host_tbl->useFlag != DB_TBL_END)
25860+ {
25861+ if (host_tbl->useFlag == DB_TBL_USE)
25862+ {
25863+ return host_tbl;
25864+ }
25865+ host_tbl ++;
25866+ }
25867+ return (HostTbl *)NULL;
25868+}
25869+
25870+void
25871+PGRset_recovery_status(int status)
25872+{
25873+ if (RecoverySemID <= 0)
25874+ return;
25875+ PGRsem_lock(RecoverySemID,SEM_NUM_OF_RECOVERY);
25876+ if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25877+ {
25878+ Recovery_Status_Inf->recovery_status = status;
25879+
25880+ }
25881+ PGRsem_unlock(RecoverySemID,SEM_NUM_OF_RECOVERY);
25882+}
25883+
25884+int
25885+PGRget_recovery_status(void)
25886+{
25887+ int status = -1;
25888+
25889+ if (RecoverySemID <= 0)
25890+ return -1;
25891+ PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25892+ if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25893+ {
25894+ status = Recovery_Status_Inf->recovery_status;
25895+ }
25896+ PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25897+ return status;
25898+
25899+}
25900+
25901+static void
25902+set_transaction_status(int status)
25903+{
25904+ if (RecoverySemID <= 0)
25905+ return ;
25906+ PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25907+ if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25908+ {
25909+ Recovery_Status_Inf->recovery_status = status;
25910+ }
25911+ PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25912+}
25913+
25914+#if 0
25915+static int
25916+get_transaction_status(void)
25917+{
25918+ int status = 0;
25919+
25920+ if (RecoverySemID <= 0)
25921+ return 0;
25922+ PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25923+ if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25924+ {
25925+ status = Recovery_Status_Inf->recovery_status;
25926+ PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25927+ return status;
25928+ }
25929+ PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25930+ return 0;
25931+}
25932+#endif
25933+
25934+int
25935+PGRcheck_recovered_host(void)
25936+{
25937+ char * func = "PGRcheck_recovered_host()";
25938+ HostTbl * ptr = NULL;
25939+ int rtn = STATUS_OK;
25940+
25941+ if (RecoverySemID <= 0)
25942+ return STATUS_ERROR;
25943+ PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25944+ if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25945+ {
25946+ if (Recovery_Status_Inf->useFlag != DB_TBL_FREE)
25947+ {
25948+ ptr = PGRadd_HostTbl((HostTbl *)&(Recovery_Status_Inf->target_host),Recovery_Status_Inf->useFlag);
25949+ if (ptr == (HostTbl *) NULL)
25950+ {
25951+ show_error("%s:PGRadd_HostTbl failed",func);
25952+ rtn = STATUS_ERROR;
25953+ }
25954+ Recovery_Status_Inf->useFlag = DB_TBL_FREE;
25955+ memset((HostTbl *)&(Recovery_Status_Inf->target_host),0,sizeof(HostTbl));
25956+
25957+ }
25958+ }
25959+ PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25960+ return rtn;
25961+}
25962+
25963+int
25964+PGRset_recovered_host(HostTbl * target, int useFlag)
25965+{
25966+ if (RecoverySemID <= 0)
25967+ return -1;
25968+ PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25969+ if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
25970+ {
25971+ Recovery_Status_Inf->useFlag = useFlag;
25972+ if (target != (HostTbl*)NULL)
25973+ {
25974+ memcpy((HostTbl *)&(Recovery_Status_Inf->target_host),target,sizeof(HostTbl));
25975+ PGRset_host_status(target,useFlag);
25976+ }
25977+
25978+ }
25979+ PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY);
25980+ return 0;
25981+}
25982+
25983+static bool
25984+is_master_in_recovery(char * host , int port,int recovery_status)
25985+{
25986+ HostTbl * master = NULL;
25987+
25988+ int status = PGRget_recovery_status();
25989+ if (status == RECOVERY_CLEARED)
25990+ {
25991+ master = PGRget_master();
25992+ if (master == (HostTbl *)NULL)
25993+ {
25994+ return false;
25995+ }
25996+ return (PGRis_same_host(host, port , master->hostName, master->port));
25997+ }
25998+ return false;
25999+}
26000+
26001+int
26002+PGRinit_recovery(void)
26003+{
26004+ char * func = "PGRinit_recovery()";
26005+ int size = 0;
26006+ union semun sem_arg;
26007+ int i = 0;
26008+
26009+ if ((RecoverySemID = semget(IPC_PRIVATE,4,IPC_CREAT | IPC_EXCL | 0600)) < 0)
26010+ {
26011+ show_error("%s:semget() failed. (%s)",func,strerror(errno));
26012+ return STATUS_ERROR;
26013+ }
26014+ for ( i = 0 ; i < 4 ; i ++)
26015+ {
26016+ semctl(RecoverySemID, i, GETVAL, sem_arg);
26017+ sem_arg.val = 1;
26018+ semctl(RecoverySemID, i, SETVAL, sem_arg);
26019+ }
26020+
26021+ size = sizeof(RecoveryStatusInf);
26022+ RecoveryShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
26023+ if (RecoveryShmid < 0)
26024+ {
26025+ show_error("%s:shmget() failed. (%s)",func,strerror(errno));
26026+ return STATUS_ERROR;
26027+ }
26028+ Recovery_Status_Inf = (RecoveryStatusInf *)shmat(RecoveryShmid,0,0);
26029+ if (Recovery_Status_Inf == (RecoveryStatusInf *)-1)
26030+ {
26031+ show_error("%s:shmat() failed. (%s)",func,strerror(errno));
26032+ return STATUS_ERROR;
26033+ }
26034+ memset(Recovery_Status_Inf,0,size);
26035+ Recovery_Status_Inf->check_point = PGR_CHECK_POINT ;
26036+
26037+ size = sizeof(unsigned int);
26038+ ReplicateSerializationShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
26039+ if (ReplicateSerializationShmid < 0)
26040+ {
26041+ show_error("%s:shmget() failed. (%s)",func,strerror(errno));
26042+ return STATUS_ERROR;
26043+ }
26044+
26045+ PGR_ReplicateSerializationID = (unsigned int *)shmat(ReplicateSerializationShmid,0,0);
26046+ if( PGR_ReplicateSerializationID == (unsigned int *)-1) {
26047+ show_error("%s:shmat() failed. (%s)",func,strerror(errno));
26048+ return STATUS_ERROR;
26049+ }
26050+ memset(PGR_ReplicateSerializationID,0,size);
26051+ PGRset_recovery_status(RECOVERY_INIT);
26052+ PGRset_recovered_host((HostTbl *)NULL, DB_TBL_FREE);
26053+ set_transaction_status(0);
26054+
26055+ /*
26056+ * create message queue
26057+ */
26058+ RecoveryMsgShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
26059+ if (RecoveryMsgShmid < 0)
26060+ {
26061+ show_error("%s:shmget() failed. (%s)",func,strerror(errno));
26062+ return STATUS_ERROR;
26063+ }
26064+
26065+ RecoveryMsgid = (int *)shmat(RecoveryMsgShmid,0,0);
26066+ if( RecoveryMsgid < 0) {
26067+ show_error("%s:shmat() failed. (%s)",func,strerror(errno));
26068+ return STATUS_ERROR;
26069+ }
26070+ *RecoveryMsgid = msgget (IPC_PRIVATE, 00666 | IPC_CREAT );
26071+ if (*RecoveryMsgid < 0)
26072+ {
26073+ show_error("%s:msgget() failed. (%s)",func,strerror(errno));
26074+ return STATUS_ERROR;
26075+ }
26076+
26077+
26078+ return STATUS_OK;
26079+}
26080+
26081+static void
26082+clearHostTbl(void)
26083+{
26084+
26085+ HostTbl * ptr = NULL;
26086+
26087+ if (Host_Tbl_Begin == NULL)
26088+ return;
26089+ /* normal socket close */
26090+ ptr = Host_Tbl_Begin;
26091+ while(ptr && ptr->useFlag != DB_TBL_END)
26092+ {
26093+ ptr = deleteHostTbl(ptr);
26094+ }
26095+}
26096+
26097+void
26098+PGRexit_subprocess(int signo)
26099+{
26100+ exit_signo = signo;
26101+ PGRreplicate_exit(1);
26102+}
26103+
26104+void
26105+PGRreplicate_exit(int exit_status)
26106+{
26107+ char fname[256];
26108+ int rtn = 0;
26109+ sigset_t mask;
26110+
26111+ sigemptyset(&mask);
26112+ sigaddset(&mask, SIGTERM);
26113+ sigaddset(&mask, SIGINT);
26114+ sigaddset(&mask, SIGQUIT);
26115+ sigaddset(&mask, SIGCHLD);
26116+ sigprocmask(SIG_BLOCK, &mask, NULL);
26117+
26118+ kill (0, exit_signo);
26119+
26120+ child_wait(0);
26121+
26122+ if (RidFp != NULL)
26123+ {
26124+ rewind(RidFp);
26125+ if (Recovery_Status_Inf != NULL)
26126+ {
26127+ PGRwrite_log_file(RidFp,"%u",Recovery_Status_Inf->replication_id);
26128+ }
26129+ fflush(RidFp);
26130+ fclose(RidFp);
26131+ RidFp = NULL;
26132+ }
26133+
26134+ if (ReplicateSock > 0)
26135+ close(ReplicateSock);
26136+
26137+ /* recovery status clear */
26138+ if (RecoverySemID > 0)
26139+ Recovery_Status_Inf->recovery_status = RECOVERY_INIT;
26140+
26141+ /* normal socket close */
26142+ clearHostTbl();
26143+
26144+ if (Host_Tbl_Begin != (HostTbl *)NULL)
26145+ {
26146+ rtn = shmdt((char *)Host_Tbl_Begin);
26147+ shmctl(HostTblShmid,IPC_RMID,(struct shmid_ds *)NULL);
26148+ }
26149+
26150+ if (Cascade_Tbl != (ReplicateServerInfo *)NULL)
26151+ {
26152+ rtn = shmdt((char *)Cascade_Tbl);
26153+ shmctl(CascadeTblShmid,IPC_RMID,(struct shmid_ds *)NULL);
26154+ }
26155+
26156+ if (Cascade_Inf != (CascadeInf *)NULL)
26157+ {
26158+ rtn = shmdt((char *)Cascade_Inf);
26159+ shmctl(CascadeInfShmid,IPC_RMID,(struct shmid_ds *)NULL);
26160+ }
26161+
26162+ if (Commit_Log_Tbl != (CommitLogInf *)NULL)
26163+ {
26164+ rtn = shmdt((char *)Commit_Log_Tbl);
26165+ shmctl(CommitLogShmid,IPC_RMID,(struct shmid_ds *)NULL);
26166+ }
26167+
26168+ if (Recovery_Status_Inf != (RecoveryStatusInf *)NULL)
26169+ {
26170+ rtn = shmdt((char *)Recovery_Status_Inf);
26171+ shmctl(RecoveryShmid,IPC_RMID,(struct shmid_ds *)NULL);
26172+ }
26173+ if (PGR_ReplicateSerializationID!=NULL)
26174+ {
26175+ shmdt(PGR_ReplicateSerializationID);
26176+ shmctl(ReplicateSerializationShmid,IPC_RMID,(struct shmid_ds *)NULL);
26177+ }
26178+
26179+ if (RecoveryMsgid)
26180+ {
26181+ if (*RecoveryMsgid >= 0)
26182+ msgctl(*RecoveryMsgid,IPC_RMID,(struct msqid_ds *)NULL);
26183+
26184+ shmdt(RecoveryMsgid);
26185+ shmctl(RecoveryMsgShmid, IPC_RMID, NULL);
26186+ }
26187+
26188+ if (StatusFp != NULL)
26189+ {
26190+ fflush(StatusFp);
26191+ fclose(StatusFp);
26192+ StatusFp = NULL;
26193+ }
26194+ if (LogFp != NULL)
26195+ {
26196+ fflush(LogFp);
26197+ fclose(LogFp);
26198+ LogFp = NULL;
26199+ }
26200+
26201+ if (PGR_Result != NULL)
26202+ {
26203+ free(PGR_Result);
26204+ PGR_Result = NULL;
26205+ }
26206+ if (PGR_Response_Inf != NULL)
26207+ {
26208+ free(PGR_Response_Inf);
26209+ PGR_Response_Inf = NULL;
26210+ }
26211+
26212+ if (LoadBalanceTbl != NULL)
26213+ {
26214+ free(LoadBalanceTbl);
26215+ LoadBalanceTbl = NULL;
26216+ }
26217+
26218+ if (PGR_Log_Header != NULL)
26219+ {
26220+ free(PGR_Log_Header);
26221+ PGR_Log_Header = NULL;
26222+ }
26223+
26224+ if (PGR_Send_Query_ID != NULL)
26225+ {
26226+ free(PGR_Send_Query_ID);
26227+ PGR_Send_Query_ID = NULL;
26228+ }
26229+
26230+ if (CascadeSemID > 0)
26231+ {
26232+ sem_quit(CascadeSemID);
26233+ CascadeSemID = 0;
26234+ }
26235+ if (SemID > 0)
26236+ {
26237+ sem_quit(SemID);
26238+ SemID = 0;
26239+ }
26240+ if (RecoverySemID > 0)
26241+ {
26242+ sem_quit(RecoverySemID);
26243+ RecoverySemID = 0;
26244+ }
26245+ if (VacuumSemID > 0)
26246+ {
26247+ sem_quit(VacuumSemID);
26248+ }
26249+
26250+ snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGREPLICATE_PID_FILE);
26251+ unlink(fname);
26252+
26253+ /* close socket between rlog process */
26254+
26255+ if (Replicateion_Log->r_log_sock >= 0)
26256+ {
26257+ close(Replicateion_Log->r_log_sock);
26258+ Replicateion_Log->r_log_sock = -1;
26259+ }
26260+ if (Replicateion_Log->RLog_Sock_Path != NULL)
26261+ {
26262+ unlink(Replicateion_Log->RLog_Sock_Path);
26263+ free(Replicateion_Log->RLog_Sock_Path);
26264+ Replicateion_Log->RLog_Sock_Path = NULL;
26265+ }
26266+
26267+ if (ResolvedName != NULL)
26268+ {
26269+ free(ResolvedName);
26270+ ResolvedName = NULL;
26271+ }
26272+ exit(exit_status);
26273+}
26274+
26275+static int
26276+send_cluster_status_to_load_balance(HostTbl * host_ptr,int status)
26277+{
26278+ RecoveryPacket packet;
26279+ int rtn = 0;
26280+
26281+ memset(&packet,0,sizeof(RecoveryPacket));
26282+ packet.packet_no = htons(status);
26283+ strncpy(packet.hostName,host_ptr->hostName,sizeof(packet.hostName));
26284+ packet.port = htons(host_ptr->port);
26285+ rtn = PGRsend_load_balance_packet(&packet);
26286+ return rtn;
26287+}
26288+
26289+int
26290+PGRset_host_status(HostTbl * host_ptr,int status)
26291+{
26292+ if (host_ptr == NULL)
26293+ {
26294+ return STATUS_ERROR;
26295+ }
26296+ if (host_ptr->useFlag != status)
26297+ {
26298+ host_ptr->useFlag = status;
26299+ if (status == DB_TBL_ERROR )
26300+ {
26301+ host_ptr->transaction_count = 0;
26302+ send_cluster_status_to_load_balance(host_ptr,RECOVERY_ERROR_CONNECTION);
26303+ }
26304+ write_host_status_file(host_ptr);
26305+ }
26306+ return STATUS_OK;
26307+}
26308+
26309+static void
26310+write_host_status_file(HostTbl * host_ptr)
26311+{
26312+ switch( host_ptr->useFlag)
26313+ {
26314+ case DB_TBL_FREE:
26315+ PGRwrite_log_file(StatusFp,"port(%d) host:%s free",
26316+ host_ptr->port,
26317+ host_ptr->hostName);
26318+ break;
26319+ case DB_TBL_INIT:
26320+ PGRwrite_log_file(StatusFp,"port(%d) host:%s initialize",
26321+ host_ptr->port,
26322+ host_ptr->hostName);
26323+ break;
26324+ case DB_TBL_USE:
26325+ PGRwrite_log_file(StatusFp,"port(%d) host:%s start use",
26326+ host_ptr->port,
26327+ host_ptr->hostName);
26328+ break;
26329+ case DB_TBL_ERROR:
26330+ PGRwrite_log_file(StatusFp,"port(%d) host:%s error",
26331+ host_ptr->port,
26332+ host_ptr->hostName);
26333+ break;
26334+ case DB_TBL_END:
26335+ PGRwrite_log_file(StatusFp,"port(%d) host:%s end",
26336+ host_ptr->port,
26337+ host_ptr->hostName);
26338+ break;
26339+ }
26340+}
26341+
26342+static int
26343+check_result( PGresult * res )
26344+{
26345+ int status = 0;
26346+
26347+ status = PQresultStatus(res);
26348+ if ((status == PGRES_NONFATAL_ERROR ) ||
26349+ (status == PGRES_FATAL_ERROR ))
26350+ {
26351+ return STATUS_ERROR;
26352+ }
26353+ return STATUS_OK;
26354+}
26355+
26356+static bool
26357+compare_results(int *results, int size, int source_id)
26358+{
26359+ int i, prev = 0;
26360+
26361+ for (i = 0; i < size; i++)
26362+ {
26363+ if (i != source_id)
26364+ {
26365+ prev = results[i];
26366+ break;
26367+ }
26368+ }
26369+
26370+ for (; i < size; i++)
26371+ {
26372+ if (i == source_id)
26373+ continue;
26374+ if (prev != results[i])
26375+ return false;
26376+ prev = results[i];
26377+ }
26378+ return true;
26379+}
26380+
26381+/*--------------------------------------------------
26382+ * SYMBOL
26383+ * PGRsend_replicate_packet_to_server()
26384+ * NOTES
26385+ * Send query data to the cluster DB and recieve result data.
26386+ * ARGS
26387+ * HostTbl * host_ptr: the record of cluster DB table (target)
26388+ * ReplicateHeader * header: header data
26389+ * char *query: query data
26390+ * char * result: returned result data
26391+ * RETURN
26392+ * STATUS_OK: OK
26393+ * STATUS_ERROR: NG
26394+ * STATUS_LOCK_CONFLICT: Lock conflicted
26395+ *---------------------------------------------------
26396+ */
26397+int
26398+PGRsend_replicate_packet_to_server( HostTbl * host_ptr, ReplicateHeader * header, char *query , char * result,unsigned int replicationId, bool recovery)
26399+{
26400+ char * func = "PGRsend_replicate_packet_to_server()";
26401+ TransactionTbl * transaction_tbl = NULL;
26402+ char *database = NULL;
26403+ char port[8];
26404+ char *userName = NULL;
26405+ char * password = NULL;
26406+ char * host = NULL;
26407+ char * md5Salt = NULL;
26408+ char * cryptSalt = NULL;
26409+ int rtn = 0;
26410+ int current_cluster = 0;
26411+ int query_size = 0;
26412+
26413+ if ((query == NULL) || (header == NULL))
26414+ {
26415+ show_error("%s: query is broken",func);
26416+ return STATUS_ERROR;
26417+ }
26418+ query_size = ntohl(header->query_size);
26419+ if (query_size < 0)
26420+ {
26421+ show_error("%s: query size is broken",func);
26422+ return STATUS_ERROR;
26423+ }
26424+ if (host_ptr == NULL)
26425+ {
26426+ return STATUS_ERROR;
26427+ }
26428+
26429+ if (PGR_Response_Inf != NULL)
26430+ {
26431+ current_cluster = PGR_Response_Inf->current_cluster;
26432+ }
26433+
26434+ /*
26435+ * set up the connection
26436+ */
26437+ database = (char *)header->dbName;
26438+ snprintf(port,sizeof(port),"%d", host_ptr->port);
26439+ userName = (char *)(header->userName);
26440+ password = (char *)(header->password);
26441+ md5Salt = (char *)(header->md5Salt);
26442+ cryptSalt = (char *)(header->cryptSalt);
26443+ host = (char *)(host_ptr->resolvedName);
26444+ /*
26445+ * get the transaction table data
26446+ * it has the connection data with each cluster DB
26447+ */
26448+ transaction_tbl = getTransactionTbl(host_ptr,header);
26449+ /*
26450+ * if the transaction process is new one,
26451+ * create connection data and add the transaction table
26452+ */
26453+ if (transaction_tbl == (TransactionTbl *)NULL)
26454+ {
26455+ if (recovery == true)
26456+ {
26457+ int cnt = 0;
26458+ while(transaction_tbl == (TransactionTbl *)NULL)
26459+ {
26460+ transaction_tbl = setTransactionTbl(host_ptr, header);
26461+ if (cnt > RECOVERY_TIMEOUT)
26462+ {
26463+ break;
26464+ }
26465+ cnt ++;
26466+ sleep(1);
26467+ }
26468+ }
26469+ else
26470+ {
26471+ transaction_tbl = setTransactionTbl(host_ptr, header);
26472+ }
26473+ if (transaction_tbl == (TransactionTbl *)NULL)
26474+ {
26475+ show_error("%s:setTransactionTbl failed",func);
26476+ if ( header->cmdSts != CMD_STS_NOTICE )
26477+ {
26478+ PGRset_host_status(host_ptr,DB_TBL_ERROR);
26479+ }
26480+ return STATUS_ERROR;
26481+ }
26482+ StartReplication[current_cluster] = true;
26483+ }
26484+ else
26485+ {
26486+ /*
26487+ * re-use the connection data
26488+ */
26489+ if ((transaction_tbl->conn != (PGconn *)NULL) &&
26490+ (transaction_tbl->conn->sock > 0))
26491+ {
26492+ StartReplication[current_cluster] = false;
26493+ }
26494+ else
26495+ {
26496+ if (transaction_tbl->conn != (PGconn *)NULL)
26497+ {
26498+ PQfinish(transaction_tbl->conn);
26499+ transaction_tbl->conn = NULL;
26500+ }
26501+ transaction_tbl->conn = PGRcreateConn(host,port,database,userName,password,md5Salt,cryptSalt);
26502+ StartReplication[current_cluster] = true;
26503+ }
26504+ }
26505+ if(header->cmdSts==CMD_STS_OTHER &&
26506+ header->cmdType==CMD_TYPE_CONNECTION_CLOSE)
26507+ {
26508+ check_delete_transaction(host_ptr, header);
26509+ return STATUS_OK;
26510+ }
26511+#ifdef PRINT_DEBUG
26512+ show_debug("%s:connect db:%s port:%s user:%s host:%s query:%s",
26513+ func, database,port,userName,host,query);
26514+#endif
26515+ rtn = send_replicate_packet_to_server( transaction_tbl, current_cluster, host_ptr, header, query ,result ,replicationId, recovery);
26516+ return rtn;
26517+}
26518+
26519+static int
26520+send_replicate_packet_to_server( TransactionTbl * transaction_tbl, int current_cluster, HostTbl * host_ptr, ReplicateHeader * header, char *query , char * result,unsigned int replicationId, bool recovery)
26521+{
26522+ char * func = "send_replicate_packet_to_server()";
26523+ PGconn * conn = (PGconn *)NULL;
26524+ PGresult * res = (PGresult *)NULL;
26525+ char sync_command[256];
26526+ bool sync_command_flg = false;
26527+ char * str = NULL;
26528+ int rtn = 0;
26529+ int query_size = 0;
26530+ int hostNum = 0;
26531+ StringInfoData input_message;
26532+
26533+ if (( transaction_tbl == (TransactionTbl *)NULL) ||
26534+ ( host_ptr == (HostTbl *) NULL) ||
26535+ (header == (ReplicateHeader *) NULL) ||
26536+ (query == NULL) ||
26537+ ( result == NULL))
26538+ {
26539+ show_error("%s:unexpected NULL variable",func);
26540+ return STATUS_ERROR;
26541+ }
26542+
26543+ query_size = ntohl(header->query_size);
26544+ if (query_size < 0)
26545+ {
26546+ show_error("%s: query size is broken",func);
26547+ return STATUS_ERROR;
26548+ }
26549+
26550+/*
26551+ if(header->cmdSts == CMD_STS_OTHER &&
26552+ header->cmdType == CMD_TYPE_CONNECTION_CLOSE)
26553+ {
26554+ check_delete_transaction(host_ptr,header);
26555+ return STATUS_OK;
26556+ }
26557+*/
26558+ conn = transaction_tbl->conn;
26559+ if (conn == NULL)
26560+ {
26561+ show_error("%s:[%d@%s] may be down",func,host_ptr->port,host_ptr->hostName);
26562+ if ( header->cmdSts != CMD_STS_NOTICE )
26563+ {
26564+ PGRset_host_status(host_ptr,DB_TBL_ERROR);
26565+ }
26566+ return STATUS_ERROR;
26567+ }
26568+ hostNum = host_ptr->hostNum;
26569+
26570+ /*
26571+ * When the query is transaction query...
26572+ */
26573+ if (is_need_sync_time(header) == true)
26574+ {
26575+ if (transaction_tbl->transaction_count >1 )
26576+ {
26577+ sync_command_flg = false;
26578+ }
26579+ else
26580+ {
26581+ sync_command_flg = true;
26582+ }
26583+ }
26584+ if ((header->cmdSts == CMD_STS_TRANSACTION ) ||
26585+ (header->cmdSts == CMD_STS_SET_SESSION_AUTHORIZATION ))
26586+ {
26587+ if ((header->cmdSts == CMD_STS_TRANSACTION ) &&
26588+ ((header->cmdType != CMD_TYPE_BEGIN) ||
26589+ (transaction_tbl->transaction_count >1 )))
26590+ {
26591+ sync_command_flg = false;
26592+ }
26593+ }
26594+
26595+ /*
26596+ * execute query
26597+ */
26598+
26599+ if (header->rlog > 0 )
26600+ {
26601+
26602+ if (is_executed_query( conn, header) == true)
26603+ {
26604+ return STATUS_OK;
26605+ }
26606+ else
26607+ {
26608+#ifdef PRINT_DEBUG
26609+ show_debug("%s:check replication log issue , id=%d,rlog=%d,query=%s status=not_replicated",func,ntohl(header->replicate_id),header->rlog,query);
26610+#endif
26611+ }
26612+ }
26613+ if (( header->cmdSts != CMD_STS_NOTICE ) &&
26614+ ( header->cmdSts != CMD_STS_PREPARE ) &&
26615+ ((sync_command_flg == true) ||
26616+ (StartReplication[current_cluster] == true)))
26617+ {
26618+ snprintf(sync_command,sizeof(sync_command),
26619+ "SELECT %s(%d,%u,%u,%u,%d,%u) ",
26620+ PGR_SYSTEM_COMMAND_FUNC,
26621+ PGR_SET_CURRENT_TIME_FUNC_NO,
26622+ (unsigned int)ntohl(header->tv.tv_sec),
26623+ (unsigned int)ntohl(header->tv.tv_usec),
26624+ (unsigned int)ntohl(PGR_Log_Header->replicate_id),
26625+ PGR_Response_Inf->response_mode,
26626+ *PGR_ReplicateSerializationID);
26627+#ifdef PRINT_DEBUG
26628+ show_debug("%s:sync_command(%s)",func,sync_command);
26629+#endif
26630+ res = PQexec(conn, sync_command);
26631+ if (res != NULL)
26632+ PQclear(res);
26633+ StartReplication[current_cluster] = false;
26634+ }
26635+
26636+ res = NULL;
26637+ if ((header->cmdType == CMD_TYPE_COPY_DATA) ||
26638+ (header->cmdType == CMD_TYPE_COPY_DATA_END))
26639+ {
26640+ /* copy data replication */
26641+ rtn =PQputnbytes(conn, query,query_size);
26642+ if (header->cmdType == CMD_TYPE_COPY_DATA_END)
26643+ {
26644+ rtn = PQendcopy(conn);
26645+ if (rtn == 1) /* failed */
26646+ {
26647+ if (transaction_tbl->conn != NULL)
26648+ {
26649+ PQfinish(transaction_tbl->conn);
26650+ transaction_tbl->conn = (PGconn *)NULL;
26651+ StartReplication[current_cluster] = true;
26652+ }
26653+ }
26654+ }
26655+ *(PGR_Send_Query_ID + hostNum ) = ntohl(header->query_id);
26656+ return STATUS_OK;
26657+ }
26658+ else if (header->cmdSts == CMD_STS_LARGE_OBJECT)
26659+ {
26660+ replicate_lo(conn, header,(LOArgs *)query);
26661+ return STATUS_OK;
26662+ }
26663+
26664+ else if (header->cmdSts == CMD_STS_PREPARE)
26665+ {
26666+
26667+ if ( !PGR_Parse_Session_Started)
26668+ {
26669+ snprintf(sync_command,sizeof(sync_command),
26670+ "SELECT %s(%d,%u,%u,%u,%d,%u) ",
26671+ PGR_SYSTEM_COMMAND_FUNC,
26672+ PGR_SET_CURRENT_TIME_FUNC_NO,
26673+ (unsigned int)ntohl(header->tv.tv_sec),
26674+ (unsigned int)ntohl(header->tv.tv_usec),
26675+ (unsigned int)ntohl(PGR_Log_Header->replicate_id),
26676+ PGR_Response_Inf->response_mode,
26677+ *PGR_ReplicateSerializationID);
26678+ res = PQexec(conn, sync_command);
26679+ if (res != NULL)
26680+ {
26681+ PQclear(res);
26682+ res = NULL;
26683+ }
26684+ while ((res = PQgetResult(conn)) != NULL)
26685+ {
26686+ if (res->resultStatus == PGRES_COPY_IN)
26687+ {
26688+ PQclear(res);
26689+ return STATUS_ERROR;
26690+ }
26691+ else if (res->resultStatus == PGRES_COPY_OUT)
26692+ {
26693+ conn->asyncStatus = PGASYNC_BUSY;
26694+ }
26695+ else if (conn->status == CONNECTION_BAD)
26696+ {
26697+ PQclear(res);
26698+ return STATUS_ERROR;
26699+ }
26700+ PQclear(res);
26701+ }
26702+ }
26703+ set_string_info(&input_message,header,query);
26704+ switch (header->cmdType)
26705+ {
26706+ case CMD_TYPE_P_PARSE :
26707+ if (send_p_parse(conn, &input_message) != STATUS_OK)
26708+ {
26709+ pqHandleSendFailure(conn);
26710+ PGR_Parse_Session_Started = false;
26711+ return STATUS_ERROR;
26712+ }
26713+ break;
26714+ case CMD_TYPE_P_BIND :
26715+ if (send_p_bind(conn, &input_message) != STATUS_OK)
26716+ {
26717+ pqHandleSendFailure(conn);
26718+ PGR_Parse_Session_Started = false;
26719+ return STATUS_ERROR;
26720+ }
26721+ break;
26722+ case CMD_TYPE_P_DESCRIBE :
26723+ if (send_p_describe(conn, &input_message) != STATUS_OK)
26724+ {
26725+ pqHandleSendFailure(conn);
26726+ PGR_Parse_Session_Started = false;
26727+ return STATUS_ERROR;
26728+ }
26729+ break;
26730+ case CMD_TYPE_P_EXECUTE :
26731+ if (send_p_execute(conn,&input_message) != STATUS_OK)
26732+ {
26733+ pqHandleSendFailure(conn);
26734+ PGR_Parse_Session_Started = false;
26735+ return STATUS_ERROR;
26736+ }
26737+ break;
26738+ case CMD_TYPE_P_SYNC :
26739+ if (send_p_sync(conn, &input_message) != STATUS_OK)
26740+ {
26741+ pqHandleSendFailure(conn);
26742+ PGR_Parse_Session_Started = false;
26743+ return STATUS_ERROR;
26744+ }
26745+ break;
26746+ case CMD_TYPE_P_CLOSE :
26747+ if (send_p_close(conn, &input_message) != STATUS_OK)
26748+ {
26749+ pqHandleSendFailure(conn);
26750+ PGR_Parse_Session_Started = false;
26751+ return STATUS_ERROR;
26752+ }
26753+ break;
26754+ default :
26755+ break;
26756+ }
26757+ return STATUS_OK;
26758+ }
26759+ else
26760+ {
26761+ if (transaction_tbl->lock != STATUS_OK)
26762+ {
26763+#ifdef PRINT_DEBUG
26764+ show_debug("%s:[%d]transaction_tbl->lock is [%d]",func,current_cluster,transaction_tbl->lock );
26765+#endif
26766+ transaction_tbl->lock = STATUS_OK;
26767+ }
26768+ snprintf(sync_command,sizeof(sync_command),
26769+ "SELECT %s(%d,%u,%u,%d) ",
26770+ PGR_SYSTEM_COMMAND_FUNC,
26771+ PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO,
26772+ replicationId,
26773+ 0,
26774+ PGR_Response_Inf->response_mode);
26775+ res = PQexec(conn, sync_command);
26776+ if (res != NULL)
26777+ {
26778+ PQclear(res);
26779+ res = NULL;
26780+ }
26781+ res = PQexec(conn, query);
26782+ rtn = check_result(res);
26783+#ifdef PRINT_DEBUG
26784+ show_debug("%s:PQexec send :%s",func,query);
26785+#endif
26786+
26787+ }
26788+
26789+ if (res == NULL)
26790+ {
26791+ StartReplication[current_cluster] = true;
26792+ return STATUS_ERROR;
26793+ }
26794+
26795+ str = PQcmdStatus(res);
26796+#ifdef PRINT_DEBUG
26797+ show_debug("%s:PQexec returns :%s",func,str);
26798+#endif
26799+ if ((str == NULL) || (*str == '\0'))
26800+ {
26801+ if ((result != NULL) && (res != NULL) && (res->errMsg != NULL))
26802+ {
26803+ snprintf(result,PGR_MESSAGE_BUFSIZE,"E%s",res->errMsg);
26804+ }
26805+ else
26806+ {
26807+ strcpy(result,"E");
26808+ }
26809+ StartReplication[current_cluster] = true;
26810+ }
26811+ else
26812+ {
26813+ if (!strncasecmp(str,PGR_LOCK_CONFLICT_NOTICE_CMD,strlen(PGR_LOCK_CONFLICT_NOTICE_CMD)))
26814+ {
26815+#ifdef PRINT_DEBUG
26816+ show_debug("%s:LOCK CONFLICT from PQexec",func);
26817+#endif
26818+ if (res != NULL)
26819+ PQclear(res);
26820+
26821+ transaction_tbl->lock = STATUS_LOCK_CONFLICT;
26822+ return STATUS_LOCK_CONFLICT;
26823+ }
26824+ else if (!strncasecmp(str,PGR_DEADLOCK_DETECT_NOTICE_CMD,strlen(PGR_DEADLOCK_DETECT_NOTICE_CMD)))
26825+ {
26826+#ifdef PRINT_DEBUG
26827+ show_debug("%s:DEADLOCK DETECTED from PQexec",func);
26828+#endif
26829+ if (res != NULL)
26830+ PQclear(res);
26831+ transaction_tbl->lock = STATUS_DEADLOCK_DETECT;
26832+ return STATUS_DEADLOCK_DETECT;
26833+ }
26834+ snprintf(result,PGR_MESSAGE_BUFSIZE,"C%s",str);
26835+ }
26836+ if (res != NULL)
26837+ PQclear(res);
26838+
26839+ /* set send query id */
26840+ *(PGR_Send_Query_ID + hostNum ) = ntohl(header->query_id);
26841+
26842+ /*
26843+ * if the query is end transaction process...
26844+ */
26845+ check_delete_transaction(host_ptr,header);
26846+
26847+ return STATUS_OK;
26848+}
26849+
26850+static int
26851+check_delete_transaction (HostTbl * host_ptr, ReplicateHeader * header)
26852+{
26853+ char *database = NULL;
26854+
26855+ if ((host_ptr == NULL) || (header == NULL))
26856+ {
26857+ return STATUS_ERROR;
26858+ }
26859+ database = (char *)header->dbName;
26860+ if(header->cmdSts == CMD_STS_OTHER &&
26861+ header->cmdType == CMD_TYPE_CONNECTION_CLOSE)
26862+ {
26863+ notice_abort(host_ptr, header);
26864+ deleteTransactionTbl(host_ptr,header);
26865+ }
26866+
26867+ delete_template(host_ptr, header);
26868+ return STATUS_OK;
26869+}
26870+
26871+static void
26872+check_transaction_status(ReplicateHeader * header,
26873+ TransactionTbl *transaction)
26874+{
26875+ if (header == (ReplicateHeader *)NULL)
26876+ {
26877+ return;
26878+ }
26879+ if (header->cmdSts == CMD_STS_TRANSACTION )
26880+ {
26881+ if (header->cmdType == CMD_TYPE_BEGIN )
26882+ {
26883+ if (transaction != NULL)
26884+ {
26885+ transaction->in_transaction = true;
26886+ transaction->transaction_count ++;
26887+ }
26888+ }
26889+ else if ((header->cmdType == CMD_TYPE_COMMIT) ||
26890+ (header->cmdType == CMD_TYPE_ROLLBACK))
26891+ {
26892+ if (transaction != NULL)
26893+ {
26894+ if (transaction->transaction_count > 0)
26895+ {
26896+ transaction->transaction_count --;
26897+ }
26898+ if (transaction->transaction_count == 0)
26899+ {
26900+ transaction->in_transaction = false;
26901+ }
26902+ }
26903+ }
26904+ }
26905+ else
26906+ {
26907+ if ( header->cmdType == CMD_TYPE_COPY )
26908+ {
26909+ if (transaction != NULL)
26910+ {
26911+ transaction->exec_copy = true;
26912+ }
26913+ }
26914+ else if (header->cmdType == CMD_TYPE_COPY_DATA_END)
26915+ {
26916+ if (transaction != NULL)
26917+ {
26918+ transaction->exec_copy = false;
26919+ }
26920+ }
26921+ }
26922+}
26923+
26924+static HostTbl *
26925+check_host_transaction_status(ReplicateHeader * header,
26926+ HostTbl *host)
26927+{
26928+ int recovery_status = 0;
26929+
26930+ if ((header == (ReplicateHeader *)NULL) || (host == (HostTbl *)NULL))
26931+ {
26932+ return NULL;
26933+ }
26934+ if (header->cmdType == CMD_TYPE_BEGIN )
26935+ {
26936+ host->transaction_count++;
26937+ }
26938+ else if ((header->cmdType == CMD_TYPE_COMMIT) ||
26939+ (header->cmdType == CMD_TYPE_ROLLBACK))
26940+ {
26941+ if (host->transaction_count > 0)
26942+ host->transaction_count--;
26943+ }
26944+
26945+ recovery_status = PGRget_recovery_status();
26946+ if ((recovery_status == RECOVERY_PREPARE_START) &&
26947+ (host->transaction_count > 0))
26948+ {
26949+ PGRset_recovery_status(RECOVERY_WAIT_CLEAN);
26950+ }
26951+ else if ((recovery_status == RECOVERY_PREPARE_START) &&
26952+ (host->transaction_count==0))
26953+ {
26954+ PGRset_recovery_status(RECOVERY_CLEARED);
26955+ }
26956+ else if ((recovery_status == RECOVERY_WAIT_CLEAN) &&
26957+ (host->transaction_count==0))
26958+ {
26959+ PGRset_recovery_status(RECOVERY_CLEARED);
26960+ }
26961+ return host;
26962+}
26963+
26964+static FILE *
26965+create_queue_file(void)
26966+{
26967+ char * func = "create_queue_file()";
26968+ FILE * fp = NULL;
26969+ struct timeval tv;
26970+ char fname[FILENAME_MAX_LENGTH];
26971+ int size = 0;
26972+ int rtn = 0;
26973+ RecoveryQueueFile * msg = NULL;
26974+
26975+ if (*RecoveryMsgid < 0)
26976+ {
26977+ return (FILE *)NULL;
26978+ }
26979+ /* create uniq file name */
26980+ gettimeofday(&tv,NULL);
26981+ memset(fname,0,sizeof(fname));
26982+ snprintf(fname,sizeof(fname),"%s/%s_%u.%u",
26983+ PGR_Data_Path,
26984+ RECOVERY_QUEUE_FILE,
26985+ (uint32_t)tv.tv_sec,
26986+ (uint32_t)tv.tv_usec);
26987+
26988+ size = sizeof(fname) + sizeof(RecoveryQueueFile);
26989+ msg = (RecoveryQueueFile *)malloc(size);
26990+ if (msg == NULL)
26991+ {
26992+ show_error("%s:malloc() failed. reason: %s", func, strerror(errno));
26993+ return (FILE *)NULL;
26994+ }
26995+ memset(msg,0,size);
26996+ msg->mtype = RECOVERY_FILE_MTYPE;
26997+ strncpy(msg->mdata,fname,sizeof(fname));
26998+
26999+ fp = fopen(fname,"a");
27000+ if (fp == NULL)
27001+ {
27002+ show_error("%s:fopen failed: (%s)",func,strerror(errno));
27003+ return (FILE *)NULL;
27004+ }
27005+
27006+ rtn = msgsnd(*RecoveryMsgid, msg, sizeof(fname), IPC_NOWAIT);
27007+ if (rtn < 0)
27008+ {
27009+ show_error("%s:msgsnd failed. reason: %s", func, strerror(errno));
27010+ free(msg);
27011+ msgctl(*RecoveryMsgid, IPC_RMID, NULL);
27012+ *RecoveryMsgid = msgget (IPC_PRIVATE, 00666 | IPC_CREAT );
27013+ return (FILE *)NULL;
27014+ }
27015+
27016+ strncpy(Recovery_Status_Inf->write_file,fname,sizeof(Recovery_Status_Inf->write_file));
27017+ return fp;
27018+}
27019+
27020+static int
27021+add_queue_file(char * data,int size)
27022+{
27023+ int cnt = 0;
27024+
27025+ if ((QueueFp == NULL) || (data == NULL) || (size < 0))
27026+ {
27027+ return STATUS_ERROR;
27028+ }
27029+ /*fseek(QueueFp,0,SEEK_END);*/
27030+ while (fwrite(data, size,1,QueueFp) <= 0)
27031+ {
27032+ fclose(QueueFp);
27033+ QueueFp = NULL;
27034+ if (cnt > MAX_RETRY_TIMES)
27035+ {
27036+ return STATUS_ERROR;
27037+ }
27038+ QueueFp = create_queue_file();
27039+ cnt ++;
27040+ }
27041+ Recovery_Status_Inf->file_size += size;
27042+ return STATUS_OK;
27043+}
27044+
27045+/*
27046+ * set query in queue
27047+ */
27048+int
27049+PGRset_queue(ReplicateHeader * header,char * query)
27050+{
27051+ char * func = "PGRset_queue()";
27052+ int header_size = 0;
27053+ int query_size = 0;
27054+
27055+ if ((Recovery_Status_Inf == NULL) || (header == NULL))
27056+ {
27057+ show_error("%s:header is null",func);
27058+ return STATUS_ERROR;
27059+ }
27060+
27061+ query_size = ntohl(header->query_size);
27062+ if (query_size < 0)
27063+ {
27064+ show_error("%s:query size less than 0",func);
27065+ return STATUS_ERROR;
27066+ }
27067+ header_size = sizeof(ReplicateHeader);
27068+
27069+ if (RecoverySemID <= 0)
27070+ {
27071+ show_error("%s:RecoverySemID is not initialized",func);
27072+ return STATUS_ERROR;
27073+ }
27074+ PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
27075+ /* check existance of queue file */
27076+ if (Recovery_Status_Inf->write_file[0] == '\0')
27077+ {
27078+ /* create new queue file */
27079+ Recovery_Status_Inf->file_size = 0;
27080+ QueueFp = create_queue_file();
27081+ }
27082+ else
27083+ {
27084+ /* check size of queue file */
27085+ if (Recovery_Status_Inf->file_size + header_size + query_size > MAX_QUEUE_FILE_SIZE)
27086+ {
27087+ /* if the file size is over the limit, create new queue file */
27088+ memset(Recovery_Status_Inf->write_file,0,sizeof(Recovery_Status_Inf->write_file));
27089+ fclose(QueueFp);
27090+ Recovery_Status_Inf->file_size = 0;
27091+ QueueFp = create_queue_file();
27092+ }
27093+ else
27094+ {
27095+ QueueFp= fopen(Recovery_Status_Inf->write_file,"a");
27096+ }
27097+ }
27098+ if (QueueFp == (FILE *)NULL)
27099+ {
27100+ PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
27101+ show_error("%s:QueueFp open failed. error is %s",func,strerror(errno));
27102+ return STATUS_ERROR;
27103+ }
27104+ header->replicate_id = htonl(*PGR_ReplicateSerializationID);
27105+ if (add_queue_file((char *)header,header_size) != STATUS_OK)
27106+ {
27107+ PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
27108+ show_error("%s:header add failed into queue file",func);
27109+ return STATUS_ERROR;
27110+ }
27111+ if (query_size > 0)
27112+ {
27113+ if (add_queue_file((char *)query,query_size) != STATUS_OK)
27114+ {
27115+ PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
27116+ show_error("%s:queue add failed into queue file",func);
27117+ return STATUS_ERROR;
27118+ }
27119+ }
27120+ fflush(QueueFp);
27121+ fclose(QueueFp);
27122+ PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
27123+
27124+ return STATUS_OK;
27125+}
27126+
27127+HostTbl *
27128+PGRget_HostTbl(char * resolvedName, int port)
27129+{
27130+ HostTbl * ptr = NULL;
27131+ int len = 0;
27132+
27133+ if (Host_Tbl_Begin == NULL)
27134+ {
27135+ return NULL;
27136+ }
27137+ len = strlen(resolvedName);
27138+ ptr = Host_Tbl_Begin;
27139+ if (len > sizeof(ptr->resolvedName))
27140+ {
27141+ len = sizeof(ptr->resolvedName);
27142+ }
27143+ while(ptr->useFlag != DB_TBL_END)
27144+ {
27145+ if ((! memcmp(ptr->resolvedName,resolvedName,len)) &&
27146+ (ptr->port == port))
27147+ {
27148+ return ptr;
27149+ }
27150+ ptr ++;
27151+ }
27152+ return (HostTbl*)NULL;
27153+}
27154+
27155+static void
27156+sem_quit(int semid)
27157+{
27158+ semctl(semid, 0, IPC_RMID);
27159+}
27160+
27161+void
27162+PGRclear_connections(void)
27163+{
27164+ Dlelem *ptr = NULL;
27165+
27166+ pthread_mutex_lock(&transaction_table_mutex);
27167+ ptr = DLGetHead(Transaction_Tbl_Begin);
27168+ while (ptr)
27169+ {
27170+ TransactionTbl *transaction = DLE_VAL(ptr);
27171+ if (transaction->conn != NULL)
27172+ {
27173+ PQfinish(transaction->conn);
27174+ transaction->conn = NULL;
27175+ }
27176+ ptr = DLGetSucc(ptr);
27177+ }
27178+ pthread_mutex_unlock(&transaction_table_mutex);
27179+}
27180+
27181+void
27182+PGRdestroy_transaction_table(void)
27183+{
27184+ Dlelem *ptr = NULL, *next;
27185+ pthread_mutex_lock(&transaction_table_mutex);
27186+ ptr = DLGetHead(Transaction_Tbl_Begin);
27187+ while (ptr)
27188+ {
27189+ next = DLGetSucc(ptr);
27190+ DLRemove(ptr);
27191+ DLFreeElem(ptr);
27192+ ptr = next;
27193+ }
27194+ DLFreeList(Transaction_Tbl_Begin);
27195+ Transaction_Tbl_Begin = NULL;
27196+ pthread_mutex_unlock(&transaction_table_mutex);
27197+}
27198+
27199+static bool
27200+is_need_sync_time(ReplicateHeader * header)
27201+{
27202+ bool rtn = false;
27203+
27204+ if (header->cmdSts == CMD_STS_PREPARE)
27205+ {
27206+ rtn = false;
27207+ }
27208+ else if ((header->cmdType == CMD_TYPE_COPY) ||
27209+ (header->cmdType == CMD_TYPE_COPY_DATA) ||
27210+ (header->cmdType == CMD_TYPE_COPY_DATA_END))
27211+ {
27212+ rtn = false;
27213+ }
27214+ if ((header->cmdSts == CMD_STS_QUERY ) &&
27215+ ((header->cmdType == CMD_TYPE_INSERT) ||
27216+ (header->cmdType == CMD_TYPE_UPDATE) ||
27217+ (header->cmdType == CMD_TYPE_DELETE) ||
27218+ (header->cmdType == CMD_TYPE_SET) ||
27219+ (header->cmdType == CMD_TYPE_EXECUTE)))
27220+ {
27221+ rtn = true;
27222+ }
27223+ else
27224+ {
27225+ if ((header->cmdType == CMD_TYPE_COPY) ||
27226+ (header->cmdType == CMD_TYPE_SELECT) ||
27227+ (header->cmdType == CMD_TYPE_VACUUM) ||
27228+ (header->cmdType == CMD_TYPE_ANALYZE) ||
27229+ (header->cmdType == CMD_TYPE_BEGIN))
27230+ {
27231+ rtn = true;
27232+ }
27233+ if ((header->cmdSts == CMD_STS_TRANSACTION ) &&
27234+ (header->cmdType != CMD_TYPE_BEGIN))
27235+ {
27236+ rtn = false;
27237+ }
27238+ }
27239+ return rtn;
27240+}
27241+
27242+static bool
27243+is_need_wait_answer(ReplicateHeader * header)
27244+{
27245+ bool rtn = false;
27246+
27247+ if (header->cmdSts == CMD_STS_PREPARE)
27248+ {
27249+ rtn = false;
27250+ }
27251+ else if ((header->cmdType == CMD_TYPE_COPY) ||
27252+ (header->cmdType == CMD_TYPE_COPY_DATA) ||
27253+ (header->cmdType == CMD_TYPE_COPY_DATA_END))
27254+ {
27255+ rtn = false;
27256+ }
27257+ else if ((header->cmdSts == CMD_STS_QUERY ) &&
27258+ ((header->cmdType == CMD_TYPE_INSERT) ||
27259+ (header->cmdType == CMD_TYPE_UPDATE) ||
27260+ (header->cmdType == CMD_TYPE_DELETE) ||
27261+ (header->cmdType == CMD_TYPE_VACUUM) ||
27262+ (header->cmdType == CMD_TYPE_ANALYZE) ||
27263+ (header->cmdType == CMD_TYPE_EXECUTE)))
27264+ {
27265+ rtn = true;
27266+ }
27267+ else if ((header->cmdSts == CMD_STS_TRANSACTION ) ||
27268+ (header->cmdSts == CMD_STS_SET_SESSION_AUTHORIZATION ) ||
27269+ (header->cmdSts == CMD_STS_TEMP_TABLE ) ||
27270+ (header->cmdType == CMD_TYPE_SELECT))
27271+ {
27272+ rtn = true;
27273+ }
27274+
27275+ return rtn;
27276+}
27277+
27278+static void
27279+delete_template(HostTbl * ptr, ReplicateHeader * header)
27280+{
27281+ if ((ptr == (HostTbl *)NULL ) ||
27282+ (header == (ReplicateHeader *)NULL) )
27283+ {
27284+ return;
27285+ }
27286+
27287+ if ((! strncmp(header->dbName,"template1",9)) ||
27288+ (! strncmp(header->dbName,"template0",9)))
27289+ {
27290+ if ((header->cmdSts != CMD_STS_TRANSACTION ) &&
27291+ ( header->cmdSts != CMD_STS_SET_SESSION_AUTHORIZATION ) &&
27292+ ( header->cmdSts != CMD_STS_TEMP_TABLE ))
27293+ {
27294+ deleteTransactionTbl(ptr,header);
27295+ }
27296+ }
27297+}
27298+
27299+/*--------------------------------------------------------------------
27300+ * SYMBOL
27301+ * check_copy_command()
27302+ * NOTES
27303+ * check the query which it is copy command or not
27304+ * when the query is 'copy from', set 'stdin' after 'from'
27305+ * ARGS
27306+ * char * query: query strings(I)
27307+ * RETURN
27308+ * copy command : changed copy command
27309+ * other command : NULL
27310+ *--------------------------------------------------------------------
27311+ */
27312+static char *
27313+check_copy_command(char * query)
27314+{
27315+ char * p;
27316+ char * p1, *p2, *wp;
27317+ char * buf;
27318+ int size;
27319+
27320+ if (query == NULL)
27321+ return NULL;
27322+ size = strlen(query) + strlen(" stdin ");
27323+ p = p1 = query;
27324+ wp = strstr(p,"FROM");
27325+ if (wp == NULL)
27326+ wp = strstr(p,"from");
27327+
27328+ if (wp != NULL)
27329+ {
27330+ p = wp + strlen("FROM");
27331+ *p = '\0';
27332+ p ++;
27333+ while ((isspace(*p)) && (*p != '\0')) p++;
27334+ while ((!isspace(*p)) && (*p != '\0')) p++;
27335+ p2 = p;
27336+ buf = malloc(size);
27337+ if (buf == NULL)
27338+ {
27339+ return NULL;
27340+ }
27341+ snprintf(buf,size,"%s stdin %s",p1,p2);
27342+ return buf;
27343+ }
27344+ return NULL;
27345+}
27346+
27347+static int
27348+next_replication_id(void)
27349+{
27350+ char * func = "next_replication_id()";
27351+
27352+ if (Recovery_Status_Inf == (RecoveryStatusInf *)NULL)
27353+ {
27354+ show_error("%s: Recovery_Status_Inf is NULL",func);
27355+ return -1;
27356+ }
27357+ Recovery_Status_Inf->replication_id ++;
27358+ Recovery_Status_Inf->check_point --;
27359+ return (Recovery_Status_Inf->replication_id);
27360+}
27361+
27362+static void
27363+check_replication_id(void)
27364+{
27365+ char * func = "check_replication_id()";
27366+
27367+ if (Recovery_Status_Inf == (RecoveryStatusInf *)NULL)
27368+ {
27369+ show_error("%s: Recovery_Status_Inf is NULL",func);
27370+ return ;
27371+ }
27372+ if (Recovery_Status_Inf->check_point < 0)
27373+ {
27374+ Recovery_Status_Inf->check_point = PGR_CHECK_POINT ;
27375+ rewind(RidFp);
27376+ PGRwrite_log_file(RidFp,"%u",Recovery_Status_Inf->replication_id + PGR_CHECK_POINT );
27377+ }
27378+}
27379+
27380+int
27381+PGRset_replication_id(uint32_t id)
27382+{
27383+ Recovery_Status_Inf->replication_id = id;
27384+ return (Recovery_Status_Inf->replication_id);
27385+}
27386+
27387+int
27388+PGRdo_replicate(int sock,ReplicateHeader *header, char * query)
27389+{
27390+
27391+ char * func = "PGRdo_replicate()";
27392+
27393+ struct timeval tv;
27394+ int status = STATUS_OK;
27395+ int recovery_status = 0;
27396+ char * query_string = NULL;
27397+
27398+ if (header->cmdType == CMD_TYPE_COPY)
27399+ {
27400+ query_string = check_copy_command(query);
27401+ if (query_string == NULL)
27402+ {
27403+ return LOOP_CONTINUE;
27404+ }
27405+ }
27406+ else
27407+ {
27408+ query_string = query;
27409+ if (header->cmdType == CMD_TYPE_SET)
27410+ {
27411+ if (is_autocommit_off(query_string) == true)
27412+ {
27413+ PGR_AutoCommit = false;
27414+ }
27415+ else if (is_autocommit_on(query_string) == true)
27416+ {
27417+ PGR_AutoCommit = true;
27418+ }
27419+ }
27420+ }
27421+ header->isAutoCommit=PGR_AutoCommit ? 1 : 0;
27422+ gettimeofday(&tv,NULL);
27423+ header->tv.tv_sec = htonl(tv.tv_sec);
27424+ header->tv.tv_usec = htonl(tv.tv_usec);
27425+#ifdef PRINT_DEBUG
27426+ show_debug("%s:query :: %s",func,query_string);
27427+#endif
27428+
27429+ /* set query id */
27430+ header->query_id = htonl(PGRget_next_query_id());
27431+
27432+ /* save header for logging */
27433+ if (is_need_sync_time(header) == true)
27434+ {
27435+ if (PGR_Log_Header != NULL)
27436+ {
27437+ memcpy(PGR_Log_Header,header,sizeof(ReplicateHeader));
27438+ if (header->rlog == 0)
27439+ {
27440+ PGR_Log_Header->replicate_id = htonl(next_replication_id());
27441+ }
27442+ }
27443+ }
27444+ /* check rlog */
27445+ if (header->rlog == CONNECTION_SUSPENDED_TYPE )
27446+ {
27447+ if (PGRget_rlog_header(header) == STATUS_OK)
27448+ {
27449+ header->rlog = CONNECTION_SUSPENDED_TYPE;
27450+
27451+ }
27452+ }
27453+
27454+ /* check recovery mode */
27455+
27456+ recovery_status = PGRget_recovery_status();
27457+ PGRcheck_recovered_host();
27458+
27459+ /* send replication packet */
27460+ status = PGRreplicate_packet_send( header,query_string,sock,recovery_status);
27461+
27462+ if ((header->cmdType == CMD_TYPE_COPY) &&
27463+ (query_string != NULL))
27464+ {
27465+ free(query_string);
27466+ query_string = NULL;
27467+ }
27468+
27469+ if (status == STATUS_ABORTED )
27470+ {
27471+#ifdef PRINT_DEBUG
27472+ show_debug("%s:status is STATUS_ABORTED",func);
27473+#endif
27474+ return LOOP_END;
27475+ }
27476+ if (status == STATUS_DEADLOCK_DETECT)
27477+ {
27478+#ifdef PRINT_DEBUG
27479+ show_debug("%s:status is STATUS_DEADLOCK_DETECT",func);
27480+#endif
27481+ return LOOP_END;
27482+ }
27483+ return LOOP_CONTINUE;
27484+}
27485+
27486+/*--------------------------------------------------------------------
27487+ * SYMBOL
27488+ * PGRreturn_result()
27489+ * NOTES
27490+ * Return result of execution
27491+ * ARGS
27492+ * int dest: socket of destination server (I)
27493+ * char *result: result data(I)
27494+ * int wait: wait flag (I)
27495+ * RETURN
27496+ * OK: STATUS_OK
27497+ * NG: STATUS_ERROR
27498+ * NG: STATUS_LOCK_CONFLICT
27499+ * NG: STATUS_DEADLOCK_DETECT
27500+ *--------------------------------------------------------------------
27501+ */
27502+int
27503+PGRreturn_result(int dest, char * result, int wait)
27504+{
27505+ char * func = "PGRreturn_result()";
27506+ fd_set wmask;
27507+ struct timeval timeout;
27508+ int rtn = 0;
27509+ char * send_ptr = NULL;
27510+ int send_size= 0;
27511+ int buf_size = 0;
27512+ int s = 0;
27513+ int status = 0;
27514+ int flag = 0;
27515+
27516+ if (result == NULL)
27517+ {
27518+ show_error("%s:result is not initialize",func);
27519+ return STATUS_ERROR;
27520+ }
27521+ if (dest < 0)
27522+ {
27523+ return STATUS_ERROR;
27524+ }
27525+ send_ptr = result;
27526+ buf_size = PGR_MESSAGE_BUFSIZE;
27527+ if (buf_size < 1)
27528+ buf_size = 1;
27529+
27530+ /*
27531+ * Wait for something to happen.
27532+ */
27533+#ifdef MSG_DONTWAIT
27534+ flag |= MSG_DONTWAIT;
27535+#endif
27536+#ifdef MSG_NOSIGNAL
27537+ flag |= MSG_NOSIGNAL;
27538+#endif
27539+
27540+ for (;;)
27541+ {
27542+ timeout.tv_sec = PGR_Replication_Timeout;
27543+ timeout.tv_usec = 0;
27544+
27545+ FD_ZERO(&wmask);
27546+ FD_SET(dest,&wmask);
27547+
27548+ rtn = select(dest+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
27549+ if (rtn < 0)
27550+ {
27551+ if (errno == EINTR || errno == EAGAIN)
27552+ continue;
27553+
27554+ show_error("%s:select failed ,errno is %s",func , strerror(errno));
27555+ return STATUS_ERROR;
27556+ }
27557+ else if (rtn && FD_ISSET(dest, &wmask))
27558+ {
27559+ s = send(dest,send_ptr + send_size,buf_size - send_size ,flag);
27560+ if (s < 0)
27561+ {
27562+ if (errno == EINTR || errno == EAGAIN)
27563+ continue;
27564+ else
27565+ {
27566+ show_error("%s:send error: %d(%s)", func, errno, strerror(errno));
27567+ memset(send_ptr, 0, PGR_MESSAGE_BUFSIZE);
27568+ return STATUS_ERROR;
27569+ }
27570+ }
27571+ else if (s > 0)
27572+ {
27573+ send_size += s;
27574+ if (send_size == buf_size)
27575+ {
27576+
27577+ status = STATUS_OK;
27578+ if (wait == PGR_WAIT_ANSWER)
27579+ {
27580+ status = read_answer(dest);
27581+ }
27582+ return status;
27583+ }
27584+ }
27585+ else /* s == 0 */
27586+ {
27587+ show_error("%s:unexpected EOF", func);
27588+ memset(send_ptr, 0, PGR_MESSAGE_BUFSIZE);
27589+ return STATUS_ERROR;
27590+ }
27591+ }
27592+ }
27593+ memset(send_ptr, 0, PGR_MESSAGE_BUFSIZE);
27594+ return STATUS_ERROR;
27595+}
27596+
27597+/*--------------------------------------------------------------------
27598+ * SYMBOL
27599+ * read_answer()
27600+ * NOTES
27601+ * Receive answer packet
27602+ * ARGS
27603+ * int dest: socket of destination server (I)
27604+ * RETURN
27605+ * OK: STATUS_OK
27606+ * NG: STATUS_ERROR
27607+ * NG: STATUS_LOCK_CONFLICT
27608+ * NG: STATUS_DEADLOCK_DETECT
27609+ *--------------------------------------------------------------------
27610+ */
27611+static int
27612+read_answer(int dest)
27613+{
27614+ char * func = "read_answer()";
27615+ fd_set rmask;
27616+ struct timeval timeout;
27617+ int rtn;
27618+ ReplicateHeader header;
27619+ char * answer = NULL;
27620+ int status = STATUS_ERROR;
27621+
27622+ for(;;)
27623+ {
27624+ if (answer != NULL)
27625+ {
27626+ free(answer);
27627+ answer = NULL;
27628+ }
27629+ timeout.tv_sec = PGR_Replication_Timeout;
27630+ timeout.tv_usec = 0;
27631+ FD_ZERO(&rmask);
27632+ FD_SET(dest,&rmask);
27633+ rtn = select(dest+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
27634+ if (rtn < 0)
27635+ {
27636+ if (errno == EINTR || errno == EAGAIN)
27637+ continue;
27638+
27639+ show_error("%s:select failed ,errno is %s",func , strerror(errno));
27640+ return STATUS_ERROR;
27641+ }
27642+ else if (rtn && FD_ISSET(dest, &rmask))
27643+ {
27644+ memset(&header,0,sizeof(ReplicateHeader));
27645+ answer = PGRread_packet(dest,&header);
27646+ if (answer == NULL)
27647+ {
27648+ status = STATUS_ERROR;
27649+ break;
27650+ }
27651+ if ((header.cmdSts != CMD_STS_RESPONSE) &&
27652+ (header.cmdSts != CMD_STS_NOTICE))
27653+ {
27654+ show_error("%s:none response packet received",func);
27655+ free(answer);
27656+ answer = NULL;
27657+ status = STATUS_ERROR;
27658+ break;
27659+ }
27660+#ifdef PRINT_DEBUG
27661+ show_debug("%s:answer[%s]",func,answer);
27662+#endif
27663+ if (answer != NULL)
27664+ {
27665+ if (!strncasecmp(answer,PGR_QUERY_DONE_NOTICE_CMD,strlen(PGR_QUERY_DONE_NOTICE_CMD)))
27666+ {
27667+#ifdef PRINT_DEBUG
27668+ show_debug("%s:QUERY DONE",func);
27669+#endif
27670+ status = STATUS_OK;
27671+ }
27672+ else if (!strncasecmp(answer,PGR_QUERY_ABORTED_NOTICE_CMD,strlen(PGR_QUERY_ABORTED_NOTICE_CMD)))
27673+ {
27674+#ifdef PRINT_DEBUG
27675+ show_debug("%s:QUERY ABORTED",func);
27676+#endif
27677+ status = STATUS_ABORTED;
27678+ }
27679+ else if (!strncasecmp(answer,PGR_LOCK_CONFLICT_NOTICE_CMD,strlen(PGR_LOCK_CONFLICT_NOTICE_CMD)))
27680+ {
27681+#ifdef PRINT_DEBUG
27682+ show_debug("%s:LOCK CONFLICT !!",func);
27683+#endif
27684+ status = STATUS_LOCK_CONFLICT;
27685+ }
27686+ else if (!strncasecmp(answer,PGR_DEADLOCK_DETECT_NOTICE_CMD,strlen(PGR_DEADLOCK_DETECT_NOTICE_CMD)))
27687+ {
27688+#ifdef PRINT_DEBUG
27689+ show_debug("%s:DEADLOCK DETECT !!",func);
27690+#endif
27691+ status = STATUS_DEADLOCK_DETECT;
27692+ }
27693+ free(answer);
27694+ answer = NULL;
27695+ }
27696+ return status;
27697+ }
27698+ }
27699+ return status;
27700+}
27701+
27702+/*--------------------------------------------------
27703+ * SYMBOL
27704+ * PGRreplicate_packet_send()
27705+ * NOTES
27706+ * Send query to each cluster DB servers and return result.
27707+ * ARGS
27708+ * ReplicateHeader * header : packet header (I)
27709+ * char * query : query for replication (I)
27710+ * int dest : destination socket for return result (I)
27711+ * RETURN
27712+ * OK : STATUS_OK
27713+ * NG : STATUS_ERROR
27714+ * DEADLOCK : STATUS_DEADLOCK_DETECT
27715+ *---------------------------------------------------
27716+ */
27717+int
27718+PGRreplicate_packet_send( ReplicateHeader * header, char * query,int dest,int recovery_status) {
27719+ return replicate_packet_send_internal(header,query,dest,recovery_status,false);
27720+}
27721+
27722+
27723+int
27724+replicate_packet_send_internal(ReplicateHeader * header, char * query,int dest,int recovery_status,bool isHeldLock)
27725+{
27726+ char * func = "replicate_packet_send_internal()";
27727+ HostTbl * host_ptr = (HostTbl*)NULL;
27728+ HostTbl * source_host_ptr = (HostTbl*)NULL;
27729+ int status = STATUS_OK;
27730+ int sem_cnt = 0;
27731+ int sem_id = 0;
27732+ char *database = NULL;
27733+ char port[8];
27734+ char *userName = NULL;
27735+ char *password = NULL;
27736+ char * md5Salt = NULL;
27737+ char * cryptSalt = NULL;
27738+ char * host = NULL;
27739+ char result[PGR_MESSAGE_BUFSIZE];
27740+
27741+ pthread_attr_t attr;
27742+ int rc = 0;
27743+ int t = 0;
27744+ int t_cnt = 0;
27745+ int source_t_cnt = -1;
27746+ int transaction_count = 0;
27747+ int *results_from_thread;
27748+ bool reliable_mode = true;
27749+
27750+ pthread_t thread[MAX_DB_SERVER];
27751+ ThreadArgInf thread_arg[MAX_DB_SERVER];
27752+
27753+
27754+#ifdef PRINT_DEBUG
27755+ show_debug("cmdSts=%c",header->cmdSts);
27756+ if(header->cmdType!='\0')
27757+ show_debug("cmdType=%c",header->cmdType);
27758+ show_debug("rlog=%d",header->rlog);
27759+ show_debug("port=%d",ntohs(header->port));
27760+ show_debug("pid=%d",ntohs(header->pid));
27761+ show_debug("from_host=%s",header->from_host);
27762+ show_debug("dbName=%s",header->dbName);
27763+ show_debug("userName=%s",header->userName);
27764+ show_debug("recieve sec=%u",ntohl(header->tv.tv_sec));
27765+ show_debug("recieve usec=%u",ntohl(header->tv.tv_usec));
27766+ show_debug("query_size=%d",ntohl(header->query_size));
27767+ show_debug("request_id=%d",ntohl(header->request_id));
27768+ show_debug("replicate_id=%d",ntohl(header->replicate_id));
27769+ show_debug("recovery_status=%d",recovery_status);
27770+ if (header->cmdSts != CMD_STS_PREPARE)
27771+ show_debug("query=%s",query);
27772+
27773+#endif
27774+
27775+ /* check rlog type */
27776+ if (header->rlog == FROM_R_LOG_TYPE)
27777+ {
27778+ if (is_executed_query_in_origin(header) == false)
27779+ {
27780+#ifdef PRINT_DEBUG
27781+ show_debug("this query is not yet done in source cluster db. so it wait for receive re-replicate request");
27782+#endif
27783+ /* wait re-replicate request */
27784+ return STATUS_SKIP_REPLICATE;
27785+ }
27786+ }
27787+ /*
27788+ * loop while registrated cluster DB exist
27789+ */
27790+ if (Host_Tbl_Begin == NULL)
27791+ {
27792+ return STATUS_ERROR;
27793+ }
27794+ host_ptr = Host_Tbl_Begin;
27795+ PGR_Response_Inf->current_cluster = 0;
27796+ memset(result,0,sizeof(result));
27797+ sem_cnt = 1;
27798+
27799+ if (is_need_queue_jump(header,query) == false)
27800+ {
27801+ sem_id = SemID;
27802+ }
27803+ else
27804+ {
27805+ sem_id = VacuumSemID;
27806+ }
27807+ if(!isHeldLock) {
27808+#ifdef PRINT_DEBUG
27809+ show_debug("sem_lock [%d] req",sem_cnt);
27810+#endif
27811+
27812+ PGRsem_lock(sem_id,sem_cnt);
27813+#ifdef PRINT_DEBUG
27814+ show_debug("sem_lock [%d] got it",sem_cnt);
27815+#endif
27816+ }
27817+ ++*PGR_ReplicateSerializationID;
27818+
27819+ /* set replication log */
27820+ if (is_need_use_rlog(header) == true)
27821+ {
27822+ PGRset_rlog(header,query);
27823+ }
27824+
27825+ pthread_attr_init(&attr);
27826+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
27827+ PGR_Response_Inf->current_cluster = 0;
27828+ t_cnt = 0;
27829+ while(host_ptr->useFlag != DB_TBL_END)
27830+ {
27831+ /*
27832+ * check the status of the cluster DB
27833+ */
27834+ if ((host_ptr->useFlag != DB_TBL_USE) &&
27835+ (host_ptr->useFlag != DB_TBL_INIT))
27836+ {
27837+ host_ptr ++;
27838+ continue;
27839+ }
27840+ /*
27841+ * skip loop during recover and the host name is master DB
27842+ */
27843+ if (is_master_in_recovery(host_ptr->hostName, host_ptr->port,recovery_status) == true)
27844+ {
27845+ if (PGRset_queue(header,query) != STATUS_OK)
27846+ {
27847+ show_error("%s:failed to put query to queue.abort to recovery",func);
27848+ PGRset_recovery_status(RECOVERY_INIT);
27849+ }
27850+#ifdef PRINT_DEBUG
27851+ show_debug("%s master is using for recovery",func);
27852+#endif
27853+ host_ptr ++;
27854+ continue;
27855+ }
27856+ host_ptr = check_host_transaction_status(header, host_ptr);
27857+ /*
27858+ * compare with the host name and the exceptional host name
27859+ */
27860+ thread_arg[t_cnt].header = header;
27861+ thread_arg[t_cnt].query = query;
27862+ thread_arg[t_cnt].dest = dest;
27863+ thread_arg[t_cnt].host_ptr = host_ptr;
27864+ thread_arg[t_cnt].current_cluster = t_cnt;
27865+ thread_arg[t_cnt].transaction_tbl = (TransactionTbl *)NULL;
27866+
27867+ if (PGRis_same_host(header->from_host,ntohs(header->port),host_ptr->resolvedName, host_ptr->port) == true)
27868+ {
27869+#ifdef PRINT_DEBUG
27870+ show_debug("source host");
27871+#endif
27872+ /* replication to source cluster db */
27873+ source_host_ptr = host_ptr;
27874+ source_t_cnt = t_cnt;
27875+
27876+ if (header->rlog == FROM_R_LOG_TYPE )
27877+ {
27878+#ifdef PRINT_DEBUG
27879+ show_debug("%s: This simple query was suspended. Therefore this query is not re-replicated to source cluster db.",func);
27880+#endif
27881+ }
27882+ check_transaction_status(header, thread_arg[t_cnt].transaction_tbl);
27883+ t_cnt++;
27884+ }
27885+ /* replication to other cluster db */
27886+ else
27887+ {
27888+ if ((header->rlog == CONNECTION_SUSPENDED_TYPE ) &&
27889+ (header->cmdSts == CMD_STS_TRANSACTION) )
27890+ {
27891+#ifdef PRINT_DEBUG
27892+ show_debug("%s: This transaction query was suspended. Therefore this query is not replicated to other cluster dbs.",func);
27893+#endif
27894+ }
27895+ else
27896+ {
27897+ /*
27898+ * get the transaction table data
27899+ * it has the connection data with each cluster DB
27900+ */
27901+ thread_arg[t_cnt].transaction_tbl = getTransactionTbl(host_ptr,header);
27902+ /*
27903+ * if the transaction process is new one,
27904+ * create connection data and add the transaction table
27905+ */
27906+ if (thread_arg[t_cnt].transaction_tbl == (TransactionTbl *)NULL)
27907+ {
27908+ thread_arg[t_cnt].transaction_tbl = setTransactionTbl(host_ptr, header);
27909+ if (thread_arg[t_cnt].transaction_tbl == (TransactionTbl *)NULL)
27910+ {
27911+ show_error("%s:setTransactionTbl failed",func);
27912+ if ( header->cmdSts != CMD_STS_NOTICE )
27913+ {
27914+ PGRset_host_status(host_ptr,DB_TBL_ERROR);
27915+ }
27916+ host_ptr ++;
27917+ continue;
27918+ }
27919+ StartReplication[t_cnt] = true;
27920+ }
27921+ else
27922+ {
27923+ /*
27924+ * re-use the connection data
27925+ */
27926+ if ((thread_arg[t_cnt].transaction_tbl->conn != (PGconn *)NULL) &&
27927+ (thread_arg[t_cnt].transaction_tbl->conn->sock > 0))
27928+ {
27929+ /*
27930+ memset(thread_arg[t_cnt].transaction_tbl->conn->inBuffer,0,thread_arg[t_cnt].transaction_tbl->conn->inBufSize);
27931+ memset(thread_arg[t_cnt].transaction_tbl->conn->outBuffer,0,thread_arg[t_cnt].transaction_tbl->conn->outBufSize);
27932+ */
27933+ StartReplication[t_cnt] = false;
27934+ }
27935+ else
27936+ {
27937+ if (thread_arg[t_cnt].transaction_tbl->conn != (PGconn *)NULL)
27938+ {
27939+ PQfinish(thread_arg[t_cnt].transaction_tbl->conn);
27940+ thread_arg[t_cnt].transaction_tbl->conn = NULL;
27941+ }
27942+
27943+ database = (char *)(header->dbName);
27944+ snprintf(port,sizeof(port),"%d", host_ptr->port);
27945+ userName = (char *)(header->userName);
27946+ password = (char *)(header->password);
27947+ md5Salt = (char *)(header->md5Salt);
27948+ cryptSalt = (char *)(header->cryptSalt);
27949+ host = (char *)(host_ptr->hostName);
27950+
27951+ thread_arg[t_cnt].transaction_tbl->conn = PGRcreateConn(host,port,database,userName,password,md5Salt,cryptSalt);
27952+ StartReplication[t_cnt] = true;
27953+#ifdef PRINT_DEBUG
27954+ show_debug("%s:connect db:%s port:%s user:%s host:%s query:%s",
27955+ func, database,port,userName,host,query);
27956+#endif
27957+ }
27958+ }
27959+ check_transaction_status(header, thread_arg[t_cnt].transaction_tbl);
27960+ transaction_count = thread_arg[t_cnt].transaction_tbl->transaction_count;
27961+ rc = pthread_create(&thread[t_cnt], &attr, thread_send_cluster, (void*)&thread_arg[t_cnt]);
27962+
27963+ if (rc)
27964+ {
27965+ show_error("pthread_create error");
27966+ }
27967+ t_cnt++;
27968+ }
27969+ }
27970+ /*
27971+ * send replication query to each cluster server
27972+ */
27973+ if (host_ptr->useFlag != DB_TBL_USE)
27974+ {
27975+ PGRset_host_status(host_ptr,DB_TBL_USE);
27976+ }
27977+
27978+ host_ptr++;
27979+ PGR_Response_Inf->current_cluster ++;
27980+ status = STATUS_OK;
27981+ }
27982+
27983+ /* When the query is SELECT, source cluster would not need to wait other cluster's result */
27984+ if ((header->cmdType == CMD_TYPE_SELECT) && (header->cmdSts != CMD_STS_PREPARE))
27985+ {
27986+ thread_send_source( (void*)&thread_arg[source_t_cnt]);
27987+ reliable_mode = false;
27988+ }
27989+
27990+ pthread_attr_destroy(&attr);
27991+
27992+ results_from_thread = malloc(t_cnt * sizeof(int));
27993+ for ( t = 0 ; t < t_cnt; )
27994+ {
27995+ int result;
27996+ if (t == source_t_cnt)
27997+ {
27998+ t++;
27999+ continue;
28000+ }
28001+ rc = pthread_join(thread[t], (void **)&result);
28002+ if ((rc != 0) && (errno == EINTR))
28003+ {
28004+ usleep(100);
28005+ continue;
28006+ }
28007+ results_from_thread[t] = (int)result;
28008+ pthread_detach(thread[t]);
28009+ t++;
28010+ }
28011+
28012+ if (compare_results(results_from_thread, t_cnt, source_t_cnt) == false)
28013+ show_error("query results discrepancy between cluster servers: %s", query);
28014+ free(results_from_thread);
28015+
28016+ thread_arg[source_t_cnt].transaction_count = transaction_count;
28017+ /*
28018+ * send replication query to source cluster server.
28019+ */
28020+ if ((source_t_cnt >= 0) && ( reliable_mode == true ))
28021+ {
28022+ thread_send_source( (void*)&thread_arg[source_t_cnt]);
28023+ }
28024+ /* unset replication log */
28025+ if (is_need_use_rlog(header) == true)
28026+ {
28027+ PGRunset_rlog(header,query);
28028+ }
28029+
28030+ check_replication_id();
28031+ if (header->cmdSts == CMD_STS_PREPARE)
28032+ {
28033+ if (header->cmdType != CMD_TYPE_P_SYNC)
28034+ {
28035+ if (PGR_Parse_Session_Started == false)
28036+ {
28037+ PGR_Parse_Session_Started = true;
28038+ }
28039+ }
28040+ }
28041+ else
28042+ {
28043+ PGR_Parse_Session_Started = false;
28044+ }
28045+
28046+ if(!isHeldLock) {
28047+#ifdef PRINT_DEBUG
28048+ show_debug("sem_unlock[%d]",sem_cnt);
28049+#endif
28050+ PGRsem_unlock(sem_id,sem_cnt);
28051+ }
28052+
28053+ return status;
28054+}
28055+
28056+static void *
28057+thread_send_source(void * arg)
28058+{
28059+ char * func = "thread_send_source()";
28060+ ThreadArgInf * thread_arg = NULL;
28061+ ReplicateHeader * header = (ReplicateHeader*)NULL;
28062+ char * query = NULL;
28063+ int dest = 0;
28064+ HostTbl * host_ptr = (HostTbl*)NULL;
28065+ int status = STATUS_OK;
28066+ int transaction_count = 0;
28067+ char result[PGR_MESSAGE_BUFSIZE];
28068+ bool sync_command_flg = false;
28069+
28070+ if (arg == NULL)
28071+ {
28072+ show_error("%s:arg is NULL",func);
28073+ status = STATUS_ERROR;
28074+ pthread_exit((void *) status);
28075+ }
28076+ thread_arg = (ThreadArgInf *)arg;
28077+ header = thread_arg->header;
28078+ query = thread_arg->query;
28079+ dest = thread_arg->dest;
28080+ host_ptr = thread_arg->host_ptr;
28081+ transaction_count = thread_arg->transaction_count;
28082+
28083+ if(header->cmdSts==CMD_STS_OTHER &&
28084+ header->cmdType==CMD_TYPE_CONNECTION_CLOSE)
28085+ {
28086+ return (void *)0;
28087+ }
28088+
28089+ if (header->rlog == FROM_R_LOG_TYPE )
28090+ {
28091+ /* It is not necessary to return rlog to source DB. */
28092+#ifdef PRINT_DEBUG
28093+ show_debug("%s: It is not necessary to return rlog to source DB",func);
28094+#endif
28095+ status = STATUS_OK;
28096+ return (void *)status;
28097+ }
28098+
28099+ /**
28100+ * NOTE:
28101+ * We can use PGR_ReplicateSerializationID here , because
28102+ * all queries from cluster server isn't recovery query.
28103+ *
28104+ */
28105+ if (is_need_sync_time(header) == true)
28106+ {
28107+ if (transaction_count >1 )
28108+ {
28109+ sync_command_flg = false;
28110+ }
28111+ else
28112+ {
28113+ sync_command_flg = true;
28114+ }
28115+ }
28116+ if (sync_command_flg == true)
28117+ {
28118+ snprintf(result,PGR_MESSAGE_BUFSIZE,
28119+ "%d,%u,%u,%u,%d,%u",
28120+ PGR_SET_CURRENT_TIME_FUNC_NO,
28121+ (unsigned int)ntohl(header->tv.tv_sec),
28122+ (unsigned int)ntohl(header->tv.tv_usec),
28123+ (unsigned int)ntohl(PGR_Log_Header->replicate_id),
28124+ PGR_Response_Inf->response_mode,
28125+ *PGR_ReplicateSerializationID);
28126+ }
28127+ else
28128+ {
28129+ snprintf(result,PGR_MESSAGE_BUFSIZE,
28130+ "%d,%u,%u,%d",
28131+ PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO,
28132+ *PGR_ReplicateSerializationID,
28133+ 0,
28134+ PGR_Response_Inf->response_mode);
28135+ }
28136+ /* execute query in the exceptional host */
28137+ /* it is not use replication */
28138+ if (is_need_wait_answer(header) == true)
28139+ {
28140+ status = PGRreturn_result(dest,result, PGR_WAIT_ANSWER);
28141+ }
28142+ else
28143+ {
28144+ status = PGRreturn_result(dest, result, PGR_NOWAIT_ANSWER);
28145+ }
28146+
28147+ /*
28148+ if (status == STATUS_ERROR )
28149+ {
28150+ show_error("%s: %s[%d] should be down ",func,host_ptr->hostName,host_ptr->port);
28151+ PGRset_host_status(host_ptr,DB_TBL_ERROR);
28152+ }
28153+ */
28154+
28155+ /* delete server table when query use template db */
28156+ if (PGR_Response_Inf->response_mode != PGR_RELIABLE_MODE)
28157+ {
28158+ delete_template(host_ptr,header);
28159+ }
28160+#ifdef PRINT_DEBUG
28161+ show_debug("end thread_send_source()");
28162+#endif
28163+ return (void *)0;
28164+}
28165+
28166+static void *
28167+thread_send_cluster(void * arg)
28168+{
28169+ char * func = "thread_send_cluster()";
28170+ ThreadArgInf * thread_arg = NULL;
28171+ ReplicateHeader * header = (ReplicateHeader*)NULL;
28172+ char * query = NULL;
28173+ int dest = 0;
28174+ HostTbl * host_ptr = (HostTbl*)NULL;
28175+ int rtn = 0;
28176+ int status = STATUS_OK;
28177+ TransactionTbl * transaction_tbl = (TransactionTbl *)NULL;
28178+ int current_cluster = 0;
28179+ char result[PGR_MESSAGE_BUFSIZE];
28180+
28181+#ifdef PRINT_DEBUG
28182+ show_debug("start thread_send_cluster()");
28183+#endif
28184+ if (arg == NULL)
28185+ {
28186+ show_error("%s:arg is NULL",func);
28187+ status = STATUS_ERROR;
28188+ pthread_exit((void *) status);
28189+ }
28190+
28191+ thread_arg = (ThreadArgInf *)arg;
28192+ header = thread_arg->header;
28193+ query = thread_arg->query;
28194+ dest = thread_arg->dest;
28195+ host_ptr = thread_arg->host_ptr;
28196+ transaction_tbl = thread_arg->transaction_tbl;
28197+ current_cluster = thread_arg->current_cluster;
28198+
28199+
28200+ if(header->cmdSts==CMD_STS_OTHER &&
28201+ header->cmdType==CMD_TYPE_CONNECTION_CLOSE)
28202+ {
28203+ check_delete_transaction(host_ptr, header);
28204+ return (void *)0;
28205+ }
28206+
28207+ rtn = send_replicate_packet_to_server( transaction_tbl, current_cluster, host_ptr, header, query , result,*PGR_ReplicateSerializationID, false);
28208+
28209+#ifdef PRINT_DEBUG
28210+ show_debug("%s:return value from send_replicate_packet_to_server() is %d",func,rtn);
28211+#endif
28212+ if (rtn == STATUS_ABORTED)
28213+ {
28214+ snprintf(result,PGR_MESSAGE_BUFSIZE,"%d", PGR_NOTICE_ABORT_FUNC_NO);
28215+ status = PGRreturn_result(dest, result, PGR_NOWAIT_ANSWER);
28216+ status = STATUS_ABORTED;
28217+ pthread_exit((void *) status);
28218+ }
28219+ /* delete server table when query use template db */
28220+ delete_template(host_ptr,header);
28221+#ifdef PRINT_DEBUG
28222+ show_debug("%s:pthread_exit[%d]",func,current_cluster );
28223+#endif
28224+
28225+ pthread_exit((void *) rtn);
28226+}
28227+
28228+/*--------------------------------------------------
28229+ * SYMBOL
28230+ * PGRreplicate_packet_send_each_server()
28231+ * NOTES
28232+ * Send query to a cluster DB server and return result.
28233+ * ARGS
28234+ * HostTbl * ptr : cluster server info table (I)
28235+ * bool return_response : flag for return result(I)
28236+ * ReplicateHeader * header: header data (I)
28237+ * char * query : query data (I)
28238+ * int dest : socket of destination server(I)
28239+ * RETURN
28240+ * OK : STATUS_OK
28241+ * NG : STATUS_ERROR
28242+ *---------------------------------------------------
28243+ */
28244+int
28245+PGRreplicate_packet_send_each_server( HostTbl * ptr, bool return_response, ReplicateHeader * header, char * query,int dest)
28246+{
28247+ char * func = "PGRreplicate_packet_send_each_server()";
28248+ char * host;
28249+ int rtn;
28250+
28251+ host = ptr->hostName;
28252+ /*
28253+ * send query to cluster DB
28254+ */
28255+ if (PGR_Result == NULL)
28256+ {
28257+ show_error("%s:PGR_Result is not initialize",func);
28258+ return STATUS_ERROR;
28259+ }
28260+
28261+ rtn = PGRsend_replicate_packet_to_server( ptr, header,query,PGR_Result, dest, false);
28262+
28263+ return rtn;
28264+}
28265+
28266+/*--------------------------------------------------
28267+ * SYMBOL
28268+ * PGRread_packet()
28269+ * NOTES
28270+ * Read packet data and send the query to each cluster DB.
28271+ * The packet data has header data and query data.
28272+ * ARGS
28273+ * int sock : socket (I)
28274+ * ReplicateHeader *header : header data (O)
28275+ * RETURN
28276+ * OK: pointer of read query
28277+ * NG: NULL
28278+ *---------------------------------------------------
28279+ */
28280+char *
28281+PGRread_packet(int sock, ReplicateHeader *header)
28282+{
28283+ char * func = "PGRread_packet()";
28284+ int r =0;
28285+ int cnt = 0;
28286+ char * read_ptr = NULL;
28287+ int read_size = 0;
28288+ int header_size = 0;
28289+ char * query = NULL;
28290+ fd_set rmask;
28291+ struct timeval timeout;
28292+ int rtn;
28293+
28294+ if (header == NULL)
28295+ {
28296+ return NULL;
28297+ }
28298+ memset(header,0,sizeof(ReplicateHeader));
28299+ read_ptr = (char*)header;
28300+ header_size = sizeof(ReplicateHeader);
28301+ cnt = 0;
28302+
28303+ for (;;){
28304+ /*
28305+ * read header data
28306+ */
28307+
28308+ timeout.tv_sec = 1;
28309+ timeout.tv_usec = 0;
28310+
28311+ /*
28312+ * Wait for something to happen.
28313+ */
28314+ FD_ZERO(&rmask);
28315+ FD_SET(sock,&rmask);
28316+ rtn = select(sock+1, &rmask, (fd_set *)NULL,(fd_set *)NULL, &timeout);
28317+
28318+ if (rtn < 0)
28319+ {
28320+ if (errno == EINTR || errno == EAGAIN)
28321+ continue;
28322+
28323+ show_error("%s:select failed ,errno is %s",func , strerror(errno));
28324+ return NULL;
28325+ }
28326+
28327+ if (rtn && FD_ISSET(sock, &rmask))
28328+ {
28329+ r = recv(sock,read_ptr + read_size ,header_size - read_size, MSG_WAITALL);
28330+ /*
28331+ r = recv(sock,read_ptr + read_size ,header_size - read_size, 0);
28332+ */
28333+ if (r < 0)
28334+ {
28335+ show_error("%s:recv failed: (%s)",func,strerror(errno));
28336+ if (errno == EINTR || errno == EAGAIN)
28337+ continue;
28338+ else
28339+ {
28340+ show_error("%s:recv failed: (%s)",func,strerror(errno));
28341+ return NULL;
28342+ }
28343+ }
28344+ else if (r > 0)
28345+ {
28346+ read_size += r;
28347+ if ( read_size == header_size)
28348+ {
28349+ query = PGRread_query(sock,header);
28350+ return query;
28351+ }
28352+ }
28353+ else if (r == 0)
28354+ {
28355+ return NULL;
28356+ }
28357+ }
28358+ }
28359+ return NULL;
28360+}
28361+
28362+char *
28363+PGRread_query(int sock, ReplicateHeader *header)
28364+{
28365+ char * func = "PGRread_query()";
28366+ int r =0;
28367+ int cnt = 0;
28368+ char * read_ptr;
28369+ int read_size = 0;
28370+ int query_size = 0;
28371+ char * query = NULL;
28372+
28373+ query_size = ntohl(header->query_size);
28374+ if (query_size < 0)
28375+ {
28376+ show_error("%s:receive size less than 0",func);
28377+ return NULL;
28378+ }
28379+ query = malloc(query_size+4);
28380+ if (query == NULL)
28381+ {
28382+ /*
28383+ * buffer allocation failed
28384+ */
28385+ show_error("%s:malloc failed: (%s)",func,strerror(errno));
28386+ return NULL;
28387+ }
28388+ memset(query,0,query_size+4);
28389+ if (query_size == 0)
28390+ {
28391+ return query;
28392+ }
28393+ read_size = 0;
28394+ cnt = 0;
28395+ read_ptr = (char *)query;
28396+ for (;;)
28397+ {
28398+ /*
28399+ * read query data
28400+ */
28401+
28402+ /*r = recv(sock,read_ptr + read_size ,query_size - read_size, MSG_WAITALL); */
28403+ r = recv(sock,read_ptr + read_size ,query_size - read_size, 0);
28404+ if (r < 0)
28405+ {
28406+ if (errno == EINTR || errno == EAGAIN)
28407+ continue;
28408+ else
28409+ {
28410+ show_error("%s:recv failed: (%s)",func,strerror(errno));
28411+ free(query);
28412+ query = NULL;
28413+ return NULL;
28414+ }
28415+ }
28416+ else if (r > 0)
28417+ {
28418+ read_size += r;
28419+ if ( read_size == query_size)
28420+ {
28421+ return query;
28422+ }
28423+ }
28424+ else /* r == 0 */
28425+ {
28426+ show_error("%s:unexpected EOF", func);
28427+ free(query);
28428+ query = NULL;
28429+ return NULL;
28430+ }
28431+ }
28432+ free(query);
28433+ query = NULL;
28434+ return NULL;
28435+}
28436+
28437+static bool
28438+is_autocommit_off(char * query)
28439+{
28440+ int i;
28441+ char buf[256];
28442+ char * p = NULL;
28443+
28444+ if (query == NULL)
28445+ return false;
28446+ memset(buf,0,sizeof(buf));
28447+ p = query;
28448+ i = 0;
28449+ while ( *p != '\0' )
28450+ {
28451+ buf[i++] = toupper(*p);
28452+ p++;
28453+ if (i >= (sizeof(buf) -2))
28454+ break;
28455+ }
28456+ p = strstr(buf,"AUTOCOMMIT");
28457+ if ( p == NULL)
28458+ {
28459+ return false;
28460+ }
28461+ p = strstr(buf,"OFF");
28462+ if ( p == NULL )
28463+ {
28464+ return false;
28465+ }
28466+ return true;
28467+}
28468+
28469+static bool
28470+is_autocommit_on(char * query)
28471+{
28472+ int i;
28473+ char buf[256];
28474+ char * p = NULL;
28475+
28476+ if (query == NULL)
28477+ return false;
28478+ memset(buf,0,sizeof(buf));
28479+ p = query;
28480+ i = 0;
28481+ while ( *p != '\0' )
28482+ {
28483+ buf[i++] = toupper(*p);
28484+ p++;
28485+ if (i >= (sizeof(buf) -2))
28486+ break;
28487+ }
28488+ p = strstr(buf,"AUTOCOMMIT");
28489+ if ( p == NULL)
28490+ {
28491+ return false;
28492+ }
28493+ p = strstr(buf,"ON");
28494+ if ( p == NULL )
28495+ {
28496+ return false;
28497+ }
28498+ return true;
28499+}
28500+
28501+static unsigned int
28502+get_host_ip_from_tbl(char * host)
28503+{
28504+ Dlelem * ptr = NULL;
28505+
28506+ pthread_mutex_lock(&transaction_table_mutex);
28507+ if (Transaction_Tbl_Begin == NULL)
28508+ {
28509+ pthread_mutex_unlock(&transaction_table_mutex);
28510+ return 0;
28511+ }
28512+ ptr = DLGetHead(Transaction_Tbl_Begin);
28513+ while (ptr)
28514+ {
28515+ TransactionTbl *transaction = DLE_VAL(ptr);
28516+ if (!strncasecmp(transaction->host,host,sizeof(transaction->host)))
28517+ {
28518+ pthread_mutex_unlock(&transaction_table_mutex);
28519+ return transaction->hostIP;
28520+ }
28521+ ptr = DLGetSucc(ptr);
28522+ }
28523+ pthread_mutex_unlock(&transaction_table_mutex);
28524+
28525+ return 0;
28526+}
28527+
28528+static unsigned int
28529+get_srcHost_ip_from_tbl(char * srcHost)
28530+{
28531+ Dlelem * ptr = NULL;
28532+
28533+ pthread_mutex_lock(&transaction_table_mutex);
28534+
28535+ if (Transaction_Tbl_Begin == NULL)
28536+ {
28537+ pthread_mutex_unlock(&transaction_table_mutex);
28538+
28539+ return 0;
28540+ }
28541+ ptr = DLGetHead(Transaction_Tbl_Begin);
28542+ while (ptr)
28543+ {
28544+ TransactionTbl *transaction = DLE_VAL(ptr);
28545+ if (!strncasecmp(transaction->srcHost,srcHost,sizeof(transaction->srcHost)))
28546+ {
28547+ pthread_mutex_unlock(&transaction_table_mutex);
28548+
28549+ return transaction->srcHostIP;
28550+ }
28551+ ptr = DLGetSucc(ptr);
28552+ }
28553+ pthread_mutex_unlock(&transaction_table_mutex);
28554+
28555+ return 0;
28556+}
28557+
28558+unsigned int
28559+PGRget_next_query_id(void)
28560+{
28561+ if (PGR_Query_ID >= PGR_MAX_QUERY_ID)
28562+ {
28563+ PGR_Query_ID = 0;
28564+ }
28565+ PGR_Query_ID ++;
28566+ return PGR_Query_ID;
28567+}
28568+
28569+
28570+void
28571+PGRnotice_replication_server(char * hostName, unsigned short portNumber,unsigned short recoveryPortNumber, unsigned short lifecheckPortNumber, char * userName)
28572+{
28573+ char * func ="PGRnotice_replication_server()";
28574+ ReplicateHeader header;
28575+ char query[PGR_MESSAGE_BUFSIZE];
28576+
28577+ if (((hostName == NULL) || (*hostName == 0)) ||
28578+ ((userName == NULL) || (*userName == 0)) ||
28579+ ((portNumber == 0) || (recoveryPortNumber == 0)))
28580+ {
28581+#ifdef PRINT_DEBUG
28582+ show_debug("%s: can not connect server[%s][%s][%d][%d]",func,hostName,userName,portNumber,recoveryPortNumber);
28583+#endif
28584+ return;
28585+ }
28586+ memset(&header,0,sizeof(ReplicateHeader));
28587+ memset(query,0,sizeof(query));
28588+ snprintf(query,sizeof(query)-1,"SELECT %s(%d,'%s',%d,%d,%d)",
28589+ PGR_SYSTEM_COMMAND_FUNC,
28590+ PGR_STARTUP_REPLICATION_SERVER_FUNC_NO,
28591+ hostName,
28592+ portNumber,
28593+ recoveryPortNumber,
28594+ lifecheckPortNumber);
28595+ header.cmdSys = CMD_SYS_CALL;
28596+ header.cmdSts = CMD_STS_NOTICE;
28597+ header.query_size = htonl(strlen(query));
28598+ header.query_id = htonl(PGRget_next_query_id());
28599+ strncpy(header.from_host,hostName,sizeof(header.from_host));
28600+ strncpy(header.userName,userName,sizeof(header.userName));
28601+ strcpy(header.dbName,"template1");
28602+ PGRreplicate_packet_send( &header, query, NOTICE_SYSTEM_CALL_TYPE ,RECOVERY_INIT);
28603+}
28604+
28605+static bool
28606+is_need_use_rlog(ReplicateHeader * header)
28607+{
28608+ bool rtn = false;
28609+ if ((Cascade_Inf->useFlag != DB_TBL_USE) ||
28610+ (PGR_Use_Replication_Log != true) ||
28611+ (header->rlog > 0))
28612+ {
28613+ rtn=false;
28614+ }
28615+ else if ((header->cmdSts == CMD_STS_QUERY ) &&
28616+ ((header->cmdType == CMD_TYPE_INSERT) ||
28617+ (header->cmdType == CMD_TYPE_UPDATE) ||
28618+ (header->cmdType == CMD_TYPE_DELETE) ||
28619+ (header->cmdType == CMD_TYPE_EXECUTE)))
28620+ {
28621+ rtn = true;
28622+ }
28623+ else
28624+ {
28625+ if ((header->cmdSts == CMD_STS_TRANSACTION ) &&
28626+ (header->cmdType == CMD_TYPE_COMMIT))
28627+ {
28628+ rtn = true;
28629+ }
28630+ }
28631+ return rtn;
28632+}
28633+
28634+int
28635+PGRinit_transaction_table(void)
28636+{
28637+ if (Transaction_Tbl_Begin != NULL)
28638+ {
28639+ DLFreeList(Transaction_Tbl_Begin);
28640+ }
28641+
28642+ Transaction_Tbl_Begin = DLNewList();
28643+
28644+ return STATUS_OK;
28645+}
28646+
28647+static bool
28648+is_need_queue_jump( ReplicateHeader * header,char *query)
28649+{
28650+ if (header == NULL)
28651+ {
28652+ return true;
28653+ }
28654+
28655+ if (header->cmdSts == CMD_STS_QUERY)
28656+ {
28657+ if ((header->cmdType == CMD_TYPE_VACUUM ) ||
28658+ (header->cmdType == CMD_TYPE_ANALYZE ))
28659+ {
28660+ if ((strstr(query,"full") == NULL) &&
28661+ (strstr(query,"FULL") == NULL))
28662+ {
28663+ return true;
28664+ }
28665+ }
28666+ }
28667+ return false;
28668+}
28669+
28670+
28671+static bool
28672+is_executed_query_in_origin( ReplicateHeader *header )
28673+{
28674+ char *database = NULL;
28675+ char port[8];
28676+ char *userName = NULL;
28677+ char *password = NULL;
28678+ char * md5Salt = NULL;
28679+ char * cryptSalt = NULL;
28680+ char * host = NULL;
28681+ HostTbl * host_ptr = (HostTbl*)NULL;
28682+ TransactionTbl * transaction_tbl = (TransactionTbl*)NULL;
28683+ PGconn * conn = (PGconn *)NULL;
28684+ bool result = false;
28685+
28686+ if (Host_Tbl_Begin == NULL)
28687+ {
28688+ return STATUS_ERROR;
28689+ }
28690+ host_ptr = Host_Tbl_Begin;
28691+ while(host_ptr->useFlag != DB_TBL_END)
28692+ {
28693+ /*
28694+ * check the status of the cluster DB
28695+ */
28696+ if (host_ptr->useFlag != DB_TBL_USE)
28697+ {
28698+ host_ptr ++;
28699+ continue;
28700+ }
28701+ if (PGRis_same_host(header->from_host,ntohs(header->port),host_ptr->hostName, host_ptr->port) == true)
28702+ {
28703+ break;
28704+ }
28705+ host_ptr ++;
28706+ }
28707+ if (host_ptr->useFlag == DB_TBL_END)
28708+ {
28709+ return false;
28710+ }
28711+ /*
28712+ * set up the connection
28713+ */
28714+ transaction_tbl = getTransactionTbl(host_ptr,header);
28715+ if (transaction_tbl == (TransactionTbl *)NULL)
28716+ {
28717+ transaction_tbl = setTransactionTbl(host_ptr, header);
28718+ if (transaction_tbl == (TransactionTbl *)NULL)
28719+ {
28720+ return false;
28721+ }
28722+ }
28723+ else
28724+ {
28725+ if ((transaction_tbl->conn == (PGconn *)NULL) ||
28726+ (transaction_tbl->conn->sock <= 0))
28727+ {
28728+ database = (char *)(header->dbName);
28729+ snprintf(port,sizeof(port),"%d", host_ptr->port);
28730+ userName = (char *)(header->userName);
28731+ password = (char *)(header->password);
28732+ md5Salt = (char *)(header->md5Salt);
28733+ cryptSalt = (char *)(header->cryptSalt);
28734+ host = (char *)(host_ptr->hostName);
28735+ transaction_tbl->conn = PGRcreateConn(host,port,database,userName,password,md5Salt,cryptSalt);
28736+ }
28737+ }
28738+ conn = transaction_tbl->conn;
28739+ if (conn == NULL)
28740+ {
28741+ return false;
28742+ }
28743+
28744+ result = is_executed_query( conn, header);
28745+ deleteTransactionTbl(host_ptr,header);
28746+ return result;
28747+}
28748+
28749+static bool
28750+is_executed_query( PGconn *conn, ReplicateHeader * header)
28751+{
28752+ static PGresult * res = (PGresult *)NULL;
28753+ char sync_command[PGR_MESSAGE_BUFSIZE];
28754+ char * str = NULL;
28755+
28756+ snprintf(sync_command,sizeof(sync_command),
28757+ "SELECT %s(%d,%u,%u,%u,%d) ",
28758+ PGR_SYSTEM_COMMAND_FUNC,
28759+ PGR_QUERY_CONFIRM_ANSWER_FUNC_NO,
28760+ (unsigned int)ntohl(header->tv.tv_sec),
28761+ (unsigned int)ntohl(header->tv.tv_usec),
28762+ (unsigned int)ntohl(header->replicate_id),
28763+ PGR_Response_Inf->response_mode);
28764+
28765+ res = PQexec(conn, sync_command);
28766+ if (res != NULL)
28767+ {
28768+ str = PQcmdStatus(res);
28769+ if ((str != NULL) &&
28770+ (!strncasecmp(str,PGR_ALREADY_REPLICATED_NOTICE_CMD,strlen(PGR_ALREADY_REPLICATED_NOTICE_CMD))))
28771+ {
28772+ PQclear(res);
28773+ return true;
28774+ }
28775+ PQclear(res);
28776+
28777+ }
28778+ return false;
28779+}
28780+
28781+static int
28782+replicate_lo( PGconn * conn, ReplicateHeader * header, LOArgs * query)
28783+{
28784+ int status = STATUS_OK;
28785+ int mode = 0;
28786+ Oid lobjId = 0;
28787+ int fd = 0;
28788+ char * buf = NULL;
28789+ char * filename = NULL;
28790+ size_t len = 0;
28791+ int offset = 0;
28792+ int whence = 0;
28793+
28794+ if ((conn == (PGconn *)NULL) || (query == (LOArgs *)NULL) || (header == (ReplicateHeader *)NULL))
28795+ {
28796+ return STATUS_ERROR;
28797+ }
28798+ switch (header->cmdType)
28799+ {
28800+ case CMD_TYPE_LO_IMPORT :
28801+ filename = query->buf;
28802+ if (lo_import(conn, filename) > 0 )
28803+ {
28804+ status = STATUS_OK;
28805+ }
28806+ else
28807+ {
28808+ status = STATUS_ERROR;
28809+ }
28810+ break;
28811+ case CMD_TYPE_LO_CREATE :
28812+ mode = (int)ntohl(query->arg1);
28813+ if (lo_creat(conn, mode) > 0)
28814+ {
28815+ status = STATUS_OK;
28816+ }
28817+ else
28818+ {
28819+ status = STATUS_ERROR;
28820+ }
28821+ break;
28822+ case CMD_TYPE_LO_OPEN :
28823+ lobjId = (Oid)ntohl(query->arg1);
28824+ mode = (int)ntohl(query->arg2);
28825+ if (lo_open(conn, lobjId, mode) > 0)
28826+ {
28827+ status = STATUS_OK;
28828+ }
28829+ else
28830+ {
28831+ status = STATUS_ERROR;
28832+ }
28833+ break;
28834+ case CMD_TYPE_LO_WRITE :
28835+ fd = (int)ntohl(query->arg1);
28836+ len = (int)ntohl(query->arg2);
28837+ buf = query->buf;
28838+ if (lo_write(conn, fd, buf, len) == len )
28839+ {
28840+ status = STATUS_OK;
28841+ }
28842+ else
28843+ {
28844+ status = STATUS_ERROR;
28845+ }
28846+ break;
28847+ case CMD_TYPE_LO_LSEEK :
28848+ fd = (int)ntohl(query->arg1);
28849+ offset = (int)ntohl(query->arg2);
28850+ whence = (int)ntohl(query->arg3);
28851+ if (lo_lseek(conn, fd, offset, whence) >= 0)
28852+ {
28853+ status = STATUS_OK;
28854+ }
28855+ else
28856+ {
28857+ status = STATUS_ERROR;
28858+ }
28859+ break;
28860+ case CMD_TYPE_LO_CLOSE :
28861+ fd = (int)ntohl(query->arg1);
28862+ if (lo_close(conn, fd) == 0)
28863+ {
28864+ status = STATUS_OK;
28865+ }
28866+ else
28867+ {
28868+ status = STATUS_ERROR;
28869+ }
28870+ break;
28871+ case CMD_TYPE_LO_UNLINK :
28872+ lobjId = (Oid)ntohl(query->arg1);
28873+ if (lo_unlink(conn,lobjId) >= 0)
28874+ {
28875+ status = STATUS_OK;
28876+ }
28877+ else
28878+ {
28879+ status = STATUS_ERROR;
28880+ }
28881+ break;
28882+ default :
28883+ break;
28884+ }
28885+ return status;
28886+}
28887+
28888+static int
28889+send_func(HostTbl * host_ptr,ReplicateHeader * header, char * func,char * result)
28890+{
28891+ char * f ="send_func()";
28892+ char *database = NULL;
28893+ char port[8];
28894+ char *userName = NULL;
28895+ char *password = NULL;
28896+ char * md5Salt = NULL;
28897+ char * cryptSalt = NULL;
28898+ char * host = NULL;
28899+ char * str = NULL;
28900+ TransactionTbl * transaction_tbl = (TransactionTbl *)NULL;
28901+ PGresult * res = (PGresult *)NULL;
28902+ PGconn * conn = (PGconn *)NULL;
28903+ int rtn = 0;
28904+ int current_cluster = 0;
28905+
28906+ if ((host_ptr == (HostTbl *)NULL) ||
28907+ (header == (ReplicateHeader *)NULL) ||
28908+ (func == NULL) ||
28909+ (result == NULL))
28910+ {
28911+ return STATUS_ERROR;
28912+ }
28913+ /*
28914+ * set up the connection
28915+ */
28916+ database = (char *)header->dbName;
28917+ snprintf(port,sizeof(port),"%d", host_ptr->port);
28918+ userName = (char *)(header->userName);
28919+ password = (char *)(header->password);
28920+ md5Salt = (char *)(header->md5Salt);
28921+ cryptSalt = (char *)(header->cryptSalt);
28922+ host = (char *)(host_ptr->hostName);
28923+ if (PGR_Response_Inf != NULL)
28924+ {
28925+ current_cluster = PGR_Response_Inf->current_cluster;
28926+ }
28927+
28928+ /*
28929+ * get the transaction table data
28930+ * it has the connection data with each cluster DB
28931+ */
28932+ transaction_tbl = getTransactionTbl(host_ptr,header);
28933+ /*
28934+ * if the transaction process is new one,
28935+ * create connection data and add the transaction table
28936+ */
28937+ if (transaction_tbl == (TransactionTbl *)NULL)
28938+ {
28939+ transaction_tbl = setTransactionTbl(host_ptr, header);
28940+ if (transaction_tbl == (TransactionTbl *)NULL)
28941+ {
28942+ StartReplication[current_cluster] = true;
28943+ show_error("%s:setTransactionTbl failed",f);
28944+ if ( header->cmdSts != CMD_STS_NOTICE )
28945+ {
28946+ PGRset_host_status(host_ptr,DB_TBL_ERROR);
28947+ }
28948+ return STATUS_ERROR;
28949+ }
28950+ }
28951+ else
28952+ {
28953+ /*
28954+ * re-use the connection data
28955+ */
28956+ if ((transaction_tbl->conn != (PGconn *)NULL) &&
28957+ (transaction_tbl->conn->sock > 0))
28958+ {
28959+ StartReplication[current_cluster] = false;
28960+ }
28961+ else
28962+ {
28963+ if (transaction_tbl->conn != (PGconn *)NULL)
28964+ {
28965+ PQfinish(transaction_tbl->conn);
28966+ }
28967+ transaction_tbl->conn = PGRcreateConn(host,port,database,userName,password,md5Salt,cryptSalt);
28968+ StartReplication[current_cluster] = true;
28969+ }
28970+ }
28971+ conn = transaction_tbl->conn;
28972+
28973+ if (conn == NULL)
28974+ {
28975+ show_error("%s:[%d@%s] may be down",f,host_ptr->port,host_ptr->hostName);
28976+ if ( header->cmdSts != CMD_STS_NOTICE )
28977+ {
28978+ StartReplication[current_cluster] = true;
28979+ PGRset_host_status(host_ptr,DB_TBL_ERROR);
28980+ }
28981+ return STATUS_ERROR;
28982+ }
28983+ res = PQexec(conn, func);
28984+ if (res == NULL)
28985+ {
28986+ StartReplication[current_cluster] = true;
28987+ return STATUS_ERROR;
28988+ }
28989+ str = PQcmdStatus(res);
28990+ if ((str == NULL) || (*str == '\0'))
28991+ {
28992+ rtn = STATUS_ERROR;
28993+ }
28994+ else
28995+ {
28996+ snprintf(result, PGR_MESSAGE_BUFSIZE, "%s",str);
28997+ rtn = STATUS_OK;
28998+ }
28999+ if (res != NULL)
29000+ PQclear(res);
29001+ return rtn;
29002+}
29003+
29004+static uint32_t
29005+get_oid(HostTbl * host_ptr,ReplicateHeader * header)
29006+{
29007+ char sync_command[PGR_MESSAGE_BUFSIZE];
29008+ char result[PGR_MESSAGE_BUFSIZE];
29009+
29010+ memset(result,0,sizeof(result));
29011+ snprintf(sync_command,sizeof(sync_command),
29012+ "SELECT %s(%d)",
29013+ PGR_SYSTEM_COMMAND_FUNC, PGR_GET_OID_FUNC_NO);
29014+ if (send_func(host_ptr, header, sync_command, result) == STATUS_OK)
29015+ {
29016+ return (strtoul(result, NULL, 10));
29017+ }
29018+ return 0;
29019+}
29020+
29021+static int
29022+set_oid(HostTbl * host_ptr,ReplicateHeader * header, uint32_t oid)
29023+{
29024+ char sync_command[PGR_MESSAGE_BUFSIZE];
29025+ char result[PGR_MESSAGE_BUFSIZE];
29026+
29027+ memset(result,0,sizeof(result));
29028+ snprintf(sync_command,sizeof(sync_command),
29029+ "SELECT %s(%d,%u)",
29030+ PGR_SYSTEM_COMMAND_FUNC,
29031+ PGR_SET_OID_FUNC_NO,
29032+ oid);
29033+ return ( send_func(host_ptr, header, sync_command, result) );
29034+}
29035+
29036+/*
29037+ * sync oid during cluster DB's
29038+ */
29039+int
29040+PGRsync_oid(ReplicateHeader *header)
29041+{
29042+ HostTbl * host_ptr = (HostTbl*)NULL;
29043+ uint32_t max_oid = 0;
29044+ uint32_t oid = 0;
29045+ int recovery_status = 0;
29046+
29047+ /* get current oid of all cluster db's */
29048+ host_ptr = Host_Tbl_Begin;
29049+ if (host_ptr == (HostTbl *)NULL)
29050+ {
29051+ return STATUS_ERROR;
29052+ }
29053+ recovery_status = PGRget_recovery_status();
29054+ while(host_ptr->useFlag != DB_TBL_END)
29055+ {
29056+ /*
29057+ * check the status of the cluster DB
29058+ */
29059+ if (host_ptr->useFlag != DB_TBL_USE)
29060+ {
29061+ host_ptr ++;
29062+ continue;
29063+ }
29064+ /*
29065+ * skip loop during recover and the host name is master DB
29066+ */
29067+ if (is_master_in_recovery(host_ptr->hostName, host_ptr->port,recovery_status) == true)
29068+ {
29069+ host_ptr ++;
29070+ continue;
29071+ }
29072+ oid = get_oid(host_ptr,header);
29073+ if (max_oid < oid )
29074+ {
29075+ max_oid = oid;
29076+ }
29077+ host_ptr ++;
29078+ }
29079+ if (max_oid <= 0)
29080+ return STATUS_ERROR;
29081+
29082+ /* set oid in cluster db */
29083+ host_ptr = Host_Tbl_Begin;
29084+ while(host_ptr->useFlag != DB_TBL_END)
29085+ {
29086+ /*
29087+ * check the status of the cluster DB
29088+ */
29089+ if (host_ptr->useFlag != DB_TBL_USE)
29090+ {
29091+ host_ptr ++;
29092+ continue;
29093+ }
29094+ /*
29095+ * skip loop during recover and the host name is master DB
29096+ */
29097+ if (is_master_in_recovery(host_ptr->hostName, host_ptr->port,recovery_status) == true)
29098+ {
29099+ host_ptr ++;
29100+ continue;
29101+ }
29102+ set_oid(host_ptr,header,max_oid);
29103+ host_ptr ++;
29104+ }
29105+
29106+ return STATUS_OK;
29107+}
29108+
29109+int
29110+PGRload_replication_id(void)
29111+{
29112+ char * func = "PGRload_replication_id()";
29113+ char buf[256];
29114+ char *p;
29115+
29116+ if (Recovery_Status_Inf == (RecoveryStatusInf *)NULL)
29117+ {
29118+ show_error("%s: Recovery_Status_Inf is NULL",func);
29119+ return STATUS_ERROR;
29120+ }
29121+ if (RidFp == (FILE *)NULL)
29122+ {
29123+ show_error("%s: replication id file is not open",func);
29124+ return STATUS_ERROR;
29125+ }
29126+ rewind(RidFp);
29127+ if (fgets(buf,sizeof(buf),RidFp) == NULL)
29128+ {
29129+ Recovery_Status_Inf->replication_id = 0;
29130+ }
29131+ else
29132+ {
29133+ p = strrchr(buf,' ');
29134+ if (p != NULL)
29135+ {
29136+ p++;
29137+ Recovery_Status_Inf->replication_id = (uint32_t) atol(p);
29138+ }
29139+ else
29140+ {
29141+ Recovery_Status_Inf->replication_id = 0;
29142+ }
29143+ }
29144+ return STATUS_OK;
29145+}
29146+
29147+static int
29148+notice_abort(HostTbl * host_ptr,ReplicateHeader * header)
29149+{
29150+ char sync_command[PGR_MESSAGE_BUFSIZE];
29151+ char result[PGR_MESSAGE_BUFSIZE];
29152+
29153+ memset(result,0,sizeof(result));
29154+ snprintf(sync_command,sizeof(sync_command),
29155+ "SELECT %s(%d)",
29156+ PGR_SYSTEM_COMMAND_FUNC,
29157+ PGR_NOTICE_ABORT_FUNC_NO);
29158+ return ( send_func(host_ptr, header, sync_command, result) );
29159+}
29160+
29161+static int
29162+send_p_parse (PGconn * conn, StringInfo input_message)
29163+{
29164+ const char *stmt_name;
29165+ const char *query_string;
29166+ int numParams;
29167+ Oid paramTypes;
29168+
29169+ /* get name,query */
29170+ stmt_name = pq_getmsgstring(input_message);
29171+ query_string = pq_getmsgstring(input_message);
29172+ /* send name,query */
29173+ if (pqPutMsgStart('P', false, conn) < 0 ||
29174+ pqPuts(stmt_name, conn) < 0 ||
29175+ pqPuts(query_string, conn) < 0)
29176+ {
29177+ return STATUS_ERROR;
29178+ }
29179+ /* get number of parameter */
29180+ numParams = pq_getmsgint(input_message, 2);
29181+ /* send number of parameter */
29182+ if (pqPutInt(numParams, 2, conn) < 0)
29183+ {
29184+ return STATUS_ERROR;
29185+ }
29186+ if (numParams > 0)
29187+ {
29188+ int i;
29189+ for (i = 0; i < numParams; i++)
29190+ {
29191+ paramTypes = pq_getmsgint(input_message, 4);
29192+ if (pqPutInt(paramTypes, 4, conn) < 0)
29193+ {
29194+ return STATUS_ERROR;
29195+ }
29196+ }
29197+ }
29198+ if (pqPutMsgEnd(conn) < 0)
29199+ {
29200+ return STATUS_ERROR;
29201+ }
29202+ return STATUS_OK;
29203+}
29204+
29205+static int
29206+send_p_bind (PGconn * conn, StringInfo input_message)
29207+{
29208+ const char *portal_name;
29209+ const char *stmt_name;
29210+ int numPFormats;
29211+ int16 pformats;
29212+ int numParams;
29213+ int numRFormats;
29214+ int16 rformats;
29215+ int i;
29216+
29217+ /* Get&Send the fixed part of the message */
29218+ portal_name = pq_getmsgstring(input_message);
29219+ stmt_name = pq_getmsgstring(input_message);
29220+ if (pqPutMsgStart('B', false, conn) < 0 ||
29221+ pqPuts(portal_name, conn) < 0 ||
29222+ pqPuts(stmt_name, conn) < 0)
29223+ {
29224+ return STATUS_ERROR;
29225+ }
29226+
29227+ /* Get&Send the parameter format codes */
29228+ numPFormats = pq_getmsgint(input_message, 2);
29229+ if (pqPutInt(numPFormats, 2, conn) < 0)
29230+ {
29231+ return STATUS_ERROR;
29232+ }
29233+ if (numPFormats > 0)
29234+ {
29235+ for (i = 0; i < numPFormats; i++)
29236+ {
29237+ pformats = pq_getmsgint(input_message, 2);
29238+ if (pqPutInt(pformats, 2, conn) < 0)
29239+ {
29240+ return STATUS_ERROR;
29241+ }
29242+ }
29243+ }
29244+
29245+ /* Get&Send the parameter value count */
29246+ numParams = pq_getmsgint(input_message, 2);
29247+ if (pqPutInt(numParams, 2, conn) < 0)
29248+ {
29249+ return STATUS_ERROR;
29250+ }
29251+ if (numParams > 0)
29252+ {
29253+ int32 plength;
29254+ for (i = 0 ; i < numParams ; i ++)
29255+ {
29256+ plength = pq_getmsgint(input_message, 4);
29257+ if (plength != -1)
29258+ {
29259+ const char *pvalue = pq_getmsgbytes(input_message, plength);
29260+ if (pqPutInt(plength, 4, conn) < 0 ||
29261+ pqPutnchar(pvalue, plength, conn) < 0)
29262+ {
29263+ return STATUS_ERROR;
29264+ }
29265+ }
29266+ else
29267+ {
29268+ if (pqPutInt(plength, 4, conn) < 0)
29269+ {
29270+ return STATUS_ERROR;
29271+ }
29272+ }
29273+ }
29274+ }
29275+
29276+ /* Get&Send the result format codes */
29277+ numRFormats = pq_getmsgint(input_message, 2);
29278+ if (pqPutInt(numRFormats, 2, conn) < 0 )
29279+ {
29280+ return STATUS_ERROR;
29281+ }
29282+ if (numRFormats > 0)
29283+ {
29284+ for (i = 0; i < numRFormats; i++)
29285+ {
29286+ rformats = pq_getmsgint(input_message, 2);
29287+ if (pqPutInt(rformats, 2, conn) < 0)
29288+ {
29289+ return STATUS_ERROR;
29290+ }
29291+ }
29292+ }
29293+ if (pqPutMsgEnd(conn) < 0)
29294+ {
29295+ return STATUS_ERROR;
29296+ }
29297+ return STATUS_OK;
29298+}
29299+
29300+static int
29301+send_p_describe (PGconn * conn, StringInfo input_message)
29302+{
29303+
29304+ int describe_type;
29305+ const char *describe_target;
29306+
29307+ describe_type = pq_getmsgbyte(input_message);
29308+ describe_target = pq_getmsgstring(input_message);
29309+
29310+ /* construct the Describe Portal message */
29311+ if (pqPutMsgStart('D', false, conn) < 0 ||
29312+ pqPutc(describe_type, conn) < 0 ||
29313+ pqPuts(describe_target, conn) < 0 ||
29314+ pqPutMsgEnd(conn) < 0)
29315+ {
29316+ return STATUS_ERROR;
29317+ }
29318+ return STATUS_OK;
29319+}
29320+
29321+static int
29322+send_p_execute (PGconn * conn, StringInfo input_message)
29323+{
29324+ const char *portal_name;
29325+ int max_rows;
29326+
29327+ portal_name = pq_getmsgstring(input_message);
29328+ max_rows = pq_getmsgint(input_message, 4);
29329+ /* construct the Execute message */
29330+ if (pqPutMsgStart('E', false, conn) < 0 ||
29331+ pqPuts(portal_name, conn) < 0 ||
29332+ pqPutInt(max_rows, 4, conn) < 0 ||
29333+ pqPutMsgEnd(conn) < 0)
29334+ {
29335+ return STATUS_ERROR;
29336+ }
29337+ return STATUS_OK;
29338+}
29339+
29340+static int
29341+send_p_sync (PGconn * conn, StringInfo input_message)
29342+{
29343+ PGresult *result;
29344+ PGresult *lastResult;
29345+
29346+ /* construct the Sync message */
29347+ if (pqPutMsgStart('S', false, conn) < 0 ||
29348+ pqPutMsgEnd(conn) < 0)
29349+ {
29350+ return STATUS_ERROR;
29351+ }
29352+ /* remember we are using extended query protocol */
29353+ conn->queryclass = PGQUERY_EXTENDED;
29354+
29355+ /*
29356+ * Give the data a push. In nonblock mode, don't complain if we're unable
29357+ * to send it all; PQgetResult() will do any additional flushing needed.
29358+ */
29359+ if (pqFlush(conn) < 0)
29360+ {
29361+ return STATUS_ERROR;
29362+ }
29363+
29364+ /* OK, it's launched! */
29365+ conn->asyncStatus = PGASYNC_BUSY;
29366+
29367+ lastResult = NULL;
29368+ while ((result = PQgetResult(conn)) != NULL)
29369+ {
29370+ if (lastResult)
29371+ {
29372+ if (lastResult->resultStatus == PGRES_FATAL_ERROR &&
29373+ result->resultStatus == PGRES_FATAL_ERROR)
29374+ {
29375+ PQclear(result);
29376+ result = lastResult;
29377+ }
29378+ else
29379+ PQclear(lastResult);
29380+ }
29381+ lastResult = result;
29382+ if (result->resultStatus == PGRES_COPY_IN ||
29383+ result->resultStatus == PGRES_COPY_OUT ||
29384+ conn->status == CONNECTION_BAD)
29385+ break;
29386+ }
29387+ if (lastResult != NULL)
29388+ {
29389+ PQclear(lastResult);
29390+ }
29391+ return STATUS_OK;
29392+}
29393+
29394+static int
29395+send_p_close (PGconn * conn, StringInfo input_message)
29396+{
29397+
29398+ int close_type;
29399+ const char *close_target;
29400+
29401+ close_type = pq_getmsgbyte(input_message);
29402+ close_target = pq_getmsgstring(input_message);
29403+ if (pqPutMsgStart('C', false, conn) < 0 ||
29404+ pqPutc(close_type, conn) < 0 ||
29405+ pqPuts(close_target, conn) < 0 ||
29406+ pqPutMsgEnd(conn) < 0)
29407+ {
29408+ return STATUS_ERROR;
29409+ }
29410+ return STATUS_OK;
29411+}
29412+static void
29413+set_string_info(StringInfo input_message, ReplicateHeader * header, char * query)
29414+{
29415+ int len;
29416+ len = ntohl(header->query_size);
29417+ input_message->data = query;
29418+ input_message->maxlen = len;
29419+ input_message->len = len -1;
29420+ input_message->cursor = 0;
29421+}
29422diff -aruN postgresql-8.2.4/src/pgcluster/pgrp/rlog.c pgcluster-1.7.0rc7/src/pgcluster/pgrp/rlog.c
29423--- postgresql-8.2.4/src/pgcluster/pgrp/rlog.c 1970-01-01 01:00:00.000000000 +0100
29424+++ pgcluster-1.7.0rc7/src/pgcluster/pgrp/rlog.c 2007-02-18 22:52:17.000000000 +0100
29425@@ -0,0 +1,1260 @@
29426+/*--------------------------------------------------------------------
29427+ * FILE:
29428+ * rlog.c
29429+ *
29430+ * NOTE:
29431+ * This file is composed of the functions to call with the source
29432+ * at pgreplicate for replicate ahead log.
29433+ *
29434+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
29435+ *--------------------------------------------------------------------
29436+ */
29437+#ifdef USE_REPLICATION
29438+
29439+#include "postgres.h"
29440+#include "postgres_fe.h"
29441+
29442+#include <stdio.h>
29443+#include <sys/types.h>
29444+#include <sys/stat.h>
29445+#include <sys/un.h>
29446+#include <unistd.h>
29447+#ifdef HAVE_FCNTL_H
29448+#include <fcntl.h>
29449+#endif
29450+#include <errno.h>
29451+#include <ctype.h>
29452+#include <time.h>
29453+#include <sys/ipc.h>
29454+#include <sys/shm.h>
29455+#include <sys/sem.h>
29456+#include <sys/msg.h>
29457+#include <signal.h>
29458+#include <sys/socket.h>
29459+#include <netdb.h>
29460+#ifdef HAVE_NETINET_TCP_H
29461+#include <netinet/tcp.h>
29462+#endif
29463+#include <dirent.h>
29464+#include <arpa/inet.h>
29465+
29466+#ifdef HAVE_CRYPT_H
29467+#include <crypt.h>
29468+#endif
29469+
29470+#ifdef MULTIBYTE
29471+#include "mb/pg_wchar.h"
29472+#endif
29473+
29474+#include "libpq-fe.h"
29475+#include "libpq-int.h"
29476+#include "fe-auth.h"
29477+#include "access/xact.h"
29478+#include "replicate_com.h"
29479+#include "pgreplicate.h"
29480+
29481+static int RLog_Recv_Sock = -1;
29482+/*--------------------------------------
29483+ * PROTOTYPE DECLARATION
29484+ *--------------------------------------
29485+ */
29486+static int set_query_log(ReplicateHeader * header, char * query);
29487+static QueryLogType * get_query_log_by_header(ReplicateHeader * header);
29488+static QueryLogType * get_query_log(ReplicateHeader * header);
29489+static void delete_query_log(ReplicateHeader * header);
29490+static int set_commit_log(ReplicateHeader * header);
29491+static CommitLogInf * get_commit_log(ReplicateHeader * header);
29492+static void delete_commit_log(ReplicateHeader * header);
29493+static bool was_committed_transaction(ReplicateHeader * header);
29494+static int create_recv_rlog_socket(void);
29495+static int do_rlog(int fd);
29496+static int recv_message(int sock,char * buf, int len);
29497+static int send_message(int sock, char * msg, int len);
29498+static void exit_rlog(int sig);
29499+static int reconfirm_commit(ReplicateHeader * header);
29500+#if 0
29501+static int PGRget_sync_data(ReplicateHeader *header);
29502+static int PGRdelete_sync_data(ReplicateHeader *header);
29503+#endif /* #if 0 */
29504+
29505+
29506+int
29507+PGRwrite_rlog(ReplicateHeader * header, char * query)
29508+{
29509+ char * func = "PGRwrite_rlog()";
29510+
29511+ if (header == NULL)
29512+ {
29513+ show_error("%s:header is null",func);
29514+ return STATUS_ERROR;
29515+ }
29516+ switch (header->cmdSts)
29517+ {
29518+ case CMD_STS_QUERY:
29519+#ifdef PRINT_DEBUG
29520+ show_debug("%s:set_query_log",func);
29521+#endif
29522+ set_query_log(header,query);
29523+ break;
29524+ case CMD_STS_DELETE_QUERY:
29525+#ifdef PRINT_DEBUG
29526+ show_debug("%s:delete_query_log",func);
29527+#endif
29528+ delete_query_log(header);
29529+ break;
29530+ case CMD_STS_TRANSACTION:
29531+ if (header->cmdType == CMD_TYPE_COMMIT)
29532+ {
29533+#ifdef PRINT_DEBUG
29534+ show_debug("%s:set_commit_log call",func);
29535+#endif
29536+ set_commit_log(header);
29537+ }
29538+ break;
29539+ case CMD_STS_DELETE_TRANSACTION:
29540+ if (header->cmdType == CMD_TYPE_COMMIT)
29541+ {
29542+#ifdef PRINT_DEBUG
29543+ show_debug("%s:delete_commit_log call",func);
29544+#endif
29545+ delete_commit_log(header);
29546+ }
29547+ break;
29548+ default:
29549+ show_error("%s:unknown status %c",func,header->cmdSts);
29550+ break;
29551+ }
29552+ return STATUS_OK;
29553+}
29554+
29555+ReplicateHeader *
29556+PGRget_requested_query(ReplicateHeader * header)
29557+{
29558+ QueryLogType * query_log = NULL;
29559+
29560+ if (Query_Log_Top == NULL)
29561+ {
29562+ return NULL;
29563+ }
29564+ query_log = Query_Log_Top;
29565+ while(query_log != (QueryLogType *)NULL)
29566+ {
29567+ if ((query_log->header->request_id == header->request_id) &&
29568+ (query_log->header->pid == header->pid) &&
29569+ (query_log->header->port == header->port) &&
29570+ (!strncmp(query_log->header->from_host,header->from_host,sizeof(header->from_host))))
29571+ {
29572+ return query_log->header;
29573+ }
29574+ query_log = (QueryLogType *)(query_log->next);
29575+ }
29576+ return (ReplicateHeader *)NULL;
29577+}
29578+
29579+static int
29580+set_query_log(ReplicateHeader * header, char * query)
29581+{
29582+ char * func = "set_query_log()";
29583+ int size = 0;
29584+ QueryLogType * query_log = NULL;
29585+
29586+ if (Query_Log_Top == NULL)
29587+ {
29588+ Query_Log_Top = (QueryLogType *)malloc(sizeof(QueryLogType));
29589+ if (Query_Log_Top == (QueryLogType *)NULL)
29590+ {
29591+ show_error("%s:malloc failed: (%s)",func,strerror(errno));
29592+ return STATUS_ERROR;
29593+ }
29594+ Query_Log_Top->next = NULL;
29595+ Query_Log_Top->last = NULL;
29596+ Query_Log_End = Query_Log_Top;
29597+ Query_Log_End->next = NULL;
29598+ Query_Log_End->last = NULL;
29599+ query_log = Query_Log_Top;
29600+ }
29601+ else
29602+ {
29603+ query_log = (QueryLogType *)malloc(sizeof(QueryLogType));
29604+ if (query_log == (QueryLogType *)NULL)
29605+ {
29606+ show_error("%s:malloc failed: (%s)",func,strerror(errno));
29607+ return STATUS_ERROR;
29608+ }
29609+ Query_Log_End->next = (char *)query_log;
29610+ query_log->last = (char *)Query_Log_End;
29611+ query_log->next = NULL;
29612+ Query_Log_End = query_log;
29613+ }
29614+ query_log->header = (ReplicateHeader *)malloc(sizeof(ReplicateHeader));
29615+ if (query_log->header == (ReplicateHeader *)NULL)
29616+ {
29617+ show_error("%s:malloc failed: (%s)",func,strerror(errno));
29618+ return STATUS_ERROR;
29619+ }
29620+ size = ntohl(header->query_size);
29621+
29622+ query_log->query = (char *)malloc(size+4);
29623+ if (query_log->query == (char *)NULL)
29624+ {
29625+ show_error("%s:malloc failed: (%s)",func,strerror(errno));
29626+ return STATUS_ERROR;
29627+ }
29628+ memset(query_log->query,0,size+4);
29629+ memcpy(query_log->header,header,sizeof(ReplicateHeader));
29630+ query_log->header->rlog = FROM_R_LOG_TYPE ;
29631+ memcpy(query_log->query,query,size);
29632+
29633+ return STATUS_OK;
29634+}
29635+
29636+static QueryLogType *
29637+get_query_log_by_header(ReplicateHeader * header)
29638+{
29639+ QueryLogType * query_log = NULL;
29640+
29641+ if (Query_Log_Top == NULL)
29642+ {
29643+ return (QueryLogType *)NULL;
29644+ }
29645+ query_log = Query_Log_Top;
29646+ show_debug("get_query_log_by_header:header is %d,%d,%d,%s",
29647+ header->request_id,
29648+ header->pid,
29649+ header->port,
29650+ header->from_host);
29651+
29652+ while(query_log != (QueryLogType *)NULL)
29653+ {
29654+ show_debug("get_query_log_by_header:comparing to %d,%d,%d,%s",
29655+ query_log->header->request_id,
29656+ query_log->header->pid,
29657+ query_log->header->port,
29658+ query_log->header->from_host);
29659+
29660+ if ((query_log->header->request_id == header->request_id) &&
29661+ (query_log->header->pid == header->pid) &&
29662+ (query_log->header->port == header->port) &&
29663+ (!strncmp(query_log->header->from_host,header->from_host,sizeof(header->from_host))))
29664+ {
29665+ return query_log;
29666+ }
29667+ query_log = (QueryLogType *)(query_log->next);
29668+ }
29669+ return (QueryLogType *)NULL;
29670+}
29671+
29672+static QueryLogType *
29673+get_query_log(ReplicateHeader * header)
29674+{
29675+ QueryLogType * query_log = NULL;
29676+
29677+ if (Query_Log_Top == NULL)
29678+ {
29679+ return NULL;
29680+ }
29681+ query_log = Query_Log_Top;
29682+ while(query_log != (QueryLogType *)NULL)
29683+ {
29684+ show_debug("get_qurey_log: comparing in log is %d,header is %d",query_log->header->replicate_id,header->replicate_id);
29685+ if (query_log->header->replicate_id == header->replicate_id)
29686+ {
29687+ return query_log;
29688+ }
29689+ query_log = (QueryLogType *)(query_log->next);
29690+ }
29691+ return (QueryLogType*)NULL;
29692+}
29693+
29694+static void
29695+delete_query_log(ReplicateHeader * header)
29696+{
29697+ QueryLogType * query_log = NULL;
29698+ QueryLogType * last = NULL;
29699+ QueryLogType * next = NULL;
29700+
29701+ query_log = get_query_log(header);
29702+
29703+ if (query_log == NULL)
29704+ {
29705+ return ;
29706+ }
29707+ last = (QueryLogType *)query_log->last;
29708+ next = (QueryLogType *)query_log->next;
29709+
29710+ /* change link */
29711+ if (last != (QueryLogType *)NULL)
29712+ {
29713+ last->next = (char *)next;
29714+ }
29715+ else
29716+ {
29717+ Query_Log_Top = next;
29718+ }
29719+ if (next != (QueryLogType *)NULL)
29720+ {
29721+ next->last = (char *)last;
29722+ }
29723+ else
29724+ {
29725+ Query_Log_End = last;
29726+ }
29727+
29728+ /* delete contents */
29729+ if (query_log->header != NULL)
29730+ {
29731+ free(query_log->header);
29732+ }
29733+ if (query_log->query != NULL)
29734+ {
29735+ free(query_log->query);
29736+ }
29737+ free(query_log);
29738+}
29739+
29740+static int
29741+set_commit_log(ReplicateHeader * header)
29742+{
29743+
29744+ CommitLogInf * commit_log = NULL;
29745+ ReplicateHeader * c_header;
29746+
29747+ if (Commit_Log_Tbl == NULL)
29748+ {
29749+ return STATUS_ERROR;
29750+ }
29751+ commit_log = Commit_Log_Tbl + 1;
29752+ while ( commit_log->inf.useFlag != DB_TBL_END )
29753+ {
29754+ if (commit_log->inf.useFlag != DB_TBL_USE)
29755+ {
29756+ commit_log->inf.useFlag = DB_TBL_USE;
29757+ c_header = &(commit_log->header);
29758+ memcpy(c_header,header,sizeof(ReplicateHeader));
29759+ Commit_Log_Tbl->inf.commit_log_num ++;
29760+ break;
29761+ }
29762+ commit_log ++;
29763+ }
29764+ return STATUS_OK;
29765+}
29766+
29767+static CommitLogInf *
29768+get_commit_log(ReplicateHeader * header)
29769+{
29770+ CommitLogInf * commit_log = NULL;
29771+ ReplicateHeader * c_header;
29772+ int cnt = 0;
29773+
29774+ if (Commit_Log_Tbl == NULL)
29775+ {
29776+ return (CommitLogInf *)NULL;
29777+ }
29778+ commit_log = Commit_Log_Tbl + 1;
29779+ while ( commit_log->inf.useFlag != DB_TBL_END )
29780+ {
29781+ if (commit_log->inf.useFlag == DB_TBL_USE)
29782+ {
29783+ cnt ++;
29784+ c_header = &(commit_log->header);
29785+ if (c_header == NULL)
29786+ {
29787+ commit_log ++;
29788+ continue;
29789+ }
29790+ if (c_header->replicate_id == header->replicate_id)
29791+ {
29792+ return commit_log;
29793+ }
29794+ }
29795+ else
29796+ {
29797+ }
29798+ if (cnt >= Commit_Log_Tbl->inf.commit_log_num)
29799+ {
29800+ break;
29801+ }
29802+ commit_log ++;
29803+ }
29804+ return (CommitLogInf *)NULL;
29805+}
29806+
29807+static void
29808+delete_commit_log(ReplicateHeader * header)
29809+{
29810+ CommitLogInf * commit_log = NULL;
29811+
29812+ commit_log = get_commit_log(header);
29813+ if (commit_log != NULL)
29814+ {
29815+ memset(&(commit_log->header),0,sizeof(commit_log->header));
29816+ commit_log->inf.useFlag = DB_TBL_INIT;
29817+ Commit_Log_Tbl->inf.commit_log_num --;
29818+ }
29819+}
29820+
29821+static bool
29822+was_committed_transaction(ReplicateHeader * header)
29823+{
29824+ CommitLogInf * commit_log = NULL;
29825+
29826+ commit_log = get_commit_log(header);
29827+ if (commit_log != NULL)
29828+ {
29829+ return true;
29830+ }
29831+ return false;
29832+}
29833+
29834+void
29835+PGRreconfirm_commit(int sock, ReplicateHeader * header)
29836+{
29837+ int result = PGR_NOT_YET_COMMIT;
29838+
29839+ if (Replicateion_Log == NULL)
29840+ {
29841+ return ;
29842+ }
29843+
29844+ if (Replicateion_Log->r_log_sock > 0)
29845+ {
29846+ close(Replicateion_Log->r_log_sock );
29847+ Replicateion_Log->r_log_sock = -1;
29848+ }
29849+ Replicateion_Log->r_log_sock = PGRcreate_send_rlog_socket();
29850+ if (Replicateion_Log->r_log_sock == -1)
29851+ return;
29852+
29853+ header->query_size = 0;
29854+ PGRsend_rlog_packet(Replicateion_Log->r_log_sock,header,"");
29855+ PGRrecv_rlog_result(Replicateion_Log->r_log_sock,&result, sizeof(result));
29856+
29857+
29858+ close(Replicateion_Log->r_log_sock );
29859+ Replicateion_Log->r_log_sock = -1;
29860+
29861+ snprintf(PGR_Result,PGR_MESSAGE_BUFSIZE,"%d,%d", PGR_TRANSACTION_CONFIRM_ANSWER_FUNC_NO,result);
29862+
29863+ PGRreturn_result(sock, PGR_Result,PGR_NOWAIT_ANSWER);
29864+}
29865+
29866+static int
29867+reconfirm_commit(ReplicateHeader * header)
29868+{
29869+ char * func = "reconfirm_commit()";
29870+ int result = PGR_NOT_YET_COMMIT;
29871+
29872+ /* check the transaction was committed */
29873+ if (was_committed_transaction(header) == true)
29874+ {
29875+ result = PGR_ALREADY_COMMITTED;
29876+#ifdef PRINT_DEBUG
29877+ show_debug("%s:PGR_ALREADY_COMMITTED",func);
29878+#endif
29879+ }
29880+ else
29881+ {
29882+#ifdef PRINT_DEBUG
29883+ show_debug("%s:PGR_NOT_YET_COMMIT",func);
29884+#endif
29885+ }
29886+ return result;
29887+}
29888+
29889+void
29890+PGRset_rlog(ReplicateHeader * header, char * query)
29891+{
29892+ char * func = "PGRset_rlog()";
29893+ int status = STATUS_OK;
29894+ bool send_flag = false;
29895+
29896+ if (PGR_Log_Header == NULL)
29897+ {
29898+ return;
29899+ }
29900+ switch (header->cmdSts)
29901+ {
29902+ case CMD_STS_QUERY:
29903+ send_flag = true;
29904+ break;
29905+ case CMD_STS_TRANSACTION:
29906+ if (header->cmdType == CMD_TYPE_COMMIT)
29907+ {
29908+ send_flag = true;
29909+ PGR_Log_Header->cmdType = header->cmdType;
29910+ PGR_Log_Header->query_size = htonl(strlen(query));
29911+ }
29912+ break;
29913+ }
29914+ if (send_flag != true)
29915+ {
29916+ show_error("%s:send_flag is false",func);
29917+ return;
29918+ }
29919+ PGR_Log_Header->cmdSys = CMD_SYS_LOG;
29920+ if (Cascade_Inf->useFlag == DB_TBL_USE)
29921+ {
29922+ /* save log data in remote server */
29923+ show_debug("%s:set rlog %s",func,query);
29924+ status = PGRsend_lower_cascade(PGR_Log_Header, query);
29925+ if (status == STATUS_OK) {
29926+ status=PGRwait_notice_rlog_done();
29927+ }
29928+ if (status != STATUS_OK)
29929+ {
29930+#ifdef PRINT_DEBUG
29931+ show_debug("%s:PGRsend_lower_cascade failed",func);
29932+#endif
29933+ PGRwrite_rlog(PGR_Log_Header, query);
29934+ }
29935+ }
29936+ else
29937+ {
29938+ /* save log data in local server */
29939+ PGRwrite_rlog(PGR_Log_Header, query);
29940+ }
29941+}
29942+
29943+void
29944+PGRunset_rlog(ReplicateHeader * header, char * query)
29945+{
29946+ int status = STATUS_OK;
29947+ bool send_flag = false;
29948+
29949+ if (PGR_Log_Header == NULL)
29950+ {
29951+ return;
29952+ }
29953+ switch (header->cmdSts)
29954+ {
29955+ case CMD_STS_QUERY:
29956+ send_flag = true;
29957+ PGR_Log_Header->cmdSts = CMD_STS_DELETE_QUERY;
29958+ break;
29959+ case CMD_STS_TRANSACTION:
29960+ if (PGR_Log_Header->cmdType == CMD_TYPE_COMMIT)
29961+ {
29962+ PGR_Log_Header->cmdSts = CMD_STS_DELETE_TRANSACTION;
29963+ PGR_Log_Header->query_size = htonl(strlen(query));
29964+ send_flag = true;
29965+ }
29966+ break;
29967+ }
29968+ if (send_flag != true)
29969+ {
29970+ return;
29971+ }
29972+ PGR_Log_Header->cmdSys = CMD_SYS_LOG;
29973+ if (Cascade_Inf->useFlag == DB_TBL_USE)
29974+ {
29975+ /* save log data in remote server */
29976+ show_debug("unset rlog %s",query);
29977+
29978+ status = PGRsend_lower_cascade(PGR_Log_Header, query);
29979+ if (status == STATUS_OK)
29980+ {
29981+ status=PGRwait_notice_rlog_done();
29982+ }
29983+ if (status != STATUS_OK)
29984+ {
29985+#ifdef PRINT_DEBUG
29986+ show_debug("PGRsend_lower_cascade recv failed");
29987+#endif
29988+ PGRwrite_rlog(PGR_Log_Header, query);
29989+ }
29990+ }
29991+ else
29992+ {
29993+ /* save log data in local server */
29994+ PGRwrite_rlog(PGR_Log_Header, query);
29995+ }
29996+}
29997+
29998+int
29999+PGRresend_rlog_to_db(void)
30000+{
30001+ char *func="PGRresend_rlog_to_db";
30002+ QueryLogType * query_log = NULL;
30003+ QueryLogType * next = NULL;
30004+ int status = STATUS_OK;
30005+ int dest = 0;
30006+
30007+ show_debug("%s:enter.",func);
30008+
30009+ query_log = Query_Log_Top;
30010+
30011+ while (query_log != NULL)
30012+ {
30013+
30014+
30015+ show_debug("%s:processing qlog,query=%s",func,query_log->query);
30016+ if (query_log->header->rlog != FROM_R_LOG_TYPE )
30017+ {
30018+ query_log = (QueryLogType *)query_log->next;
30019+ continue;
30020+ }
30021+ status = replicate_packet_send_internal(query_log->header,query_log->query, dest,RECOVERY_INIT,false);
30022+ show_debug("%s:status=%d",func,status);
30023+
30024+ if (status == STATUS_SKIP_REPLICATE )
30025+ {
30026+ Query_Log_Top = query_log;
30027+ query_log = (QueryLogType *)query_log->next;
30028+ }
30029+ else
30030+ {
30031+ if (query_log->header != NULL)
30032+ {
30033+ free(query_log->header );
30034+ }
30035+ if (query_log->query != NULL)
30036+ {
30037+ free(query_log->query );
30038+ }
30039+ next = (QueryLogType *)query_log->next;
30040+ free(query_log);
30041+ query_log = next;
30042+ Query_Log_Top = query_log;
30043+ }
30044+ if (query_log != NULL)
30045+ {
30046+ Query_Log_End = (QueryLogType *)query_log->next;
30047+ }
30048+ else
30049+ {
30050+ Query_Log_End = (QueryLogType *)NULL;
30051+ }
30052+ }
30053+
30054+ show_debug("%s:exit.",func);
30055+
30056+ return STATUS_OK;
30057+}
30058+
30059+pid_t
30060+PGR_RLog_Main(void)
30061+{
30062+ char * func = "PGR_RLog_Main()";
30063+ int afd = -1;
30064+ int rtn;
30065+ struct sockaddr addr;
30066+ socklen_t addrlen;
30067+ pid_t pid = 0;
30068+ pid_t pgid = 0;
30069+
30070+ extern int fork_wait_time;
30071+
30072+ if (Replicateion_Log == NULL)
30073+ {
30074+ show_error("%s:Replicateion_Log is NULL",func);
30075+ return -1;
30076+ }
30077+ pgid = getpgid(0);
30078+ if ((pid = fork()) != 0 )
30079+ {
30080+ return pid;
30081+ }
30082+ PGRsignal(SIGTERM,exit_rlog);
30083+ PGRsignal(SIGINT,exit_rlog);
30084+ PGRsignal(SIGQUIT,exit_rlog);
30085+ PGRsignal(SIGPIPE,SIG_IGN);
30086+
30087+ if (PGRinit_transaction_table() != STATUS_OK)
30088+ {
30089+ show_error("RLog process transaction table memory allocate failed");
30090+ return -1;
30091+ }
30092+
30093+ setpgid(0,pgid);
30094+ RLog_Recv_Sock = create_recv_rlog_socket();
30095+ if(RLog_Recv_Sock == -1)
30096+ {
30097+ show_error("rlog socket creation failure.quit all process.");
30098+ kill(pgreplicate_pid, SIGINT);
30099+ exit_rlog(0);
30100+ }
30101+
30102+ if (fork_wait_time > 0) {
30103+#ifdef PRINT_DEBUG
30104+ show_debug("rlog process: wait fork(): pid = %d", getpid());
30105+#endif
30106+ sleep(fork_wait_time);
30107+ }
30108+
30109+ for (;;)
30110+ {
30111+ fd_set rmask;
30112+ struct timeval timeout;
30113+
30114+ timeout.tv_sec = PGR_Replication_Timeout;
30115+ timeout.tv_usec = 0;
30116+
30117+ Idle_Flag = IDLE_MODE ;
30118+ if (Exit_Request)
30119+ {
30120+ exit_rlog(0);
30121+ }
30122+ /*
30123+ * Wait for something to happen.
30124+ */
30125+ FD_ZERO(&rmask);
30126+ FD_SET(RLog_Recv_Sock,&rmask);
30127+ rtn = select(RLog_Recv_Sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
30128+ if (rtn < 0)
30129+ {
30130+ if (errno == EINTR || errno == EAGAIN)
30131+ continue;
30132+ }
30133+ else if (rtn && FD_ISSET(RLog_Recv_Sock, &rmask))
30134+ {
30135+ Idle_Flag = BUSY_MODE ;
30136+ addrlen = sizeof(addr);
30137+ afd = accept(RLog_Recv_Sock, &addr, &addrlen);
30138+ if (afd < 0)
30139+ {
30140+ continue;
30141+ }
30142+ else
30143+ {
30144+ do_rlog(afd);
30145+ close(afd);
30146+ }
30147+ }
30148+ }
30149+ exit(0);
30150+}
30151+
30152+static int
30153+create_recv_rlog_socket(void)
30154+{
30155+ char * func = "create_recv_socket()";
30156+ struct sockaddr_un addr;
30157+ int fd;
30158+ int status;
30159+ int len;
30160+
30161+ /* set unix domain socket path */
30162+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
30163+ if (fd == -1)
30164+ {
30165+ show_error("%s:Failed to create UNIX domain socket. reason: %s",func, strerror(errno));
30166+ return -1;
30167+ }
30168+ memset((char *) &addr, 0, sizeof(addr));
30169+ ((struct sockaddr *)&addr)->sa_family = AF_UNIX;
30170+ snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/.s.PGRLOG.%d",
30171+ PGR_Write_Path,
30172+ Replicateion_Log->RLog_Port_Number);
30173+fprintf(stderr,"addr.sun_path[%s]\n",addr.sun_path);
30174+ if (Replicateion_Log->RLog_Sock_Path == NULL)
30175+ {
30176+ Replicateion_Log->RLog_Sock_Path = strdup(addr.sun_path);
30177+fprintf(stderr,"Replicateion_Log->RLog_Sock_Path[%s]\n",Replicateion_Log->RLog_Sock_Path);
30178+ }
30179+ len = sizeof(struct sockaddr_un);
30180+ status = bind(fd, (struct sockaddr *)&addr, len);
30181+ if (status == -1)
30182+ {
30183+ show_error("%s: bind() failed. reason: %s", func, strerror(errno));
30184+ return -1;
30185+ }
30186+
30187+ if (chmod(addr.sun_path, 0770) == -1)
30188+ {
30189+ show_error("%s: chmod() failed. reason: %s", func, strerror(errno));
30190+ return -1;
30191+ }
30192+
30193+ status = listen(fd, 1000000);
30194+ if (status < 0)
30195+ {
30196+ show_error("%s: listen() failed. reason: %s", func, strerror(errno));
30197+ return -1;
30198+ }
30199+ return fd;
30200+}
30201+
30202+int
30203+PGRcreate_send_rlog_socket(void)
30204+{
30205+ char * func = "create_recv_socket()";
30206+ struct sockaddr_un addr;
30207+ int fd;
30208+ int len;
30209+
30210+ /* set unix domain socket path */
30211+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
30212+ if (fd == -1)
30213+ {
30214+ show_error("%s:Failed to create UNIX domain socket. reason: %s",func, strerror(errno));
30215+ return -1;
30216+ }
30217+ memset((char *) &addr, 0, sizeof(addr));
30218+ ((struct sockaddr *)&addr)->sa_family = AF_UNIX;
30219+ snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/.s.PGRLOG.%d",
30220+ PGR_Write_Path,
30221+ Replicateion_Log->RLog_Port_Number);
30222+ if (Replicateion_Log->RLog_Sock_Path == NULL)
30223+ {
30224+ Replicateion_Log->RLog_Sock_Path = strdup(addr.sun_path);
30225+ }
30226+ len = sizeof(struct sockaddr_un);
30227+ if (connect(fd, (struct sockaddr *)&addr, len) < 0)
30228+ {
30229+ close(fd);
30230+ return -1;
30231+ }
30232+ return fd;
30233+}
30234+
30235+static int
30236+do_rlog(int fd)
30237+{
30238+ char * func = "do_rlog()";
30239+ QueryLogType * query_log = NULL;
30240+ ReplicateHeader header;
30241+ char * query = NULL;
30242+ int status = STATUS_OK;
30243+
30244+ memset(&header,0,sizeof(header));
30245+ query = PGRread_packet(fd, &header);
30246+ show_debug("%s:got result:cmdSys='%c'",func,header.cmdSys);
30247+ if (header.cmdSys == 0)
30248+ {
30249+ return STATUS_ERROR;
30250+ }
30251+ switch (header.cmdSys)
30252+ {
30253+ case CMD_SYS_REPLICATE:
30254+ if (header.cmdSts != CMD_STS_DELETE_QUERY)
30255+ {
30256+ query_log = get_query_log_by_header(&header);
30257+ if (query_log != (QueryLogType*)NULL)
30258+ {
30259+ memcpy(&header,query_log->header,sizeof(ReplicateHeader));
30260+ }
30261+ send_message(fd,(char *)&header,sizeof(ReplicateHeader));
30262+ header.cmdSts = CMD_STS_DELETE_QUERY;
30263+ PGRwrite_rlog(&header, NULL);
30264+ }
30265+ else
30266+ {
30267+ status = PGRwrite_rlog((ReplicateHeader*)&header,(char *)NULL);
30268+ send_message(fd,(char *)&status,sizeof(status));
30269+ }
30270+ break;
30271+ case CMD_SYS_LOG:
30272+ status = PGRwrite_rlog((ReplicateHeader*)&header, query);
30273+ send_message(fd,(char *)&status,sizeof(status));
30274+ break;
30275+ case CMD_SYS_CALL:
30276+ if (header.cmdSts == CMD_STS_TRANSACTION_ABORT)
30277+ {
30278+#ifdef PRINT_DEBUG
30279+ show_debug("%s: CMD_STS_TRANSACTION_ABORT",func);
30280+#endif
30281+ status = reconfirm_commit(&header);
30282+ }
30283+ else if (header.cmdSts == CMD_STS_QUERY_SUSPEND)
30284+ {
30285+#ifdef PRINT_DEBUG
30286+ show_debug("%s: CMD_STS_QUERY_SUSPEND",func);
30287+#endif
30288+ // status = PGRresend_rlog_to_db();
30289+ }
30290+#ifdef PRINT_DEBUG
30291+ show_debug("%s: SYS_CALL process done",func);
30292+#endif
30293+ send_message(fd,(char *)&status,sizeof(status));
30294+ break;
30295+ }
30296+ show_debug("%s:process result done:cmdSys='%c'",func,header.cmdSys);
30297+ return STATUS_OK;
30298+}
30299+
30300+#if 0
30301+static int
30302+PGRget_sync_data(ReplicateHeader *header)
30303+{
30304+ ReplicateHeader rlog_header;
30305+
30306+ if (header == NULL)
30307+ {
30308+ return STATUS_ERROR;
30309+ }
30310+
30311+
30312+ if (Replicateion_Log->r_log_sock > 0)
30313+ {
30314+ close(Replicateion_Log->r_log_sock );
30315+ Replicateion_Log->r_log_sock = -1;
30316+ }
30317+ Replicateion_Log->r_log_sock = PGRcreate_send_rlog_socket();
30318+ if (Replicateion_Log->r_log_sock == -1)
30319+ return STATUS_ERROR;
30320+
30321+ memset(&rlog_header,0,sizeof(ReplicateHeader));
30322+ send_message( Replicateion_Log->r_log_sock, (char *)header,sizeof(ReplicateHeader));
30323+ recv_message( Replicateion_Log->r_log_sock, (char *)&rlog_header,sizeof(ReplicateHeader));
30324+ if (rlog_header.cmdSts != 0)
30325+ {
30326+ close(Replicateion_Log->r_log_sock );
30327+ Replicateion_Log->r_log_sock = -1;
30328+ memcpy(header,&rlog_header, sizeof(ReplicateHeader));
30329+ return STATUS_OK;
30330+ }
30331+
30332+ close(Replicateion_Log->r_log_sock );
30333+ Replicateion_Log->r_log_sock = -1;
30334+
30335+ return STATUS_ERROR;
30336+
30337+}
30338+
30339+static int
30340+PGRdelete_sync_data(ReplicateHeader *header)
30341+{
30342+ int status;
30343+ char cmdSts;
30344+
30345+ cmdSts = header->cmdSts;
30346+ header->cmdSts = CMD_STS_DELETE_QUERY;
30347+ if (header == NULL)
30348+ {
30349+ return STATUS_ERROR;
30350+ }
30351+
30352+ if (Replicateion_Log->r_log_sock > 0)
30353+ {
30354+ close(Replicateion_Log->r_log_sock );
30355+ Replicateion_Log->r_log_sock = -1;
30356+ }
30357+ Replicateion_Log->r_log_sock = PGRcreate_send_rlog_socket();
30358+ if (Replicateion_Log->r_log_sock == -1)
30359+ return STATUS_ERROR;
30360+
30361+ send_message( Replicateion_Log->r_log_sock, (char *)header,sizeof(ReplicateHeader));
30362+ recv_message( Replicateion_Log->r_log_sock, (char *)&status,sizeof(status));
30363+ header->cmdSts = cmdSts;
30364+
30365+ close(Replicateion_Log->r_log_sock );
30366+ Replicateion_Log->r_log_sock = -1;
30367+
30368+ return status;
30369+
30370+}
30371+#endif /* #if 0 */
30372+
30373+int
30374+PGRsend_rlog_packet(int sock,ReplicateHeader * header, const char * query_string)
30375+{
30376+ char * buf = NULL;
30377+ int buf_size = 0;
30378+ int header_size = 0;
30379+ int query_size = 0;
30380+ int rtn = 0;
30381+
30382+ /* check parameter */
30383+ if ((sock <= 0) || (header == NULL))
30384+ {
30385+ return STATUS_ERROR;
30386+ }
30387+ if (query_string != NULL)
30388+ {
30389+ query_size = ntohl(header->query_size);
30390+ }
30391+ header_size = sizeof(ReplicateHeader);
30392+ buf_size = header_size + query_size + 4;
30393+ buf = (char *)malloc(buf_size);
30394+ if (buf == (char *)NULL)
30395+ {
30396+ return STATUS_ERROR;
30397+ }
30398+ memset(buf,0,buf_size);
30399+ buf_size -= 4;
30400+ memcpy(buf,header,header_size);
30401+ if (query_size > 0)
30402+ {
30403+ memcpy((char *)(buf+header_size),query_string,query_size+1);
30404+ }
30405+ rtn = send_message(sock,buf,buf_size);
30406+ free(buf);
30407+ return rtn;
30408+}
30409+
30410+int
30411+PGRrecv_rlog_result(int sock,void * result, int size)
30412+{
30413+ char *func = "PGRrecv_rlog_result";
30414+ fd_set rmask;
30415+ struct timeval timeout;
30416+ int rtn;
30417+
30418+ if ((result == (void *)NULL) || (size <= 0))
30419+ {
30420+ return -1;
30421+ }
30422+
30423+ /*
30424+ * Wait for something to happen.
30425+ */
30426+ rtn = 1;
30427+ for (;;)
30428+ {
30429+ timeout.tv_sec = PGR_Replication_Timeout;
30430+ timeout.tv_usec = 0;
30431+
30432+ FD_ZERO(&rmask);
30433+ FD_SET(sock,&rmask);
30434+ rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
30435+ if (rtn < 0)
30436+ {
30437+ if (errno != EINTR || errno != EAGAIN)
30438+ {
30439+ show_error("%s: select() failed (%s)",func,strerror(errno));
30440+ return -1;
30441+ }
30442+ }
30443+ else if (rtn && FD_ISSET(sock, &rmask))
30444+ {
30445+ return (recv_message(sock, (char*)result, size));
30446+ }
30447+ }
30448+ return -1;
30449+}
30450+
30451+
30452+static int
30453+recv_message(int sock,char * buf, int len)
30454+{
30455+ char *func = "recv_message";
30456+ int cnt = 0;
30457+ int r = 0;
30458+ char * read_ptr;
30459+ int read_size = 0;
30460+ cnt = 0;
30461+ read_ptr = buf;
30462+
30463+ for (;;)
30464+ {
30465+ r = recv(sock,read_ptr + read_size ,len - read_size, 0);
30466+ if (r < 0)
30467+ {
30468+ if (errno == EINTR || errno == EAGAIN)
30469+ continue;
30470+ else
30471+ {
30472+ show_error("%s:recv failed: %d(%s)",func, errno, strerror(errno));
30473+ return -1;
30474+ }
30475+ }
30476+ else if (r > 0)
30477+ {
30478+ read_size += r;
30479+ if (read_size == len)
30480+ return read_size;
30481+ }
30482+ else /* r == 0 */
30483+ {
30484+ show_error("%s:unexpected EOF", func);
30485+ return -1;
30486+ }
30487+ }
30488+ return -1;
30489+}
30490+
30491+int
30492+PGRsend_rlog_to_local(ReplicateHeader * header,char * query)
30493+{
30494+ int status = STATUS_OK;
30495+
30496+ if (Replicateion_Log == NULL)
30497+ {
30498+ return STATUS_ERROR;
30499+ }
30500+
30501+ if (Replicateion_Log->r_log_sock > 0)
30502+ {
30503+ close(Replicateion_Log->r_log_sock );
30504+ Replicateion_Log->r_log_sock = -1;
30505+ }
30506+
30507+ Replicateion_Log->r_log_sock = PGRcreate_send_rlog_socket();
30508+ if (Replicateion_Log->r_log_sock == -1)
30509+ return STATUS_ERROR;
30510+
30511+ show_debug("send_to_local %s",query);
30512+ status = PGRsend_rlog_packet(Replicateion_Log->r_log_sock,header,query);
30513+ show_debug("send_to_local result is %d,errno=%d(%s)",status,errno ,strerror(errno));
30514+
30515+ if (status != STATUS_ERROR)
30516+ {
30517+ PGRrecv_rlog_result(Replicateion_Log->r_log_sock,&status, sizeof(status));
30518+ }
30519+
30520+ close(Replicateion_Log->r_log_sock );
30521+ Replicateion_Log->r_log_sock = -1;
30522+
30523+ return status;
30524+}
30525+
30526+int
30527+PGRget_rlog_header(ReplicateHeader * header)
30528+{
30529+ int status = STATUS_OK;
30530+ ReplicateHeader rlog_header;
30531+
30532+ if ((Replicateion_Log == NULL) ||
30533+ (header == NULL))
30534+ {
30535+ return STATUS_ERROR;
30536+ }
30537+
30538+ if (Replicateion_Log->r_log_sock > 0)
30539+ {
30540+ close(Replicateion_Log->r_log_sock );
30541+ Replicateion_Log->r_log_sock = -1;
30542+ }
30543+ Replicateion_Log->r_log_sock = PGRcreate_send_rlog_socket();
30544+ if (Replicateion_Log->r_log_sock == -1)
30545+ return STATUS_ERROR;
30546+
30547+ memcpy(&rlog_header,header,sizeof(ReplicateHeader));
30548+ rlog_header.cmdSys = CMD_SYS_REPLICATE;
30549+ rlog_header.query_size = 0;
30550+ status =PGRsend_rlog_packet(Replicateion_Log->r_log_sock,&rlog_header,"");
30551+ if (status != STATUS_ERROR)
30552+ {
30553+ status = PGRrecv_rlog_result(Replicateion_Log->r_log_sock,&rlog_header, sizeof(ReplicateHeader));
30554+ if (status > 0)
30555+ {
30556+ memcpy(header,&rlog_header,sizeof(ReplicateHeader));
30557+ status = STATUS_OK;
30558+ }
30559+ else
30560+ {
30561+ status = STATUS_ERROR;
30562+ }
30563+ }
30564+
30565+ close(Replicateion_Log->r_log_sock );
30566+ Replicateion_Log->r_log_sock = -1;
30567+
30568+ return status;
30569+}
30570+
30571+static int
30572+send_message(int sock, char * msg, int len)
30573+{
30574+ char * func = "send_message()";
30575+ fd_set wmask;
30576+ struct timeval timeout;
30577+ int rtn = 0;
30578+ char * send_ptr = NULL;
30579+ int send_size= 0;
30580+ int buf_size = 0;
30581+ int s = 0;
30582+ int flag = 0;
30583+
30584+ if ((msg == NULL) || (len <= 0) || (sock <= 0))
30585+ {
30586+ return STATUS_ERROR;
30587+ }
30588+ send_ptr = msg;
30589+ buf_size = len;
30590+
30591+ /*
30592+ * Wait for something to happen.
30593+ */
30594+#ifdef MSG_DONTWAIT
30595+ flag |= MSG_DONTWAIT;
30596+#endif
30597+#ifdef MSG_NOSIGNAL
30598+ flag |= MSG_NOSIGNAL;
30599+#endif
30600+
30601+ for (;;)
30602+ {
30603+ timeout.tv_sec = PGR_Replication_Timeout;
30604+ timeout.tv_usec = 0;
30605+
30606+ FD_ZERO(&wmask);
30607+ FD_SET(sock,&wmask);
30608+ rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
30609+
30610+ if (rtn < 0 )
30611+ {
30612+ if (errno == EAGAIN || errno == EINTR)
30613+ continue;
30614+
30615+ show_error("%s:send-select error: %d(%s)",func,errno,strerror(errno));
30616+ return STATUS_ERROR;
30617+ }
30618+ else if (rtn & FD_ISSET(sock, &wmask))
30619+ {
30620+ s = send(sock,send_ptr + send_size,buf_size - send_size ,flag);
30621+ if (s < 0)
30622+ {
30623+ if (errno == EINTR || errno == EAGAIN)
30624+ continue;
30625+ else
30626+ {
30627+ show_error("%s:send error: %d(%s)",func,errno,strerror(errno));
30628+ memset(send_ptr, 0, len);
30629+ return STATUS_ERROR;
30630+ }
30631+ }
30632+ else if (s == 0)
30633+ {
30634+ show_error("%s:unexpected EOF");
30635+ memset(send_ptr, 0, len);
30636+ return STATUS_ERROR;
30637+ }
30638+ else /* s > 0 */
30639+ {
30640+ send_size += s;
30641+ if (send_size == buf_size)
30642+ {
30643+ return STATUS_OK;
30644+ }
30645+ }
30646+ }
30647+ }
30648+ show_error("%s:send-select unknown error: %d(%s)",
30649+ func,errno,strerror(errno));
30650+ return STATUS_ERROR;
30651+}
30652+
30653+static void
30654+exit_rlog(int sig)
30655+{
30656+ sigset_t mask;
30657+
30658+ Exit_Request = true;
30659+ if (sig == SIGTERM)
30660+ {
30661+ if (Idle_Flag == BUSY_MODE)
30662+ {
30663+ return;
30664+ }
30665+ }
30666+
30667+ sigemptyset(&mask);
30668+ sigaddset(&mask, SIGINT);
30669+ sigaddset(&mask, SIGTERM);
30670+ sigaddset(&mask, SIGQUIT);
30671+ sigprocmask(SIG_BLOCK, &mask, NULL);
30672+
30673+ if (RLog_Recv_Sock >= 0)
30674+ {
30675+ close(RLog_Recv_Sock);
30676+ RLog_Recv_Sock = -1;
30677+ }
30678+ if (Replicateion_Log->RLog_Sock_Path != NULL)
30679+ {
30680+ unlink(Replicateion_Log->RLog_Sock_Path);
30681+ free(Replicateion_Log->RLog_Sock_Path);
30682+ }
30683+ exit(0);
30684+}
30685+#endif /* USE_REPLICATION */
30686diff -aruN postgresql-8.2.4/src/pgcluster/tool/Makefile pgcluster-1.7.0rc7/src/pgcluster/tool/Makefile
30687--- postgresql-8.2.4/src/pgcluster/tool/Makefile 1970-01-01 01:00:00.000000000 +0100
30688+++ pgcluster-1.7.0rc7/src/pgcluster/tool/Makefile 2007-02-18 22:52:17.000000000 +0100
30689@@ -0,0 +1,32 @@
30690+#-------------------------------------------------------------------------
30691+#
30692+# Makefile for src/pgcluster/pgrp
30693+#
30694+#-------------------------------------------------------------------------
30695+
30696+subdir = src/pgcluster/tool
30697+top_builddir = ../../..
30698+include $(top_builddir)/src/Makefile.global
30699+
30700+OBJS= pgcbench.o
30701+
30702+# EXTRA_OBJS = $(top_builddir)/src/backend/libpq/replicate_com.o
30703+
30704+override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -DBINDIR=\"$(bindir)\"
30705+
30706+all: pgcbench
30707+
30708+pgcbench: $(OBJS) $(libpq_builddir)/libpq.a
30709+ $(CC) $(CFLAGS) $(OBJS) $(EXTRA_OBJS) $(libpq) $(LDFLAGS) $(LIBS) -o $@
30710+
30711+install: all installdirs
30712+ $(INSTALL_PROGRAM) pgcbench$(X) $(DESTDIR)$(bindir)/pgcbench$(X)
30713+
30714+installdirs:
30715+ $(mkinstalldirs) $(DESTDIR)$(bindir)
30716+
30717+uninstall:
30718+ rm -f $(addprefix $(DESTDIR)$(bindir)/, pgcbench$(X))
30719+
30720+clean distclean maintainer-clean:
30721+ rm -f pgcbench$(X) $(OBJS)
30722diff -aruN postgresql-8.2.4/src/pgcluster/tool/README.jp pgcluster-1.7.0rc7/src/pgcluster/tool/README.jp
30723--- postgresql-8.2.4/src/pgcluster/tool/README.jp 1970-01-01 01:00:00.000000000 +0100
30724+++ pgcluster-1.7.0rc7/src/pgcluster/tool/README.jp 2007-02-18 22:52:17.000000000 +0100
30725@@ -0,0 +1,296 @@
30726+$Id$
30727+
30728+\e$B"#\e(B pgcbench \e$B$H$O\e(B
30729+
30730+pgcbench \e$B$O\e(B PGCluster \e$B$N%Y%s%A%^!<%/%F%9%H$r9T$&%W%m%0%i%`$G$9!#$b$A$m\e(B
30731+\e$B$s!"\e(BPGCluster \e$B$@$1$G$O$J$/\e(B PostgreSQL \e$B$N%Y%s%A%^!<%/$r<B;\$9$k$3$H$b$G\e(B
30732+\e$B$-$^$9!#\e(B
30733+
30734+pgcbench \e$B$O\e(B SELECT\e$B!"\e(BUPDATE\e$B!"\e(BINSERT \e$B$r4^$`%H%i%s%6%/%7%g%s$r<B9T$7!"A4\e(B
30735+\e$BBN$N<B9T;~4V$H<B:]$K40N;$7$?$G$"$m$&%H%i%s%6%/%7%g%s$N?t$+$i\e(B 1 \e$BIC4V$K\e(B
30736+\e$B<B9T$G$-$?%H%i%s%6%/%7%g%s?t\e(B (tps) \e$B$rI=<($7$^$9!#=hM}$NBP>]$H$J$k%F!<\e(B
30737+\e$B%V%k$O%G%U%)%k%H$G$O\e(B 10 \e$BK|9T$N%G!<%?$r4^$_$^$9!#\e(B
30738+
30739+\e$B<B:]$KI=<($O0J2<$N$h$&$K$J$j$^$9!#\e(B
30740+
30741+ number of clients: 4
30742+ number of transactions actually processed: 100
30743+ run time (sec) = 4.416423
30744+ tps = 22.642759 (including connections establishing)
30745+
30746+\e$B"(\e(B \e$BCm0U\e(B
30747+
30748+ pgcbench \e$B$G$O!"%H%i%s%6%/%7%g%s$,<B:]$K40N;$7$?$+$I$&$+$K4X78$J$/!"\e(B
30749+ \e$B:G=i$K;XDj$5$l$?%H%i%s%6%/%7%g%s$N?t$r$b$H$K\e(B tps \e$B$r5a$a$F$$$k$?$a!"\e(B
30750+ \e$B%Y%s%A%^!<%/$,ESCf$G=*N;$7$F$7$^$C$?>l9g!"I=<($5$l$k\e(B tps \e$B$,?.MQ$G$-\e(B
30751+ \e$B$J$$2DG=@-$,$"$k$3$H$KCm0U$7$F$/$@$5$$!#\e(B
30752+
30753+\e$B$J$*!"\e(Bpgcbench \e$B$O\e(B pgbench \e$B$H$$$&\e(B PostgreSQL \e$BMQ$K=q$+$l$?%Y%s%A%^!<%/%F\e(B
30754+\e$B%9%H$r9T$J$&%W%m%0%i%`$r$b$H$K:n@.$5$l$^$7$?!#\e(B
30755+
30756+
30757+\e$B"#\e(B pgbench \e$B$H$N0c$$\e(B
30758+
30759+\e$B!&\e(B \e$B%^%k%A%f!<%64D6-$N<B8=J}K!\e(B
30760+
30761+ pgbench \e$B$,\e(B libpq \e$B$NHsF14|=hM}5!G=$r;H$C$?%7%s%0%k%W%m%;%9$K$h$C$F%7\e(B
30762+ \e$B%_%e%l!<%H$7$F$$$k$N$KBP$7$F!"\e(Bpgcbench \e$B$G$O\e(B fork \e$B$r;H$C$?%^%k%A%W%m\e(B
30763+ \e$B%;%9$K$h$C$F<B8=$7$F$$$^$9!#$3$l$O!"%7%s%0%k%W%m%;%9$G$O%m%C%/$,H/\e(B
30764+ \e$B@8$9$k$H!"\e(BPGCluster \e$B$,;_$^$C$F$7$^$&$3$H$rHr$1$k$?$a$G$9!#\e(B
30765+
30766+\e$B!&\e(B \e$B%*%W%7%g%s$NJQ99\e(B
30767+
30768+ pgcbench \e$B$K$O!"\e(BPGCluster \e$B$N%Y%s%A%^!<%/%F%9%H$r9T$J$&$N$KJXMx$J!"%H\e(B
30769+ \e$B%i%s%6%/%7%g%s$NFbMF$r;XDj$9$k%*%W%7%g%s$,!"$$$/$D$+DI2C$5$l$F$$$^\e(B
30770+ \e$B$9!%\e(B
30771+
30772+
30773+\e$B"#\e(B pgcbench \e$B$N%$%s%9%H!<%k\e(B
30774+
30775+1. PGCluster \e$B$r\e(B configure\e$B!"\e(Bmake \e$B$7$^$9!#\e(B
30776+
30777+ pgcbench \e$B$N%$%s%9%H!<%k$@$1$,L\E*$G$"$l$P!"\e(BPGCluster \e$B$N$9$Y$F$r%3%s\e(B
30778+ \e$B%Q%$%k$9$kI,MW$O$"$j$^$;$s!#\e(BPGCluster \e$B%=!<%9$N%H%C%W%G%#%l%/%H%j$G\e(B
30779+ configure \e$B$r$7$?8e!"\e(Bsrc/interface/libpq \e$B$G\e(B make all \e$B$r<B9T$9$l$P=`\e(B
30780+ \e$BHw40N;$G$9!#\e(B
30781+
30782+2. \e$B$3$N%G%#%l%/%H%j\e(B (src/pgcluster/tool) \e$B$G\e(B make \e$B$r<B9T$7$^$9!#\e(B
30783+
30784+ \e$B$=$&$9$k$H!"\e(Bpgcbench \e$B$H$$$&<B9T%W%m%0%i%`$,$G$-$^$9!#$=$N$^$^<B9T$7\e(B
30785+ \e$B$F$b9=$$$^$;$s$7!"\e(Bmake install \e$B$r<B9T$7$F\e(B PGCluster \e$B$NI8=`<B9T%W%m\e(B
30786+ \e$B%0%i%`%G%#%l%/%H%j\e(B (\e$B%G%U%)%k%H$G$O\e(B /usr/local/pgsql/bin) \e$B$K%$%s%9%H!<\e(B
30787+ \e$B%k$9$k$3$H$b$G$-$^$9!#\e(B
30788+
30789+
30790+\e$B"#\e(B pgcbench \e$B$N;H$$J}\e(B
30791+
30792+ pgcbench [\e$B%*%W%7%g%s\e(B] [\e$B%G!<%?%Y!<%9L>\e(B]
30793+
30794+\e$B%G!<%?%Y!<%9L>$r>JN,$9$k$H!"%m%0%$%sL>$HF1$8%G!<%?%Y!<%9L>$r;XDj$7$?$b\e(B
30795+\e$B$N$H8+$J$7$^$9!#$J$*!"\e(Bpgcbench \e$B$G%G%U%)%k%H$N%Y%s%A%^!<%/$r<B;\$9$k$?\e(B
30796+\e$B$a$K$O!"8e=R$N\e(B -i \e$B%*%W%7%g%s$r;HMQ$7$F%G!<%?%Y!<%9$r$"$i$+$8$a=i4|2=$7\e(B
30797+\e$B$F$*$/I,MW$,$"$j$^$9!#\e(B
30798+
30799+pgcbench \e$B$K$O$$$m$$$m$J%*%W%7%g%s$,$"$j$^$9!#\e(B
30800+
30801+-h \e$B%[%9%HL>\e(B
30802+
30803+ PostgreSQL\e$B$N\e(B \e$B%G!<%?%Y!<%9%G!<%b%s\e(B postmaster \e$B$NF0:n$7$F$$$k%[%9%HL>\e(B
30804+ \e$B$r;XDj$7$^$9!#>JN,$9$k$H4D6-JQ?t\e(B PGHOST \e$B$K@_Dj$7$?%[%9%HL>$,;XDj$5\e(B
30805+ \e$B$l$^$9!#\e(BPGHOST \e$B$b;XDj$5$l$F$$$J$$$H<+%[%9%H$K\e(B Unix \e$B%I%a%$%s%=%1%C%H\e(B
30806+ \e$B$G@\B3$7$^$9!#\e(B
30807+
30808+-p \e$B%]!<%HHV9f\e(B
30809+
30810+ postmaster \e$B$N;HMQ$9$k%]!<%HHV9f$r;XDj$7$^$9!#>JN,$9$k$H4D6-JQ?t\e(B
30811+ PGPORT \e$B$K@_Dj$7$?%]!<%HHV9f$,;XDj$5$l$^$9!#\e(BPGPORT \e$B$b;XDj$5$l$F$$$J\e(B
30812+ \e$B$$$H\e(B 5432 \e$B$,;XDj$5$l$?$b$N$H8+$J$7$^$9!#\e(B
30813+
30814+-c \e$B%/%i%$%"%s%H?t\e(B
30815+
30816+ \e$BF1;~<B9T%/%i%$%"%s%H?t$r;XDj$7$^$9!#>JN,;~$O\e(B 1 \e$B$H$J$j$^$9!#\e(Bpgcbench
30817+ \e$B$OF1;~<B9T%/%i%$%"%s%H$4$H$K%U%!%$%k%G%#%9%/%j%W%?$r;HMQ$9$k$N$G!"\e(B
30818+ \e$B;HMQ2DG=%U%!%$%k%G%#%9%/%j%W%??t$r1[$($k%/%i%$%"%s%H?t$O;XDj$G$-$^\e(B
30819+ \e$B$;$s!#;HMQ2DG=%U%!%$%k%G%#%9%/%j%W%??t$O\e(B limit \e$B$d\e(B ulimit \e$B%3%^%s%I$G\e(B
30820+ \e$B3NG'$9$k$3$H$,$G$-$^$9!#\e(B
30821+
30822+-t \e$B%H%i%s%6%/%7%g%s?t\e(B
30823+
30824+ \e$B%H%i%s%6%/%7%g%s?t$r;XDj$7$^$9!#3F%/%i%$%"%s%H$,<B9T$9$k%H%i%s%6%/\e(B
30825+ \e$B%7%g%s?t$O$3$l$r%/%i%$%"%s%H?t$G3d$C$??t$H$J$j$^$9!#>JN,;~$O\e(B 10 \e$B$H\e(B
30826+ \e$B$J$j$^$9!#\e(B
30827+
30828+-s \e$B%9%1!<%j%s%0%U%!%/%?!<\e(B
30829+
30830+ -i \e$B%*%W%7%g%s$H$H$b$K;HMQ$7$^$9!#%9%1!<%j%s%0%U%!%/%?!<$O\e(B 1 \e$B0J>e$N\e(B
30831+ \e$B@0?t$G$9!#%9%1!<%j%s%0%U%!%/%?!<$rJQ$($k$3$H$K$h$j!"%F%9%H$NBP>]$H\e(B
30832+ \e$B$J$k%F!<%V%k$NBg$-$5$,\e(B 10 \e$BK|\e(B \e$B!_\e(B \e$B%9%1!<%j%s%0%U%!%/%?!<$K$J$j$^$9!#\e(B
30833+ \e$B%G%U%)%k%H$N%9%1!<%j%s%0%U%!%/%?!<$O\e(B 1 \e$B$G$9!#\e(B
30834+
30835+-u \e$B%m%0%$%sL>\e(B
30836+
30837+ DB \e$B%f!<%6$N%m%0%$%sL>$r;XDj$7$^$9!#>JN,$9$k$H4D6-JQ?t\e(B PGUSER \e$B$K@_Dj\e(B
30838+ \e$B$7$?%m%0%$%sL>$,;XDj$5$l$^$9!#\e(B
30839+
30840+-P \e$B%Q%9%o!<%I\e(B
30841+
30842+ \e$B%Q%9%o!<%I$r;XDj$7$^$9!#$J$*!"$3$N%*%W%7%g%s$r;H$&$H!"%Q%9%o!<%I$r\e(B
30843+ ps \e$B%3%^%s%I$G8+$i$l$k$J$I!"%;%-%e%j%F%#%[!<%k$K$J$k2DG=@-$,$"$k$N$G!"\e(B
30844+ \e$B%F%9%HMQ$K$N$_$*;H$$2<$5$$!#\e(B
30845+
30846+-n
30847+
30848+ \e$B$3$N%*%W%7%g%s$r;XDj$9$k$H!"%Y%s%A%^!<%/3+;OA0$K\e(B VACUUM \e$B$H\e(B history
30849+ \e$B%F!<%V%k$N%/%j%"$r<B9T$7$^$;$s!#\e(B
30850+
30851+-v
30852+
30853+ \e$B$3$N%*%W%7%g%s$r;XDj$9$k$H!"%Y%s%A%^!<%/3+;OA0$K\e(B VACUUM \e$B$H\e(B history
30854+ \e$B%F!<%V%k$N%/%j%"$r<B9T$7$^$9!#\e(B-v \e$B$H\e(B -n \e$B$r>JN,$9$k$H!":G>.8B$N\e(B
30855+ VACUUM \e$B$J$I$r<B9T$7$^$9!#$9$J$o$A!"\e(Bhistory \e$B%F!<%V%k$N%/%j%"$H!"\e(B
30856+ branches\e$B!"\e(Btellers\e$B!"\e(Bhistory \e$B%F!<%V%k$N\e(B VACUUM \e$B$r<B9T$7$^$9!#$3$l$O!"\e(B
30857+ VACUUM \e$B$N;~4V$r:G>.8B$K$7$J$,$i!"%Q%U%)!<%^%s%9$K1F6A$9$k%4%_A]=|$r\e(B
30858+ \e$B8z2LE*$K<B9T$7$^$9!#DL>o$O\e(B -v \e$B$H\e(B -n \e$B$r>JN,$9$k$3$H$r?d>)$7$^$9!#\e(B
30859+
30860+-I
30861+
30862+ \e$BA^F~$N$_$N%H%i%s%6%/%7%g%s$r<B9T$7$^$9!#A^F~%9%T!<%I$rB,Dj$7$?$$$H\e(B
30863+ \e$B$-$K;H$$$^$9!#\e(B
30864+
30865+-U
30866+
30867+ \e$B99?7$N$_$N%H%i%s%6%/%7%g%s$r<B9T$7$^$9!#99?7%9%T!<%I$rB,Dj$7$?$$$H\e(B
30868+ \e$B$-$K;H$$$^$9!#\e(B
30869+
30870+-S
30871+
30872+ \e$B8!:w$N$_$N%H%i%s%6%/%7%g%s$r<B9T$7$^$9!#8!:w%9%T!<%I$rB,Dj$7$?$$$H\e(B
30873+ \e$B$-$K;H$$$^$9!#\e(B
30874+
30875+-f \e$B%U%!%$%kL>\e(B
30876+
30877+ \e$B%H%i%s%6%/%7%g%s$NFbMF$,5-=R$5$l$?%U%!%$%kL>$r;XDj$7$^$9!#$3$N%*%W\e(B
30878+ \e$B%7%g%s$r;XDj$9$k$H!"%U%!%$%k$K5-=R$5$l$?FbMF$N%H%i%s%6%/%7%g%s$r<B\e(B
30879+ \e$B9T$7$^$9!#%Y%s%A%^!<%/$G;HMQ$9$k%F!<%V%k$O$"$i$+$8$a=i4|2=$7$F$*$/\e(B
30880+ \e$BI,MW$,$"$j$^$9!#F~NO%U%!%$%k$N%U%)!<%^%C%H$O8e=R$7$^$9!#\e(B
30881+
30882+-T
30883+
30884+ BEGIN \e$B$H\e(B END \e$B$G0O$^$l$?%H%i%s%6%/%7%g%s%V%m%C%/Fb$G%H%i%s%6%/%7%g%s\e(B
30885+ \e$B$r<B9T$7$^$9!#\e(B
30886+
30887+-C
30888+
30889+ \e$B$3$N%*%W%7%g%s$r;XDj$9$k$H!":G=i$K3NN)$7$?%3%M%/%7%g%s$r;H$$2s$9$N\e(B
30890+ \e$B$G$O$J$/!"3F%H%i%s%6%/%7%g%s$4$H$K\e(B DB \e$B$X$N@\B3$r9T$$$^$9!#%3%M%/%7%g\e(B
30891+ \e$B%s$N%*!<%P!<$X%C%I$rB,Dj$9$k$N$KM-8z$G$9!#\e(B
30892+
30893+-l
30894+
30895+ \e$B8D!9$N%H%i%s%6%/%7%g%s$N<B9T;~4V$r5-O?$7$^$9!#5-O?@h$O%+%l%s%H%G%#\e(B
30896+ \e$B%l%/%H%j0J2<$N\e(B pgbench_log.xxx \e$B$H$$$&%U%!%$%k$G$9!#%U%!%$%k$N%U%)!<\e(B
30897+ \e$B%^%C%H$O0J2<$N$h$&$K$J$j$^$9!#;~4V$O%^%$%/%mICC10L$G$9!#\e(B
30898+
30899+ <\e$B%/%i%$%"%s%H\e(B ID> <\e$B%H%i%s%6%/%7%g%sHV9f\e(B> <\e$B;~4V\e(B>
30900+
30901+-d
30902+
30903+ \e$B%G%P%C%0%*%W%7%g%s!#MM!9$J>pJs$,I=<($5$l$^$9!#\e(B
30904+
30905+
30906+\e$B"#\e(B \e$B%G!<%?%Y!<%9$N=i4|2=\e(B
30907+
30908+pgcbench \e$B$G%G%U%)%k%H$N%Y%s%A%^!<%/%F%9%H$r<B;\$9$k$?$a$K$O!"$"$i$+$8\e(B
30909+\e$B$a%G!<%?%Y!<%9$r=i4|2=$7!"%F%9%H%G!<%?$r:n$kI,MW$,$"$j$^$9!#\e(B
30910+
30911+ pgcbench -i [\e$B%G!<%?%Y!<%9L>\e(B]
30912+
30913+\e$B$3$l$K$h$j0J2<$N%F!<%V%k$,:n@.$5$l$^$9\e(B (\e$B%9%1!<%j%s%0%U%!%/%?!<$,\e(B 1 \e$B$N\e(B
30914+\e$B>l9g\e(B)\e$B!#\e(B
30915+
30916+ \e$B%F!<%V%kL>\e(B | \e$B9T?t\e(B
30917+ ------------+--------
30918+ branches | 1
30919+ tellers | 10
30920+ accounts | 100000
30921+ history | 0
30922+
30923+\e$B"(\e(B \e$BCm0U\e(B
30924+
30925+ \e$BF1$8L>A0$N%F!<%V%k$,$"$k$H:o=|$5$l$F$7$^$&$N$G$4Cm0U2<$5$$!#\e(B
30926+
30927+\e$B%9%1!<%j%s%0%U%!%/%?!<$r\e(B 10\e$B!"\e(B100\e$B!"\e(B1000 \e$B$J$I$KJQ99$9$k$H!">e5-9T?t$O$=\e(B
30928+\e$B$l$K1~$8$F\e(B 10 \e$BG\!"\e(B100 \e$BG\!"\e(B1000 \e$BG\$K$J$j$^$9!#Nc$($P!"%9%1!<%j%s%0%U%!\e(B
30929+\e$B%/%?!<$r\e(B 100 \e$B$H$9$k$H0J2<$N$h$&$K$J$j$^$9!#\e(B
30930+
30931+ \e$B%F!<%V%kL>\e(B | \e$B9T?t\e(B
30932+ ------------+----------
30933+ branches | 100
30934+ tellers | 1000
30935+ accounts | 10000000
30936+ history | 0
30937+
30938+
30939+\e$B"#\e(B \e$BF~NO%U%!%$%k$N%U%)!<%^%C%H\e(B
30940+
30941+pgcbench \e$B$G$O!"\e(B-f \e$B%*%W%7%g%s$r;XDj$7$F%H%i%s%6%/%7%g%s$K4^$^$l$k\e(B SQL
30942+\e$B%3%^%s%I$NFbMF$r5-=R$7$?%U%!%$%k$rFI$_9~$`$3$H$,$G$-$^$9!#F~NO%U%!%$%k\e(B
30943+\e$B$K$O\e(B 1 \e$B9T$K$D$-\e(B 1 \e$B$D$N%3%^%s%I$r5-=R$7$^$9!#6u9T$OL5;k$5$l!"Fs=E%O%$%U\e(B
30944+\e$B%s$G;O$^$k9T$O%3%a%s%H$r0UL#$7$^$9!#\e(B
30945+
30946+\e$B%3%^%s%I$K$O!"\e(BSQL \e$B%3%^%s%I$K2C$(!"%P%C%/%9%i%C%7%e$G;O$^$k%a%?%3%^%s%I\e(B
30947+\e$B$r5-=R$9$k$3$H$,$G$-$^$9!#%a%?%3%^%s%I$O\e(B pgcbench \e$B<+?H$K$h$C$F<B9T$5$l\e(B
30948+\e$B$^$9!#%a%?%3%^%s%I$N7A<0$O%P%C%/%9%i%C%7%e!"$=$ND>8e$K%3%^%s%I$NF0;l!"\e(B
30949+\e$B$=$N<!$K0z?t$,B3$-$^$9!#F0;l%3%^%s%I$H0z?t!"$^$?$=$l$>$l$N0z?t$O6uGrJ8\e(B
30950+\e$B;z$K$h$C$F6hJL$5$l$^$9!#\e(B
30951+
30952+\e$B:#$N$H$3$m!"0J2<$N%a%?%3%^%s%I$,Dj5A$5$l$F$$$^$9!#\e(B
30953+
30954+\setrandom name min max
30955+
30956+ \e$B:G>.CM\e(B min \e$B$H:GBgCM\e(B max \e$B$N4V$NCM$r<h$kMp?t$r!"\e(Bname \e$BJQ?t$K@_Dj$7$^$9!#\e(B
30957+
30958+\e$BJQ?t$KMp?t$r@_Dj$9$k$K$O!"\e(B\setrandom \e$B%a%?%3%^%s%I$r;HMQ$7$F0J2<$N$h$&\e(B
30959+\e$B$K5-=R$7$^$9!#\e(B
30960+
30961+ \setrandom aid 1 100000
30962+
30963+\e$B$3$l$O!"JQ?t\e(B aid \e$B$K\e(B 1 \e$B$+$i\e(B 100000 \e$B$N4V$NMp?t$r@_Dj$7$^$9!#$^$?!"JQ?t$N\e(B
30964+\e$BCM$r\e(B SQL \e$B%3%^%s%I$KKd$a9~$`$K$O!"0J2<$N$h$&$K$=$NL>A0$NA0$K%3%m%s$rIU\e(B
30965+\e$B$1$^$9!#\e(B
30966+
30967+ SELECT abalance FROM accounts WHERE aid = :aid
30968+
30969+\e$BNc$($P!"\e(BTPC-B \e$B$K;w$?%Y%s%A%^!<%/$r9T$&$K$O!"0J2<$N$h$&$K%H%i%s%6%/%7%g\e(B
30970+\e$B%s$NFbMF$r%U%!%$%k$K5-=R$7!"\e(B-f \e$B%*%W%7%g%s$K$h$C$F$=$N%U%!%$%k$r;XDj$7\e(B
30971+\e$B$F\e(B pgcbench \e$B$r<B9T$7$^$9!#\e(B
30972+
30973+ \setrandom aid 1 100000
30974+ \setrandom bid 1 1
30975+ \setrandom tid 1 10
30976+ \setrandom delta 1 1000
30977+ BEGIN
30978+ UPDATE accounts SET abalance = abalance + :delta WHERE aid = :aid
30979+ SELECT abalance FROM accounts WHERE aid = :aid
30980+ UPDATE tellers SET tbalance = tbalance + :delta WHERE tid = :tid
30981+ UPDATE branches SET bbalance = bbalance + :delta WHERE bid = :bid
30982+ INSERT INTO history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, 'now')
30983+
30984+\e$B$J$*!"$3$NNc$G$O!"\e(B-i \e$B%*%W%7%g%s$r;H$C$F=i4|2=$7$?%G!<%?%Y!<%9\e(B (\e$B%9%1!<\e(B
30985+\e$B%j%s%0%U%!%/%?!<$,\e(B 1 \e$B$N>l9g\e(B) \e$B$KBP$7$F%Y%s%A%^!<%/$r9T$&$3$H$r2>Dj$7$F\e(B
30986+\e$B$$$^$9!#\e(B
30987+
30988+
30989+\e$B"#\e(B \e$B%H%i%s%6%/%7%g%s$NDj5A\e(B
30990+
30991+pgcbench \e$B$N%G%U%)%k%H$N%Y%s%A%^!<%/$G$O!"0J2<$N\e(B SQL \e$B%3%^%s%I$rA4It40N;\e(B
30992+\e$B$7$F\e(B 1 \e$B%H%i%s%6%/%7%g%s$H?t$($F$$$^$9!#\e(B
30993+
30994+1. SELECT abalance FROM accounts WHERE aid = :aid
30995+
30996+ :aid \e$B$O\e(B 1 \e$B$+$i%9%1!<%j%s%0%U%!%/%?!<\e(B \e$B!_\e(B 10 \e$BK|$^$G$NCM$r<h$kMp?t$G$9!#\e(B
30997+ \e$B$3$3$G$O\e(B 1 \e$B7o$@$18!:w$5$l$^$9!#0J2<!"Mp?t$NCM$O$=$l$>$l$3$N%H%i%s%6\e(B
30998+ \e$B%/%7%g%s$NCf$G$OF1$8CM$r;H$$$^$9!#\e(B
30999+
31000+2. UPDATE accounts SET abalance = abalance + :delta WHERE aid = :aid
31001+
31002+ :delta \e$B$O\e(B 1 \e$B$+$i\e(B 1000 \e$B$^$G$NCM$r<h$kMp?t$G$9!#\e(B
31003+
31004+3. SELECT abalance FROM accounts WHERE aid = :aid
31005+
31006+4. INSERT INTO history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, 'now')
31007+
31008+ :tid \e$B$O\e(B 1 \e$B$+$i%9%1!<%j%s%0%U%!%/%?!<\e(B \e$B!_\e(B 10 \e$B$^$G$NCM$r<h$kMp?t!"\e(B:bid
31009+ \e$B$O\e(B 1 \e$B$+$i%9%1!<%j%s%0%U%!%/%?!<$^$G$NCM$r<h$kMp?t$G$9!#\e(B
31010+
31011+5. INSERT INTO history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, 'now')
31012+
31013+6. INSERT INTO history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, 'now')
31014+
31015+7. SELECT abalance FROM accounts WHERE aid = :aid
31016+
31017+-T \e$B%*%W%7%g%s$r;XDj$7$F%H%i%s%6%/%7%g%s%V%m%C%/Fb$G%H%i%s%6%/%7%g%s$r\e(B
31018+\e$B<B9T$9$k>l9g!"\e(B1 \e$B$r\e(B BEGIN\e$B$K!"\e(B7 \e$B$r\e(B END \e$B$KCV$-49$($?\e(B SQL \e$B%3%^%s%I$,<B9T$5\e(B
31019+\e$B$l$^$9!#$^$?!"%H%i%s%6%/%7%g%s$H$7$F<B9T$5$l$k\e(B SQL \e$B%3%^%s%I$O!"\e(B-I \e$B%*%W\e(B
31020+\e$B%7%g%s\e(B (\e$BA^F~$N$_\e(B) \e$B$G$"$l$P\e(B 4\e$B!"\e(B-U (\e$B99?7$N$_\e(B) \e$B$G$"$l$P\e(B 2\e$B!"\e(B-S (\e$B8!:w$N$_\e(B)
31021+\e$B$G$"$l$P\e(B 1 \e$B$H$J$j$^$9!#\e(B
31022diff -aruN postgresql-8.2.4/src/pgcluster/tool/pgcbench.c pgcluster-1.7.0rc7/src/pgcluster/tool/pgcbench.c
31023--- postgresql-8.2.4/src/pgcluster/tool/pgcbench.c 1970-01-01 01:00:00.000000000 +0100
31024+++ pgcluster-1.7.0rc7/src/pgcluster/tool/pgcbench.c 2007-02-18 22:52:17.000000000 +0100
31025@@ -0,0 +1,1625 @@
31026+/*
31027+ * pgbench: a simple benchmark program for PGCluster
31028+ * This program was written based on pgbench by Tatsuo Ishii.
31029+ *
31030+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
31031+ * Portions Copyright (c) 2000-2006, Tatsuo Ishii
31032+ *
31033+ * Permission to use, copy, modify, and distribute this software and
31034+ * its documentation for any purpose and without fee is hereby
31035+ * granted, provided that the above copyright notice appear in all
31036+ * copies and that both that copyright notice and this permission
31037+ * notice appear in supporting documentation, and that the name of the
31038+ * author not be used in advertising or publicity pertaining to
31039+ * distribution of the software without specific, written prior
31040+ * permission. The author makes no representations about the
31041+ * suitability of this software for any purpose. It is provided "as
31042+ * is" without express or implied warranty.
31043+ */
31044+#include "postgres_fe.h"
31045+
31046+#include "libpq-fe.h"
31047+
31048+#include <errno.h>
31049+
31050+#ifdef WIN32
31051+#include "win32.h"
31052+#else
31053+#include <sys/time.h>
31054+#include <unistd.h>
31055+
31056+#ifdef HAVE_GETOPT_H
31057+#include <getopt.h>
31058+#endif
31059+
31060+#ifdef HAVE_SYS_SELECT_H
31061+#include <sys/select.h>
31062+#endif
31063+
31064+/* for getrlimit */
31065+#include <sys/resource.h>
31066+#endif /* ! WIN32 */
31067+
31068+#include <sys/types.h>
31069+#include <sys/wait.h>
31070+
31071+#include <ctype.h>
31072+#include <search.h>
31073+
31074+extern char *optarg;
31075+extern int optind;
31076+
31077+#ifdef WIN32
31078+#undef select
31079+#endif
31080+
31081+
31082+/********************************************************************
31083+ * some configurable parameters */
31084+
31085+#define MAXCLIENTS 4096 /* max number of clients allowed */
31086+
31087+int nclients = 1; /* default number of simulated clients */
31088+int nxacts = 10; /* default number of transactions per
31089+ * clients */
31090+
31091+/*
31092+ * scaling factor. for example, tps = 10 will make 1000000 tuples of
31093+ * accounts table.
31094+ */
31095+int tps = 1;
31096+
31097+/*
31098+ * end of configurable parameters
31099+ *********************************************************************/
31100+
31101+#define nbranches 1
31102+#define ntellers 10
31103+#define naccounts 100000
31104+
31105+#define SELECT_ONLY (1)
31106+#define INSERT_ONLY (2)
31107+#define UPDATE_ONLY (3)
31108+#define WITH_TRANSACTION (4)
31109+#define TPC_B_LIKE (5)
31110+#define CUSTOM_QUERY (6)
31111+
31112+#define SQL_COMMAND 1
31113+#define META_COMMAND 2
31114+
31115+FILE *LOGFILE = NULL;
31116+
31117+bool use_log = false; /* log transaction latencies to a file */
31118+
31119+int remains; /* number of remaining clients */
31120+
31121+int is_connect; /* establish connection for each
31122+ * transaction */
31123+
31124+char *pghost = "";
31125+char *pgport = NULL;
31126+char *pgoptions = NULL;
31127+char *pgtty = NULL;
31128+char *login = NULL;
31129+char *pwd = NULL;
31130+char *dbName;
31131+
31132+typedef struct
31133+{
31134+ char *name;
31135+ char *value;
31136+} Variable;
31137+
31138+typedef struct
31139+{
31140+ PGconn *con; /* connection handle to DB */
31141+ int id; /* client No. */
31142+ int state; /* state No. */
31143+ int cnt; /* xacts count */
31144+ int ecnt; /* error count */
31145+ int maxAct;
31146+ int listen; /* 0 indicates that an async query has
31147+ * been sent */
31148+ int aid; /* account id for this transaction */
31149+ int bid; /* branch id for this transaction */
31150+ int tid; /* teller id for this transaction */
31151+ int delta;
31152+ int abalance;
31153+ void *variables;
31154+ struct timeval txn_begin; /* used for measuring latencies */
31155+} CState;
31156+
31157+typedef struct
31158+{
31159+ int type;
31160+ int argc;
31161+ char **argv;
31162+} Command;
31163+
31164+Command **commands = NULL;
31165+
31166+static void
31167+usage(void)
31168+{
31169+ fprintf(stderr, "usage: pgcbench [-h hostname][-p port][-c nclients][-t ntransactions][-s scaling_factor][-I(insert only)][-U(update only)][-S(select only)][-f filename][-u login][-P password][-d(debug)][dbname]\n");
31170+ fprintf(stderr, "(initialize mode): pgcbench -i [-h hostname][-p port][-s scaling_factor][-u login][-P password][-d(debug)][dbname]\n");
31171+}
31172+
31173+/* random number generator */
31174+static int
31175+getrand(int min, int max )
31176+{
31177+
31178+ return (min + (int) (max * 1.0 * rand() / (RAND_MAX + 1.0)));
31179+}
31180+
31181+/* set up a connection to the backend */
31182+static PGconn *
31183+doConnect(void)
31184+{
31185+ PGconn *con;
31186+ PGresult *res;
31187+
31188+ con = PQsetdbLogin(pghost, pgport, pgoptions, pgtty, dbName,
31189+ login, pwd);
31190+ if (con == NULL)
31191+ {
31192+ fprintf(stderr, "Connection to database '%s' failed.\n", dbName);
31193+ fprintf(stderr, "Memory allocatin problem?\n");
31194+ return (NULL);
31195+ }
31196+
31197+ if (PQstatus(con) == CONNECTION_BAD)
31198+ {
31199+ fprintf(stderr, "Connection to database '%s' failed.\n", dbName);
31200+
31201+ if (PQerrorMessage(con))
31202+ fprintf(stderr, "%s", PQerrorMessage(con));
31203+ else
31204+ fprintf(stderr, "No explanation from the backend\n");
31205+
31206+ return (NULL);
31207+ }
31208+
31209+ res = PQexec(con, "SET search_path = public");
31210+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
31211+ {
31212+ fprintf(stderr, "%s", PQerrorMessage(con));
31213+ exit(1);
31214+ }
31215+ PQclear(res);
31216+
31217+ return (con);
31218+}
31219+
31220+/* throw away response from backend */
31221+static void
31222+discard_response(CState * state)
31223+{
31224+ PGresult *res;
31225+
31226+ do
31227+ {
31228+ res = PQgetResult(state->con);
31229+ if (res)
31230+ PQclear(res);
31231+ } while (res);
31232+}
31233+
31234+/* check to see if the SQL result was good */
31235+static int
31236+check(CState * st, PGresult *res, int good)
31237+{
31238+ if (res && PQresultStatus(res) != good)
31239+ {
31240+ fprintf(stderr, "aborted in state %d: %s", st->state, PQerrorMessage(st->con));
31241+ PQfinish(st->con);
31242+ st->con = NULL;
31243+ return (-1);
31244+ }
31245+ return (0); /* OK */
31246+}
31247+
31248+static int
31249+compareVariables(const void *v1, const void *v2)
31250+{
31251+ return strcmp(((Variable *)v1)->name, ((Variable *)v2)->name);
31252+}
31253+
31254+static char *
31255+getVariable(CState * st, char *name)
31256+{
31257+ Variable key = { name }, *var;
31258+
31259+ var = tfind(&key, &st->variables, compareVariables);
31260+ if (var != NULL)
31261+ return (*(Variable **)var)->value;
31262+ else
31263+ return NULL;
31264+}
31265+
31266+static int
31267+putVariable(CState * st, char *name, char *value)
31268+{
31269+ Variable key = { name }, *var;
31270+
31271+ var = tfind(&key, &st->variables, compareVariables);
31272+ if (var == NULL)
31273+ {
31274+ if ((var = malloc(sizeof(Variable))) == NULL)
31275+ return false;
31276+
31277+ var->name = NULL;
31278+ var->value = NULL;
31279+
31280+ if ((var->name = strdup(name)) == NULL
31281+ || (var->value = strdup(value)) == NULL
31282+ || tsearch(var, &st->variables, compareVariables) == NULL)
31283+ {
31284+ free(var->name);
31285+ free(var->value);
31286+ free(var);
31287+ return false;
31288+ }
31289+ }
31290+ else
31291+ {
31292+ free((*(Variable **)var)->value);
31293+ if (((*(Variable **)var)->value = strdup(value)) == NULL)
31294+ return false;
31295+ }
31296+
31297+ return true;
31298+}
31299+
31300+static char *
31301+assignVariables(CState * st, char *sql)
31302+{
31303+ int i, j;
31304+ char *p, *name, *val;
31305+ void *tmp;
31306+
31307+ i = 0;
31308+ while ((p = strchr(&sql[i], ':')) != NULL)
31309+ {
31310+ i = j = p - sql;
31311+ do
31312+ i++;
31313+ while (isalnum(sql[i]) != 0 || sql[i] == '_');
31314+ if (i == j + 1)
31315+ continue;
31316+
31317+ name = malloc(i - j);
31318+ if (name == NULL)
31319+ return NULL;
31320+ memcpy(name, &sql[j + 1], i - (j + 1));
31321+ name[i - (j + 1)] = '\0';
31322+ val = getVariable(st, name);
31323+ free(name);
31324+ if (val == NULL)
31325+ continue;
31326+
31327+ if (strlen(val) > i - j)
31328+ {
31329+ tmp = realloc(sql, strlen(sql) - (i - j) + strlen(val) + 1);
31330+ if (tmp == NULL)
31331+ {
31332+ free(sql);
31333+ return NULL;
31334+ }
31335+ sql = tmp;
31336+ }
31337+
31338+ if (strlen(val) != i - j)
31339+ memmove(&sql[j + strlen(val)], &sql[i], strlen(&sql[i]) + 1);
31340+
31341+ strncpy(&sql[j], val, strlen(val));
31342+
31343+ if (strlen(val) < i - j)
31344+ {
31345+ tmp = realloc(sql, strlen(sql) + 1);
31346+ if (tmp == NULL)
31347+ {
31348+ free(sql);
31349+ return NULL;
31350+ }
31351+ sql = tmp;
31352+ }
31353+
31354+ i = j + strlen(val);
31355+ }
31356+
31357+ return sql;
31358+}
31359+
31360+/* process a transaction */
31361+static void
31362+doMix(CState * st, int debug, int ttype)
31363+{
31364+ char sql[256];
31365+ PGresult *res;
31366+
31367+ if (st->listen)
31368+ { /* are we receiver? */
31369+ if (debug)
31370+ fprintf(stderr, "client receiving\n");
31371+ if (!PQconsumeInput(st->con))
31372+ { /* there's something wrong */
31373+ fprintf(stderr, "Client aborted in state %d. Probably the backend died while processing.\n", st->state);
31374+ PQfinish(st->con);
31375+ st->con = NULL;
31376+ return;
31377+ }
31378+ if (PQisBusy(st->con))
31379+ return; /* don't have the whole result yet */
31380+
31381+ switch (st->state)
31382+ {
31383+ case 0: /* response to "begin" */
31384+ res = PQgetResult(st->con);
31385+ if (ttype == WITH_TRANSACTION)
31386+ {
31387+ if (check(st, res, PGRES_COMMAND_OK))
31388+ return;
31389+ }
31390+ else
31391+ {
31392+ if (check(st, res, PGRES_TUPLES_OK))
31393+ return;
31394+ }
31395+ PQclear(res);
31396+ discard_response(st);
31397+ break;
31398+ case 1: /* response to "update accounts..." */
31399+ res = PQgetResult(st->con);
31400+ if (check(st, res, PGRES_COMMAND_OK))
31401+ return;
31402+ PQclear(res);
31403+ discard_response(st);
31404+ break;
31405+ case 2: /* response to "select abalance ..." */
31406+ res = PQgetResult(st->con);
31407+ if (check(st, res, PGRES_TUPLES_OK))
31408+ return;
31409+ PQclear(res);
31410+ discard_response(st);
31411+ break;
31412+ case 3: /* response to "update tellers ..." */
31413+ res = PQgetResult(st->con);
31414+ if (check(st, res, PGRES_COMMAND_OK))
31415+ return;
31416+ PQclear(res);
31417+ discard_response(st);
31418+ break;
31419+ case 4: /* response to "update branches ..." */
31420+ res = PQgetResult(st->con);
31421+ if (check(st, res, PGRES_COMMAND_OK))
31422+ return;
31423+ PQclear(res);
31424+ discard_response(st);
31425+ break;
31426+ case 5: /* response to "insert into history ..." */
31427+ res = PQgetResult(st->con);
31428+ if (check(st, res, PGRES_COMMAND_OK))
31429+ return;
31430+ PQclear(res);
31431+ discard_response(st);
31432+ break;
31433+ case 6: /* response to "end" */
31434+
31435+ /*
31436+ * transaction finished: record the time it took in the
31437+ * log
31438+ */
31439+ if (use_log)
31440+ {
31441+ double diff;
31442+ struct timeval now;
31443+
31444+ gettimeofday(&now, NULL);
31445+ diff = (int) (now.tv_sec - st->txn_begin.tv_sec) * 1000000.0 +
31446+ (int) (now.tv_usec - st->txn_begin.tv_usec);
31447+
31448+ fprintf(LOGFILE, "%d %d %.0f\n", st->id, st->cnt, diff);
31449+ }
31450+
31451+ res = PQgetResult(st->con);
31452+ if (ttype == WITH_TRANSACTION)
31453+ {
31454+ if (check(st, res, PGRES_COMMAND_OK))
31455+ return;
31456+ }
31457+ else
31458+ {
31459+ if (check(st, res, PGRES_TUPLES_OK))
31460+ return;
31461+ }
31462+ PQclear(res);
31463+ discard_response(st);
31464+
31465+ if (is_connect)
31466+ {
31467+ PQfinish(st->con);
31468+ st->con = NULL;
31469+ }
31470+ if (++st->cnt >= st->maxAct)
31471+ {
31472+ remains--; /* I've done */
31473+ if (st->con != NULL)
31474+ {
31475+ PQfinish(st->con);
31476+ st->con = NULL;
31477+ }
31478+ return;
31479+ }
31480+ break;
31481+ }
31482+
31483+ /* increment state counter */
31484+ st->state++;
31485+ if (st->state > 6)
31486+ {
31487+ st->state = 0;
31488+ remains--; /* I've done */
31489+ }
31490+ }
31491+
31492+ if (st->con == NULL)
31493+ {
31494+ if ((st->con = doConnect()) == NULL)
31495+ {
31496+ fprintf(stderr, "Client aborted in establishing connection.\n");
31497+ remains--; /* I've aborted */
31498+ PQfinish(st->con);
31499+ st->con = NULL;
31500+ return;
31501+ }
31502+ }
31503+
31504+ switch (st->state)
31505+ {
31506+ case 0: /* about to start */
31507+ if (ttype == WITH_TRANSACTION)
31508+ {
31509+ strcpy(sql, "begin");
31510+ }
31511+ else
31512+ {
31513+ st->aid = getrand(1, naccounts * tps);
31514+ snprintf(sql, 256, "select abalance from accounts where aid = %d", st->aid);
31515+ }
31516+ st->aid = getrand(1, naccounts * tps);
31517+ st->bid = getrand(1, nbranches * tps);
31518+ st->tid = getrand(1, ntellers * tps);
31519+ st->delta = getrand(1, 1000);
31520+ if (use_log)
31521+ gettimeofday(&(st->txn_begin), NULL);
31522+ break;
31523+ case 1:
31524+ snprintf(sql, 256, "update accounts set abalance = abalance + %d where aid = %d\n", st->delta, st->aid);
31525+ break;
31526+ case 2:
31527+ snprintf(sql, 256, "select abalance from accounts where aid = %d", st->aid);
31528+ break;
31529+ case 3:
31530+ if (ttype == 0)
31531+ {
31532+ snprintf(sql, 256, "update tellers set tbalance = tbalance + %d where tid = %d\n",
31533+ st->delta, st->tid);
31534+ break;
31535+ }
31536+ case 4:
31537+ if (ttype == 0)
31538+ {
31539+ snprintf(sql, 256, "update branches set bbalance = bbalance + %d where bid = %d", st->delta, st->bid);
31540+ break;
31541+ }
31542+ case 5:
31543+ snprintf(sql, 256, "insert into history(tid,bid,aid,delta,mtime) values(%d,%d,%d,%d,'now')",
31544+ st->tid, st->bid, st->aid, st->delta);
31545+ break;
31546+ case 6:
31547+ if (ttype == WITH_TRANSACTION)
31548+ {
31549+ strcpy(sql, "end");
31550+ }
31551+ else
31552+ {
31553+ st->aid = getrand(1, naccounts * tps);
31554+ snprintf(sql, 256, "select abalance from accounts where aid = %d", st->aid);
31555+ }
31556+ break;
31557+ }
31558+
31559+ if (debug)
31560+ fprintf(stderr, "client sending %s\n", sql);
31561+
31562+ if (PQsendQuery(st->con, sql) == 0)
31563+ {
31564+ if (debug)
31565+ fprintf(stderr, "PQsendQuery(%s)failed\n", sql);
31566+ st->ecnt++;
31567+ }
31568+ else
31569+ {
31570+ st->listen++; /* flags that should be listened */
31571+ }
31572+}
31573+
31574+/* process a select only transaction */
31575+static void
31576+doOne(CState * st, int debug, int ttype )
31577+{
31578+ char sql[256];
31579+ PGresult *res;
31580+
31581+ if (st->listen)
31582+ { /* are we receiver? */
31583+ if (debug)
31584+ fprintf(stderr, "client receiving\n");
31585+ if (!PQconsumeInput(st->con))
31586+ { /* there's something wrong */
31587+ fprintf(stderr, "Client aborted in state %d. Probably the backend died while processing.\n", st->state);
31588+ remains--; /* I've aborted */
31589+ PQfinish(st->con);
31590+ st->con = NULL;
31591+ return;
31592+ }
31593+ if (PQisBusy(st->con))
31594+ return; /* don't have the whole result yet */
31595+
31596+ switch (st->state)
31597+ {
31598+ case 0: /* response to "select abalance ..." */
31599+ res = PQgetResult(st->con);
31600+ if (ttype == SELECT_ONLY)
31601+ {
31602+ if (check(st, res, PGRES_TUPLES_OK))
31603+ return;
31604+ }
31605+ else
31606+ {
31607+ if (check(st, res, PGRES_COMMAND_OK))
31608+ return;
31609+ }
31610+ PQclear(res);
31611+ discard_response(st);
31612+
31613+ if (is_connect)
31614+ {
31615+ PQfinish(st->con);
31616+ st->con = NULL;
31617+ }
31618+
31619+ if (++st->cnt >= st->maxAct)
31620+ {
31621+ remains--; /* I've done */
31622+ if (st->con != NULL)
31623+ {
31624+ PQfinish(st->con);
31625+ st->con = NULL;
31626+ }
31627+ return;
31628+ }
31629+ break;
31630+ }
31631+
31632+ /* increment state counter */
31633+ st->state++;
31634+ if (st->state > 0)
31635+ {
31636+ st->state = 0;
31637+ remains--; /* I've done */
31638+ }
31639+ }
31640+
31641+ if (st->con == NULL)
31642+ {
31643+ if ((st->con = doConnect()) == NULL)
31644+ {
31645+ fprintf(stderr, "Client aborted in establishing connection.\n");
31646+ PQfinish(st->con);
31647+ st->con = NULL;
31648+ return;
31649+ }
31650+ }
31651+
31652+ switch (st->state)
31653+ {
31654+ case 0:
31655+ st->aid = getrand(1, naccounts * tps);
31656+ st->bid = getrand(1, nbranches * tps);
31657+ st->tid = getrand(1, ntellers * tps);
31658+ st->delta = getrand(1, 1000);
31659+ if ( ttype == SELECT_ONLY)
31660+ {
31661+ snprintf(sql, 256, "select abalance from accounts where aid = %d", st->aid);
31662+ }
31663+ if ( ttype == UPDATE_ONLY)
31664+ {
31665+ snprintf(sql, 256, "update accounts set abalance = abalance + %d where aid = %d\n", st->delta, st->aid);
31666+ }
31667+ if ( ttype == INSERT_ONLY)
31668+ {
31669+ snprintf(sql, 256, "insert into history(tid,bid,aid,delta,mtime) values(%d,%d,%d,%d,'now')",
31670+ st->tid, st->bid, st->aid, st->delta);
31671+ }
31672+ break;
31673+ }
31674+
31675+ if (debug)
31676+ fprintf(stderr, "client sending %s\n", sql);
31677+
31678+ if (PQsendQuery(st->con, sql) == 0)
31679+ {
31680+ if (debug)
31681+ fprintf(stderr, "PQsendQuery(%s)failed\n", sql);
31682+ st->ecnt++;
31683+ }
31684+ else
31685+ {
31686+ st->listen++; /* flags that should be listened */
31687+ }
31688+}
31689+
31690+static void
31691+doCustom(CState * st, int debug, int ttype )
31692+{
31693+ PGresult *res;
31694+
31695+ if (st->listen)
31696+ { /* are we receiver? */
31697+ if (commands[st->state]->type == SQL_COMMAND)
31698+ {
31699+ if (debug)
31700+ fprintf(stderr, "client receiving\n");
31701+ if (!PQconsumeInput(st->con))
31702+ { /* there's something wrong */
31703+ fprintf(stderr, "Client aborted in state %d. Probably the backend died while processing.\n", st->state);
31704+ PQfinish(st->con);
31705+ st->con = NULL;
31706+ return;
31707+ }
31708+ if (PQisBusy(st->con))
31709+ return; /* don't have the whole result yet */
31710+ }
31711+
31712+ /*
31713+ * transaction finished: record the time it took in the
31714+ * log
31715+ */
31716+ if (use_log && commands[st->state + 1] == NULL)
31717+ {
31718+ double diff;
31719+ struct timeval now;
31720+
31721+ gettimeofday(&now, NULL);
31722+ diff = (int) (now.tv_sec - st->txn_begin.tv_sec) * 1000000.0 +
31723+ (int) (now.tv_usec - st->txn_begin.tv_usec);
31724+
31725+ fprintf(LOGFILE, "%d %d %.0f\n", st->id, st->cnt, diff);
31726+ }
31727+
31728+ if (commands[st->state]->type == SQL_COMMAND)
31729+ {
31730+ res = PQgetResult(st->con);
31731+ if (strncasecmp(commands[st->state]->argv[0], "select", 6) != 0)
31732+ {
31733+ if (check(st, res, PGRES_COMMAND_OK))
31734+ return;
31735+ }
31736+ else
31737+ {
31738+ if (check(st, res, PGRES_TUPLES_OK))
31739+ return;
31740+ }
31741+ PQclear(res);
31742+ discard_response(st);
31743+ }
31744+
31745+ if (commands[st->state + 1] == NULL)
31746+ {
31747+ if (is_connect)
31748+ {
31749+ PQfinish(st->con);
31750+ st->con = NULL;
31751+ }
31752+ if (++st->cnt >= st->maxAct)
31753+ {
31754+ remains--; /* I've done */
31755+ if (st->con != NULL)
31756+ {
31757+ PQfinish(st->con);
31758+ st->con = NULL;
31759+ }
31760+ return;
31761+ }
31762+ }
31763+
31764+ /* increment state counter */
31765+ st->state++;
31766+ if (commands[st->state] == NULL)
31767+ {
31768+ st->state = 0;
31769+ remains--; /* I've done */
31770+ }
31771+ }
31772+
31773+ if (st->con == NULL)
31774+ {
31775+ if ((st->con = doConnect()) == NULL)
31776+ {
31777+ fprintf(stderr, "Client aborted in establishing connection.\n");
31778+ remains--; /* I've aborted */
31779+ PQfinish(st->con);
31780+ st->con = NULL;
31781+ return;
31782+ }
31783+ }
31784+
31785+ if (use_log && st->state == 0)
31786+ gettimeofday(&(st->txn_begin), NULL);
31787+
31788+ if (commands[st->state]->type == SQL_COMMAND)
31789+ {
31790+ char *sql;
31791+
31792+ if ((sql = strdup(commands[st->state]->argv[0])) == NULL
31793+ || (sql = assignVariables(st, sql)) == NULL)
31794+ {
31795+ fprintf(stderr, "out of memory\n");
31796+ st->ecnt++;
31797+ return;
31798+ }
31799+
31800+ if (debug)
31801+ fprintf(stderr, "client sending %s\n", sql);
31802+
31803+ if (PQsendQuery(st->con, sql) == 0)
31804+ {
31805+ if (debug)
31806+ fprintf(stderr, "PQsendQuery(%s)failed\n", sql);
31807+ st->ecnt++;
31808+ }
31809+ else
31810+ {
31811+ st->listen++; /* flags that should be listened */
31812+ }
31813+
31814+ free(sql);
31815+ }
31816+ else if (commands[st->state]->type == META_COMMAND)
31817+ {
31818+ int argc = commands[st->state]->argc, i;
31819+ char **argv = commands[st->state]->argv;
31820+
31821+ if (debug)
31822+ {
31823+ fprintf(stderr, "client executing \\%s", argv[0]);
31824+ for (i = 1; i < argc; i++)
31825+ fprintf(stderr, " %s", argv[i]);
31826+ fprintf(stderr, "\n");
31827+ }
31828+
31829+ if (strcasecmp(argv[0], "setrandom") == 0)
31830+ {
31831+ char *val;
31832+
31833+ if ((val = malloc(strlen(argv[3]) + 1)) == NULL)
31834+ {
31835+ fprintf(stderr, "%s: out of memory\n", argv[0]);
31836+ st->ecnt++;
31837+ return;
31838+ }
31839+
31840+ sprintf(val, "%d", getrand(atoi(argv[2]), atoi(argv[3])));
31841+
31842+ if (putVariable(st, argv[1], val) == false)
31843+ {
31844+ fprintf(stderr, "%s: out of memory\n", argv[0]);
31845+ free(val);
31846+ st->ecnt++;
31847+ return;
31848+ }
31849+
31850+ free(val);
31851+ st->listen++;
31852+ }
31853+ }
31854+}
31855+
31856+/* discard connections */
31857+static void
31858+disconnect_all(CState * state)
31859+{
31860+ if (state->con)
31861+ PQfinish(state->con);
31862+}
31863+
31864+/* create tables and setup data */
31865+static void
31866+init(void)
31867+{
31868+ PGconn *con;
31869+ PGresult *res;
31870+ static char *DDLs[] = {
31871+ "drop table branches",
31872+ "create table branches(bid int not null,bbalance int,filler char(88))",
31873+ "drop table tellers",
31874+ "create table tellers(tid int not null,bid int,tbalance int,filler char(84))",
31875+ "drop table accounts",
31876+ "create table accounts(aid int not null,bid int,abalance int,filler char(84))",
31877+ "drop table history",
31878+ "create table history(tid int,bid int,aid int,delta int,mtime timestamp,filler char(22))"};
31879+ static char *DDLAFTERs[] = {
31880+ "alter table branches add primary key (bid)",
31881+ "alter table tellers add primary key (tid)",
31882+ "alter table accounts add primary key (aid)"};
31883+
31884+
31885+ char sql[256];
31886+
31887+ int i;
31888+
31889+ if ((con = doConnect()) == NULL)
31890+ exit(1);
31891+
31892+ for (i = 0; i < (sizeof(DDLs) / sizeof(char *)); i++)
31893+ {
31894+ res = PQexec(con, DDLs[i]);
31895+ if (strncmp(DDLs[i], "drop", 4) && PQresultStatus(res) != PGRES_COMMAND_OK)
31896+ {
31897+ fprintf(stderr, "%s", PQerrorMessage(con));
31898+ exit(1);
31899+ }
31900+ PQclear(res);
31901+ }
31902+
31903+ res = PQexec(con, "begin");
31904+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
31905+ {
31906+ fprintf(stderr, "%s", PQerrorMessage(con));
31907+ exit(1);
31908+ }
31909+ PQclear(res);
31910+
31911+ for (i = 0; i < nbranches * tps; i++)
31912+ {
31913+ snprintf(sql, 256, "insert into branches(bid,bbalance) values(%d,0)", i + 1);
31914+ res = PQexec(con, sql);
31915+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
31916+ {
31917+ fprintf(stderr, "%s", PQerrorMessage(con));
31918+ exit(1);
31919+ }
31920+ PQclear(res);
31921+ }
31922+
31923+ for (i = 0; i < ntellers * tps; i++)
31924+ {
31925+ snprintf(sql, 256, "insert into tellers(tid,bid,tbalance) values (%d,%d,0)"
31926+ ,i + 1, i / ntellers + 1);
31927+ res = PQexec(con, sql);
31928+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
31929+ {
31930+ fprintf(stderr, "%s", PQerrorMessage(con));
31931+ exit(1);
31932+ }
31933+ PQclear(res);
31934+ }
31935+
31936+ res = PQexec(con, "end");
31937+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
31938+ {
31939+ fprintf(stderr, "%s", PQerrorMessage(con));
31940+ exit(1);
31941+ }
31942+ PQclear(res);
31943+
31944+ /*
31945+ * occupy accounts table with some data
31946+ */
31947+ fprintf(stderr, "creating tables...\n");
31948+ for (i = 0; i < naccounts * tps; i++)
31949+ {
31950+ int j = i + 1;
31951+
31952+ if (j % 10000 == 1)
31953+ {
31954+ res = PQexec(con, "copy accounts from stdin");
31955+ if (PQresultStatus(res) != PGRES_COPY_IN)
31956+ {
31957+ fprintf(stderr, "%s", PQerrorMessage(con));
31958+ exit(1);
31959+ }
31960+ PQclear(res);
31961+ }
31962+
31963+ snprintf(sql, 256, "%d\t%d\t%d\t\n", j, i / naccounts + 1, 0);
31964+ if (PQputline(con, sql))
31965+ {
31966+ fprintf(stderr, "PQputline failed\n");
31967+ exit(1);
31968+ }
31969+
31970+ if (j % 10000 == 0)
31971+ {
31972+ /*
31973+ * every 10000 tuples, we commit the copy command. this should
31974+ * avoid generating too much WAL logs
31975+ */
31976+ fprintf(stderr, "%d tuples done.\n", j);
31977+ if (PQputline(con, "\\.\n"))
31978+ {
31979+ fprintf(stderr, "very last PQputline failed\n");
31980+ exit(1);
31981+ }
31982+
31983+ if (PQendcopy(con))
31984+ {
31985+ fprintf(stderr, "PQendcopy failed\n");
31986+ exit(1);
31987+ }
31988+
31989+#ifdef NOT_USED
31990+
31991+ /*
31992+ * do a checkpoint to purge the old WAL logs
31993+ */
31994+ res = PQexec(con, "checkpoint");
31995+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
31996+ {
31997+ fprintf(stderr, "%s", PQerrorMessage(con));
31998+ exit(1);
31999+ }
32000+ PQclear(res);
32001+#endif /* NOT_USED */
32002+ }
32003+ }
32004+ fprintf(stderr, "set primary key...\n");
32005+ for (i = 0; i < (sizeof(DDLAFTERs) / sizeof(char *)); i++)
32006+ {
32007+ res = PQexec(con, DDLAFTERs[i]);
32008+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
32009+ {
32010+ fprintf(stderr, "%s", PQerrorMessage(con));
32011+ exit(1);
32012+ }
32013+ PQclear(res);
32014+ }
32015+
32016+ /* vacuum */
32017+ fprintf(stderr, "vacuum...");
32018+ res = PQexec(con, "vacuum analyze");
32019+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
32020+ {
32021+ fprintf(stderr, "%s", PQerrorMessage(con));
32022+ exit(1);
32023+ }
32024+ PQclear(res);
32025+ fprintf(stderr, "done.\n");
32026+
32027+ PQfinish(con);
32028+}
32029+
32030+static int
32031+process_file(char *filename)
32032+{
32033+ const char delim[] = " \f\n\r\t\v";
32034+
32035+ FILE *fd;
32036+ int lineno, i, j;
32037+ char buf[BUFSIZ], *p, *tok;
32038+ void *tmp;
32039+
32040+ if (strcmp(filename, "-") == 0)
32041+ fd = stdin;
32042+ else if ((fd = fopen(filename, "r")) == NULL)
32043+ {
32044+ fprintf(stderr, "%s: %s\n", strerror(errno), filename);
32045+ return false;
32046+ }
32047+
32048+ fprintf(stderr, "processing file...\n");
32049+
32050+ lineno = 1;
32051+ i = 0;
32052+ while (fgets(buf, sizeof(buf), fd) != NULL)
32053+ {
32054+ if ((p = strchr(buf, '\n')) != NULL)
32055+ *p = '\0';
32056+ p = buf;
32057+ while (isspace(*p))
32058+ p++;
32059+ if (*p == '\0' || strncmp(p, "--", 2) == 0)
32060+ {
32061+ lineno++;
32062+ continue;
32063+ }
32064+
32065+ if ((tmp = realloc(commands, sizeof(Command *) * (i + 1))) == NULL)
32066+ {
32067+ i--;
32068+ goto error;
32069+ }
32070+ commands = tmp;
32071+
32072+ if ((commands[i] = malloc(sizeof(Command))) == NULL)
32073+ goto error;
32074+
32075+ commands[i]->argv = NULL;
32076+ commands[i]->argc = 0;
32077+
32078+ if (*p == '\\')
32079+ {
32080+ commands[i]->type = META_COMMAND;
32081+
32082+ j = 0;
32083+ tok = strtok(++p, delim);
32084+ while (tok != NULL)
32085+ {
32086+ tmp = realloc(commands[i]->argv, sizeof(char *) * (j + 1));
32087+ if (tmp == NULL)
32088+ goto error;
32089+ commands[i]->argv = tmp;
32090+
32091+ if ((commands[i]->argv[j] = strdup(tok)) == NULL)
32092+ goto error;
32093+
32094+ commands[i]->argc++;
32095+
32096+ j++;
32097+ tok = strtok(NULL, delim);
32098+ }
32099+
32100+ if (strcasecmp(commands[i]->argv[0], "setrandom") == 0)
32101+ {
32102+ int min, max;
32103+
32104+ if (commands[i]->argc < 4)
32105+ {
32106+ fprintf(stderr, "%s: %d: \\%s: missing argument\n", filename, lineno, commands[i]->argv[0]);
32107+ goto error;
32108+ }
32109+
32110+ for (j = 4; j < commands[i]->argc; j++)
32111+ fprintf(stderr, "%s: %d: \\%s: extra argument \"%s\" ignored\n", filename, lineno, commands[i]->argv[0], commands[i]->argv[j]);
32112+
32113+ if ((min = atoi(commands[i]->argv[2])) < 0)
32114+ {
32115+ fprintf(stderr, "%s: %d: \\%s: invalid minimum number %s\n", filename, lineno, commands[i]->argv[0], commands[i]->argv[2]);
32116+ goto error;
32117+ }
32118+
32119+ if ((max = atoi(commands[i]->argv[3])) < min || max > RAND_MAX)
32120+ {
32121+ fprintf(stderr, "%s: %d: \\%s: invalid maximum number %s\n", filename, lineno, commands[i]->argv[0], commands[i]->argv[3]);
32122+ goto error;
32123+ }
32124+ }
32125+ else
32126+ {
32127+ fprintf(stderr, "%s: %d: invalid command \\%s\n", filename, lineno, commands[i]->argv[0]);
32128+ goto error;
32129+ }
32130+ }
32131+ else
32132+ {
32133+ commands[i]->type = SQL_COMMAND;
32134+
32135+ if ((commands[i]->argv = malloc(sizeof(char *))) == NULL)
32136+ goto error;
32137+
32138+ if ((commands[i]->argv[0] = strdup(p)) == NULL)
32139+ goto error;
32140+
32141+ commands[i]->argc++;
32142+ }
32143+
32144+ i++;
32145+ lineno++;
32146+ }
32147+ fclose(fd);
32148+
32149+ if ((tmp = realloc(commands, sizeof(Command *) * (i + 1))) == NULL)
32150+ goto error;
32151+ commands = tmp;
32152+
32153+ commands[i] = NULL;
32154+
32155+ return true;
32156+
32157+error:
32158+ if (errno == ENOMEM)
32159+ fprintf(stderr, "%s: %d: out of memory\n", filename, lineno);
32160+
32161+ fclose(fd);
32162+
32163+ if (commands == NULL)
32164+ return false;
32165+
32166+ while (i >= 0)
32167+ {
32168+ if (commands[i] != NULL)
32169+ {
32170+ for (j = 0; j < commands[i]->argc; j++)
32171+ free(commands[i]->argv[j]);
32172+
32173+ free(commands[i]->argv);
32174+ free(commands[i]);
32175+ }
32176+
32177+ i--;
32178+ }
32179+ free(commands);
32180+
32181+ return false;
32182+}
32183+
32184+/* print out results */
32185+static void
32186+printResults(
32187+ int ttype, int normal_xacts,
32188+ struct timeval * tv1, struct timeval * tv2,
32189+ struct timeval * tv3)
32190+{
32191+ double t1,
32192+ t2;
32193+ char *s;
32194+
32195+ t1 = (tv3->tv_sec - tv1->tv_sec) * 1000000.0 + (tv3->tv_usec - tv1->tv_usec);
32196+ t1 = t1 / 1000000.0 ;
32197+
32198+ t2 = (tv3->tv_sec - tv1->tv_sec) * 1000000.0 + (tv3->tv_usec - tv1->tv_usec);
32199+ t2 = normal_xacts * 1000000.0 / t2;
32200+
32201+#define SELECT_ONLY (1)
32202+#define INSERT_ONLY (2)
32203+#define UPDATE_ONLY (3)
32204+#define WITH_TRANSACTION (4)
32205+ switch (ttype)
32206+ {
32207+ case 0:
32208+ s = "TPC-B (sort of)";
32209+ break;
32210+ case SELECT_ONLY :
32211+ s = "SELECT only";
32212+ break;
32213+ case INSERT_ONLY :
32214+ s = "INSERT only";
32215+ break;
32216+ case UPDATE_ONLY :
32217+ s = "UPDATE only";
32218+ break;
32219+ case CUSTOM_QUERY :
32220+ s = "Custom query";
32221+ break;
32222+ default:
32223+ s = "Mix query";
32224+ break;
32225+ }
32226+
32227+
32228+ printf("transaction type: %s\n", s);
32229+ printf("scaling factor: %d\n", tps);
32230+ printf("number of clients: %d\n", nclients);
32231+ printf("number of transactions actually processed: %d\n", normal_xacts );
32232+ printf("run time (sec) = %f \n", t1);
32233+ printf("tps = %f (including connections establishing)\n", t2);
32234+}
32235+
32236+static int
32237+doChild(int clientId, int min, int max, int debug, int ttype)
32238+{
32239+ CState state; /* status of clients */
32240+
32241+ struct timeval tv1; /* start up time */
32242+ fd_set input_mask;
32243+ int nsocks = 0; /* return from select(2) */
32244+ int sock = 0;
32245+
32246+ gettimeofday(&tv1, NULL);
32247+ srand((unsigned int) tv1.tv_usec + clientId );
32248+
32249+ memset((char *)&state,0,sizeof(CState));
32250+ /* make connections to the database */
32251+ state.id = clientId;
32252+ if ((state.con = doConnect()) == NULL)
32253+ exit(1);
32254+
32255+ state.maxAct = max - min + 1;
32256+ /* send start up queries in async manner */
32257+ switch (ttype)
32258+ {
32259+ case WITH_TRANSACTION :
32260+ case TPC_B_LIKE :
32261+ doMix(&state, debug, ttype);
32262+ break;
32263+ case CUSTOM_QUERY :
32264+ doCustom(&state, debug, ttype);
32265+ break;
32266+ default :
32267+ doOne(&state, debug, ttype);
32268+ break;
32269+ }
32270+
32271+ remains = max;
32272+ for (;;)
32273+ {
32274+ if (remains < min || !state.con)
32275+ {
32276+ break;
32277+ }
32278+
32279+ FD_ZERO(&input_mask);
32280+
32281+ if (ttype != CUSTOM_QUERY || commands[state.state]->type != META_COMMAND)
32282+ {
32283+ if (state.con == NULL)
32284+ {
32285+ if ((state.con = doConnect()) == NULL)
32286+ {
32287+ exit(1);
32288+ }
32289+ }
32290+ sock = PQsocket(state.con);
32291+
32292+ if (sock < 0)
32293+ {
32294+ fprintf(stderr, "Client %d: PQsocket failed\n", clientId);
32295+ disconnect_all(&state);
32296+ exit(1);
32297+ }
32298+ FD_SET(sock, &input_mask);
32299+
32300+ if ((nsocks = select(sock + 1, &input_mask, (fd_set *) NULL,
32301+ (fd_set *) NULL, (struct timeval *) NULL)) < 0)
32302+ {
32303+ if (errno == EINTR)
32304+ continue;
32305+ /* must be something wrong */
32306+ disconnect_all(&state);
32307+ fprintf(stderr, "select failed: %s\n", strerror(errno));
32308+ exit(1);
32309+ }
32310+ else if (nsocks == 0)
32311+ { /* timeout */
32312+ fprintf(stderr, "select timeout\n");
32313+ fprintf(stderr, "client %d:state %d cnt %d ecnt %d listen %d\n",
32314+ clientId, state.state, state.cnt, state.ecnt, state.listen);
32315+ exit(0);
32316+ }
32317+ }
32318+
32319+ /* ok, backend returns reply */
32320+ if (state.con && (FD_ISSET(PQsocket(state.con), &input_mask)
32321+ || (ttype == CUSTOM_QUERY
32322+ && commands[state.state]->type == META_COMMAND)))
32323+ {
32324+ switch (ttype)
32325+ {
32326+ case WITH_TRANSACTION :
32327+ case TPC_B_LIKE :
32328+ doMix(&state, debug, ttype);
32329+ break;
32330+ case CUSTOM_QUERY :
32331+ doCustom(&state, debug, ttype);
32332+ break;
32333+ default :
32334+ doOne(&state, debug, ttype);
32335+ break;
32336+ }
32337+ }
32338+ }
32339+ disconnect_all(&state);
32340+ return 1;
32341+}
32342+
32343+static int
32344+doClient(int debug, int ttype)
32345+{
32346+ pid_t pid;
32347+ int i;
32348+ int min,max;
32349+ int base,mo;
32350+
32351+ base = nxacts / nclients;
32352+ mo = nxacts % nclients;
32353+ min = max = 0;
32354+ for ( i = 0 ; i < nclients ; i ++)
32355+ {
32356+ min = max + 1;
32357+ max += base;
32358+ if (mo > 0)
32359+ {
32360+ max += 1;
32361+ mo --;
32362+ }
32363+ pid = fork();
32364+ if (pid == 0)
32365+ {
32366+ doChild(i, min, max, debug, ttype);
32367+ exit(0);
32368+ }
32369+ }
32370+ while ( wait(NULL) > 0)
32371+ ;
32372+ return 1;
32373+}
32374+
32375+int
32376+main(int argc, char **argv)
32377+{
32378+ int c;
32379+ int is_init_mode = 0; /* initialize mode? */
32380+ int is_no_vacuum = 0; /* no vacuum at all before
32381+ * testing? */
32382+ int is_full_vacuum = 0; /* do full vacuum before testing? */
32383+ int debug = 0; /* debug flag */
32384+ int ttype = TPC_B_LIKE; /* transaction type */
32385+ char *filename = NULL;
32386+
32387+ struct timeval tv1; /* start up time */
32388+ struct timeval tv2; /* after establishing all connections to
32389+ * the backend */
32390+ struct timeval tv3; /* end time */
32391+
32392+#if !(defined(__CYGWIN__) || defined(__MINGW32__))
32393+ struct rlimit rlim;
32394+#endif
32395+
32396+ PGconn *con;
32397+ PGresult *res;
32398+ char *env;
32399+
32400+ if ((env = getenv("PGHOST")) != NULL && *env != '\0')
32401+ pghost = env;
32402+ if ((env = getenv("PGPORT")) != NULL && *env != '\0')
32403+ pgport = env;
32404+ else if ((env = getenv("PGUSER")) != NULL && *env != '\0')
32405+ login = env;
32406+
32407+ while ((c = getopt(argc, argv, "ih:nvp:dc:t:s:u:P:CNSlTUIf:")) != -1)
32408+ {
32409+ switch (c)
32410+ {
32411+ case 'i':
32412+ is_init_mode++;
32413+ break;
32414+ case 'h':
32415+ pghost = optarg;
32416+ break;
32417+ case 'n':
32418+ is_no_vacuum++;
32419+ break;
32420+ case 'v':
32421+ is_full_vacuum++;
32422+ break;
32423+ case 'p':
32424+ pgport = optarg;
32425+ break;
32426+ case 'd':
32427+ debug++;
32428+ break;
32429+ case 'S':
32430+ ttype = SELECT_ONLY;
32431+ break;
32432+ case 'I':
32433+ ttype = INSERT_ONLY;
32434+ break;
32435+ case 'U':
32436+ ttype = UPDATE_ONLY;
32437+ break;
32438+ case 'T':
32439+ ttype = WITH_TRANSACTION;
32440+ break;
32441+ case 'c':
32442+ nclients = atoi(optarg);
32443+ if (nclients <= 0 || nclients > MAXCLIENTS)
32444+ {
32445+ fprintf(stderr, "invalid number of clients: %d\n", nclients);
32446+ exit(1);
32447+ }
32448+#if !(defined(__CYGWIN__) || defined(__MINGW32__))
32449+#ifdef RLIMIT_NOFILE /* most platform uses RLIMIT_NOFILE */
32450+ if (getrlimit(RLIMIT_NOFILE, &rlim) == -1)
32451+ {
32452+#else /* but BSD doesn't ... */
32453+ if (getrlimit(RLIMIT_OFILE, &rlim) == -1)
32454+ {
32455+#endif /* HAVE_RLIMIT_NOFILE */
32456+ fprintf(stderr, "getrlimit failed. reason: %s\n", strerror(errno));
32457+ exit(1);
32458+ }
32459+ if (rlim.rlim_cur <= (nclients + 2))
32460+ {
32461+ fprintf(stderr, "You need at least %d open files resource but you are only allowed to use %ld.\n", nclients + 2, (long) rlim.rlim_cur);
32462+ fprintf(stderr, "Use limit/ulimt to increase the limit before using pgbench.\n");
32463+ exit(1);
32464+ }
32465+#endif /* #if !(defined(__CYGWIN__) || defined(__MINGW32__)) */
32466+ break;
32467+ case 'C':
32468+ is_connect = 1;
32469+ break;
32470+ case 's':
32471+ tps = atoi(optarg);
32472+ if (tps <= 0)
32473+ {
32474+ fprintf(stderr, "invalid scaling factor: %d\n", tps);
32475+ exit(1);
32476+ }
32477+ break;
32478+ case 't':
32479+ nxacts = atoi(optarg);
32480+ if (nxacts <= 0)
32481+ {
32482+ fprintf(stderr, "invalid number of transactions: %d\n", nxacts);
32483+ exit(1);
32484+ }
32485+ break;
32486+ case 'u':
32487+ login = optarg;
32488+ break;
32489+ case 'P':
32490+ pwd = optarg;
32491+ break;
32492+ case 'l':
32493+ use_log = true;
32494+ break;
32495+ case 'f':
32496+ ttype = CUSTOM_QUERY;
32497+ filename = optarg;
32498+ break;
32499+ default:
32500+ usage();
32501+ exit(1);
32502+ break;
32503+ }
32504+ }
32505+
32506+ if (argc > optind)
32507+ dbName = argv[optind];
32508+ else
32509+ {
32510+ if ((env = getenv("PGDATABASE")) != NULL && *env != '\0')
32511+ dbName = env;
32512+ else if (login != NULL && *login != '\0')
32513+ dbName = login;
32514+ else
32515+ dbName = "";
32516+ }
32517+
32518+ if (is_init_mode)
32519+ {
32520+ init();
32521+ exit(0);
32522+ }
32523+
32524+ if (use_log)
32525+ {
32526+ char logpath[64];
32527+
32528+ snprintf(logpath, 64, "pgbench_log.%d", getpid());
32529+ LOGFILE = fopen(logpath, "w");
32530+
32531+ if (LOGFILE == NULL)
32532+ {
32533+ fprintf(stderr, "Couldn't open logfile \"%s\": %s", logpath, strerror(errno));
32534+ exit(1);
32535+ }
32536+ }
32537+
32538+ if (debug)
32539+ {
32540+ printf("pghost: %s pgport: %s nclients: %d nxacts: %d dbName: %s\n",
32541+ pghost, pgport, nclients, nxacts, dbName);
32542+ }
32543+
32544+ /* opening connection... */
32545+ con = doConnect();
32546+ if (con == NULL)
32547+ exit(1);
32548+
32549+ if (PQstatus(con) == CONNECTION_BAD)
32550+ {
32551+ fprintf(stderr, "Connection to database '%s' failed.\n", dbName);
32552+ fprintf(stderr, "%s", PQerrorMessage(con));
32553+ exit(1);
32554+ }
32555+
32556+ if (ttype == CUSTOM_QUERY)
32557+ {
32558+ PQfinish(con);
32559+ if (process_file(filename) == false)
32560+ exit(1);
32561+ }
32562+ else
32563+ {
32564+ /*
32565+ * get the scaling factor that should be same as count(*) from
32566+ * branches...
32567+ */
32568+ res = PQexec(con, "select count(*) from branches");
32569+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
32570+ {
32571+ fprintf(stderr, "%s", PQerrorMessage(con));
32572+ exit(1);
32573+ }
32574+ tps = atoi(PQgetvalue(res, 0, 0));
32575+ if (tps < 0)
32576+ {
32577+ fprintf(stderr, "count(*) from branches invalid (%d)\n", tps);
32578+ exit(1);
32579+ }
32580+ PQclear(res);
32581+
32582+ if (!is_no_vacuum)
32583+ {
32584+ fprintf(stderr, "starting vacuum...");
32585+ res = PQexec(con, "vacuum branches");
32586+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
32587+ {
32588+ fprintf(stderr, "%s", PQerrorMessage(con));
32589+ exit(1);
32590+ }
32591+ PQclear(res);
32592+
32593+ res = PQexec(con, "vacuum tellers");
32594+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
32595+ {
32596+ fprintf(stderr, "%s", PQerrorMessage(con));
32597+ exit(1);
32598+ }
32599+ PQclear(res);
32600+
32601+ res = PQexec(con, "delete from history");
32602+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
32603+ {
32604+ fprintf(stderr, "%s", PQerrorMessage(con));
32605+ exit(1);
32606+ }
32607+ PQclear(res);
32608+ res = PQexec(con, "vacuum history");
32609+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
32610+ {
32611+ fprintf(stderr, "%s", PQerrorMessage(con));
32612+ exit(1);
32613+ }
32614+ PQclear(res);
32615+
32616+ fprintf(stderr, "end.\n");
32617+
32618+ if (is_full_vacuum)
32619+ {
32620+ fprintf(stderr, "starting full vacuum...");
32621+ res = PQexec(con, "vacuum analyze accounts");
32622+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
32623+ {
32624+ fprintf(stderr, "%s", PQerrorMessage(con));
32625+ exit(1);
32626+ }
32627+ PQclear(res);
32628+ fprintf(stderr, "end.\n");
32629+ }
32630+ }
32631+ PQfinish(con);
32632+ }
32633+
32634+ /* set random seed */
32635+ gettimeofday(&tv1, NULL);
32636+ srand((unsigned int) tv1.tv_usec);
32637+ /* get start up time */
32638+ gettimeofday(&tv1, NULL);
32639+ /* time after connections set up */
32640+ gettimeofday(&tv2, NULL);
32641+
32642+ doClient(debug, ttype);
32643+
32644+ /* get end time */
32645+ gettimeofday(&tv3, NULL);
32646+ printResults(ttype, nxacts, &tv1, &tv2, &tv3);
32647+ if (LOGFILE)
32648+ fclose(LOGFILE);
32649+ return 1;
32650+}
32651diff -aruN postgresql-8.2.4/src/pgcluster/tool/pgcbench.sh pgcluster-1.7.0rc7/src/pgcluster/tool/pgcbench.sh
32652--- postgresql-8.2.4/src/pgcluster/tool/pgcbench.sh 1970-01-01 01:00:00.000000000 +0100
32653+++ pgcluster-1.7.0rc7/src/pgcluster/tool/pgcbench.sh 2007-02-18 22:52:17.000000000 +0100
32654@@ -0,0 +1,30 @@
32655+#! /bin/bash
32656+
32657+set -e
32658+
32659+while getopts ih:nvp:dc:t:s:u:P:CNSlTUIf: opt; do
32660+ case $opt in
32661+ f)
32662+ filename=$OPTARG
32663+ ;;
32664+ *)
32665+ opts=(${opts[@]} -$opt $OPTARG)
32666+ ;;
32667+ esac
32668+done
32669+shift $(($OPTIND - 1))
32670+dbname=$1
32671+
32672+tps=$(psql -At -c "SELECT count(*) FROM branches" $dbname)
32673+
32674+vacuumdb -t branches $dbname
32675+vacuumdb -t tellers $dbname
32676+psql -c "DELETE FROM history" $dbname
32677+vacuumdb -t history $dbname
32678+
32679+if [ -z $filename ]; then
32680+ pgcbench ${opts[@]} $@
32681+else
32682+ perl -pe "BEGIN { \$tps = $tps } s/\`([^\`]+)\`/eval \$1/eg" $filename \
32683+ | pgcbench ${opts[@]} -f - $@
32684+fi
32685diff -aruN postgresql-8.2.4/src/pgcluster/tool/tpc-b_like.sql pgcluster-1.7.0rc7/src/pgcluster/tool/tpc-b_like.sql
32686--- postgresql-8.2.4/src/pgcluster/tool/tpc-b_like.sql 1970-01-01 01:00:00.000000000 +0100
32687+++ pgcluster-1.7.0rc7/src/pgcluster/tool/tpc-b_like.sql 2007-02-18 22:52:17.000000000 +0100
32688@@ -0,0 +1,11 @@
32689+\setrandom aid 1 `100000 * $tps`
32690+\setrandom bid 1 `1 * $tps`
32691+\setrandom tid 1 `10 * $tps`
32692+\setrandom delta 1 1000
32693+BEGIN
32694+UPDATE accounts SET abalance = abalance + :delta WHERE aid = :aid
32695+SELECT abalance FROM accounts WHERE aid = :aid
32696+UPDATE tellers SET tbalance = tbalance + :delta WHERE tid = :tid
32697+UPDATE branches SET bbalance = bbalance + :delta WHERE bid = :bid
32698+INSERT INTO history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, current_timestamp)
32699+END
This page took 4.14757 seconds and 4 git commands to generate.