diff -aruN postgresql-8.2.4/GNUmakefile.in pgcluster-1.7.0rc7/GNUmakefile.in
--- postgresql-8.2.4/GNUmakefile.in 2006-08-18 21:58:05.000000000 +0200
+++ pgcluster-1.7.0rc7/GNUmakefile.in 2007-02-18 22:52:16.000000000 +0100
@@ -63,13 +63,13 @@
##########################################################################
-distdir := postgresql-$(VERSION)
+distdir := pgcluster-$(PGCLUSTER_VERSION)
dummy := =install=
-garbage := =* "#"* ."#"* *~* *.orig *.rej core postgresql-*
+garbage := =* "#"* ."#"* *~* *.orig *.rej core pgcluster-*
dist: $(distdir).tar.gz
ifeq ($(split-dist), yes)
-dist: postgresql-base-$(VERSION).tar.gz postgresql-docs-$(VERSION).tar.gz postgresql-opt-$(VERSION).tar.gz postgresql-test-$(VERSION).tar.gz
+dist: pgcluster-base-$(PGCLUSTER_VERSION).tar.gz pgcluster-docs-$(PGCLUSTER_VERSION).tar.gz pgcluster-opt-$(PGCLUSTER_VERSION).tar.gz pgcluster-test-$(PGCLUSTER_VERSION).tar.gz
endif
dist:
-rm -rf $(distdir)
@@ -81,19 +81,19 @@
src/tools src/tutorial \
$(addprefix src/pl/, plperl plpython tcl)
-docs_files := doc/postgres.tar.gz doc/src doc/TODO.detail
+docs_files := doc/pgcluster.tar.gz doc/src doc/TODO.detail
-postgresql-base-$(VERSION).tar: distdir
+pgcluster-base-$(PGCLUSTER_VERSION).tar: distdir
$(TAR) -c $(addprefix --exclude $(distdir)/, $(docs_files) $(opt_files) src/test) \
-f $@ $(distdir)
-postgresql-docs-$(VERSION).tar: distdir
+pgcluster-docs-$(PGCLUSTER_VERSION).tar: distdir
$(TAR) cf $@ $(addprefix $(distdir)/, $(docs_files))
-postgresql-opt-$(VERSION).tar: distdir
+pgcluster-opt-$(PGCLUSTER_VERSION).tar: distdir
$(TAR) cf $@ $(addprefix $(distdir)/, $(opt_files))
-postgresql-test-$(VERSION).tar: distdir
+pgcluster-test-$(PGCLUSTER_VERSION).tar: distdir
$(TAR) cf $@ $(distdir)/src/test
distdir:
diff -aruN postgresql-8.2.4/INSTALL_PGCLUSTER pgcluster-1.7.0rc7/INSTALL_PGCLUSTER
--- postgresql-8.2.4/INSTALL_PGCLUSTER 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/INSTALL_PGCLUSTER 2007-02-19 00:59:13.000000000 +0100
@@ -0,0 +1,392 @@
+PGCluster Installation Instructions
+
+=============================================================
+1. Installation
+=============================================================
+
+1-1. Install Cluster DB Server, Replication Server & Load Balancer
+----------------------------------------------------------------
+$ cd $source_dir
+$ ./configure
+$ gmake
+$ su
+# gmake install
+# chown -R postgres /usr/local/pgsql
+----------------------------------------------------------------
+
+=============================================================
+2. Initialize DB
+=============================================================
+$ su
+# adduser postgres
+# mkdir /usr/local/pgsql/data
+# chown postgres /usr/local/pgsql/data
+# su - postgres
+$ /usr/local/pgsql/bin/initdb -D /usr/local/pgsql/data
+
+
+=============================================================
+3. Configuration
+=============================================================
+(EX.System Composition)
+
+ |
+ ((Load Balance Server))
+ ( hostname: lb.pgcluster.org)
+ ( receive port:5432 )
+ ( recovery port:6001 )
+ |
+----------+-------------+------------+----------
+ | |
+ (( Cluster DB 1 )) (( Cluster DB 2 ))
+ ( hostname:c1.pgcluster.org) ( hostname:c2.pgcluster.org)
+ ( receive port: 5432 ) ( receive port:5432 )
+ ( recovery port:7001 ) ( recovery port 7002 )
+ | |
+----------+-------------+------------+----------
+ |
+ ((Replication Server))
+ ( hostname:pgr.pgcluster.org)
+ ( receive port:8001 )
+ ( recovery port:8101 )
+
+
+3-1. Load Balance Server
+
+The setup file of load balance server is copied from the sample file and edited.
+(the sample file is installed '/usr/local/pgsql/share' in default)
+----------------------------------------------------------------
+$cd /usr/local/pgsql/share
+$cp pglb.conf.sample pglb.conf
+----------------------------------------------------------------
+
+In the case of the above system composition example,
+the setup example of pglb.conf file is as the following
+
+#============================================================
+# Load Balance Server configuration file
+#-------------------------------------------------------------
+# file: pglb.conf
+#-------------------------------------------------------------
+# This file controls:
+# o which hosts are db cluster server
+# o which port use connect to db cluster server
+# o how many connections are allowed on each DB server
+#============================================================
+#-------------------------------------------------------------
+# set cluster DB server information
+# o Host_Name : hostname
+# o Port : Connection for postmaster
+# o Max_Connection : Maximum number of connection to postmaster
+#-------------------------------------------------------------
+
+ c1.pgcluster.org
+ 5432
+ 32
+
+
+ c2.pgcluster.org
+ 5432
+ 32
+
+#-------------------------------------------------------------
+# set Load Balance server information
+# o Host_Name : The host name of this load balance server.
+# -- please write a host name by FQDN or IP address.
+# o Backend_Socket_Dir : Unix domain socket path for the backend
+# o Receive_Port : Connection from client
+# o Recovery_Port : Connection for recovery process
+# o Max_Cluster_Num : Maximum number of cluster DB servers
+# o Use_Connection_Pooling : Use connection pool [yes/no]
+# o Lifecheck_Timeout : Timeout of the lifecheck response
+# o Lifecheck_Interval : Interval time of the lifecheck
+# (range 1s - 1h)
+# 10s -- 10 seconds
+# 10min -- 10 minutes
+# 1h -- 1 hours
+#-------------------------------------------------------------
+ lb.pgcluster.org
+ /tmp
+ 5432
+ 6001
+ 128
+ no
+ 3s
+ 15s
+#-------------------------------------------------------------
+# A setup of a log files
+#
+# o File_Name : Log file name with full path
+# o File_Size : Maximum size of each log files
+# Please specify in a number and unit(K or M)
+# 10 -- 10 Byte
+# 10K -- 10 KByte
+# 10M -- 10 MByte
+# o Rotate : Rotation times
+# If specified 0, old versions are removed.
+#-------------------------------------------------------------
+
+ /tmp/pglb.log
+ 1M
+ 3
+
+
+3-2. Cluster DB Server
+
+The Cluster DB server need edit two configuration files
+('pg_hba.conf' and 'cluster.conf').
+These files are create under the $PG_DATA directory after 'initdb'.
+
+A. pg_hba.conf
+Permission to connect DB via IP connectoins is need for this system.
+
+B. cluster.conf
+In the case of the above system composition example,
+the setup example of cluster.conf file is as the following
+
+#============================================================
+# Cluster DB Server configuration file
+#-------------------------------------------------------------
+# file: cluster.conf
+#-------------------------------------------------------------
+# This file controls:
+# o which hosts & port are replication server
+# o which port use for replication request to replication server
+# o which command use for recovery function
+#
+#============================================================
+#-------------------------------------------------------------
+# set cluster DB server information
+# o Host_Name : hostname
+# o Port : Connection port for postmaster
+# o Recovery_Port : Connection for recovery process
+#-------------------------------------------------------------
+
+ pgr.pgcluster.org
+ 8001
+ 8101
+
+#-------------------------------------------------------------
+# set Cluster DB Server information
+# o Host_Name : Host name which connect with replication server
+# o Recovery_Port : Connection port for recovery
+# o Rsync_Path : Path of rsync command
+# o Rsync_Option : File transfer option for rsync
+# o Rsync_Compress : Use compression option for rsync
+# [yes/no]. default : yes
+# o Pg_Dump_Path : path of pg_dump
+# o When_Stand_Alone : When all replication servers fell,
+# you can set up two kinds of permittion,
+# "real_only" or "read_write".
+# o Replication_Timeout : Timeout of each replication request
+# o Lifecheck_Timeout : Timeout of the lifecheck response
+# o Lifecheck_Interval : Interval time of the lifecheck
+# (range 1s - 1h)
+# 10s -- 10 seconds
+# 10min -- 10 minutes
+# 1h -- 1 hours
+#-------------------------------------------------------------
+ c1.pgcluster.org
+ 7001
+ /usr/bin/rsync
+ ssh -1
+ yes
+ /usr/local/pgsql/bin/pg_dump
+ read_only
+ 1min
+ 3s
+ 11s
+#-------------------------------------------------------------
+# set partitional replicate control information
+# set DB name and Table name to stop reprication
+# o DB_Name : DB name
+# o Table_Name : Table name
+#-------------------------------------------------------------
+#
+# test_db
+# log_table
+#
+
+3-3. Replication Server
+
+The setup file of replication server is copied from the sample file and edited.
+(the sample file is installed '/usr/local/pgsql/share' in default)
+----------------------------------------------------------------
+$cd /usr/local/pgsql/share
+$cp pgreplicate.conf.sample pgreplicate.conf
+----------------------------------------------------------------
+In the case of the above system composition example,
+the setup example of pgreplicate.conf file is as the following
+
+#============================================================
+# PGReplicate configuration file
+#-------------------------------------------------------------
+# file: pgreplicate.conf
+#-------------------------------------------------------------
+# This file controls:
+# o which hosts & port are cluster server
+# o which port use for replication request from cluster server
+#============================================================
+#-------------------------------------------------------------
+# set cluster DB server information
+# o Host_Name : hostname
+# o Port : Connection port for postmaster
+# o Recovery_Port : Connection port for recovery
+#-------------------------------------------------------------
+
+ c1.pgcluster.org
+ 5432
+ 7001
+
+
+ c2.pgcluster.org
+ 5432
+ 7001
+
+#-------------------------------------------------------------
+# set Load Balance server information
+# o Host_Name : hostname
+# o Recovery_Port : Connection port for recovery
+#-------------------------------------------------------------
+
+ lb.pgcluster.org
+ 6001
+
+#------------------------------------------------------------
+# A setup of the cascade connection between replication servers.
+# When you do not use RLOG recovery, you can skip this setup
+#
+# o Host_Name : The host name of the upper replication server.
+# Please write a host name by FQDN or IP address.
+# o Port : The connection port with postmaster.
+# o Recovery_Port : The connection port at the time of
+# a recovery sequence .
+#------------------------------------------------------------
+#
+# upper_replicate.pgcluster.org
+# 8002
+# 8102
+#
+#
+#-------------------------------------------------------------
+# A setup of a replication server
+#
+# o Host_Name : The host name of the this replication server.
+# Please write a host name by FQDN or IP address.
+# o Replicate_Port : Connection port for replication
+# o Recovery_Port : Connection port for recovery
+# o RLOG_Port : Connection port for replication log
+# o Response_mode : Timing which returns a response
+# - normal -- return result of DB which received the query
+# - reliable -- return result after waiting for response of
+# all Cluster DBs.
+# o Use_Replication_Log : Use replication log
+# [yes/no]. default : no
+# o Replication_Timeout : Timeout of each replication response
+# o Lifecheck_Timeout : Timeout of the lifecheck response
+# o Lifecheck_Interval : Interval time of the lifecheck
+# (range 1s - 1h)
+# 10s -- 10 seconds
+# 10min -- 10 minutes
+# 1h -- 1 hours
+#-------------------------------------------------------------
+ pgr.pgcluster.org
+ 8001
+ 8101
+ 8301
+ normal
+ no
+ 1min
+ 3s
+ 15s
+#-------------------------------------------------------------
+# A setup of a log files
+#
+# o File_Name : Log file name with full path
+# o File_Size : maximum size of each log files
+# Please specify in a number and unit(K or M)
+# 10 -- 10 Byte
+# 10K -- 10 KByte
+# 10M -- 10 MByte
+# o Rotate : Rotation times
+# If specified 0, old versions are removed.
+#-------------------------------------------------------------
+
+ /tmp/pgreplicate.log
+ 1M
+ 3
+
+
+=============================================================
+4. Start Up / Stop
+=============================================================
+
+4-1. replication server
+
+A. Start replication server
+----------------------------------------------------------------
+$ /usr/local/pgsql/bin/pgreplicate -D /usr/local/pgsql/etc
+----------------------------------------------------------------
+
+B. Stop replication server
+----------------------------------------------------------------
+$ /usr/local/pgsql/bin/pgreplicate -D /usr/local/pgsql/etc stop
+----------------------------------------------------------------
+
+usage: pgreplicate [-D path_of_config_file] [-W path_of_work_files]
+[-w wait time before fork process][-U login user][-l][-n][-v][-h][stop]
+ -l: print error logs in the log file.
+ -n: don't run in daemon mode.
+ -v: debug mode. need '-n' flag
+ -h: print this help
+ stop: stop pgreplicate
+(config file default path: ./pgreplicate.conf)
+
+4-2. cluster DB server
+$PG_HOME = /usr/local/pgsql
+$PG_DATA = /usr/local/pgsql/data
+
+A. Start cluster DB server
+----------------------------------------------------------------
+$ /usr/local/pgsql/bin/pg_ctl start -D /usr/local/pgsql/data
+----------------------------------------------------------------
+
+B. Stop cluster DB server
+----------------------------------------------------------------
+$ /usr/local/pgsql/bin/pg_ctl stop -D /usr/local/pgsql/data
+----------------------------------------------------------------
+
+C-1. RE start (recovery) cluster DB server with backup
+----------------------------------------------------------------
+$ /usr/local/pgsql/bin/pg_ctl start -D /usr/local/pgsql/data -o "-R"
+----------------------------------------------------------------
+
+C-2. RE start (recovery) cluster DB server without backup
+----------------------------------------------------------------
+$ /usr/local/pgsql/bin/pg_ctl start -D /usr/local/pgsql/data -o "-r"
+----------------------------------------------------------------
+
+D. Upgrade cluster DB server with pg_dump
+----------------------------------------------------------------
+$ /usr/local/pgsql/bin/pg_ctl start -D /usr/local/pgsql/data -o "-U"
+----------------------------------------------------------------
+
+4-3. load balance server
+
+A. Start load balance server
+----------------------------------------------------------------
+$ /usr/local/pgsql/bin/pglb -D /usr/local/pgsql/share
+----------------------------------------------------------------
+
+B. Stop load balance server
+----------------------------------------------------------------
+$ /usr/local/pgsql/bin/pglb -D /usr/local/pgsql/share stop
+----------------------------------------------------------------
+
+usage: pglb [-D path_of_config_file] [-W path_of_work_files] [-n][-v][-h][stop]
+ -l: print error logs in the log file.
+ -n: don't run in daemon mode.
+ -v: debug mode. need '-n' flag
+ -h: print this help
+ stop: stop pglb
+ (config file default path: ./pglb.conf)
diff -aruN postgresql-8.2.4/README_PGCLUSTER pgcluster-1.7.0rc7/README_PGCLUSTER
--- postgresql-8.2.4/README_PGCLUSTER 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/README_PGCLUSTER 2007-02-19 01:00:40.000000000 +0100
@@ -0,0 +1,118 @@
+PGCluster: Multi-Master Synchronous Replication System for PostgreSQL
+===========================================================
+
+PGCluster is a multi-master and synchronous replication system that supports load balancing of PostgreSQL.
+
+Changed:
+ $INSTALL_DIR/GNUmakefile.in
+ $INSTALL_DIR/INSTALL_PGCLUSTER
+ $INSTALL_DIR/README_PGCLUSTER
+ $INSTALL_DIR/configure
+ $INSTALL_DIR/configure.in
+ $INSTALL_DIR/pgcluster.sh.tmpl
+ $INSTALL_DIR/src/Makefile
+ $INSTALL_DIR/src/Makefile.global.in
+ $INSTALL_DIR/src/backend/Makefile
+ $INSTALL_DIR/src/backend/access/transam/clog.c
+ $INSTALL_DIR/src/backend/access/transam/xact.c
+ $INSTALL_DIR/src/backend/catalog/catalog.c
+ $INSTALL_DIR/src/backend/commands/analyze.c
+ $INSTALL_DIR/src/backend/commands/copy.c
+ $INSTALL_DIR/src/backend/commands/sequence.c
+ $INSTALL_DIR/src/backend/executor/functions.c
+ $INSTALL_DIR/src/backend/libpq/Makefile
+ $INSTALL_DIR/src/backend/libpq/be-fsstubs.c
+ $INSTALL_DIR/src/backend/libpq/cluster.conf.sample
+ $INSTALL_DIR/src/backend/libpq/recovery.c
+ $INSTALL_DIR/src/backend/libpq/lifecheck.c
+ $INSTALL_DIR/src/backend/libpq/replicate.c
+ $INSTALL_DIR/src/backend/libpq/replicate_com.c
+ $INSTALL_DIR/src/backend/main/main.c
+ $INSTALL_DIR/src/backend/parser/gram.y
+ $INSTALL_DIR/src/backend/parser/keywords.c
+ $INSTALL_DIR/src/backend/parser/parse_clause.c
+ $INSTALL_DIR/src/backend/parser/parse_relation.c
+ $INSTALL_DIR/src/backend/postmaster/postmaster.c
+ $INSTALL_DIR/src/backend/storage/large_object/inv_api.c
+ $INSTALL_DIR/src/backend/storage/lmgr/deadlock.c
+ $INSTALL_DIR/src/backend/storage/lmgr/lmgr.c
+ $INSTALL_DIR/src/backend/storage/lmgr/lock.c
+ $INSTALL_DIR/src/backend/storage/lmgr/proc.c
+ $INSTALL_DIR/src/backend/tcop/postgres.c
+ $INSTALL_DIR/src/backend/tcop/pquery.c
+ $INSTALL_DIR/src/backend/tcop/utility.c
+ $INSTALL_DIR/src/backend/utils/adt/float.c
+ $INSTALL_DIR/src/backend/utils/adt/nabstime.c
+ $INSTALL_DIR/src/backend/utils/adt/ri_triggers.c
+ $INSTALL_DIR/src/backend/utils/adt/timestamp.c
+ $INSTALL_DIR/src/backend/utils/error/assert.c
+ $INSTALL_DIR/src/backend/utils/error/elog.c
+ $INSTALL_DIR/src/backend/utils/fmgr/fmgr.c
+ $INSTALL_DIR/src/backend/utils/mb/mbutils.c
+ $INSTALL_DIR/src/backend/utils/misc/guc.c
+ $INSTALL_DIR/src/backend/utils/misc/postgresql.conf.sample
+ $INSTALL_DIR/src/bin/initdb/initdb.c
+ $INSTALL_DIR/src/bin/pg_dump/pg_dump.c
+ $INSTALL_DIR/src/bin/pg_dump/pg_dumpall.c
+ $INSTALL_DIR/src/include/pg_config.h.in
+ $INSTALL_DIR/src/include/replicate.h
+ $INSTALL_DIR/src/include/replicate_com.h
+ $INSTALL_DIR/src/include/storage/lmgr.h
+ $INSTALL_DIR/src/include/storage/proc.h
+ $INSTALL_DIR/src/interfaces/libpq/Makefile
+ $INSTALL_DIR/src/makefiles/Makefile.aix
+ $INSTALL_DIR/src/makefiles/Makefile.freebsd
+ $INSTALL_DIR/src/makefiles/Makefile.hpux
+ $INSTALL_DIR/src/makefiles/Makefile.linux
+ $INSTALL_DIR/src/makefiles/Makefile.netbsd
+ $INSTALL_DIR/src/makefiles/Makefile.openbsd
+ $INSTALL_DIR/src/makefiles/Makefile.solaris
+ $INSTALL_DIR/src/makefiles/Makefile.sunos4
+Added:
+ $INSTALL_DIR/src/pgcluster/Makefile
+ $INSTALL_DIR/src/pgcluster/libpgc/Makefile
+ $INSTALL_DIR/src/pgcluster/libpgc/libpgc.h
+ $INSTALL_DIR/src/pgcluster/libpgc/sem.c
+ $INSTALL_DIR/src/pgcluster/libpgc/show.c
+ $INSTALL_DIR/src/pgcluster/libpgc/signal.c
+ $INSTALL_DIR/src/pgcluster/pglb/AUTHORS
+ $INSTALL_DIR/src/pgcluster/pglb/COPYING
+ $INSTALL_DIR/src/pgcluster/pglb/Makefile
+ $INSTALL_DIR/src/pgcluster/pglb/child.c
+ $INSTALL_DIR/src/pgcluster/pglb/cluster_table.c
+ $INSTALL_DIR/src/pgcluster/pglb/lifecheck.c
+ $INSTALL_DIR/src/pgcluster/pglb/load_balance.c
+ $INSTALL_DIR/src/pgcluster/pglb/main.c
+ $INSTALL_DIR/src/pgcluster/pglb/pglb.conf.sample
+ $INSTALL_DIR/src/pgcluster/pglb/pglb.h
+ $INSTALL_DIR/src/pgcluster/pglb/pool_auth.c
+ $INSTALL_DIR/src/pgcluster/pglb/pool_connection_pool.c
+ $INSTALL_DIR/src/pgcluster/pglb/pool_params.c
+ $INSTALL_DIR/src/pgcluster/pglb/pool_process_query.c
+ $INSTALL_DIR/src/pgcluster/pglb/pool_stream.c
+ $INSTALL_DIR/src/pgcluster/pglb/recovery.c
+ $INSTALL_DIR/src/pgcluster/pglb/socket.c
+ $INSTALL_DIR/src/pgcluster/pgrp/AUTHORS
+ $INSTALL_DIR/src/pgcluster/pgrp/COPYING
+ $INSTALL_DIR/src/pgcluster/pgrp/Makefile
+ $INSTALL_DIR/src/pgcluster/pgrp/cascade.c
+ $INSTALL_DIR/src/pgcluster/pgrp/conf.c
+ $INSTALL_DIR/src/pgcluster/pgrp/lifecheck.c
+ $INSTALL_DIR/src/pgcluster/pgrp/main.c
+ $INSTALL_DIR/src/pgcluster/pgrp/pgreplicate.conf.sample
+ $INSTALL_DIR/src/pgcluster/pgrp/pgreplicate.h
+ $INSTALL_DIR/src/pgcluster/pgrp/pqformat.c
+ $INSTALL_DIR/src/pgcluster/pgrp/recovery.c
+ $INSTALL_DIR/src/pgcluster/pgrp/replicate.c
+ $INSTALL_DIR/src/pgcluster/pgrp/rlog.c
+ $INSTALL_DIR/src/pgcluster/tool/Makefile
+ $INSTALL_DIR/src/pgcluster/tool/README.jp
+ $INSTALL_DIR/src/pgcluster/tool/pgcbench.c
+ $INSTALL_DIR/src/pgcluster/tool/pgcbench.sh
+ $INSTALL_DIR/src/pgcluster/tool/tpc-b_like.sql
+
+The latest version of this software may be obtained at
+http://pgfoundry.org/projects/pgcluster/
+
+For more information look at pgFoundry web site located at
+http://pgcluster.projects.postgresql.org/
diff -aruN postgresql-8.2.4/configure pgcluster-1.7.0rc7/configure
--- postgresql-8.2.4/configure 2007-02-07 04:48:58.000000000 +0100
+++ pgcluster-1.7.0rc7/configure 2007-03-01 16:27:35.000000000 +0100
@@ -275,6 +275,8 @@
PACKAGE_STRING='PostgreSQL 8.2.4'
PACKAGE_BUGREPORT='pgsql-bugs@postgresql.org'
+PGCLUSTER_VERSION='1.7.0rc7'
+
ac_unique_file="src/backend/access/common/heaptuple.c"
ac_default_prefix=/usr/local/pgsql
# Factoring default headers for most tests.
@@ -314,7 +316,7 @@
# include
#endif"
-ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS configure_args build build_cpu build_vendor build_os host host_cpu host_vendor host_os PORTNAME docdir enable_nls WANTED_LANGUAGES default_port enable_shared enable_rpath enable_debug DTRACE DTRACEFLAGS enable_dtrace CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP GCC TAS autodepend INCLUDES enable_thread_safety with_tcl with_perl with_python with_krb5 krb_srvtab with_pam with_ldap with_bonjour with_openssl with_zlib EGREP ELF_SYS LDFLAGS_SL AWK FLEX FLEXFLAGS LN_S LD with_gnu_ld ld_R_works RANLIB ac_ct_RANLIB TAR STRIP ac_ct_STRIP STRIP_STATIC_LIB STRIP_SHARED_LIB YACC YFLAGS PERL perl_archlibexp perl_privlibexp perl_useshrplib perl_embed_ldflags PYTHON python_version python_configdir python_includespec python_libdir python_libspec python_additional_libs HAVE_IPV6 LIBOBJS acx_pthread_config PTHREAD_CC PTHREAD_LIBS PTHREAD_CFLAGS LDAP_LIBS_FE LDAP_LIBS_BE HAVE_POSIX_SIGNALS MSGFMT MSGMERGE XGETTEXT localedir TCLSH TCL_CONFIG_SH TCL_INCLUDE_SPEC TCL_LIB_FILE TCL_LIBS TCL_LIB_SPEC TCL_SHARED_BUILD TCL_SHLIB_LD_LIBS NSGMLS JADE have_docbook DOCBOOKSTYLE COLLATEINDEX SGMLSPL vpath_build LTLIBOBJS'
+ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS configure_args build build_cpu build_vendor build_os host host_cpu host_vendor host_os PORTNAME docdir enable_nls WANTED_LANGUAGES default_port enable_shared enable_rpath enable_debug DTRACE DTRACEFLAGS enable_dtrace CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP GCC TAS autodepend INCLUDES enable_thread_safety with_tcl with_perl with_python with_krb5 krb_srvtab with_pam with_ldap with_bonjour with_openssl with_zlib EGREP ELF_SYS LDFLAGS_SL AWK FLEX FLEXFLAGS LN_S LD with_gnu_ld ld_R_works RANLIB ac_ct_RANLIB TAR STRIP ac_ct_STRIP STRIP_STATIC_LIB STRIP_SHARED_LIB YACC YFLAGS PERL perl_archlibexp perl_privlibexp perl_useshrplib perl_embed_ldflags PYTHON python_version python_configdir python_includespec python_libdir python_libspec python_additional_libs HAVE_IPV6 LIBOBJS acx_pthread_config PTHREAD_CC PTHREAD_LIBS PTHREAD_CFLAGS LDAP_LIBS_FE LDAP_LIBS_BE HAVE_POSIX_SIGNALS MSGFMT MSGMERGE XGETTEXT localedir TCLSH TCL_CONFIG_SH TCL_INCLUDE_SPEC TCL_LIB_FILE TCL_LIBS TCL_LIB_SPEC TCL_SHARED_BUILD TCL_SHLIB_LD_LIBS NSGMLS JADE have_docbook DOCBOOKSTYLE COLLATEINDEX SGMLSPL vpath_build LTLIBOBJS PGCLUSTER_VERSION'
ac_subst_files=''
# Initialize some variables set by options.
@@ -1241,6 +1243,10 @@
#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
_ACEOF
+cat >>confdefs.h <<_ACEOF
+#define PGCLUSTER_VERSION "$PGCLUSTER_VERSION"
+_ACEOF
+
# Let the site file select an alternate cache file if it wants to.
# Prefer explicitly selected file to automatically selected ones.
@@ -23555,6 +23561,7 @@
s,@host_os@,$host_os,;t t
s,@PORTNAME@,$PORTNAME,;t t
s,@docdir@,$docdir,;t t
+s,@PGCLUSTER_VERSION@,$PGCLUSTER_VERSION,;t t
s,@enable_nls@,$enable_nls,;t t
s,@WANTED_LANGUAGES@,$WANTED_LANGUAGES,;t t
s,@default_port@,$default_port,;t t
diff -aruN postgresql-8.2.4/configure.in pgcluster-1.7.0rc7/configure.in
--- postgresql-8.2.4/configure.in 2007-02-07 04:48:58.000000000 +0100
+++ pgcluster-1.7.0rc7/configure.in 2007-02-18 22:52:16.000000000 +0100
@@ -27,6 +27,7 @@
AC_SUBST(configure_args, [$ac_configure_args])
AC_DEFINE_UNQUOTED(PG_VERSION, "$PACKAGE_VERSION", [PostgreSQL version as a string])
+AC_DEFINE_UNQUOTED(PGCLUSTER_VERSION, "$PGCLUSTER_VERSION", [PGCluster version])
AC_CANONICAL_HOST
diff -aruN postgresql-8.2.4/pgcluster.sh.tmpl pgcluster-1.7.0rc7/pgcluster.sh.tmpl
--- postgresql-8.2.4/pgcluster.sh.tmpl 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/pgcluster.sh.tmpl 2007-02-18 22:52:16.000000000 +0100
@@ -0,0 +1,56 @@
+#!/bin/sh
+#
+# $FreeBSD: ports/databases/pgcluster/files/pgcluster.sh.tmpl,v 1.1 2004/01/26 09:02:45 kuriyama Exp $
+#
+# PROVIDE: pgcluster
+# REQUIRE: DAEMON
+# BEFORE: pgreplicate
+# KEYWORD: FreeBSD
+#
+# Add the following line to /etc/rc.conf to enable pgcluster:
+#
+# pgcluster_enable="YES"
+# # optional
+# pgcluster_data="/home/pgsql/data"
+# pgcluster_flags="-w -s"
+#
+
+pgcluster_enable="NO"
+pgcluster_data="%%PREFIX%%/pgsql/data"
+pgcluster_flags="-w -s"
+
+. %%RC_SUBR%%
+
+load_rc_config pgcluster
+
+name=pgcluster
+command=%%PREFIX%%/bin/pg_ctl
+pgcluster_user=pgsql
+extra_commands="initdb recover"
+initdb_cmd="pgcluster_initdb"
+recover_cmd="pgcluster_recover"
+start_cmd="pgcluster_start"
+stop_cmd="pgcluster_stop"
+
+pgcluster_flags="${pgcluster_flags} -D ${pgcluster_data}"
+pidfile="${pgcluster_data}/postmaster.pid"
+
+pgcluster_start()
+{
+ su -m ${pgcluster_user} -c "exec ${command} start ${pgcluster_flags} -o '-i'"
+}
+pgcluster_stop()
+{
+ su -m ${pgcluster_user} -c "exec ${command} stop ${pgcluster_flags} -m i"
+}
+pgcluster_recover()
+{
+ su -m ${pgcluster_user} -c "exec ${command} start ${pgcluster_flags} -o '-i -R'"
+}
+pgcluster_initdb()
+{
+ su -m ${pgcluster_user} -c "exec %%PREFIX%%/bin/initdb -D ${pgcluster_data}"
+}
+
+load_rc_config $name
+run_rc_command "$1"
diff -aruN postgresql-8.2.4/src/Makefile pgcluster-1.7.0rc7/src/Makefile
--- postgresql-8.2.4/src/Makefile 2006-06-23 01:50:35.000000000 +0200
+++ pgcluster-1.7.0rc7/src/Makefile 2007-02-18 22:52:16.000000000 +0100
@@ -16,14 +16,15 @@
all install installdirs uninstall dep depend distprep:
$(MAKE) -C port $@
$(MAKE) -C timezone $@
+ $(MAKE) -C interfaces $@
$(MAKE) -C backend $@
$(MAKE) -C backend/utils/mb/conversion_procs $@
$(MAKE) -C include $@
- $(MAKE) -C interfaces $@
$(MAKE) -C bin $@
$(MAKE) -C pl $@
$(MAKE) -C makefiles $@
$(MAKE) -C test/regress $@
+ $(MAKE) -C pgcluster $@
install: install-local
@@ -44,6 +45,7 @@
rm -f $(addprefix '$(DESTDIR)$(pgxsdir)/$(subdir)'/, Makefile.global Makefile.port Makefile.shlib nls-global.mk)
clean:
+ $(MAKE) -C pgcluster $@
$(MAKE) -C port $@
$(MAKE) -C timezone $@
$(MAKE) -C backend $@
@@ -57,6 +59,7 @@
$(MAKE) -C test/thread $@
distclean maintainer-clean:
+ -$(MAKE) -C pgcluster $@
-$(MAKE) -C port $@
-$(MAKE) -C timezone $@
-$(MAKE) -C backend $@
diff -aruN postgresql-8.2.4/src/Makefile.global.in pgcluster-1.7.0rc7/src/Makefile.global.in
--- postgresql-8.2.4/src/Makefile.global.in 2006-10-08 19:15:33.000000000 +0200
+++ pgcluster-1.7.0rc7/src/Makefile.global.in 2007-02-18 22:52:16.000000000 +0100
@@ -31,6 +31,9 @@
# PostgreSQL version number
VERSION = @PACKAGE_VERSION@
+# PGCluster version number
+PGCLUSTER_VERSION = @PGCLUSTER_VERSION@
+
# Support for VPATH builds
vpath_build = @vpath_build@
abs_top_srcdir = @abs_top_srcdir@
@@ -207,6 +210,7 @@
GCC = @GCC@
CFLAGS = @CFLAGS@
+CFLAGS += -DUSE_REPLICATION -DPRINT_DEBUG
# Kind-of compilers
YACC = @YACC@
diff -aruN postgresql-8.2.4/src/backend/Makefile pgcluster-1.7.0rc7/src/backend/Makefile
--- postgresql-8.2.4/src/backend/Makefile 2006-10-08 19:15:33.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/Makefile 2007-02-18 22:52:16.000000000 +0100
@@ -39,7 +39,7 @@
ifneq ($(PORTNAME), win32)
ifneq ($(PORTNAME), aix)
-postgres: $(OBJS)
+postgres: $(OBJS) $(libpq_srcdir)/libpq.a
$(CC) $(CFLAGS) $(LDFLAGS) $(export_dynamic) $^ $(LIBS) -o $@
endif
@@ -169,6 +169,7 @@
$(INSTALL_DATA) $(srcdir)/libpq/pg_ident.conf.sample '$(DESTDIR)$(datadir)/pg_ident.conf.sample'
$(INSTALL_DATA) $(srcdir)/utils/misc/postgresql.conf.sample '$(DESTDIR)$(datadir)/postgresql.conf.sample'
$(INSTALL_DATA) $(srcdir)/access/transam/recovery.conf.sample '$(DESTDIR)$(datadir)/recovery.conf.sample'
+ $(INSTALL_DATA) $(srcdir)/libpq/cluster.conf.sample $(DESTDIR)$(datadir)/cluster.conf.sample
install-bin: postgres $(POSTGRES_IMP) installdirs
$(INSTALL_PROGRAM) postgres$(X) '$(DESTDIR)$(bindir)/postgres$(X)'
@@ -221,8 +222,9 @@
$(MAKE) -C catalog uninstall-data
rm -f '$(DESTDIR)$(datadir)/pg_hba.conf.sample' \
'$(DESTDIR)$(datadir)/pg_ident.conf.sample' \
- '$(DESTDIR)$(datadir)/postgresql.conf.sample' \
- '$(DESTDIR)$(datadir)/recovery.conf.sample'
+ '$(DESTDIR)$(datadir)/postgresql.conf.sample' \
+ '$(DESTDIR)$(datadir)/recovery.conf.sample' \
+ '$(DESTDIR)$(datadir)/cluster.conf.sample'
##########################################################################
diff -aruN postgresql-8.2.4/src/backend/access/transam/clog.c pgcluster-1.7.0rc7/src/backend/access/transam/clog.c
--- postgresql-8.2.4/src/backend/access/transam/clog.c 2006-11-05 23:42:07.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/access/transam/clog.c 2007-02-18 22:52:16.000000000 +0100
@@ -57,6 +57,9 @@
#define TransactionIdToByte(xid) (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE)
#define TransactionIdToBIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE)
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/*
* Link to shared-memory data structures for CLOG control
@@ -335,7 +338,16 @@
/* Check to see if there's any files that could be removed */
if (!SlruScanDirectory(ClogCtl, cutoffPage, false))
+#ifdef USE_REPLICATION
+ {
+ /* Perform a forced CHECKPOINT */
+ /* CreateCheckPoint(false, true); */
+ RequestCheckpoint(true, false);
+#endif /* USE_REPLICATION */
return; /* nothing to remove */
+#ifdef USE_REPLICATION
+ }
+#endif /* USE_REPLICATION */
/* Write XLOG record and flush XLOG to disk */
WriteTruncateXlogRec(cutoffPage);
diff -aruN postgresql-8.2.4/src/backend/access/transam/xact.c pgcluster-1.7.0rc7/src/backend/access/transam/xact.c
--- postgresql-8.2.4/src/backend/access/transam/xact.c 2006-11-23 02:14:59.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/access/transam/xact.c 2007-02-18 22:52:16.000000000 +0100
@@ -44,6 +44,9 @@
#include "utils/relcache.h"
#include "utils/guc.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/*
* User-tweakable parameters
@@ -4335,3 +4338,11 @@
else
appendStringInfo(buf, "UNKNOWN");
}
+
+#ifdef USE_REPLICATION
+void
+PGR_Reload_Start_Time(void)
+{
+ xactStartTimestamp = GetCurrentTimestamp();
+}
+#endif /* USE_REPLICATION */
diff -aruN postgresql-8.2.4/src/backend/catalog/catalog.c pgcluster-1.7.0rc7/src/backend/catalog/catalog.c
--- postgresql-8.2.4/src/backend/catalog/catalog.c 2006-10-04 02:29:50.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/catalog/catalog.c 2007-02-18 22:52:16.000000000 +0100
@@ -38,6 +38,9 @@
#include "utils/fmgroids.h"
#include "utils/relcache.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
#define OIDCHARS 10 /* max chars printed by %u */
@@ -360,7 +363,7 @@
Oid
GetNewOidWithIndex(Relation relation, Relation indexrel)
{
- Oid newOid;
+ Oid newOid = 0;
IndexScanDesc scan;
ScanKeyData key;
bool collides;
@@ -368,8 +371,18 @@
/* Generate new OIDs until we find one not in the table */
do
{
+#ifdef USE_REPLICATION
+ if (PGR_Is_Sync_OID == true)
+ {
+ newOid = PGRGetNewObjectId(newOid);
+ }
+ else
+ {
+ newOid = GetNewObjectId();
+ }
+#else
newOid = GetNewObjectId();
-
+#endif /* USE_REPLICATION */
ScanKeyInit(&key,
(AttrNumber) 1,
BTEqualStrategyNumber, F_OIDEQ,
@@ -454,3 +467,4 @@
return rnode.relNode;
}
+
diff -aruN postgresql-8.2.4/src/backend/commands/analyze.c pgcluster-1.7.0rc7/src/backend/commands/analyze.c
--- postgresql-8.2.4/src/backend/commands/analyze.c 2006-11-05 23:42:08.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/commands/analyze.c 2007-02-18 22:52:16.000000000 +0100
@@ -36,6 +36,9 @@
#include "utils/syscache.h"
#include "utils/tuplesort.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/* Data structure for Algorithm S from Knuth 3.4.2 */
typedef struct
@@ -934,7 +937,11 @@
static double
random_fract(void)
{
+#ifdef USE_REPLICATION
+ return ((double) PGR_Random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
+#else
return ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
+#endif /* USE_REPLICATION */
}
/*
diff -aruN postgresql-8.2.4/src/backend/commands/copy.c pgcluster-1.7.0rc7/src/backend/commands/copy.c
--- postgresql-8.2.4/src/backend/commands/copy.c 2006-10-06 19:13:58.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/commands/copy.c 2007-02-18 22:52:16.000000000 +0100
@@ -41,6 +41,9 @@
#include "utils/lsyscache.h"
#include "utils/memutils.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
#define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
#define OCTVALUE(c) ((c) - '0')
@@ -488,6 +491,9 @@
CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
{
int bytesread = 0;
+#ifdef USE_REPLICATION
+ char * ptr = (char *)databuf;
+#endif
switch (cstate->copy_dest)
{
@@ -578,6 +584,9 @@
}
break;
}
+#ifdef USE_REPLICATION
+ PGR_Set_Copy_Data(PGRCopyData,ptr,bytesread,0);
+#endif /* USE_REPLICATION */
return bytesread;
}
@@ -2093,6 +2102,13 @@
}
}
+#ifdef USE_REPLICATION
+ if (done)
+ {
+ PGR_Set_Copy_Data(PGRCopyData,(char *)NULL,0,1);
+ }
+#endif /* USE_REPLICATION */
+
/* Done, clean up */
error_context_stack = errcontext.previous;
@@ -2201,6 +2217,11 @@
break;
}
}
+#ifdef USE_REPLICATION
+ /*
+ PGR_Set_Copy_Data(PGRCopyData,cstate->line_buf.data,cstate->line_buf.len,0);
+ */
+#endif
/* Done reading the line. Convert it to server encoding. */
if (cstate->need_transcoding)
diff -aruN postgresql-8.2.4/src/backend/commands/prepare.c pgcluster-1.7.0rc7/src/backend/commands/prepare.c
--- postgresql-8.2.4/src/backend/commands/prepare.c 2006-10-04 02:29:51.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/commands/prepare.c 2007-02-18 22:52:16.000000000 +0100
@@ -29,6 +29,9 @@
#include "utils/builtins.h"
#include "utils/memutils.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/*
* The hash table in which prepared queries are stored. This is
@@ -793,3 +796,27 @@
result = construct_array(tmp_ary, len, REGTYPEOID, 4, true, 'i');
return PointerGetDatum(result);
}
+
+
+#ifdef USE_REPLICATION
+bool
+PGR_is_select_prepared_statement(PrepareStmt *stmt)
+{
+ PreparedStatement *entry;
+ if ((stmt == NULL) || (stmt->name == NULL))
+ {
+ return false;
+ }
+ entry = FetchPreparedStatement(stmt->name, true);
+ if (entry == NULL)
+ {
+ return false;
+ }
+ if (!strcmp(entry->commandTag,"SELECT"))
+ {
+ return true;
+ }
+ return false;
+}
+#endif /* USE_REPLICATION */
+
diff -aruN postgresql-8.2.4/src/backend/commands/sequence.c pgcluster-1.7.0rc7/src/backend/commands/sequence.c
--- postgresql-8.2.4/src/backend/commands/sequence.c 2006-10-06 19:13:58.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/commands/sequence.c 2007-02-18 22:52:16.000000000 +0100
@@ -31,6 +31,9 @@
#include "utils/resowner.h"
#include "utils/syscache.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/*
* We don't want to log each fetching of a value from a sequence,
@@ -396,6 +399,9 @@
RangeVar *sequence;
Oid relid;
+#ifdef USE_REPLICATION
+ Xlog_Check_Replicate(CMD_UTILITY);
+#endif /* USE_REPLICATION */
sequence = makeRangeVarFromNameList(textToQualifiedNameList(seqin));
relid = RangeVarGetRelid(sequence, false);
@@ -622,6 +628,10 @@
SeqTable elm;
Relation seqrel;
+#ifdef USE_REPLICATION
+ Xlog_Check_Replicate(CMD_UTILITY);
+#endif /* USE_REPLICATION */
+
/* open and AccessShareLock sequence */
init_sequence(relid, &elm, &seqrel);
diff -aruN postgresql-8.2.4/src/backend/executor/functions.c pgcluster-1.7.0rc7/src/backend/executor/functions.c
--- postgresql-8.2.4/src/backend/executor/functions.c 2007-02-02 01:03:17.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/executor/functions.c 2007-02-18 22:52:16.000000000 +0100
@@ -30,6 +30,9 @@
#include "utils/syscache.h"
#include "utils/typcache.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/*
* We have an execution_state record for each query in a function. Each
@@ -454,6 +457,13 @@
Datum value;
MemoryContext oldcontext;
+#ifdef USE_REPLICATION
+ if ((es != NULL) && (es->qd != NULL))
+ {
+ Xlog_Check_Replicate(es->qd->operation);
+ }
+#endif /* USE_REPLICATION */
+
if (es->status == F_EXEC_START)
postquel_start(es, fcache);
diff -aruN postgresql-8.2.4/src/backend/libpq/Makefile pgcluster-1.7.0rc7/src/backend/libpq/Makefile
--- postgresql-8.2.4/src/backend/libpq/Makefile 2003-11-29 20:51:49.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/libpq/Makefile 2007-02-18 22:52:16.000000000 +0100
@@ -15,7 +15,8 @@
# be-fsstubs is here for historical reasons, probably belongs elsewhere
OBJS = be-fsstubs.o be-secure.o auth.o crypt.o hba.o ip.o md5.o pqcomm.o \
- pqformat.o pqsignal.o
+ pqformat.o pqsignal.o \
+ replicate.o replicate_com.o recovery.o lifecheck.o
all: SUBSYS.o
diff -aruN postgresql-8.2.4/src/backend/libpq/auth.c pgcluster-1.7.0rc7/src/backend/libpq/auth.c
--- postgresql-8.2.4/src/backend/libpq/auth.c 2006-11-06 02:27:52.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/libpq/auth.c 2007-02-18 22:52:16.000000000 +0100
@@ -31,6 +31,9 @@
#include "libpq/pqformat.h"
#include "storage/ipc.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
static void sendAuthRequest(Port *port, AuthRequest areq);
static void auth_failed(Port *port, int status);
@@ -888,6 +891,12 @@
{
StringInfoData buf;
+#ifdef USE_REPLICATION
+ if (PGR_password == NULL)
+ {
+ return NULL;
+ }
+#endif /* USE_REPLICATION */
if (PG_PROTOCOL_MAJOR(port->proto) >= 3)
{
/* Expect 'p' message type */
@@ -939,6 +948,19 @@
ereport(DEBUG5,
(errmsg("received password packet")));
+#ifdef USE_REPLICATION
+ if (strncmp(buf.data,"md5",3) == 0)
+ {
+ char * ptr = NULL;
+ ptr = strchr(buf.data,'(');
+ if (ptr != NULL)
+ {
+ PGR_get_md5salt(PGR_password->md5Salt,ptr);
+ *ptr='\0';
+ }
+ }
+ strncpy(PGR_password->password,buf.data, PASSWORD_MAX_LENGTH );
+#endif /* USE_REPLICATION */
/*
* Return the received string. Note we do not attempt to do any
* character-set conversion on it; since we don't yet know the client's
diff -aruN postgresql-8.2.4/src/backend/libpq/be-fsstubs.c pgcluster-1.7.0rc7/src/backend/libpq/be-fsstubs.c
--- postgresql-8.2.4/src/backend/libpq/be-fsstubs.c 2006-09-07 17:37:25.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/libpq/be-fsstubs.c 2007-02-18 22:52:16.000000000 +0100
@@ -49,6 +49,9 @@
#include "storage/large_object.h"
#include "utils/memutils.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/*#define FSDB 1*/
#define BUFSIZE 8192
@@ -93,6 +96,19 @@
LargeObjectDesc *lobjDesc;
int fd;
+#ifdef USE_REPLICATION
+ if ((PGR_Stand_Alone != NULL) &&
+ (PGR_lo_open(lobjId,mode) != STATUS_OK))
+ {
+ if ((mode & INV_WRITE) &&
+ (PGR_Is_Stand_Alone() == true) &&
+ (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
+ {
+ elog(WARNING, "This query is not permitted when all replication servers fell down ");
+ PG_RETURN_INT32(-1);
+ }
+ }
+#endif /* USE_REPLICATION */
#if FSDB
elog(DEBUG4, "lo_open(%u,%d)", lobjId, mode);
#endif
@@ -126,6 +142,9 @@
errmsg("invalid large-object descriptor: %d", fd)));
PG_RETURN_INT32(-1);
}
+#ifdef USE_REPLICATION
+ PGR_lo_close(fd);
+#endif
#if FSDB
elog(DEBUG4, "lo_close(%d)", fd);
#endif
@@ -183,6 +202,18 @@
errmsg("large object descriptor %d was not opened for writing",
fd)));
+#ifdef USE_REPLICATION
+ if ((PGR_Stand_Alone != NULL) &&
+ (PGR_lo_write(fd, buf, len) != STATUS_OK))
+ {
+ if ((PGR_Is_Stand_Alone() == true) &&
+ (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
+ {
+ elog(WARNING, "This query is not permitted when all replication servers fell down ");
+ return -1;
+ }
+ }
+#endif
status = inv_write(cookies[fd], buf, len);
return status;
@@ -205,6 +236,10 @@
PG_RETURN_INT32(-1);
}
+#ifdef USE_REPLICATION
+ PGR_lo_lseek(fd, offset, whence);
+#endif /* USE_REPLICATION */
+
status = inv_seek(cookies[fd], offset, whence);
PG_RETURN_INT32(status);
@@ -221,6 +256,18 @@
*/
CreateFSContext();
+#ifdef USE_REPLICATION
+ if ((PGR_Stand_Alone != NULL) &&
+ (PGR_lo_create(InvalidOid) != STATUS_OK))
+ {
+ if ((PGR_Is_Stand_Alone() == true) &&
+ (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
+ {
+ elog(WARNING, "This query is not permitted when all replication servers fell down ");
+ PG_RETURN_INT32(-1);
+ }
+ }
+#endif /* USE_REPLICATION */
lobjId = inv_create(InvalidOid);
PG_RETURN_OID(lobjId);
@@ -231,6 +278,18 @@
{
Oid lobjId = PG_GETARG_OID(0);
+#ifdef USE_REPLICATION
+ if ((PGR_Stand_Alone != NULL) &&
+ (PGR_lo_create(lobjId) != STATUS_OK))
+ {
+ if ((PGR_Is_Stand_Alone() == true) &&
+ (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
+ {
+ elog(WARNING, "This query is not permitted when all replication servers fell down ");
+ PG_RETURN_INT32(-1);
+ }
+ }
+#endif /* USE_REPLICATION */
/*
* We don't actually need to store into fscxt, but create it anyway to
* ensure that AtEOXact_LargeObject knows there is state to clean up
@@ -263,6 +322,18 @@
{
Oid lobjId = PG_GETARG_OID(0);
+#ifdef USE_REPLICATION
+ if ((PGR_Stand_Alone != NULL) &&
+ (PGR_lo_unlink(lobjId) != STATUS_OK))
+ {
+ if ((PGR_Is_Stand_Alone() == true) &&
+ (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
+ {
+ elog(WARNING, "This query is not permitted when all replication servers fell down ");
+ return -1;
+ }
+ }
+#endif /* USE_REPLICATION */
/*
* If there are any open LO FDs referencing that ID, close 'em.
*/
@@ -360,6 +431,19 @@
nbytes = MAXPGPATH - 1;
memcpy(fnamebuf, VARDATA(filename), nbytes);
fnamebuf[nbytes] = '\0';
+
+#ifdef USE_REPLICATION
+ if ((PGR_Stand_Alone != NULL) &&
+ (PGR_lo_import((char*)fnamebuf) != STATUS_OK))
+ {
+ if ((PGR_Is_Stand_Alone() == true) &&
+ (PGR_Stand_Alone->permit == PERMIT_READ_ONLY) )
+ {
+ elog(WARNING, "This query is not permitted when all replication servers fell down ");
+ return -1;
+ }
+ }
+#endif
fd = PathNameOpenFile(fnamebuf, O_RDONLY | PG_BINARY, 0666);
if (fd < 0)
ereport(ERROR,
@@ -372,6 +456,7 @@
*/
lobjOid = inv_create(InvalidOid);
+
/*
* read in from the filesystem and write to the inversion object
*/
diff -aruN postgresql-8.2.4/src/backend/libpq/cluster.conf.sample pgcluster-1.7.0rc7/src/backend/libpq/cluster.conf.sample
--- postgresql-8.2.4/src/backend/libpq/cluster.conf.sample 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/libpq/cluster.conf.sample 2007-02-18 22:52:16.000000000 +0100
@@ -0,0 +1,71 @@
+#============================================================
+# Cluster DB Server configuration file
+#------------------------------------------------------------
+# file: cluster.conf
+#------------------------------------------------------------
+# This file controls:
+# o which hosts & port are replication server
+# o which port use for replication request to replication server
+# o which command use for recovery function
+#============================================================
+#------------------------------------------------------------
+# set Replication Server information
+# o Host_Name : hostname
+# o Port : Connection port for postmaster
+# o Recovery_Port : Connection port for recovery process
+#------------------------------------------------------------
+
+ replicate1.pgcluster.org
+ 8001
+ 8101
+
+#
+# replicate2.pgcluster.org
+# 8002
+# 8102
+#
+#
+# replicate3.pgcluster.org
+# 8003
+# 8103
+#
+#-------------------------------------------------------------
+# set Cluster DB Server information
+# o Host_Name : Host name which connect with replication server
+# o Recovery_Port : Connection port for recovery
+# o Rsync_Path : Path of rsync command
+# o Rsync_Option : File transfer option for rsync
+# o Rsync_Compress : Use compression option for rsync
+# [yes/no]. default : yes
+# o Pg_Dump_Path : Path of pg_dump
+# o When_Stand_Alone : When all replication servers fell,
+# you can set up two kinds of permission,
+# "real_only" or "read_write".
+# o Replication_Timeout : Timeout of each replication request
+# o Lifecheck_Timeout : Timeout of the lifecheck response
+# o Lifecheck_Interval : Interval time of the lifecheck
+# (range 1s - 1h)
+# 10s -- 10 seconds
+# 10min -- 10 minutes
+# 1h -- 1 hours
+#-------------------------------------------------------------
+ cluster1.pgcluster.org
+ 7001
+ /usr/bin/rsync
+ ssh -1
+ yes
+ /usr/local/pgsql/bin/pg_dump
+ read_only
+ 1 min
+ 3s
+ 11s
+#-------------------------------------------------------------
+# set partitional replicate control information
+# set DB name and Table name to stop reprication
+# o DB_Name : DB name
+# o Table_Name : Table name
+#-------------------------------------------------------------
+#
+# test_db
+# log_table
+#
diff -aruN postgresql-8.2.4/src/backend/libpq/crypt.c pgcluster-1.7.0rc7/src/backend/libpq/crypt.c
--- postgresql-8.2.4/src/backend/libpq/crypt.c 2006-07-14 16:52:19.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/libpq/crypt.c 2007-02-18 22:52:16.000000000 +0100
@@ -23,6 +23,9 @@
#include "libpq/crypt.h"
#include "libpq/md5.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
int
md5_crypt_verify(const Port *port, const char *role, char *client_pass)
@@ -72,13 +75,34 @@
if (isMD5(shadow_pass))
{
/* stored password already encrypted, only do salt */
- if (!pg_md5_encrypt(shadow_pass + strlen("md5"),
- (char *) port->md5Salt,
+#ifdef USE_REPLICATION
+ if ((PGR_password != NULL) &&
+ ((PGR_password->md5Salt[0] |
+ PGR_password->md5Salt[1] |
+ PGR_password->md5Salt[2] |
+ PGR_password->md5Salt[3]) != 0 ))
+ {
+ if (!pg_md5_encrypt(shadow_pass + strlen("md5"),
+ (char *) PGR_password->md5Salt,
sizeof(port->md5Salt), crypt_pwd))
+ {
+ pfree(crypt_pwd);
+ return STATUS_ERROR;
+ }
+ }
+ else
{
- pfree(crypt_pwd);
- return STATUS_ERROR;
+#endif /* USE_REPLICATION */
+ if (!pg_md5_encrypt(shadow_pass + strlen("md5"),
+ (char *) port->md5Salt,
+ sizeof(port->md5Salt), crypt_pwd))
+ {
+ pfree(crypt_pwd);
+ return STATUS_ERROR;
+ }
+#ifdef USE_REPLICATION
}
+#endif /* USE_REPLICATION */
}
else
{
@@ -134,6 +158,16 @@
if (strcmp(crypt_client_pass, crypt_pwd) == 0)
{
+#ifdef USE_REPLICATION
+ /*
+ if (*(PGR_password->password) != '\0')
+ {
+ memset(PGR_password->password,0,PASSWORD_MAX_LENGTH);
+ memset(PGR_password->md5Salt,0,sizeof(PGR_password->md5Salt));
+ memset(PGR_password->cryptSalt,0,sizeof(PGR_password->cryptSalt));
+ }
+ */
+#endif /* USE_REPLICATION */
/*
* Password OK, now check to be sure we are not past valuntil
*/
diff -aruN postgresql-8.2.4/src/backend/libpq/lifecheck.c pgcluster-1.7.0rc7/src/backend/libpq/lifecheck.c
--- postgresql-8.2.4/src/backend/libpq/lifecheck.c 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/libpq/lifecheck.c 2007-03-01 16:27:15.000000000 +0100
@@ -0,0 +1,281 @@
+/*--------------------------------------------------------------------
+ * FILE:
+ * lifecheck.c
+ *
+ * NOTE:
+ * This file is composed of the functions to call with the source
+ * at backend for the lifecheck.
+ * Low level I/O functions that called by in these functions are
+ * contained in 'replicate_com.c'.
+ *
+ *--------------------------------------------------------------------
+ */
+
+#ifdef USE_REPLICATION
+
+#include "postgres.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "libpq/pqsignal.h"
+#include "utils/guc.h"
+#include "miscadmin.h"
+#include "nodes/nodes.h"
+#include "nodes/parsenodes.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "tcop/tcopprot.h"
+#include "postmaster/postmaster.h"
+
+#include "replicate.h"
+
+#ifdef WIN32
+#include "win32.h"
+#else
+#ifdef HAVE_NETINET_TCP_H
+#include
+#endif
+#include
+#endif
+
+#ifndef HAVE_STRDUP
+#include "strdup.h"
+#endif
+#ifdef HAVE_CRYPT_H
+#include
+#endif
+
+#ifdef MULTIBYTE
+#include "mb/pg_wchar.h"
+#endif
+
+static void set_replication_server_status(int status);
+static int send_lifecheck(int sock);
+static int recv_lifecheck(int sock);
+static void set_timeout(SIGNAL_ARGS);
+static void exit_lifecheck(SIGNAL_ARGS);
+
+ReplicateServerInfo * PGR_Replicator_4_Lifecheck = NULL;
+
+int
+PGR_Lifecheck_Main(void)
+{
+ int status = STATUS_OK;
+ int sock = -1;
+ int pid = 0;
+
+ if ((pid = fork()) != 0 )
+ {
+ return pid;
+ }
+
+ pqsignal(SIGHUP, exit_lifecheck);
+ pqsignal(SIGTERM, exit_lifecheck);
+ pqsignal(SIGINT, exit_lifecheck);
+ pqsignal(SIGQUIT, exit_lifecheck);
+ pqsignal(SIGALRM, set_timeout);
+ PG_SETMASK(&UnBlockSig);
+
+ for (;;)
+ {
+
+ PGR_Replicator_4_Lifecheck = PGR_check_replicate_server_info();
+ if (PGR_Replicator_4_Lifecheck == NULL)
+ {
+ alarm(0);
+ sleep(PGR_Lifecheck_Interval);
+ continue;
+ }
+ /* get replication server information */
+ PGR_Replicator_4_Lifecheck = PGR_get_replicate_server_info();
+ if (PGR_Replicator_4_Lifecheck == NULL)
+ {
+ if (Debug_pretty_print)
+ {
+ elog(DEBUG1,"not found replication server");
+ }
+ return STATUS_ERROR;
+ }
+ sock = PGR_get_replicate_server_socket( PGR_Replicator_4_Lifecheck , PGR_QUERY_SOCKET );
+ if (sock < 0)
+ {
+ set_replication_server_status(DATA_ERR);
+ if (Debug_pretty_print)
+ elog(DEBUG1,"get_replicate_server_socket failed");
+ continue;
+ }
+
+ /* set alarm as lifecheck timeout */
+ alarm(PGR_Lifecheck_Timeout * 2);
+
+ /* send lifecheck to replication server */
+ status = send_lifecheck(sock);
+ if (status != STATUS_OK)
+ {
+ set_replication_server_status(DATA_ERR);
+ close(sock);
+ sock = -1;
+ if (Debug_pretty_print)
+ elog(DEBUG1,"send life check failed");
+ continue;
+ }
+
+ /* receive lifecheck response */
+ status = recv_lifecheck(sock);
+ if (status != STATUS_OK)
+ {
+ set_replication_server_status(DATA_ERR);
+ close(sock);
+ sock = -1;
+ if (Debug_pretty_print)
+ elog(DEBUG1,"receive life check failed");
+ continue;
+ }
+
+ /* stop alarm */
+ alarm(0);
+ set_replication_server_status(DATA_USE);
+
+ /* wait next lifecheck as interval */
+ sleep(PGR_Lifecheck_Interval);
+ }
+}
+
+static void
+set_replication_server_status(int status)
+{
+ if (status == DATA_ERR)
+ {
+ PGR_Replicator_4_Lifecheck->retry_count ++;
+ if (PGR_Replicator_4_Lifecheck->retry_count > MAX_RETRY_TIMES)
+ {
+ PGR_Set_Replication_Server_Status(PGR_Replicator_4_Lifecheck, status);
+ }
+ }
+ else
+ {
+ PGR_Replicator_4_Lifecheck->retry_count = 0;
+ PGR_Set_Replication_Server_Status(PGR_Replicator_4_Lifecheck, status);
+ }
+}
+
+static int
+send_lifecheck(int sock)
+{
+ ReplicateHeader header;
+ fd_set wmask;
+ struct timeval timeout;
+ int send_size = 0;
+ int buf_size = 0;
+ char * send_ptr = (char *)&header;
+ int s = 0;
+ int rtn = 0;
+
+ timeout.tv_sec = PGR_Lifecheck_Timeout;
+ timeout.tv_usec = 0;
+
+ memset(&header,0,sizeof(ReplicateHeader));
+ header.cmdSys = CMD_SYS_LIFECHECK;
+ header.cmdSts = CMD_STS_CLUSTER;
+ buf_size = sizeof(ReplicateHeader);
+
+ for (;;)
+ {
+ FD_ZERO(&wmask);
+ FD_SET(sock,&wmask);
+ rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
+ if (rtn < 0)
+ {
+ if (errno == EINTR)
+ {
+ return STATUS_OK;
+ }
+ else
+ {
+ elog(DEBUG1, "send_lifecheck():select() failed");
+ return STATUS_ERROR;
+ }
+ }
+ else if (rtn && FD_ISSET(sock, &wmask))
+ {
+ s = send(sock,send_ptr + send_size,buf_size - send_size ,0);
+ if (s < 0){
+ if (errno == EINTR)
+ {
+ return STATUS_OK;
+ }
+ if (errno == EAGAIN)
+ {
+ continue;
+ }
+ elog(DEBUG1, "send_replicate_packet():send error");
+
+ /* EPIPE || ENCONNREFUSED || ENSOCK || EHOSTUNREACH */
+ return STATUS_ERROR;
+ } else if (s == 0) {
+ elog(DEBUG1, "send_lifecheck():unexpected EOF");
+ return STATUS_ERROR;
+ } else /*if (s > 0)*/ {
+ send_size += s;
+ if (send_size == buf_size)
+ {
+ return STATUS_OK;
+ }
+ }
+ }
+ }
+}
+
+static int
+recv_lifecheck(int sock)
+{
+ int status = STATUS_OK;
+ char result[PGR_MESSAGE_BUFSIZE];
+
+ memset(result,0,PGR_MESSAGE_BUFSIZE);
+ status = PGR_recv_replicate_result(sock,result, PGR_Lifecheck_Timeout);
+ return ((status >= 0) ?STATUS_OK:STATUS_ERROR);
+}
+
+static void
+set_timeout(SIGNAL_ARGS)
+{
+ if (PGR_Replicator_4_Lifecheck != NULL)
+ {
+ set_replication_server_status(DATA_ERR);
+ if (Debug_pretty_print)
+ elog(DEBUG1,"time out is occured in life check");
+ }
+}
+
+static void
+exit_lifecheck(SIGNAL_ARGS)
+{
+ fprintf(stderr,"lifecheck stopped\n");
+ exit(0);
+}
+
+#endif /* USE_REPLICATION */
diff -aruN postgresql-8.2.4/src/backend/libpq/recovery.c pgcluster-1.7.0rc7/src/backend/libpq/recovery.c
--- postgresql-8.2.4/src/backend/libpq/recovery.c 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/libpq/recovery.c 2007-02-18 22:52:16.000000000 +0100
@@ -0,0 +1,1566 @@
+/*--------------------------------------------------------------------
+ * FILE:
+ * recovery.c
+ *
+ * NOTE:
+ * This file is composed of the functions to call with the source
+ * at backend for the recovery.
+ * Low level I/O functions that called by in these functions are
+ * contained in 'replicate_com.c'.
+ *
+ *--------------------------------------------------------------------
+ */
+
+/*--------------------------------------
+ * INTERFACE ROUTINES
+ *
+ * I/O call:
+ * PGR_recovery_finish_send
+ * master module:
+ * PGR_Master_Main(void);
+ * recovery module:
+ * PGR_Recovery_Main
+ *-------------------------------------
+ */
+#ifdef USE_REPLICATION
+
+#include "postgres.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "libpq/pqsignal.h"
+#include "utils/guc.h"
+#include "miscadmin.h"
+#include "nodes/nodes.h"
+#include "nodes/parsenodes.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "tcop/tcopprot.h"
+#include "postmaster/postmaster.h"
+
+#include "../interfaces/libpq/libpq-fe.h"
+#include "../interfaces/libpq/libpq-int.h"
+#include "../interfaces/libpq/fe-auth.h"
+
+#include "replicate.h"
+
+#ifdef WIN32
+#include "win32.h"
+#else
+#ifdef HAVE_NETINET_TCP_H
+#include
+#endif
+#include
+#endif
+
+#ifndef HAVE_STRDUP
+#include "strdup.h"
+#endif
+#ifdef HAVE_CRYPT_H
+#include
+#endif
+
+#ifdef MULTIBYTE
+#include "mb/pg_wchar.h"
+#endif
+
+#define RECOVERY_LOOP_END (0)
+#define RECOVERY_LOOP_CONTINUE (1)
+#define RECOVERY_LOOP_FAIL (2)
+char Local_Host_Name[HOSTNAME_MAX_LENGTH];
+int PGR_Recovery_Mode = 0;
+
+static int read_packet(int sock,RecoveryPacket * packet);
+static int send_recovery_packet(int sock, RecoveryPacket * packet);
+static int send_packet(int * sock, RecoveryPacket * packet );
+static void master_loop(int fd);
+static int start_recovery_send(int * sock, ReplicateServerInfo * host);
+static int stop_recovery_send(int * sock, ReplicateServerInfo * host);
+static int rsync_pg_data(char * src , char * dest);
+static int remove_dir(char * dir_name);
+static int clear_bkup_dir(char * dir_name);
+static int bkup_dir(char * dir_name);
+static int restore_dir(char * dir_name);
+static int rsync_global_dir(char * src, char * dest, int stage);
+static int first_recovery(char * src, char * dest, char * dir);
+static int second_recovery(char * src, char * dest, char * dir);
+static int recovery_rsync(char * src , char * dest, int stage);
+static int recovery_loop(int fd, int mode);
+static void show_recovery_packet(RecoveryPacket * packet);
+static int direct_send_packet(int packet_no);
+static void set_recovery_packet(RecoveryPacket * packet, int packet_no);
+static int cold_recovery(char * src, RecoveryPacket *packet, bool need_sync_table_space, int stage);
+static int hot_recovery(RecoveryPacket *packet, int stage);
+static int restore_from_dumpall( char * hostName, uint16_t portNum, char * userName);
+static int restore_from_dump( char * hostName, uint16_t portNum, char * userName, char * dbName);
+static int restore_from_each_dump( char * hostName, uint16_t portNum, char * userName);
+static PGresult * get_dbName(char * hostName, uint16_t portNum, char * userName);
+
+static int sync_table_space(char * hostName, uint16_t portNum, char * userName, int stage);
+static PGresult * get_table_space_location(char * hostName, uint16_t portNum, char * userName);
+static int rsync_table_space(char * hostName, char * location, int stage);
+
+int PGR_recovery_error_send(void);
+int PGR_recovery_finish_send(void);
+int PGR_recovery_queue_data_req(void);
+int PGR_Master_Main(void);
+int PGR_Recovery_Main(int mode);
+
+static int
+read_packet(int sock,RecoveryPacket * packet)
+{
+ int r;
+ char * read_ptr;
+ int read_size = 0;
+ int packet_size = 0;
+
+ read_ptr = (char*)packet;
+ packet_size = sizeof(RecoveryPacket);
+
+ for (;;){
+ r = recv(sock,read_ptr + read_size ,packet_size, MSG_WAITALL);
+ if (r < 0) {
+ if (errno == EINTR || errno == EAGAIN) {
+ continue;
+ } else {
+ elog(DEBUG1, "read_packet():recv failed");
+ return -1;
+ }
+ } else if (r == 0) {
+ elog(DEBUG1, "read_packet():unexpected EOF");
+ return -1;
+ } else /*if (r > 0)*/ {
+ read_size += r;
+ if (read_size == packet_size) {
+ show_recovery_packet(packet);
+ return read_size;
+ }
+ }
+ }
+ return -1;
+}
+
+static int
+send_recovery_packet(int sock, RecoveryPacket * packet)
+{
+ char * send_ptr;
+ int send_size= 0;
+ int buf_size = 0;
+ int s;
+ int rtn;
+ fd_set wmask;
+ struct timeval timeout;
+
+ timeout.tv_sec = RECOVERY_TIMEOUT;
+ timeout.tv_usec = 0;
+
+ /*
+ * Wait for something to happen.
+ */
+ rtn = 1;
+ while (rtn)
+ {
+ for (;;)
+ {
+ timeout.tv_sec = RECOVERY_TIMEOUT;
+ timeout.tv_usec = 0;
+
+ FD_ZERO(&wmask);
+ FD_SET(sock,&wmask);
+ rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
+
+ if (rtn < 0)
+ {
+ if (errno == EINTR || errno == EAGAIN)
+ {
+ continue;
+ }
+ else
+ {
+ rtn = 0;
+ break;
+ }
+ }
+ else if (rtn && FD_ISSET(sock, &wmask))
+ {
+ send_ptr = (char *)packet;
+ buf_size = sizeof(RecoveryPacket);
+
+ s = send(sock,send_ptr + send_size,buf_size - send_size ,0);
+ if (s < 0) {
+ if (errno == EINTR || errno == EAGAIN) {
+ continue;
+ }
+ elog(DEBUG1, "send_recovery_packet():send error");
+
+ /* EPIPE || ENCONNREFUSED || ENSOCK || EHOSTUNREACH */
+ return STATUS_ERROR;
+ } else if (s == 0) {
+ elog(DEBUG1, "send_recovery_packet():unexpected EOF");
+ return STATUS_ERROR;
+ } else /*if (s > 0)*/ {
+ send_size += s;
+ if (send_size == buf_size)
+ {
+ return STATUS_OK;
+ }
+ }
+ }
+ }
+ }
+ return STATUS_ERROR;
+}
+
+static int
+send_packet(int * sock, RecoveryPacket * packet )
+{
+ int count = 0;
+ ReplicateServerInfo * host = NULL;
+
+ host = PGR_get_replicate_server_info();
+ if (host == (ReplicateServerInfo*)NULL)
+ {
+ return STATUS_ERROR;
+ }
+ count = 0;
+ while (send_recovery_packet(*sock,packet) != STATUS_OK)
+ {
+ if (count < MAX_RETRY_TIMES )
+ {
+ count ++;
+ continue;
+ }
+ count = 0;
+ close(*sock);
+ PGR_Set_Replication_Server_Status(host,DATA_ERR);
+ host = PGR_get_replicate_server_info();
+ if (host == (ReplicateServerInfo*)NULL)
+ {
+ return STATUS_ERROR;
+ }
+ PGR_Set_Replication_Server_Status(host,DATA_USE);
+ PGR_Create_Socket_Connect(sock, host->hostName , host->recoveryPortNumber);
+ }
+ return STATUS_OK;
+}
+
+static void
+master_loop(int fd)
+{
+ int count;
+ int sock;
+ int status = STATUS_OK;
+ RecoveryPacket packet;
+ int r_size = 0;
+ bool loop_end = false;
+
+ count = 0;
+ while ((status = PGR_Create_Acception(fd,&sock,"",RecoveryPortNumber)) != STATUS_OK)
+ {
+ PGR_Close_Sock(&sock);
+ sock = -1;
+ if ( count > MAX_RETRY_TIMES)
+ {
+ return;
+ }
+ count ++;
+ }
+ for(;;)
+ {
+ int rtn;
+ fd_set rmask;
+ struct timeval timeout;
+
+ timeout.tv_sec = RECOVERY_TIMEOUT;
+ timeout.tv_usec = 0;
+
+ /*
+ * Wait for something to happen.
+ */
+ FD_ZERO(&rmask);
+ FD_SET(sock,&rmask);
+ memset(&packet,0,sizeof(RecoveryPacket));
+ rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
+ if (rtn && FD_ISSET(sock, &rmask))
+ {
+ r_size = read_packet(sock,&packet);
+ if (r_size == 0)
+ {
+ continue;
+ }
+ else if (r_size < 0)
+ {
+ loop_end=true;
+ break;
+ }
+ }
+ else
+ {
+ continue;
+ }
+ switch (ntohs(packet.packet_no))
+ {
+ case RECOVERY_PGDATA_REQ :
+ /*
+ * PGDATA information request
+ */
+ /*
+ * get master server information
+ */
+ memset(&packet,0,sizeof(packet));
+ set_recovery_packet(&packet, RECOVERY_PGDATA_ANS) ;
+ status = send_packet(&sock,&packet);
+ PGR_Set_Cluster_Status(STATUS_RECOVERY);
+ break;
+ case RECOVERY_FSYNC_REQ :
+ /*
+ * get master server information
+ */
+ memset(&packet,0,sizeof(packet));
+ set_recovery_packet(&packet, RECOVERY_FSYNC_ANS );
+ status = send_packet(&sock,&packet);
+ PGR_Set_Cluster_Status(STATUS_RECOVERY);
+ loop_end = true;
+ break;
+ case RECOVERY_ERROR_TARGET_ONLY:
+ memset(&packet,0,sizeof(packet));
+ set_recovery_packet(&packet, RECOVERY_ERROR_ANS );
+ status = send_packet(&sock,&packet);
+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
+ break;
+ case RECOVERY_ERROR_CONNECTION:
+ memset(&packet,0,sizeof(packet));
+ set_recovery_packet(&packet, RECOVERY_ERROR_ANS );
+ status = send_packet(&sock,&packet);
+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
+ /**
+ * kill broken cluster db.
+ * FIXME: missing MyProcPid here. It must be postmaster's pid.
+ * but here's a bug MyProcPid doesn't initialized properly , so MyProcPid = postmaster's pid.
+ * To fix this, define variable to set posmaster's pid.
+ */
+ kill(MyProcPid,SIGQUIT);
+ loop_end = true;
+ break;
+ case RECOVERY_ERROR_ANS:
+ /* TODO: recovery failed. close this postmaster */
+ loop_end = true;
+ break;
+ case RECOVERY_FINISH:
+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
+ loop_end = true;
+ break;
+ default:
+ loop_end = true;
+ break;
+ }
+ if (loop_end)
+ {
+ break;
+ }
+ }
+ PGR_Close_Sock(&sock);
+}
+
+int
+PGR_Master_Main(void)
+{
+ int status;
+ int fd = -1;
+ int rtn;
+ int pid;
+
+ if ((pid = fork()) != 0 )
+ {
+ return pid;
+ }
+
+ memset(Local_Host_Name,0,sizeof(Local_Host_Name));
+ gethostname(Local_Host_Name,sizeof(Local_Host_Name));
+ pqsignal(SIGHUP, authdie);
+ pqsignal(SIGTERM, authdie);
+ pqsignal(SIGINT, authdie);
+ pqsignal(SIGQUIT, authdie);
+ pqsignal(SIGALRM, authdie);
+ PG_SETMASK(&UnBlockSig);
+
+ status = STATUS_ERROR;
+ status = PGR_Create_Socket_Bind(&fd, "", RecoveryPortNumber);
+
+ if (status != STATUS_OK)
+ {
+ return pid;
+ }
+ for (;;)
+ {
+ fd_set rmask;
+ struct timeval timeout;
+
+ timeout.tv_sec = 60;
+ timeout.tv_usec = 0;
+
+ /*
+ * Wait for something to happen.
+ */
+ FD_ZERO(&rmask);
+ FD_SET(fd,&rmask);
+ rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
+ if (rtn && FD_ISSET(fd, &rmask))
+ {
+ master_loop(fd);
+ }
+ }
+ return pid;
+}
+
+static int
+start_recovery_send(int * sock, ReplicateServerInfo * host)
+{
+ int status;
+ RecoveryPacket packet;
+ status = PGR_Create_Socket_Connect(sock, host->hostName, host->recoveryPortNumber);
+ if (status != STATUS_OK)
+ {
+ if (Debug_pretty_print)
+ {
+ elog(DEBUG1,"connection error to replication server");
+ }
+ return STATUS_ERROR;
+ }
+
+ memset(&packet,0,sizeof(packet));
+ set_recovery_packet(&packet, RECOVERY_PREPARE_REQ );
+ status = send_packet(sock,&packet);
+
+ return status;
+}
+
+static int
+stop_recovery_send(int * sock, ReplicateServerInfo * host)
+{
+ int status;
+ RecoveryPacket packet;
+
+ memset(&packet,0,sizeof(packet));
+ set_recovery_packet(&packet, RECOVERY_ERROR_ANS );
+ status = send_packet(sock,&packet);
+ return status;
+}
+
+static int
+direct_send_packet(int packet_no)
+{
+
+ int status;
+ int fd = -1;
+ ReplicateServerInfo * host;
+ RecoveryPacket packet;
+
+ host = PGR_get_replicate_server_info();
+ if (host == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ status = PGR_Create_Socket_Connect(&fd, host->hostName, host->recoveryPortNumber);
+ if (status != STATUS_OK)
+ {
+ PGR_Set_Replication_Server_Status(host,DATA_ERR);
+ return STATUS_ERROR;
+ }
+
+ memset(&packet,0,sizeof(packet));
+ set_recovery_packet(&packet, packet_no );
+ status = send_packet(&fd,&packet);
+
+ close(fd);
+
+ return status;
+}
+
+int
+PGR_recovery_error_send(void)
+{
+ return direct_send_packet(RECOVERY_ERROR_ANS);
+}
+
+int
+PGR_recovery_finish_send(void)
+{
+ return direct_send_packet(RECOVERY_FINISH);
+}
+
+int
+PGR_recovery_queue_data_req(void)
+{
+ int status = STATUS_OK;
+ int r_size = 0;
+ int rtn = STATUS_OK;
+ int fd = -1;
+ ReplicateServerInfo * host = NULL;
+ RecoveryPacket packet;
+
+ host = PGR_get_replicate_server_info();
+ if (host == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ status = PGR_Create_Socket_Connect(&fd, host->hostName, host->recoveryPortNumber);
+ if (status != STATUS_OK)
+ {
+ PGR_Set_Replication_Server_Status(host,DATA_ERR);
+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
+ close(fd);
+ return STATUS_ERROR;
+ }
+
+ memset(&packet,0,sizeof(packet));
+ PGRset_recovery_packet_no(&packet, RECOVERY_QUEUE_DATA_REQ );
+ status = send_packet(&fd,&packet);
+ if (status != STATUS_OK)
+ {
+ status = stop_recovery_send(&fd,host);
+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
+ close(fd);
+ return STATUS_ERROR;
+ }
+ memset(&packet,0,sizeof(RecoveryPacket));
+ r_size = read_packet(fd,&packet);
+ if (r_size <= 0)
+ {
+ rtn = STATUS_ERROR;
+ }
+ switch (ntohs(packet.packet_no))
+ {
+ case RECOVERY_QUEUE_DATA_ANS:
+ rtn = STATUS_OK;
+ break;
+ default:
+ rtn = STATUS_ERROR;
+ break;
+ }
+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
+ close(fd);
+ return rtn;
+}
+
+static int
+rsync_pg_data(char * src, char * dest)
+{
+ int status;
+ char *args[12];
+ int pid, i = 0;
+
+ args[i++] = "rsync";
+ args[i++] = "-a";
+ args[i++] = "-r";
+ if (RsyncCompress)
+ args[i++] = "-z";
+ args[i++] = "--delete";
+ args[i++] = "-e";
+ args[i++] = RsyncOption;
+ args[i++] = src;
+ args[i++] = dest;
+ args[i++] = NULL;
+
+ pid = fork();
+ if (pid == 0)
+ {
+ status = execv(RsyncPath,args);
+ }
+ else
+ {
+ for (;;)
+ {
+ int result;
+ result = wait(&status);
+ if (result < 0)
+ {
+ if (errno == EINTR)
+ continue;
+ return STATUS_ERROR;
+ }
+
+ if (WIFEXITED(status) == 0 || WEXITSTATUS(status) != 0)
+ return STATUS_ERROR;
+ else
+ break;
+ }
+ }
+ return STATUS_OK;
+}
+
+static int
+remove_dir(char * dir_name)
+{
+ DIR * dp = NULL;
+ struct dirent *dirp = NULL;
+ char fname[256];
+ int status = 0;
+
+ if ((dp = opendir(dir_name)) == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ while ((dirp = readdir(dp)) != NULL)
+ {
+ if ((!strcmp(dirp->d_name,".")) ||
+ (!strcmp(dirp->d_name,"..")))
+ {
+ continue;
+ }
+ sprintf(fname,"%s/%s",dir_name,dirp->d_name);
+ status = remove(fname);
+ if (status < 0)
+ {
+ remove_dir(fname);
+ }
+ }
+ closedir(dp);
+ if (remove(dir_name) < 0)
+ {
+ return STATUS_ERROR;
+ }
+ return STATUS_OK;
+}
+
+static int
+clear_bkup_dir(char * dir_name)
+{
+ char bkp_dir[256];
+ pid_t pid = getpid();
+
+ sprintf(bkp_dir,"%s_%d",dir_name,pid);
+ return (remove_dir(bkp_dir));
+}
+
+static int
+bkup_dir(char * dir_name)
+{
+ int status;
+ char org_dir[256];
+ char bkp_dir[256];
+ pid_t pid = getpid();
+
+ sprintf(org_dir,"%s",dir_name);
+ sprintf(bkp_dir,"%s_%d",dir_name,pid);
+ status = rename(org_dir,bkp_dir);
+ if (status < 0)
+ {
+ return STATUS_ERROR;
+ }
+ return STATUS_OK;
+}
+
+static int
+restore_dir(char * dir_name)
+{
+ int status;
+ char org_dir[256];
+ char bkp_dir[256];
+ pid_t pid = getpid();
+
+ sprintf(org_dir,"%s",dir_name);
+ sprintf(bkp_dir,"%s_%d",dir_name,pid);
+ status = rename(bkp_dir,org_dir);
+ if (status < 0)
+ {
+ remove_dir(org_dir);
+ status = rename(bkp_dir,org_dir);
+ if (status < 0)
+ {
+ return STATUS_ERROR;
+ }
+ }
+ return STATUS_OK;
+}
+
+static int
+rsync_global_dir(char * src, char * dest, int stage)
+{
+ int status;
+ char control_file[256];
+ char org_dir[256];
+ char src_dir[256];
+ struct stat fstat;
+ int cnt;
+
+ sprintf(org_dir,"%s/global",dest);
+ sprintf(control_file,"%s/global/pg_control",dest);
+ if ((stage == PGR_1ST_RECOVERY) && (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP))
+ {
+ if (bkup_dir(org_dir) != STATUS_OK)
+ {
+ return STATUS_ERROR;
+ }
+ }
+ sprintf(src_dir,"%s/global",src);
+ status = rsync_pg_data(src_dir, dest);
+ if (status != STATUS_OK )
+ {
+ restore_dir(org_dir);
+ return STATUS_ERROR;
+ }
+ /* check pg_control file */
+ cnt = 0;
+ while (stat(control_file, &fstat) < 0)
+ {
+ if (cnt > MAX_RETRY_TIMES )
+ {
+ restore_dir(org_dir);
+ return STATUS_ERROR;
+ }
+ cnt ++;
+ sleep(1);
+ }
+ if ((stage == PGR_2ND_RECOVERY) && (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP))
+ {
+ clear_bkup_dir(org_dir);
+ }
+ return STATUS_OK;
+}
+
+static int
+first_recovery(char * src, char * dest, char * dir)
+{
+ int status = STATUS_OK;
+ char src_dir[256];
+ char dest_dir[256];
+
+ memset(src_dir,0,sizeof(src_dir));
+ memset(dest_dir,0,sizeof(dest_dir));
+ sprintf(src_dir,"%s/%s",src,dir);
+ sprintf(dest_dir,"%s/%s",dest,dir);
+ if (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP)
+ {
+ status = bkup_dir(dest_dir);
+ if (status < 0)
+ {
+ return STATUS_ERROR;
+ }
+ }
+ status = rsync_pg_data(src_dir, dest);
+ if (status != STATUS_OK )
+ {
+ restore_dir(dest_dir);
+ return STATUS_ERROR;
+ }
+ return STATUS_OK;
+}
+
+static int
+second_recovery(char * src, char * dest, char * dir)
+{
+ int status = STATUS_OK;
+ char src_dir[256];
+ char dest_dir[256];
+
+ memset(src_dir,0,sizeof(src_dir));
+ memset(dest_dir,0,sizeof(dest_dir));
+ sprintf(src_dir,"%s/%s",src,dir);
+ sprintf(dest_dir,"%s/%s",dest,dir);
+
+ status = rsync_pg_data(src_dir, dest);
+ if (status != STATUS_OK )
+ {
+ restore_dir(dest_dir);
+ return STATUS_ERROR;
+ }
+ if (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP)
+ {
+ clear_bkup_dir(dest_dir);
+ }
+
+ return STATUS_OK;
+}
+
+static int
+recovery_rsync(char * src , char * dest, int stage)
+{
+ if ((src== NULL) || ( dest == NULL))
+ {
+ return STATUS_ERROR;
+ }
+
+ /* recovery step of "global" directory */
+ fprintf(stderr,"%s recovery step of [global] directory...",
+ ((stage == 1)?"1st":"2nd"));
+ if (rsync_global_dir(src, dest, stage) != STATUS_OK)
+ {
+ fprintf(stderr,"NG\n");
+ return STATUS_ERROR;
+ }
+ fprintf(stderr,"OK\n");
+
+ if (stage == PGR_1ST_RECOVERY)
+ {
+ /* 1st recovery step of "base" directory */
+ fprintf(stderr,"1st recovery step of [base] directory...");
+ if (first_recovery(src,dest,"base") != STATUS_OK)
+ {
+ fprintf(stderr,"NG\n");
+ return STATUS_ERROR;
+ }
+ fprintf(stderr,"OK\n");
+
+ fprintf(stderr,"1st recovery step of [pg_clog] directory...");
+ /* 1st recovery step of "pg_clog" directory */
+ if (first_recovery(src,dest,"pg_clog") != STATUS_OK)
+ {
+ fprintf(stderr,"NG\n");
+ return STATUS_ERROR;
+ }
+ fprintf(stderr,"OK\n");
+
+ /* 1st recovery step of "pg_xlog" directory */
+ fprintf(stderr,"1st recovery step of [pg_xlog] directory...");
+ if (first_recovery(src,dest,"pg_xlog") != STATUS_OK)
+ {
+ fprintf(stderr,"NG\n");
+ return STATUS_ERROR;
+ }
+ fprintf(stderr,"OK\n");
+ }
+ else
+ {
+ /* 2nd recovery step of "base" directory */
+ fprintf(stderr,"2nd recovery step of [base] directory...");
+ if (second_recovery(src,dest,"base") != STATUS_OK)
+ {
+ fprintf(stderr,"NG\n");
+ return STATUS_ERROR;
+ }
+ fprintf(stderr,"OK\n");
+
+ /* 2nd recovery step of "pg_clog" directory */
+ fprintf(stderr,"2nd recovery step of [pg_clog] directory...");
+ if (second_recovery(src,dest,"pg_clog") != STATUS_OK)
+ {
+ fprintf(stderr,"NG\n");
+ return STATUS_ERROR;
+ }
+ fprintf(stderr,"OK\n");
+
+ /* 2nd recovery step of "pg_xlog" directory */
+ fprintf(stderr,"2nd recovery step of [pg_xlog] directory...");
+ if (second_recovery(src,dest,"pg_xlog") != STATUS_OK)
+ {
+ fprintf(stderr,"NG\n");
+ return STATUS_ERROR;
+ }
+ fprintf(stderr,"OK\n");
+ }
+
+ return STATUS_OK;
+}
+
+static int
+recovery_loop(int fd, int mode)
+{
+
+ int status = STATUS_OK;
+ RecoveryPacket packet;
+ int r_size = 0;
+ int rtn = RECOVERY_LOOP_END;
+ char src[256];
+ bool need_sync_table_space = false;
+
+ memset(&packet,0,sizeof(RecoveryPacket));
+ r_size = read_packet(fd,&packet);
+ if (r_size <= 0)
+ {
+ rtn = RECOVERY_LOOP_FAIL;
+ }
+ switch (ntohs(packet.packet_no))
+ {
+ case RECOVERY_PREPARE_ANS :
+ /*
+ * get master information
+ */
+ /*
+ * sync master data before recovery
+ */
+ if (Debug_pretty_print)
+ {
+ elog(DEBUG1,"local host : %s master:%s",Local_Host_Name,packet.hostName);
+ }
+ if (!strncmp(Local_Host_Name,packet.hostName,strlen(Local_Host_Name)))
+ {
+ strcpy(src,packet.pg_data);
+ need_sync_table_space = false;
+ }
+ else
+ {
+ sprintf(src,"%s:%s",packet.hostName,packet.pg_data);
+ need_sync_table_space = true;
+ }
+ if (PGR_Recovery_Mode == PGR_COLD_RECOVERY)
+ {
+ rtn = cold_recovery(src,&packet,need_sync_table_space,PGR_1ST_RECOVERY);
+ }
+ else
+ {
+ rtn = hot_recovery(&packet,PGR_1ST_RECOVERY);
+ }
+ if (rtn != STATUS_OK)
+ {
+ rtn = RECOVERY_LOOP_FAIL;
+ break;
+ }
+
+ /*
+ * send recovery start request
+ */
+ PGRset_recovery_packet_no(&packet, RECOVERY_START_REQ );
+ status = send_packet(&fd,&packet);
+ if (status != STATUS_OK)
+ {
+ fprintf(stderr,"RECOVERY_START_REQ send error\n");
+ rtn = RECOVERY_LOOP_FAIL;
+ break;
+ }
+ rtn = RECOVERY_LOOP_CONTINUE;
+ break;
+ case RECOVERY_START_ANS :
+ /*
+ * sync master data for recovery
+ */
+ if (!strncmp(Local_Host_Name,packet.hostName,strlen(Local_Host_Name)))
+ {
+ strcpy(src,packet.pg_data);
+ need_sync_table_space = false;
+ }
+ else
+ {
+ sprintf(src,"%s:%s",packet.hostName,packet.pg_data);
+ need_sync_table_space = true;
+ }
+ if (PGR_Recovery_Mode == PGR_COLD_RECOVERY)
+ {
+ rtn = cold_recovery(src,&packet,need_sync_table_space,PGR_2ND_RECOVERY);
+ }
+ else
+ {
+ rtn = hot_recovery(&packet,PGR_2ND_RECOVERY);
+ }
+
+ if (rtn == STATUS_OK)
+ {
+ fprintf(stderr,"2nd recovery successed\n");
+ if (mode == PGR_HOT_RECOVERY)
+ {
+ rtn = RECOVERY_LOOP_CONTINUE;
+ /*
+ * send recovery queued data request
+ */
+ PGRset_recovery_packet_no(&packet, RECOVERY_QUEUE_DATA_REQ );
+ status = send_packet(&fd,&packet);
+ if (status != STATUS_OK)
+ {
+ fprintf(stderr,"RECOVERY_QUEUE_DATA_REQ send error\n");
+ rtn = RECOVERY_LOOP_FAIL;
+ break;
+ }
+ }
+ else
+ {
+ rtn = RECOVERY_LOOP_END;
+ }
+ }
+ else
+ {
+ fprintf(stderr,"2nd hot recovery failed\n");
+ rtn = RECOVERY_LOOP_FAIL;
+ }
+ break;
+ case RECOVERY_QUEUE_DATA_ANS:
+ rtn = RECOVERY_LOOP_END;
+ break;
+ case RECOVERY_ERROR_OCCUPIED:
+ fprintf(stderr,"already in use for another recovery\n");
+ rtn = RECOVERY_LOOP_FAIL;
+ break;
+ case RECOVERY_ERROR_CONNECTION:
+ fprintf(stderr,"connection failed\n");
+ rtn = RECOVERY_LOOP_FAIL;
+ break;
+ default:
+ fprintf(stderr,"unknown packet received\n");
+ rtn = RECOVERY_LOOP_FAIL;
+ break;
+ }
+
+ return rtn;
+}
+
+int
+PGR_Recovery_Main(int mode)
+{
+ int status;
+ int fd = -1;
+ int rtn;
+ ReplicateServerInfo * host;
+
+ memset(Local_Host_Name,0,sizeof(Local_Host_Name));
+ gethostname(Local_Host_Name,sizeof(Local_Host_Name));
+ PGR_Recovery_Mode = mode;
+
+ status = STATUS_ERROR;
+
+Retry_Start_Recovery:
+ host = PGR_get_replicate_server_info();
+ if (host == NULL)
+ {
+ if (Debug_pretty_print)
+ {
+ elog(DEBUG1,"not found replication server");
+ }
+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
+ return STATUS_ERROR;
+ }
+
+ PGR_Set_Cluster_Status(STATUS_RECOVERY);
+ status = start_recovery_send(&fd,host);
+ if (status != STATUS_OK)
+ {
+ PGR_Set_Replication_Server_Status(host,DATA_ERR);
+ close(fd);
+ if (Debug_pretty_print)
+ {
+ elog(DEBUG1,"start recovery packet send error");
+ }
+ goto Retry_Start_Recovery;
+ }
+
+ for (;;)
+ {
+ fd_set rmask;
+ struct timeval timeout;
+
+ timeout.tv_sec = RECOVERY_TIMEOUT;
+ timeout.tv_usec = 0;
+
+ /*
+ * Wait for something to happen.
+ */
+ FD_ZERO(&rmask);
+ FD_SET(fd,&rmask);
+ rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
+ if (rtn && FD_ISSET(fd, &rmask))
+ {
+ status = recovery_loop(fd, mode);
+ if (status == RECOVERY_LOOP_CONTINUE)
+ {
+ continue;
+ }
+ else if (status == RECOVERY_LOOP_END)
+ {
+ close(fd);
+ break;
+ }
+ else if (status == RECOVERY_LOOP_FAIL)
+ {
+ status = stop_recovery_send(&fd,host);
+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
+ if (status != STATUS_OK)
+ {
+ close(fd);
+ return STATUS_ERROR;
+ }
+ close(fd);
+ return STATUS_ERROR;
+ }
+ else
+ {
+ close(fd);
+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
+ return STATUS_ERROR;
+ }
+ }
+ }
+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
+ return STATUS_OK;
+}
+
+static void
+show_recovery_packet(RecoveryPacket * packet)
+{
+
+ if (Debug_pretty_print)
+ {
+ elog(DEBUG1,"no = %d",ntohs(packet->packet_no));
+ elog(DEBUG1,"max_connect = %d",ntohs(packet->max_connect));
+ elog(DEBUG1,"port = %d",ntohs(packet->port));
+ elog(DEBUG1,"recoveryPort = %d",ntohs(packet->recoveryPort));
+ if (packet->hostName != NULL)
+ elog(DEBUG1,"hostName = %s",packet->hostName);
+ if (packet->pg_data != NULL)
+ elog(DEBUG1,"pg_data = %s",packet->pg_data);
+ }
+}
+
+static void
+set_recovery_packet(RecoveryPacket * packet, int packet_no)
+{
+ struct passwd * pw = NULL;
+
+ if (packet == NULL)
+ {
+ return;
+ }
+ PGRset_recovery_packet_no(packet, packet_no );
+ packet->max_connect = htons(MaxBackends);
+ packet->port = htons(PostPortNumber);
+ packet->recoveryPort = htons(RecoveryPortNumber);
+ gethostname(packet->hostName,sizeof(packet->hostName));
+ memcpy(packet->pg_data,DataDir,sizeof(packet->pg_data));
+ memset(packet->userName,0,sizeof(packet->userName));
+ if ((pw = getpwuid(geteuid())) != NULL)
+ {
+ strncpy(packet->userName,pw->pw_name,sizeof(packet->userName));
+ }
+ else
+ {
+ cuserid(packet->userName);
+ }
+}
+
+static int
+sync_table_space(char * hostName, uint16_t portNum, char * userName, int stage)
+{
+ PGresult * res = (PGresult *)NULL;
+ int i = 0;
+ int row_num = 0;
+ char * location = NULL;
+ int rtn = STATUS_OK;
+
+ res = get_table_space_location(hostName, portNum, userName);
+ if (res == (PGresult *)NULL)
+ {
+ return STATUS_ERROR;
+ }
+ row_num = PQntuples(res);
+ for ( i = 0 ; i < row_num ; i ++)
+ {
+ location = PQgetvalue(res,i,0);
+ if (strlen(location) > 0 )
+ {
+ fprintf(stderr,"sync tablespace[%s]...",location);
+ rtn = rsync_table_space(hostName, location, stage);
+ fprintf(stderr,"%s\n", (rtn == STATUS_OK)?"OK":"NG");
+ }
+ }
+ if (res != (PGresult *)NULL)
+ {
+ PQclear(res);
+ }
+
+ return STATUS_OK;
+}
+
+static PGresult *
+get_table_space_location(char * hostName, uint16_t portNum, char * userName)
+{
+ PGresult * res = (PGresult *)NULL;
+ int cnt = 0;
+ PGconn * conn = (PGconn *)NULL;
+ char port[8];
+ char *database = "template1";
+ char * query = "select spclocation from pg_tablespace where spcname not like 'pg_%'";
+
+ if ( (hostName == NULL) ||
+ (portNum <= 0) ||
+ (userName == NULL))
+ {
+ return (PGresult *)NULL;
+ }
+ snprintf(port,sizeof(port),"%d", portNum);
+
+ /* create connection to master */
+ conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
+ if (conn == NULL)
+ {
+ return (PGresult *)NULL;
+ }
+ /* check to see that the backend Connection was successfully made */
+ cnt = 0;
+ while (PQstatus(conn) == CONNECTION_BAD)
+ {
+ if (conn != NULL)
+ {
+ PQfinish(conn);
+ }
+ if (cnt > MAX_RETRY_TIMES )
+ {
+ return (PGresult *)NULL;
+ }
+ conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
+ cnt ++;
+ }
+ res = PQexec(conn , query);
+ if ((res == NULL) ||
+ (PQresultStatus(res) != PGRES_TUPLES_OK))
+ {
+ PQclear(res);
+ res = (PGresult *)NULL;
+ }
+ if (conn != NULL)
+ {
+ PQfinish(conn);
+ }
+
+ return res;
+}
+
+static int
+rsync_table_space(char * hostName, char * location, int stage)
+{
+ int status = STATUS_OK;
+ char src_dir[256];
+ char dest_dir[256];
+ struct stat fstat;
+ int cnt = 0;
+
+ sprintf(src_dir,"%s:%s",hostName,location);
+ strncpy(dest_dir,location,sizeof(dest_dir));
+
+ if ((stage == PGR_1ST_RECOVERY) && (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP))
+ {
+ status = bkup_dir(location);
+ }
+ status = rsync_pg_data(src_dir, dest_dir);
+ if (status != STATUS_OK )
+ {
+ restore_dir(location);
+ return STATUS_ERROR;
+ }
+ /* check file status */
+ cnt = 0;
+ while (stat(location,&fstat) < 0)
+ {
+ if (cnt > MAX_RETRY_TIMES )
+ {
+ restore_dir(location);
+ return STATUS_ERROR;
+ }
+ cnt ++;
+ sleep(1);
+ }
+ if ((stage == PGR_2ND_RECOVERY) && (PGR_Recovery_Mode != PGR_WITHOUT_BACKUP))
+ {
+ clear_bkup_dir(location);
+ }
+ return STATUS_OK;
+}
+
+static int
+cold_recovery(char * src, RecoveryPacket *packet, bool need_sync_table_space, int stage)
+{
+ int status = STATUS_OK;
+
+ status = recovery_rsync(src,DataDir,stage);
+ if (status != STATUS_OK)
+ {
+ if (Debug_pretty_print)
+ {
+ elog(DEBUG1,"%s rsync error",
+ ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
+ }
+ return STATUS_ERROR;
+ }
+ if (need_sync_table_space == true)
+ {
+ status = sync_table_space(packet->hostName, ntohs(packet->port), packet->userName, stage);
+ fprintf(stderr,"%s sync_table_space ",
+ ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
+ if (status != STATUS_OK)
+ {
+ if (Debug_pretty_print)
+ {
+ elog(DEBUG1,"%s sync table space error",
+ ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
+ }
+ fprintf(stderr,"NG\n");
+ return STATUS_ERROR;
+ }
+ fprintf(stderr,"OK\n");
+ }
+ return STATUS_OK;
+}
+
+static int
+hot_recovery(RecoveryPacket *packet, int stage)
+{
+ int status = STATUS_OK;
+
+ fprintf(stderr,"%s restore from pg_dump ",
+ ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
+ if (stage == PGR_1ST_RECOVERY)
+ {
+ status = restore_from_dumpall(packet->hostName, ntohs(packet->port), packet->userName );
+ }
+ else
+ {
+ status = restore_from_each_dump(packet->hostName, ntohs(packet->port), packet->userName );
+ }
+ if (status != STATUS_OK)
+ {
+ if (Debug_pretty_print)
+ {
+ elog(DEBUG1,"%s sync table space error",
+ ((stage == PGR_1ST_RECOVERY)?"1st":"2nd"));
+ }
+ fprintf(stderr,"->NG\n");
+ return STATUS_ERROR;
+ }
+ fprintf(stderr,"->OK\n");
+ return STATUS_OK;
+}
+
+static int
+restore_from_dumpall( char * hostName, uint16_t portNum, char * userName)
+{
+ int status;
+ char exec_command[512];
+ int pid;
+ char pg_dumpall[256];
+ char psql[256];
+ char *p=NULL;
+
+ /* set pg_dumpall path */
+ memset(pg_dumpall, 0, sizeof(pg_dumpall));
+ strncpy(pg_dumpall, PgDumpPath, sizeof(pg_dumpall));
+ p = strrchr(pg_dumpall,'/');
+ if (p == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ p++;
+ strcpy(p,"pg_dumpall");
+
+ /* set psql path */
+ p = NULL;
+ memset(psql, 0, sizeof(psql));
+ strncpy(psql, PgDumpPath, sizeof(psql));
+ p = strrchr(psql,'/');
+ if (p == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ p++;
+ strcpy(p,"psql");
+ p+=4;
+ *p = '\0';
+
+ snprintf(exec_command,sizeof(exec_command),"%s -i -o -c -h %s -p %d -U %s | %s -p %d template1",
+ pg_dumpall,
+ hostName,
+ portNum,
+ userName,
+ psql,
+ PostPortNumber
+ );
+ fprintf(stderr,"1st exec:[%s]\n",exec_command);
+
+ pid = fork();
+ if (pid == 0)
+ {
+ system(exec_command);
+ exit(0);
+ }
+ else
+ {
+ for (;;)
+ {
+ int result;
+ result = wait(&status);
+ if (result < 0)
+ {
+ if (errno == EINTR)
+ continue;
+ return STATUS_ERROR;
+ }
+
+ if (WIFEXITED(status) == 0 || WEXITSTATUS(status) != 0)
+ return STATUS_ERROR;
+ else
+ break;
+ }
+ }
+ return STATUS_OK;
+}
+
+static int
+restore_from_dump( char * hostName, uint16_t portNum, char * userName, char * dbName)
+{
+ int status;
+ char exec_command[512];
+ int pid= 0;
+ char pg_restore[256];
+ char *p=NULL;
+
+ /* set pq_restore path */
+ p = NULL;
+ memset(pg_restore, 0, sizeof(pg_restore));
+ strncpy(pg_restore, PgDumpPath, sizeof(pg_restore));
+ p = strrchr(pg_restore,'/');
+ if (p == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ p++;
+ strcpy(p,"pg_restore");
+
+ snprintf(exec_command,sizeof(exec_command),"%s -i -Fc -o -b -h %s -p %d -U %s %s | %s -i -c -p %d -d %s",
+ PgDumpPath,
+ hostName,
+ portNum,
+ userName,
+ dbName,
+ pg_restore,
+ PostPortNumber,
+ dbName
+ );
+
+ fprintf(stderr,"2nd exec:[%s]\n",exec_command);
+ pid = fork();
+ if (pid == 0)
+ {
+ system(exec_command);
+ exit(0);
+ }
+ else
+ {
+ for (;;)
+ {
+ int result;
+ result = wait(&status);
+ if (result < 0)
+ {
+ if (errno == EINTR)
+ continue;
+ return STATUS_ERROR;
+ }
+
+ if (WIFEXITED(status) == 0 || WEXITSTATUS(status) != 0)
+ return STATUS_ERROR;
+ else
+ break;
+ }
+ }
+ return STATUS_OK;
+}
+
+static int
+restore_from_each_dump( char * hostName, uint16_t portNum, char * userName)
+{
+ PGresult * res = (PGresult *)NULL;
+ int i = 0;
+ int row_num = 0;
+ char * dbName = NULL;
+ int rtn = STATUS_OK;
+
+ res = get_dbName(hostName, portNum, userName);
+ if (res == (PGresult *)NULL)
+ {
+ return STATUS_ERROR;
+ }
+ row_num = PQntuples(res);
+ for ( i = 0 ; i < row_num ; i ++)
+ {
+ dbName = PQgetvalue(res,i,0);
+ if (strlen(dbName) > 0 )
+ {
+ if ((strcmp("template0",dbName)) &&
+ (strcmp("template1",dbName)))
+ {
+ rtn = restore_from_dump(hostName, portNum, userName, dbName);
+ fprintf(stderr,".");
+ }
+ }
+ }
+ if (res != (PGresult *)NULL)
+ {
+ PQclear(res);
+ }
+
+ return STATUS_OK;
+}
+
+static PGresult *
+get_dbName(char * hostName, uint16_t portNum, char * userName)
+{
+ PGresult * res = (PGresult *)NULL;
+ int cnt = 0;
+ PGconn * conn = (PGconn *)NULL;
+ char port[8];
+ char *database = "template1";
+ char * query = "SELECT datname FROM pg_database";
+
+ if ( (hostName == NULL) ||
+ (portNum <= 0) ||
+ (userName == NULL))
+ {
+ return (PGresult *)NULL;
+ }
+ snprintf(port,sizeof(port),"%d", portNum);
+
+ /* create connection to master */
+ conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
+ if (conn == NULL)
+ {
+ return (PGresult *)NULL;
+ }
+ /* check to see that the backend Connection was successfully made */
+ cnt = 0;
+ while (PQstatus(conn) == CONNECTION_BAD)
+ {
+ if (conn != NULL)
+ {
+ PQfinish(conn);
+ }
+ if (cnt > MAX_RETRY_TIMES )
+ {
+ return (PGresult *)NULL;
+ }
+ conn = PQsetdbLogin(hostName, port, NULL, NULL, database, userName, NULL);
+ cnt ++;
+ }
+ res = PQexec(conn , query);
+ if ((res == NULL) ||
+ (PQresultStatus(res) != PGRES_TUPLES_OK))
+ {
+ PQclear(res);
+ res = (PGresult *)NULL;
+ }
+ if (conn != NULL)
+ {
+ PQfinish(conn);
+ }
+
+ return res;
+}
+
+#endif /* USE_REPLICATION */
diff -aruN postgresql-8.2.4/src/backend/libpq/replicate.c pgcluster-1.7.0rc7/src/backend/libpq/replicate.c
--- postgresql-8.2.4/src/backend/libpq/replicate.c 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/libpq/replicate.c 2007-02-18 22:52:16.000000000 +0100
@@ -0,0 +1,4021 @@
+/*--------------------------------------------------------------------
+ * FILE:
+ * replicate.c
+ *
+ * NOTE:
+ * This file is composed of the functions to call with the source
+ * at backend for the replication.
+ * Low level I/O functions that called by in these functions are
+ * contained in 'replicate_com.c'.
+ *
+ *--------------------------------------------------------------------
+ */
+
+/*--------------------------------------
+ * INTERFACE ROUTINES
+ *
+ * setup/teardown:
+ * PGR_Init_Replicate_Server_Data
+ * PGR_Set_Replicate_Server_Socket
+ * PGR_delete_shm
+ * I/O call:
+ * PGR_Send_Replicate_Command
+ * table handling:
+ * PGR_get_replicate_server_info
+ * status distinction:
+ * PGR_Is_Replicated_Command
+ * Xlog_Check_Replicatec
+ * replicateion main:
+ * PGR_replication
+ *-------------------------------------
+ */
+#ifdef USE_REPLICATION
+
+#include "postgres.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#ifdef HAVE_NETINET_TCP_H
+#include
+#endif
+#include
+#include
+#include
+
+#include "access/transam.h"
+#include "bootstrap/bootstrap.h"
+#include "libpq/libpq.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "commands/prepare.h"
+#include "nodes/nodes.h"
+#include "nodes/print.h"
+#include "utils/guc.h"
+#include "parser/parser.h"
+#include "access/xact.h"
+#include "storage/proc.h"
+#include "tcop/tcopprot.h"
+#include "tcop/utility.h"
+#include "postmaster/postmaster.h"
+#include "replicate.h"
+
+/* the source of this value is 'access/transam/varsup.c' */
+#define VAR_OID_PREFETCH (8192)
+
+PGR_ReplicationLog_Info ReplicationLog_Info;
+bool pgr_skip_in_prepared_query = false;
+
+/*--------------------------------------
+ * PROTOTYPE DECLARATION
+ *--------------------------------------
+ */
+static int set_command_args(char argv[PGR_CMD_ARG_NUM][256],char *str);
+static bool is_same_replication_server(ReplicateServerInfo * sp1, ReplicateServerInfo * sp2 );
+static ReplicateServerInfo * search_new_replication_server ( ReplicateServerInfo * sp , int socket_type );
+
+static int close_replicate_server_socket ( ReplicateServerInfo * sp , int socket_type );
+static int recv_message(int sock,char * buf,int flag);
+static int send_replicate_packet(int sock,ReplicateHeader * header, char * query_string);
+static bool is_copy_from(char * query);
+static int get_words( char words[MAX_WORDS][MAX_WORD_LETTERS] ,char * string,int length,int upper);
+static int get_table_name(char * table_name, char * query, int position );
+static bool is_not_replication_query(char * query_string, int query_len, char cmdType);
+static int Comp_Not_Replicate(PGR_Not_Replicate_Type * nrp1,PGR_Not_Replicate_Type* nrp2);
+static bool is_serial_control_query(char cmdType,char * query);
+static bool is_select_into_query(char cmdType,char * query);
+static int send_response_to_replication_server(const char * notice);
+static bool do_not_replication_command(const char * commandTag);
+static bool is_create_temp_table(char * query);
+static int add_replication_server(char * hostname,char * port, char * recovery_port);
+static int change_replication_server(char * hostname,char * port, char * recovery_port);
+static int get_new_replication_socket( ReplicateServerInfo * base, ReplicateServerInfo * sp, int socket_type);
+static char * get_hostName(char * str);
+static void set_response_mode(char * mode);
+static void PGR_Set_Current_Replication_Query_ID(char *id);
+#ifdef CONTROL_LOCK_CONFLICT
+static int wait_lock_answer(void);
+static int read_trigger(char * result, int buf_size);
+#endif /* CONTROL_LOCK_CONFLICT */
+static int check_conf_data(void);
+
+static unsigned int get_next_request_id(void);
+static bool is_this_query_replicated(char * id);
+static int set_replication_id(char * id);
+static int return_current_oid(void);
+static int sync_oid(char * oid);
+static bool is_concerned_with_prepared_select(char cmdType, char * query_string);
+static int skip_non_blank(char * ptr, int max);
+static int skip_blank(char * ptr, int max);
+static int parse_message(char * query_string);
+static bool is_prepared_as_select(char * query_string);
+static bool is_statement_as_select(char * query_string);
+
+extern ssize_t secure_read(Port *, void *, size_t);
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * PGR_Init_Replicate_Server_Data()
+ * NOTES
+ * Read Configuration file and create ReplicateServerData table
+ * ARGS
+ * void
+ * RETURN
+ * OK: STATUS_OK
+ * NG: STATUS_ERROR
+ *--------------------------------------------------------------------
+ */
+int
+PGR_Init_Replicate_Server_Data(void)
+{
+ int table_size,str_size;
+ ReplicateServerInfo *sp;
+ PGR_Not_Replicate_Type * nrp;
+ ConfDataType * conf;
+ int rec_no,cnt;
+ unsigned int ip;
+ char HostName[HOSTNAME_MAX_LENGTH];
+
+ memset (HostName,0,sizeof(HostName));
+ if (ConfData_Top == (ConfDataType *)NULL)
+ {
+ return STATUS_ERROR;
+ }
+
+ /* allocate replication server information table */
+ table_size = sizeof(ReplicateServerInfo) * MAX_SERVER_NUM;
+ ReplicateServerShmid = shmget(IPC_PRIVATE,table_size,IPC_CREAT | IPC_EXCL | 0600);
+ if (ReplicateServerShmid < 0)
+ {
+ return STATUS_ERROR;
+ }
+ ReplicateServerData = (ReplicateServerInfo *)shmat(ReplicateServerShmid,0,0);
+ if (ReplicateServerData == (ReplicateServerInfo *)-1)
+ {
+ return STATUS_ERROR;
+ }
+ memset(ReplicateServerData,0,table_size);
+ sp = ReplicateServerData;
+
+ /* allocate cluster db information table */
+ ClusterDBShmid = shmget(IPC_PRIVATE,sizeof(ClusterDBInfo),IPC_CREAT | IPC_EXCL | 0600);
+ if (ClusterDBShmid < 0)
+ {
+ return STATUS_ERROR;
+ }
+ ClusterDBData = (ClusterDBInfo *)shmat(ClusterDBShmid,0,0);
+ if (ClusterDBData == (ClusterDBInfo *)-1)
+ {
+ return STATUS_ERROR;
+ }
+ memset(ClusterDBData,0,sizeof(ClusterDBInfo));
+ PGR_Set_Cluster_Status(STATUS_REPLICATED);
+
+ /* allocate partial replicate table */
+ table_size = sizeof(PGR_Not_Replicate_Type) * MAX_SERVER_NUM;
+ PGR_Not_Replicate = malloc(table_size);
+ if (PGR_Not_Replicate == (PGR_Not_Replicate_Type*)NULL)
+ {
+ return STATUS_ERROR;
+ }
+ memset(PGR_Not_Replicate, 0, table_size);
+ nrp = PGR_Not_Replicate;
+ cnt = 0;
+ conf = ConfData_Top;
+ while ((conf != (ConfDataType *)NULL) && (cnt < MAX_SERVER_NUM))
+ {
+ /* set replication server table */
+ if (!strcmp(conf->table,REPLICATION_SERVER_INFO_TAG))
+ {
+ rec_no = conf->rec_no;
+ cnt = rec_no;
+ if (!strcmp(conf->key,HOST_NAME_TAG))
+ {
+ strncpy((sp + rec_no)->hostName,conf->value,sizeof(sp->hostName));
+ conf = (ConfDataType *)conf->next;
+ continue;
+ }
+ if (!strcmp(conf->key,PORT_TAG))
+ {
+ (sp + rec_no)->portNumber = atoi(conf->value);
+ (sp + rec_no)->sock = -1;
+ if ((sp + rec_no)->useFlag != DATA_USE)
+ {
+ PGR_Set_Replication_Server_Status((sp+rec_no), DATA_INIT);
+ }
+ memset((sp + rec_no + 1)->hostName,0,sizeof(sp->hostName));
+ (sp + rec_no + 1)->useFlag = DATA_END;
+ conf = (ConfDataType *)conf->next;
+ continue;
+ }
+ if (!strcmp(conf->key,RECOVERY_PORT_TAG))
+ {
+ (sp + rec_no)->recoveryPortNumber = atoi(conf->value);
+ if ((sp + rec_no)->useFlag != DATA_USE)
+ {
+ PGR_Set_Replication_Server_Status((sp+rec_no), DATA_INIT);
+ }
+ memset((sp + rec_no + 1)->hostName,0,sizeof(sp->hostName));
+ (sp + rec_no + 1)->useFlag = DATA_END;
+ conf = (ConfDataType *)conf->next;
+ continue;
+ }
+ }
+ /* set part replication table */
+ if (!strcmp(conf->table,NOT_REPLICATE_INFO_TAG))
+ {
+ rec_no = conf->rec_no;
+ cnt = rec_no;
+ if (PGR_Not_Replicate_Rec_Num < rec_no +1)
+ {
+ PGR_Not_Replicate_Rec_Num = rec_no +1;
+ }
+ if (!strcmp(conf->key,DB_NAME_TAG))
+ {
+ strncpy((nrp + rec_no)->db_name,conf->value,sizeof(nrp->db_name));
+ conf = (ConfDataType *)conf->next;
+ continue;
+ }
+ if (!strcmp(conf->key,TABLE_NAME_TAG))
+ {
+ strncpy((nrp + rec_no)->table_name,conf->value,sizeof(nrp->table_name));
+ conf = (ConfDataType *)conf->next;
+ continue;
+ }
+ }
+ if (!strcmp(conf->key,HOST_NAME_TAG))
+ {
+ str_size = sizeof(HostName) ;
+ memset(HostName,0,str_size);
+ strncpy(HostName,conf->value,str_size-1);
+ }
+ else if (!strcmp(conf->key,RECOVERY_PORT_TAG))
+ {
+ RecoveryPortNumber = atoi(conf->value);
+ }
+ else if (!strcmp(conf->key,RSYNC_PATH_TAG))
+ {
+ str_size = strlen(conf->value) ;
+ RsyncPath = malloc(str_size + 1);
+ if (RsyncPath == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ memset(RsyncPath,0,str_size + 1);
+ strncpy(RsyncPath,conf->value,str_size);
+ }
+ else if (!strcmp(conf->key,RSYNC_OPTION_TAG))
+ {
+ str_size = strlen(conf->value) ;
+ RsyncOption = malloc(str_size + 1);
+ if (RsyncOption == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ memset(RsyncOption,0,str_size + 1);
+ strncpy(RsyncOption,conf->value,str_size);
+ }
+ else if (!strcmp(conf->key,RSYNC_COMPRESS_TAG))
+ {
+ if (!strcmp(conf->value, "yes"))
+ RsyncCompress = true;
+ else if (!strcmp(conf->value, "no"))
+ RsyncCompress = false;
+ }
+ else if (!strcmp(conf->key,PG_DUMP_PATH_TAG))
+ {
+ str_size = strlen(conf->value) ;
+ PgDumpPath = malloc(str_size + 1);
+ if (PgDumpPath == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ memset(PgDumpPath,0,str_size + 1);
+ strncpy(PgDumpPath,conf->value,str_size);
+ }
+ else if (!strcmp(conf->key,STAND_ALONE_TAG))
+ {
+ PGR_Stand_Alone = (PGR_Stand_Alone_Type*)malloc(sizeof(PGR_Stand_Alone_Type));
+ if (PGR_Stand_Alone == (PGR_Stand_Alone_Type *)NULL)
+ {
+ return STATUS_ERROR;
+ }
+ PGR_Stand_Alone->is_stand_alone = false;
+ if (!strcmp(conf->value,READ_WRITE_IF_STAND_ALONE))
+ {
+ PGR_Stand_Alone->permit = PERMIT_READ_WRITE;
+ }
+ else
+ {
+ PGR_Stand_Alone->permit = PERMIT_READ_ONLY;
+ }
+ }
+ else if (!strcmp(conf->key,TIMEOUT_TAG))
+ {
+ /* get repliaction timeout */
+ PGR_Replication_Timeout = PGRget_time_value(conf->value);
+ if ((PGR_Replication_Timeout < 1) || (PGR_Replication_Timeout > 3600))
+ {
+ fprintf(stderr,"%s is out of range. It should be between 1sec-1hr.\n",TIMEOUT_TAG);
+ return STATUS_ERROR;
+ }
+ }
+ else if (!strcmp(conf->key,LIFECHECK_TIMEOUT_TAG))
+ {
+ /* get lifecheck timeout */
+ PGR_Lifecheck_Timeout = PGRget_time_value(conf->value);
+ if ((PGR_Lifecheck_Timeout < 1) || (PGR_Lifecheck_Timeout > 3600))
+ {
+ fprintf(stderr,"%s is out of range. It should be between 1sec-1hr.\n",LIFECHECK_TIMEOUT_TAG);
+ return STATUS_ERROR;
+ }
+ }
+ else if (!strcmp(conf->key,LIFECHECK_INTERVAL_TAG))
+ {
+ /* get lifecheck interval */
+ PGR_Lifecheck_Interval = PGRget_time_value(conf->value);
+ if ((PGR_Lifecheck_Interval < 1) || (PGR_Lifecheck_Interval > 3600))
+ {
+ fprintf(stderr,"%s is out of range. It should between 1sec-1hr.\n",LIFECHECK_INTERVAL_TAG);
+ return STATUS_ERROR;
+ }
+ }
+ conf = (ConfDataType *)conf->next;
+ }
+ TransactionSock = -1;
+ ReplicateCurrentTime = (ReplicateNow *)malloc(sizeof(ReplicateNow));
+ if (ReplicateCurrentTime == (ReplicateNow *)NULL)
+ {
+ return STATUS_ERROR;
+ }
+ memset(ReplicateCurrentTime,0,sizeof(ReplicateNow));
+
+ PGRCopyData = (CopyData *)malloc(sizeof(CopyData));
+ if (PGRCopyData == (CopyData *)NULL)
+ {
+ return STATUS_ERROR;
+ }
+ memset(PGRCopyData,0,sizeof(CopyData));
+
+ if (PGR_Not_Replicate_Rec_Num == 0)
+ {
+ free(PGR_Not_Replicate);
+ PGR_Not_Replicate = NULL;
+ }
+ else
+ {
+ qsort((char *)PGR_Not_Replicate,PGR_Not_Replicate_Rec_Num,sizeof(PGR_Not_Replicate_Type), (int (*)(const void*,const void*))Comp_Not_Replicate);
+ }
+
+ PGRSelfHostName = malloc(HOSTNAME_MAX_LENGTH);
+ if (PGRSelfHostName == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ memset(PGRSelfHostName,0,HOSTNAME_MAX_LENGTH);
+
+ PGR_password = malloc(sizeof(PGR_Password_Info));
+ if (PGR_password == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ memset(PGR_password,0,sizeof(PGR_Password_Info));
+ PGR_password->password = malloc(PASSWORD_MAX_LENGTH);
+ if (PGR_password->password == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ memset(PGR_password->password,0,PASSWORD_MAX_LENGTH);
+
+ if (HostName[0] == 0)
+ {
+ if (gethostname(HostName,HOSTNAME_MAX_LENGTH) < 0)
+ {
+ return STATUS_ERROR;
+ }
+ }
+ ip=PGRget_ip_by_name(HostName);
+
+ sprintf(PGRSelfHostName,
+ "%d.%d.%d.%d",
+ (ip ) & 0xff ,
+ (ip >> 8) & 0xff ,
+ (ip >> 16) & 0xff ,
+ (ip >> 24) & 0xff );
+ if (RsyncPath == NULL)
+ {
+ RsyncPath = strdup(DEFAULT_RSYNC);
+ }
+ if (PgDumpPath == NULL)
+ {
+ PgDumpPath = strdup(DEFAULT_PG_DUMP);
+ }
+
+ return (check_conf_data());
+}
+
+static int
+check_conf_data(void)
+{
+ int i = 0;
+ ReplicateServerInfo *sp;
+ sp = ReplicateServerData;
+ while ((sp + i)->useFlag != DATA_END)
+ {
+ if (*((sp + i)->hostName) == 0)
+ {
+ fprintf(stderr,"Hostname of replication server is not valid.\n");
+ return STATUS_ERROR;
+ }
+ if ((sp + i)->portNumber < 1024)
+ {
+ fprintf(stderr,"Replication Port of replication server is not valid. It's required larger than 1024.\n");
+ return STATUS_ERROR;
+ }
+ if ((sp + i)->recoveryPortNumber < 1024)
+ {
+ fprintf(stderr,"RecoveryPort of replication server is not valid. It's required larger than 1024.\n");
+ return STATUS_ERROR;
+ }
+ if ((sp + i)->portNumber == (sp + i)->recoveryPortNumber)
+ {
+ fprintf(stderr,"Replication Port and RecoveryPort is conflicted.\n");
+ return STATUS_ERROR;
+ }
+ i++;
+ }
+ if (RecoveryPortNumber < 1024)
+ {
+ fprintf(stderr,"RecoveryPort of Cluster DB is not valid. It's required larger than 1024.\n");
+ return STATUS_ERROR;
+ }
+ if (PGR_Stand_Alone == NULL)
+ {
+ fprintf(stderr,"Stand Alone Mode is not specified.\n");
+ return STATUS_ERROR;
+ }
+ if (RsyncOption == NULL)
+ {
+ fprintf(stderr,"Option of rsync command is not specified.\n");
+ return STATUS_ERROR;
+ }
+ if (strlen(PGRSelfHostName) <= 0)
+ {
+ fprintf(stderr,"Hostname of Cluster DB is not valid.\n");
+ return STATUS_ERROR;
+ }
+ if (PGR_Lifecheck_Timeout > PGR_Lifecheck_Interval)
+ {
+ fprintf(stderr,"The lifecheck timeouti(%d) should be shorter than interval(%d).\n",PGR_Lifecheck_Timeout,PGR_Lifecheck_Interval);
+ return STATUS_ERROR;
+ }
+ return STATUS_OK;
+ }
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * PGR_Set_Replicate_Server_Socket()
+ * NOTES
+ * Create new socket and set ReplicateServerData table
+ * ARGS
+ * void
+ * RETURN
+ * OK: STATUS_OK
+ * NG: STATUS_ERROR
+ *--------------------------------------------------------------------
+ */
+int
+PGR_Set_Replicate_Server_Socket(void)
+{
+ ReplicateServerInfo * sp;
+ if (ReplicateServerData == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ sp = ReplicateServerData;
+ while (sp->useFlag != DATA_END){
+ sp->sock = -1;
+ PGR_Create_Socket_Connect(&(sp->sock),sp->hostName,sp->portNumber);
+ sp ++;
+ }
+ return STATUS_OK;
+}
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * PGR_get_replicate_server_socket()
+ * NOTES
+ * search or create a socket to connect with the replication server
+ * ARGS
+ * ReplicateServerInfo * sp: replication server data (I)
+ * int socket_type: socket type (I)
+ * -PGR_TRANSACTION_SOCKET:
+ * -PGR_QUERY_SOCKET:
+ * RETURN
+ * OK: >0(socket)
+ * NG: -1
+ *--------------------------------------------------------------------
+ */
+int
+PGR_get_replicate_server_socket ( ReplicateServerInfo * sp , int socket_type )
+{
+ ReplicateServerInfo * tmp;
+ tmp = sp;
+ if (tmp == (ReplicateServerInfo *) NULL)
+ {
+ return -1;
+ }
+ if (tmp->hostName[0] == '\0')
+ {
+ return -1;
+ }
+
+ if (TransactionSock != -1)
+ {
+ return TransactionSock;
+ }
+
+ while(PGR_Create_Socket_Connect(&TransactionSock,tmp->hostName,tmp->portNumber) != STATUS_OK)
+ {
+ close(TransactionSock);
+ TransactionSock = -1;
+ PGR_Set_Replication_Server_Status(tmp, DATA_ERR);
+ usleep(20);
+ tmp = PGR_get_replicate_server_info();
+ if (tmp == (ReplicateServerInfo *)NULL)
+ {
+ return -1;
+ }
+ PGR_Set_Replication_Server_Status(tmp, DATA_USE);
+ usleep(10);
+ }
+ return TransactionSock;
+}
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * close_replicate_server_socket()
+ * NOTES
+ * close the socket connected with the replication server
+ * ARGS
+ * ReplicateServerInfo * sp: replication server data (I)
+ * int socket_type: socket type (I)
+ * -PGR_TRANSACTION_SOCKET:
+ * -PGR_QUERY_SOCKET:
+ * RETURN
+ * OK: STATUS_OK
+ * NG: STATUS_ERROR
+ *--------------------------------------------------------------------
+ */
+static int
+close_replicate_server_socket ( ReplicateServerInfo * sp , int socket_type )
+{
+ if (sp == (ReplicateServerInfo *)NULL )
+ {
+ return STATUS_ERROR;
+ }
+ if (sp->hostName[0] == '\0')
+ {
+ return STATUS_ERROR;
+ }
+ if (TransactionSock != -1)
+ {
+ PGR_Close_Sock(&(TransactionSock));
+ TransactionSock = -1;
+ }
+ switch (socket_type)
+ {
+ case PGR_TRANSACTION_SOCKET:
+ if (TransactionSock != -1)
+ {
+ PGR_Close_Sock(&(TransactionSock));
+ }
+ TransactionSock = -1;
+ sp->sock = -1;
+ break;
+ case PGR_QUERY_SOCKET:
+ if (sp->sock != -1)
+ {
+ PGR_Close_Sock(&(sp->sock));
+ }
+ sp->sock = -1;
+ break;
+ }
+ PGR_Set_Replication_Server_Status(sp, DATA_INIT);
+ return STATUS_OK;
+}
+
+static bool
+is_same_replication_server(ReplicateServerInfo * sp1, ReplicateServerInfo * sp2 )
+{
+ if ((sp1 == NULL) || (sp2 == NULL))
+ {
+ return false;
+ }
+ if ((!strcmp(sp1->hostName,sp2->hostName)) &&
+ (sp1->portNumber == sp2->portNumber) &&
+ (sp1->recoveryPortNumber == sp2->recoveryPortNumber))
+ {
+ return true;
+ }
+ return false;
+}
+
+static ReplicateServerInfo *
+search_new_replication_server ( ReplicateServerInfo * sp , int socket_type )
+{
+ ReplicateHeader dummy_header;
+ ReplicateServerInfo * rs_tbl;
+ char command[256];
+ int sock = -1;
+ int cnt = 0;
+
+ if ((ReplicateServerData == NULL) || ( sp == NULL))
+ {
+ return NULL;
+ }
+ rs_tbl = sp;
+ close_replicate_server_socket ( sp , socket_type);
+ sp ++;
+ while (is_same_replication_server(sp,rs_tbl) != true)
+ {
+ if (sp->useFlag == DATA_END)
+ {
+ sp = ReplicateServerData;
+ }
+ sock = PGR_get_replicate_server_socket( sp , socket_type);
+ if (sock < 0 )
+ {
+ if (is_same_replication_server(sp,rs_tbl) == true)
+ {
+ return NULL;
+ }
+ else
+ {
+ sp++;
+ }
+ continue;
+ }
+ memset(&dummy_header, 0, sizeof(ReplicateHeader));
+ memset(command,0,sizeof(command));
+ snprintf(command,sizeof(command)-1,"SELECT %s(%d,%s,%d,%d)",
+ PGR_SYSTEM_COMMAND_FUNC,
+ PGR_CHANGE_REPLICATION_SERVER_FUNC_NO,
+ sp->hostName,
+ sp->portNumber,
+ sp->recoveryPortNumber);
+ dummy_header.cmdSys = CMD_SYS_CALL;
+ dummy_header.cmdSts = CMD_STS_NOTICE;
+ dummy_header.query_size = htonl(strlen(command));
+ if (send_replicate_packet(sock,&dummy_header,command) != STATUS_OK)
+ {
+ cnt ++;
+ close_replicate_server_socket ( sp , socket_type);
+ PGR_Set_Replication_Server_Status(sp, DATA_ERR);
+ }
+ else
+ {
+ PGR_Set_Replication_Server_Status(sp, DATA_USE);
+ return sp;
+ }
+ if (cnt > MAX_RETRY_TIMES )
+ {
+ sp++;
+ cnt = 0;
+ }
+ else
+ {
+ continue;
+ }
+ }
+ return NULL;
+}
+
+static int
+get_table_name(char * table_name, char * query, int position )
+{
+
+ int i,wc;
+ char * p;
+ char * sp;
+ int length;
+
+ if ((table_name == NULL) || (query == NULL) || (position < 1))
+ {
+ return STATUS_ERROR;
+ }
+ length = strlen(query);
+ p = query;
+ wc = 1;
+ sp = table_name;
+ for (i = 0 ; i < length ; i ++)
+ {
+ while(isspace(*p))
+ {
+ p++;
+ i++;
+ }
+ while((*p != '\0') && (! isspace(*p)))
+ {
+ if ((*p == ';') || (*p == '('))
+ break;
+ if (wc == position)
+ {
+ *sp = *p;
+ sp++;
+ }
+ p++;
+ i++;
+ }
+ if (wc == position)
+ {
+ *sp = '\0';
+ break;
+ }
+ wc++;
+ }
+ return STATUS_OK;
+}
+
+static bool
+is_not_replication_query(char * query_string, int query_len, char cmdType)
+{
+ PGR_Not_Replicate_Type key;
+ PGR_Not_Replicate_Type * ptr = NULL;
+
+ if (PGR_Not_Replicate_Rec_Num <= 0)
+ return false;
+ if (query_string == NULL)
+ return true;
+ memset(&key,0,sizeof(PGR_Not_Replicate_Type));
+ strncpy(key.db_name ,(char *)(MyProcPort->database_name),sizeof(key.db_name)-1);
+ switch (cmdType)
+ {
+ case CMD_TYPE_INSERT:
+ get_table_name(key.table_name,query_string,3);
+ break;
+ case CMD_TYPE_UPDATE:
+ get_table_name(key.table_name,query_string,2);
+ break;
+ case CMD_TYPE_DELETE:
+ get_table_name(key.table_name,query_string,3);
+ break;
+ case CMD_TYPE_COPY:
+ get_table_name(key.table_name,query_string,2);
+ break;
+ default:
+ return false;
+ }
+ ptr = (PGR_Not_Replicate_Type*)bsearch((void*)&key,(void*)PGR_Not_Replicate,PGR_Not_Replicate_Rec_Num,sizeof(PGR_Not_Replicate_Type), (int (*)(const void*,const void*))Comp_Not_Replicate);
+ if (ptr == NULL)
+ {
+ return false;
+ }
+ return true;
+
+}
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * PGR_Send_Replicate_Command()
+ * NOTES
+ * create new socket
+ * ARGS
+ * char * query_string: query strings (I)
+ * char cmdSts:
+ * char cmdType:
+ * RETURN
+ * OK: result
+ * NG: NULL
+ *--------------------------------------------------------------------
+ */
+char *
+PGR_Send_Replicate_Command(char * query_string, int query_len, char cmdSts ,char cmdType)
+{
+ int sock = -1;
+ int cnt = 0;
+ ReplicateHeader header;
+ char * serverName = NULL;
+ int portNumber=0;
+ char * result = NULL;
+ ReplicateServerInfo * sp = NULL;
+ ReplicateServerInfo * base = NULL;
+ int socket_type = 0;
+ char argv[ PGR_CMD_ARG_NUM ][256];
+ int argc = 0;
+ int func_no = 0;
+ int check_flag =0;
+ bool in_transaction = false;
+
+
+ /*
+ * check query string
+ */
+ if ((query_string == NULL) ||
+ (query_len < 0))
+ {
+ return NULL;
+ }
+ /* check not replication query */
+ if (is_not_replication_query(query_string, query_len, cmdType) == true)
+ {
+ PGR_Copy_Data_Need_Replicate = false;
+ return NULL;
+ }
+
+ if ((cmdSts == CMD_STS_TRANSACTION ) ||
+ (cmdSts == CMD_STS_SET_SESSION_AUTHORIZATION ) ||
+ (cmdSts == CMD_STS_TEMP_TABLE ))
+ {
+ socket_type = PGR_TRANSACTION_SOCKET ;
+ }
+ else
+ {
+ socket_type = PGR_QUERY_SOCKET ;
+ }
+
+ if(cmdSts==CMD_STS_TRANSACTION
+ && (cmdType!=CMD_TYPE_BEGIN && cmdType!=CMD_TYPE_ROLLBACK))
+ {
+ in_transaction = true;
+ }
+
+ sp = PGR_get_replicate_server_info();
+ if (sp == NULL)
+ {
+ if (Debug_pretty_print)
+ elog(DEBUG1,"PGR_get_replicate_server_info get error");
+ return NULL;
+ }
+ sock = PGR_get_replicate_server_socket( sp , socket_type);
+ if (sock < 0)
+ {
+ if (Debug_pretty_print)
+ elog(DEBUG1,"PGR_get_replicate_server_socket fail");
+ return NULL;
+ }
+ result = malloc(PGR_MESSAGE_BUFSIZE + 4);
+ if (result == NULL)
+ {
+ return NULL;
+ }
+
+ serverName = sp->hostName;
+ portNumber = (int)sp->portNumber;
+ memset(&header,0,sizeof(ReplicateHeader));
+
+ header.cmdSts = cmdSts;
+ header.cmdType = cmdType;
+ header.port = htons(PostPortNumber);
+ header.pid = htons(getpid());
+ header.query_size = htonl(query_len);
+ strncpy(header.dbName ,(char *)(MyProcPort->database_name),sizeof(header.dbName)-1);
+ strncpy(header.userName , (char *)(MyProcPort->user_name),sizeof(header.userName)-1);
+ strncpy(header.password , PGR_password->password, PASSWORD_MAX_LENGTH );
+ memcpy(header.md5Salt ,MyProcPort->md5Salt, sizeof(header.md5Salt));
+ memcpy(header.cryptSalt ,MyProcPort->cryptSalt, sizeof(header.cryptSalt));
+ header.request_id = htonl(get_next_request_id());
+ header.rlog = 0;
+
+ if (PGRSelfHostName != NULL)
+ {
+ strncpy(header.from_host, PGRSelfHostName, HOSTNAME_MAX_LENGTH);
+ }
+
+ base = sp;
+ PGR_Sock_To_Replication_Server = sock;
+
+retry_send_prereplicate_packet:
+
+ memset(result,0,PGR_MESSAGE_BUFSIZE + 4);
+ cnt = 0;
+ header.cmdSys=CMD_SYS_PREREPLICATE;
+
+ while (send_replicate_packet(sock,&header,query_string) != STATUS_OK)
+ {
+ cnt++;
+ if (cnt >= MAX_RETRY_TIMES )
+ {
+ sock = get_new_replication_socket( base, sp, socket_type);
+ if (sock < 0)
+ {
+ if (Debug_pretty_print)
+ elog(DEBUG1,"all replication servers may be down");
+ PGR_Stand_Alone->is_stand_alone = true;
+ if (cmdSts == CMD_STS_TRANSACTION )
+ {
+ strcpy(result,PGR_REPLICATION_ABORT_MSG);
+ return result;
+ }
+ free(result);
+ result = NULL;
+ return NULL;
+
+ }
+ if(in_transaction)
+ {
+ elog(ERROR,"replicate server down during replicating transaction. aborted.");
+ free(result);
+ return NULL;
+ }
+ PGR_Sock_To_Replication_Server = sock;
+ cnt = 0;
+ }
+ }
+
+ memset(result,0,PGR_MESSAGE_BUFSIZE);
+ if (PGR_recv_replicate_result(sock,result,0) < 0)
+ {
+
+ sock = get_new_replication_socket( base, sp, socket_type);
+ if (sock < 0)
+ {
+ if (Debug_pretty_print)
+ elog(DEBUG1,"all replication servers may be down");
+ PGR_Stand_Alone->is_stand_alone = true;
+
+ if (cmdSts == CMD_STS_TRANSACTION )
+ {
+ strcpy(result,PGR_REPLICATION_ABORT_MSG);
+ return result;
+ }
+ if(result!=NULL) {
+ free(result);
+ result = NULL;
+ }
+ return NULL;
+ }
+ PGR_Sock_To_Replication_Server = sock;
+ /* replication server should be down */
+
+ if(in_transaction)
+ {
+ elog(ERROR,"replicate server down during replicating transaction. aborted.");
+ free(result);
+ return NULL;
+ }
+
+ goto retry_send_prereplicate_packet;
+ }
+
+
+ argc = set_command_args(argv,result);
+ func_no=atoi(argv[0]);
+ if(func_no==0) {
+ /* this server is not primary replicate server*/
+ sock=-1;
+ goto retry_send_prereplicate_packet;
+ }
+retry_send_replicate_packet:
+
+ memset(result,0,PGR_MESSAGE_BUFSIZE + 4);
+ cnt = 0;
+ header.cmdSys = CMD_SYS_REPLICATE;
+ while (send_replicate_packet(sock,&header,query_string) != STATUS_OK)
+ {
+ if (cnt > MAX_RETRY_TIMES )
+ {
+ sock = get_new_replication_socket( base, sp, socket_type);
+ if (sock < 0)
+ {
+ if (Debug_pretty_print)
+ elog(DEBUG1,"all replication servers may be down");
+ PGR_Stand_Alone->is_stand_alone = true;
+ if (cmdSts == CMD_STS_TRANSACTION )
+ {
+ strcpy(result,PGR_REPLICATION_ABORT_MSG);
+ return result;
+ }
+ free(result);
+ result = NULL;
+ return NULL;
+
+ }
+ PGR_Sock_To_Replication_Server = sock;
+ header.rlog = CONNECTION_SUSPENDED_TYPE;
+ cnt = 0;
+ }
+ cnt ++;
+ }
+
+ memset(result,0,PGR_MESSAGE_BUFSIZE);
+ if (PGR_recv_replicate_result(sock,result,0) < 0)
+ {
+ /* replication server should be down */
+ sock = get_new_replication_socket( base, sp, socket_type);
+ if (sock < 0)
+ {
+ if (Debug_pretty_print)
+ elog(DEBUG1,"all replication servers may be down");
+ PGR_Stand_Alone->is_stand_alone = true;
+
+ if (cmdSts == CMD_STS_TRANSACTION )
+ {
+ strcpy(result,PGR_REPLICATION_ABORT_MSG);
+ return result;
+ }
+ if(result!=NULL) {
+ free(result);
+ result = NULL;
+ }
+ return NULL;
+ }
+ PGR_Sock_To_Replication_Server = sock;
+ header.rlog = CONNECTION_SUSPENDED_TYPE;
+
+ goto retry_send_replicate_packet;
+ }
+
+ argc = set_command_args(argv,result);
+ if (argc >= 1)
+ {
+ func_no = atoi(argv[0]);
+ if (func_no == PGR_SET_CURRENT_TIME_FUNC_NO)
+ {
+ if(! in_transaction)
+ PGR_Set_Current_Time(argv[1],argv[2]);
+ set_replication_id(argv[3]);
+ set_response_mode(argv[4]);
+ PGR_Set_Current_Replication_Query_ID(argv[5]);
+ }
+ else if (func_no == PGR_NOTICE_DEADLOCK_DETECTION_FUNC_NO)
+ {
+ memset(result,0,PGR_MESSAGE_BUFSIZE);
+ strcpy(result,PGR_DEADLOCK_DETECTION_MSG);
+ }
+ else if (func_no == PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO)
+ {
+ PGR_Set_Current_Replication_Query_ID(argv[1]);
+ }
+ else if (func_no == PGR_QUERY_CONFIRM_ANSWER_FUNC_NO)
+ {
+ check_flag = atoi(argv[1]);
+ if (check_flag == PGR_ALREADY_COMMITTED )
+ {
+ if(! in_transaction)
+ PGR_Set_Current_Time(argv[2],argv[3]);
+ set_replication_id(argv[4]);
+ }
+ else
+ {
+ if(! in_transaction)
+ PGR_Set_Current_Time(argv[1],argv[2]);
+ set_replication_id(argv[3]);
+ /* this query is not replicated */
+ /*
+ free(result);
+ return NULL;
+ */
+ }
+ }
+ }
+ return result;
+}
+
+uint32_t
+PGRget_replication_id(void)
+{
+ return (ReplicationLog_Info.PGR_Replicate_ID);
+}
+
+static int
+set_replication_id(char * id)
+{
+ uint32_t rid=0;
+ uint32_t saved_id;
+ if (id == NULL)
+ {
+ return STATUS_ERROR;
+ }
+
+ rid=(uint32_t)atol(id);
+ if(rid==0)
+ return STATUS_OK;
+
+ needToUpdateReplicateIdOnNextQueryIsDone=true;
+ saved_id=ReplicationLog_Info.PGR_Replicate_ID;
+
+ ReplicationLog_Info.PGR_Replicate_ID =rid;
+
+
+ /*set replicate id in this process */
+
+
+ if (CurrentReplicateServer == NULL)
+ {
+ PGR_get_replicate_server_info();
+ }
+ if (CurrentReplicateServer != NULL)
+ {
+ /* set replicate id in this system */
+ saved_id=CurrentReplicateServer->replicate_id;
+ elog(DEBUG1, "replication id set from %d to %d", saved_id, rid);
+
+ CurrentReplicateServer->replicate_id = (uint32_t)(atol(id));
+ }
+
+ return STATUS_OK;
+}
+
+
+static unsigned int
+get_next_request_id(void)
+{
+ if (ReplicationLog_Info.PGR_Request_ID +1 < PGR_MAX_COUNTER)
+ {
+ ReplicationLog_Info.PGR_Request_ID ++;
+ }
+ else
+ {
+ ReplicationLog_Info.PGR_Request_ID = 0;
+ }
+ return ReplicationLog_Info.PGR_Request_ID ;
+
+}
+
+static bool
+is_this_query_replicated(char * id)
+{
+ uint32_t replicate_id = 0;
+ uint32_t saved_id = 0;
+ int32_t diff=0;
+ ReplicateServerInfo * replicate_server_info = NULL;
+
+ if (id == NULL)
+ {
+ return false;
+ }
+ replicate_id = (uint32_t)atol(id);
+ elog(DEBUG1, "check for replication id , input=%u", replicate_id);
+
+ if (CurrentReplicateServer == NULL)
+ {
+ PGR_get_replicate_server_info();
+ }
+
+ if (CurrentReplicateServer != NULL)
+ {
+ replicate_server_info = CurrentReplicateServer;
+ }
+ else if (LastReplicateServer != NULL)
+ {
+ replicate_server_info = LastReplicateServer;
+ }
+ if (replicate_server_info != NULL)
+ {
+
+ saved_id=replicate_server_info->replicate_id;
+ saved_id = saved_id < ReplicationLog_Info.PGR_Replicate_ID
+ ? ReplicationLog_Info.PGR_Replicate_ID
+ : saved_id;
+
+ elog(DEBUG1, "check for replication id , now=%u", saved_id);
+ /* check replicate_id < saved_id logically
+ *
+ * see also:
+ * backend/transam/transam.c#TransactionIdPrecedes
+ */
+
+ diff = (int32) (saved_id-replicate_id);
+ return (diff > 0);
+ }
+ elog(DEBUG1, "check for replication id check failed. no replication server");
+ return false;
+}
+
+
+static int
+get_new_replication_socket( ReplicateServerInfo * base, ReplicateServerInfo * sp, int socket_type)
+{
+ int sock = -1;
+
+ if (( base == NULL) ||
+ ( sp == NULL))
+ {
+ return -1;
+ }
+ close_replicate_server_socket ( sp , socket_type);
+ PGR_Set_Replication_Server_Status(sp, DATA_ERR);
+ sp = search_new_replication_server(base, socket_type);
+ if (sp == NULL)
+ {
+ if (Debug_pretty_print)
+ elog(DEBUG1,"all replication servers may be down");
+ PGR_Stand_Alone->is_stand_alone = true;
+ return -1;
+ }
+ sock = PGR_get_replicate_server_socket( sp , socket_type);
+ return sock;
+}
+
+
+int
+PGR_recv_replicate_result(int sock,char * result,int user_timeout)
+{
+ fd_set rmask;
+ struct timeval timeout;
+ int rtn;
+
+ if (result == NULL)
+ {
+ return -1;
+ }
+
+ /*
+ * Wait for something to happen.
+ */
+ for (;;)
+ {
+ if (user_timeout == 0)
+ timeout.tv_sec = PGR_Replication_Timeout;
+ else
+ timeout.tv_sec = user_timeout;
+
+ timeout.tv_usec = 0;
+
+ FD_ZERO(&rmask);
+ FD_SET(sock,&rmask);
+ rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
+ if (rtn <= 0)
+ {
+ if (errno != EINTR)
+ return -1;
+ }
+
+ else if ((rtn > 0) && (FD_ISSET(sock, &rmask)))
+ {
+ return (recv_message(sock, result,0));
+ }
+ }
+ return -1;
+}
+
+static int
+recv_message(int sock,char * buf,int flag)
+{
+ int cnt = 0;
+ int r = 0;
+ char * read_ptr;
+ int read_size = 0;
+ cnt = 0;
+ read_ptr = buf;
+
+ for (;;)
+ {
+ r = recv(sock,read_ptr + read_size ,PGR_MESSAGE_BUFSIZE - read_size, flag);
+ if (r < 0) {
+ if (errno == EINTR || errno == EAGAIN) {
+ continue;
+ } else {
+ elog(DEBUG1, "recv_message():recv failed");
+ return -1;
+ }
+ } else if (r == 0) {
+ elog(DEBUG1, "recv_message():unexpected EOF");
+ return -1;
+ } else /*if (r > 0)*/ {
+ read_size += r;
+ if (read_size == PGR_MESSAGE_BUFSIZE)
+ {
+ return read_size;
+ }
+ }
+ }
+ return -1;
+}
+
+static int
+send_replicate_packet(int sock,ReplicateHeader * header, char * query_string)
+{
+ int s = 0;
+ char * send_ptr = NULL;
+ char * buf = NULL;
+ int send_size = 0;
+ int buf_size = 0;
+ int header_size = 0;
+ int rtn = 0;
+ fd_set wmask;
+ struct timeval timeout;
+ int query_size = 0;
+
+ /* check parameter */
+ if ((sock < 0) || (header == NULL))
+ {
+ return STATUS_ERROR;
+ }
+
+ query_size = ntohl(header->query_size);
+ header_size = sizeof(ReplicateHeader);
+ buf_size = header_size + query_size + 4;
+ buf = malloc(buf_size);
+ if (buf == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ memset(buf,0,buf_size);
+ buf_size -= 4;
+ memcpy(buf,header,header_size);
+ if (query_string != NULL)
+ {
+ memcpy((char *)(buf+header_size),query_string,query_size+1);
+ }
+ send_ptr = buf;
+
+ /*
+ * Wait for something to happen.
+ */
+ rtn = 1;
+ for (;;)
+ {
+ timeout.tv_sec = PGR_Replication_Timeout;
+ timeout.tv_usec = 0;
+
+ FD_ZERO(&wmask);
+ FD_SET(sock,&wmask);
+ rtn = select(sock+1, (fd_set *)NULL, &wmask, (fd_set *)NULL, &timeout);
+ if (rtn < 0)
+ {
+ if (errno == EINTR)
+ continue;
+ else
+ {
+ elog(DEBUG1, "send_replicate_packet():select() failed");
+ return STATUS_ERROR;
+ }
+ }
+ else if (rtn && FD_ISSET(sock, &wmask))
+ {
+
+
+ s = send(sock,send_ptr + send_size,buf_size - send_size ,0);
+ if (s < 0){
+ if (errno == EINTR || errno == EAGAIN)
+ {
+ continue;
+ }
+ elog(DEBUG1, "send_replicate_packet():send error");
+
+ /* EPIPE || ENCONNREFUSED || ENSOCK || EHOSTUNREACH */
+ return STATUS_ERROR;
+ } else if (s == 0) {
+ free(buf);
+ buf = NULL;
+ elog(DEBUG1, "send_replicate_packet():unexpected EOF");
+ return STATUS_ERROR;
+ } else /*if (s > 0)*/ {
+ send_size += s;
+ if (send_size == buf_size)
+ {
+ free(buf);
+ buf = NULL;
+ return STATUS_OK;
+ }
+ }
+ }
+ }
+ if (buf != NULL)
+ {
+ free(buf);
+ buf = NULL;
+ }
+ return STATUS_ERROR;
+}
+
+bool
+PGR_Is_Replicated_Command(char * query)
+{
+
+ return (PGR_Is_System_Command(query));
+}
+
+int
+Xlog_Check_Replicate(int operation)
+{
+ if (PGR_Get_Cluster_Status() == STATUS_RECOVERY)
+ {
+ return STATUS_OK;
+ /* elog(WARNING, "This query is not permitted while recovery db "); */
+ }
+ else if ((operation == CMD_UTILITY ) ||
+ (operation == CMD_INSERT ) ||
+ (operation == CMD_UPDATE ) ||
+ (operation == CMD_DELETE ))
+ {
+ return (PGR_Replicate_Function_Call());
+ }
+ return STATUS_OK;
+}
+
+int
+PGR_Replicate_Function_Call(void)
+{
+ char *result = NULL;
+ int status = STATUS_OK;
+
+ if ((PGR_Get_Cluster_Status() == STATUS_RECOVERY) ||
+ (PGR_Stand_Alone == NULL))
+ {
+ return STATUS_OK;
+ }
+ if (Query_String != NULL)
+ {
+ if (PGR_Is_Stand_Alone() == true)
+ {
+ if (PGR_Stand_Alone->permit == PERMIT_READ_ONLY)
+ {
+ Query_String = NULL;
+ return STATUS_ERROR;
+ }
+ }
+ PGR_Need_Notice = true;
+ PGR_Check_Lock.check_lock_conflict = true;
+ result = PGR_Send_Replicate_Command(Query_String,strlen(Query_String), CMD_STS_QUERY,CMD_TYPE_SELECT);
+ if (result != NULL)
+ {
+ PGR_Reload_Start_Time();
+ if (!strncmp(result,PGR_DEADLOCK_DETECTION_MSG,strlen(PGR_DEADLOCK_DETECTION_MSG)))
+ {
+ status = STATUS_DEADLOCK_DETECT;
+ }
+ free(result);
+ result = NULL;
+ }
+ else
+ {
+ status = STATUS_ERROR;
+ }
+ Query_String = NULL;
+ }
+ return status;
+}
+
+void
+PGR_delete_shm(void)
+{
+
+ if (ReplicateServerData != NULL)
+ {
+ shmdt(ReplicateServerData);
+ ReplicateServerData = NULL;
+ shmctl(ReplicateServerShmid,IPC_RMID,(struct shmid_ds *)NULL);
+ }
+ if (ClusterDBData != NULL)
+ {
+ shmdt(ClusterDBData);
+ ClusterDBData = NULL;
+ shmctl(ClusterDBShmid,IPC_RMID,(struct shmid_ds *)NULL);
+ }
+
+ if (TransactionSock != -1)
+ {
+ close(TransactionSock);
+ }
+
+ if (RsyncPath != NULL)
+ {
+ free(RsyncPath);
+ RsyncPath = NULL;
+ }
+ if (RsyncOption != NULL)
+ {
+ free(RsyncOption);
+ RsyncOption = NULL;
+ }
+
+ if (ReplicateCurrentTime != NULL)
+ {
+ free(ReplicateCurrentTime);
+ ReplicateCurrentTime = NULL;
+ }
+
+ if (PGRCopyData != NULL)
+ {
+ free (PGRCopyData);
+ PGRCopyData = NULL;
+ }
+
+ if (PGR_Stand_Alone != NULL)
+ {
+ free(PGR_Stand_Alone);
+ PGR_Stand_Alone = NULL;
+ }
+
+ if (PGR_Not_Replicate != NULL)
+ {
+ free(PGR_Not_Replicate);
+ PGR_Not_Replicate = NULL;
+ }
+ if (PGRSelfHostName != NULL)
+ {
+ free(PGRSelfHostName);
+ PGRSelfHostName = NULL;
+ }
+ if (PGR_password != NULL)
+ {
+ if (PGR_password->password != NULL)
+ {
+ free(PGR_password->password);
+ PGR_password->password = NULL;
+ }
+ free(PGR_password);
+ PGR_password = NULL;
+ }
+}
+
+ReplicateServerInfo *
+PGR_get_replicate_server_info(void)
+{
+
+ ReplicateServerInfo * sp;
+
+ if (ReplicateServerData == NULL)
+ {
+ return (ReplicateServerInfo *)NULL;
+ }
+ /* check current using replication server */
+ sp = PGR_check_replicate_server_info();
+ if (sp != NULL)
+ {
+ if (CurrentReplicateServer != NULL)
+ {
+ LastReplicateServer = CurrentReplicateServer;
+ CurrentReplicateServer->replicate_id = LastReplicateServer->replicate_id;
+ }
+ CurrentReplicateServer = sp;
+ return sp;
+ }
+ /* there is no used replication server */
+ /* however it may exist still in initial status */
+ sp = ReplicateServerData;
+ while (sp->useFlag != DATA_END)
+ {
+ if (sp->useFlag != DATA_ERR )
+ {
+ if (CurrentReplicateServer != NULL)
+ {
+ LastReplicateServer = CurrentReplicateServer;
+ CurrentReplicateServer->replicate_id = LastReplicateServer-> replicate_id;
+ }
+ CurrentReplicateServer = sp;
+ PGR_Set_Replication_Server_Status(sp, DATA_USE);
+ return sp;
+ }
+ sp++;
+ }
+ PGR_Stand_Alone->is_stand_alone = true;
+ if (CurrentReplicateServer != NULL)
+ {
+ LastReplicateServer = CurrentReplicateServer;
+ CurrentReplicateServer->replicate_id = LastReplicateServer-> replicate_id;
+ }
+ CurrentReplicateServer = NULL;
+ return (ReplicateServerInfo *)NULL;
+}
+
+ReplicateServerInfo *
+PGR_check_replicate_server_info(void)
+{
+
+ ReplicateServerInfo * sp;
+
+ if (ReplicateServerData == NULL)
+ {
+ return (ReplicateServerInfo *)NULL;
+ }
+ sp = ReplicateServerData;
+ while (sp->useFlag != DATA_END)
+ {
+ if (sp->useFlag == DATA_USE )
+ {
+ return sp;
+ }
+ sp++;
+ }
+ return NULL;
+}
+
+int
+PGR_Send_Copy(CopyData * copy,int end )
+{
+
+ char cmdSts,cmdType;
+ char * p = NULL;
+ char *result = NULL;
+ char term[8];
+ /*int status = 0; */
+
+ if (copy == NULL)
+ {
+ return STATUS_ERROR;
+ }
+
+ cmdSts = CMD_STS_COPY;
+
+ if (Transaction_Mode > 0)
+ {
+ cmdSts = CMD_STS_TRANSACTION ;
+ }
+ if (Session_Authorization_Mode)
+ {
+ cmdSts = CMD_STS_SET_SESSION_AUTHORIZATION ;
+ }
+ cmdType = CMD_TYPE_COPY_DATA;
+
+ copy->copy_data[copy->cnt] = '\0';
+ if (end)
+ {
+ memset(term,0,sizeof(term));
+ term[0]='\\';
+ term[1]='.';
+ term[2]='\n';
+
+ cmdType = CMD_TYPE_COPY_DATA_END;
+ p = NULL;
+ if (copy->cnt > 0)
+ {
+ copy->copy_data[copy->cnt] = '\0';
+ p = strstr(copy->copy_data,term);
+ if (p == NULL)
+ {
+ p = &(copy->copy_data[copy->cnt-1]);
+ copy->cnt--;
+ }
+ else
+ {
+ p = NULL;
+ }
+ }
+ if (p != NULL)
+ {
+ strncpy(p,term,sizeof(term));
+ copy->cnt += 4;
+ }
+ }
+ result = PGR_Send_Replicate_Command(copy->copy_data, copy->cnt, cmdSts, cmdType);
+ memset(copy,0,sizeof(CopyData));
+
+ if (result != NULL)
+ {
+ PGR_Reload_Start_Time();
+ free(result);
+ result = NULL;
+ return STATUS_OK;
+ }
+ else
+ {
+ return STATUS_ERROR;
+ }
+}
+
+CopyData *
+PGR_Set_Copy_Data(CopyData * copy, char *str, int len,int end)
+{
+ CopyData save;
+ int save_len = 0;
+ int read_index = 0;
+ int send_size = 0;
+ int buf_size = 0;
+ int rest_len = 0;
+ int rest_buf_size = 0;
+ int status = STATUS_OK;
+ char * ep = NULL;
+ char term[4];
+
+ #define BUFF_OFFSET (8)
+
+ if ((PGR_Copy_Data_Need_Replicate == false) ||
+ (copy == NULL))
+ {
+ return (CopyData *)NULL;
+ }
+ memset(term,0,sizeof(term));
+ term[0]='\n';
+ term[1]='\\';
+ term[2]='.';
+ buf_size = COPYBUFSIZ - BUFF_OFFSET;
+ read_index = 0;
+ rest_len = len;
+ rest_buf_size = buf_size - copy->cnt;
+ while ((rest_len > 0) && (rest_buf_size > 0))
+ {
+ if (rest_buf_size < rest_len)
+ {
+ send_size = rest_buf_size;
+ rest_len -= send_size;
+ }
+ else
+ {
+ send_size = rest_len;
+ rest_len = 0;
+ }
+ memcpy(&(copy->copy_data[copy->cnt]) ,str + read_index ,send_size);
+ copy->cnt += send_size;
+ read_index += send_size;
+ rest_buf_size = buf_size - copy->cnt;
+ if (strstr(copy->copy_data,term) != NULL)
+ {
+ break;
+ }
+ if (rest_buf_size <= 0)
+ {
+ ep = strrchr(copy->copy_data,'\n');
+ if (ep != NULL)
+ {
+ *ep = '\0';
+ save_len = copy->cnt - strlen(copy->copy_data) -1;
+ copy->cnt -= save_len ;
+ memset(&save,0,sizeof(CopyData));
+ memcpy(save.copy_data,(ep+1),save_len+1);
+ save.cnt = save_len;
+ *ep = '\n';
+ *(ep+1) = '\0';
+ status = PGR_Send_Copy(copy,0);
+ memset(copy,0,sizeof(CopyData));
+ if (save_len > 0)
+ {
+ memcpy(copy,&save,sizeof(CopyData));
+ }
+ rest_buf_size = buf_size - copy->cnt;
+
+ }
+ else
+ {
+ /* one record is bigger than COPYBUFSIZ */
+ /* buffer would be over flow*/
+ status = PGR_Send_Copy(copy,0);
+ memset(copy,0,sizeof(CopyData));
+ rest_buf_size = buf_size - copy->cnt;
+ }
+ }
+ }
+ if (end)
+ {
+ status = PGR_Send_Copy(copy,end);
+ memset(copy,0,sizeof(CopyData));
+ }
+ if (status != STATUS_OK)
+ {
+ return (CopyData *)NULL;
+ }
+ return copy;
+}
+
+int
+PGR_replication(char * query_string, CommandDest dest, Node *parsetree, const char * commandTag)
+{
+ char *result = NULL;
+ char cmdSts = CMD_STS_OTHER;
+ char cmdType = CMD_TYPE_OTHER;
+ int query_len = 0;
+
+ if ((query_string == NULL) ||
+ (commandTag == NULL))
+ {
+ return STATUS_ERROR;
+ }
+
+ Query_String = NULL;
+ query_len = strlen(query_string);
+
+ /* save query data for retry */
+ PGR_Retry_Query.query_string = query_string;
+ PGR_Retry_Query.query_len = query_len;
+ PGR_Retry_Query.cmdSts = cmdSts;
+ PGR_Retry_Query.cmdType = cmdType;
+ PGR_Retry_Query.useFlag = DATA_USE;
+ /* set cmdType */
+ if (!strcmp(commandTag,"BEGIN")) cmdType = CMD_TYPE_BEGIN ;
+ else if (!strcmp(commandTag,"COMMIT")) cmdType = CMD_TYPE_COMMIT ;
+ else if (!strcmp(commandTag,"SELECT")) cmdType = CMD_TYPE_SELECT ;
+ else if (!strcmp(commandTag,"INSERT")) cmdType = CMD_TYPE_INSERT ;
+ else if (!strcmp(commandTag,"UPDATE")) cmdType = CMD_TYPE_UPDATE ;
+ else if (!strcmp(commandTag,"DELETE")) cmdType = CMD_TYPE_DELETE ;
+ else if (!strcmp(commandTag,"VACUUM")) cmdType = CMD_TYPE_VACUUM ;
+ else if (!strcmp(commandTag,"ANALYZE")) cmdType = CMD_TYPE_ANALYZE ;
+ else if (!strcmp(commandTag,"REINDEX")) cmdType = CMD_TYPE_REINDEX ;
+ else if (!strcmp(commandTag,"ROLLBACK")) cmdType = CMD_TYPE_ROLLBACK ;
+ else if (!strcmp(commandTag,"RESET")) cmdType = CMD_TYPE_RESET ;
+ else if (!strcmp(commandTag,"START TRANSACTION")) cmdType = CMD_TYPE_BEGIN ;
+
+ /* only "replication_server" statement-name is replicated for SHOW. */
+ /* see CreateCommandTag() @ backend/tcop/postgres.c */
+
+ else if (!strcmp(commandTag,"COPY"))
+ {
+ cmdType = CMD_TYPE_COPY ;
+ if (is_copy_from(query_string))
+ {
+ PGR_Copy_Data_Need_Replicate = true;
+ }
+ else
+ {
+ PGR_Copy_Data_Need_Replicate = false;
+ return STATUS_NOT_REPLICATE;
+ }
+ }
+ else if (!strcmp(commandTag,"SET"))
+ {
+ cmdType = CMD_TYPE_SET;
+ /*
+ VariableSetStmt *stmt = (VariableSetStmt *)parsetree;
+ if (strcmp(stmt->name, "TRANSACTION ISOLATION LEVEL") &&
+ strcmp(stmt->name, "datestyle") &&
+ strcmp(stmt->name, "autocommit") &&
+ strcmp(stmt->name, "client_encoding") &&
+ strcmp(stmt->name, "password_encryption") &&
+ strcmp(stmt->name, "search_path") &&
+ strcmp(stmt->name, "session_authorization") &&
+ strcmp(stmt->name, "timezone"))
+
+ return STATUS_NOT_REPLICATE;
+ */
+ if (strstr(query_string,SYS_QUERY_1) != NULL)
+ {
+ return STATUS_NOT_REPLICATE;
+ }
+ }
+ else if (!strcmp(commandTag,"CREATE TABLE"))
+ {
+ if (is_create_temp_table(query_string))
+ {
+ Create_Temp_Table_Mode = true;
+ }
+ }
+ if (Create_Temp_Table_Mode)
+ {
+ cmdSts = CMD_STS_TEMP_TABLE ;
+ }
+ if (Transaction_Mode > 0)
+ {
+ cmdSts = CMD_STS_TRANSACTION ;
+ }
+ else
+ {
+ if ((cmdType == CMD_TYPE_COMMIT ) ||
+ (cmdType == CMD_TYPE_ROLLBACK ))
+ {
+ cmdSts = CMD_STS_TRANSACTION ;
+ if (ReplicateCurrentTime != NULL)
+ {
+ ReplicateCurrentTime->useFlag = DATA_INIT;
+ ReplicateCurrentTime->use_seed = 0;
+ }
+ }
+ }
+ if (Session_Authorization_Mode)
+ {
+ cmdSts = CMD_STS_SET_SESSION_AUTHORIZATION ;
+ if (cmdType == CMD_TYPE_SESSION_AUTHORIZATION_END)
+ {
+ Session_Authorization_Mode = false;
+ }
+ }
+ if ((cmdSts == CMD_STS_TRANSACTION ) ||
+ (cmdSts == CMD_STS_SET_SESSION_AUTHORIZATION ) ||
+ (cmdSts == CMD_STS_TEMP_TABLE ))
+ {
+ /* check partitional replication table */
+ if (is_not_replication_query(query_string, query_len, cmdType)== true )
+ {
+ PGR_Copy_Data_Need_Replicate = false;
+ return STATUS_NOT_REPLICATE;
+ }
+ Query_String = NULL;
+ if (( do_not_replication_command(commandTag) == true) &&
+ (strcmp(commandTag,"SELECT")))
+ {
+ return STATUS_NOT_REPLICATE;
+ }
+
+ if (Debug_pretty_print)
+ elog(DEBUG1,"transaction query send :%s",(char *)query_string);
+ PGR_Retry_Query.cmdSts = cmdSts;
+ PGR_Retry_Query.cmdType = cmdType;
+ result = PGR_Send_Replicate_Command(query_string,query_len, cmdSts,cmdType);
+ if (result != NULL)
+ {
+ if (!strncmp(result,PGR_DEADLOCK_DETECTION_MSG,strlen(PGR_DEADLOCK_DETECTION_MSG)))
+ {
+ /*
+ PGR_Send_Message_To_Frontend(result);
+ */
+ free(result);
+ result = NULL;
+ return STATUS_DEADLOCK_DETECT;
+ }
+ else if (!strncmp(result,PGR_REPLICATION_ABORT_MSG,strlen(PGR_REPLICATION_ABORT_MSG)))
+ {
+ free(result);
+ result = NULL;
+ return STATUS_REPLICATION_ABORT;
+ }
+ free(result);
+ result = NULL;
+ return STATUS_CONTINUE;
+ }
+ else
+ {
+ return STATUS_ERROR;
+ }
+ }
+ else
+ {
+ cmdSts = CMD_STS_QUERY ;
+ if ( do_not_replication_command(commandTag) == false)
+ {
+ Query_String = NULL;
+ /* check partitional replication table */
+ if (is_not_replication_query(query_string, query_len, cmdType)== true )
+ {
+ PGR_Copy_Data_Need_Replicate = false;
+ return STATUS_NOT_REPLICATE;
+ }
+ result = PGR_Send_Replicate_Command(query_string,query_len,cmdSts,cmdType);
+ if (result != NULL)
+ {
+ if (!strncmp(result,PGR_DEADLOCK_DETECTION_MSG,strlen(PGR_DEADLOCK_DETECTION_MSG)))
+ {
+ free(result);
+ result = NULL;
+ return STATUS_DEADLOCK_DETECT;
+ }
+ else if (!strncmp(result,PGR_REPLICATION_ABORT_MSG,strlen(PGR_REPLICATION_ABORT_MSG)))
+ {
+ free(result);
+ result = NULL;
+ return STATUS_REPLICATION_ABORT;
+ }
+ /*
+ PGR_Send_Message_To_Frontend(result);
+ */
+ free(result);
+ result = NULL;
+ return STATUS_CONTINUE;
+ }
+ else
+ {
+ return STATUS_ERROR;
+ }
+ }
+ else
+ {
+ if (( is_serial_control_query(cmdType,query_string) == true) ||
+ ( is_select_into_query(cmdType,query_string) == true))
+ {
+ Query_String = NULL;
+ PGR_Need_Notice = true;
+ PGR_Check_Lock.check_lock_conflict = true;
+ result = PGR_Send_Replicate_Command(query_string,query_len,cmdSts,cmdType);
+ if (result != NULL)
+ {
+ /*
+ PGR_Send_Message_To_Frontend(result);
+ */
+ if (!strncmp(result,PGR_DEADLOCK_DETECTION_MSG,strlen(PGR_DEADLOCK_DETECTION_MSG)))
+ {
+ free(result);
+ return STATUS_DEADLOCK_DETECT;
+ }
+ free(result);
+ result = NULL;
+ return STATUS_CONTINUE;
+ }
+ else
+ {
+ return STATUS_ERROR;
+ }
+ }
+ else
+ {
+ Query_String = query_string;
+ /*PGR_Sock_To_Replication_Server = -1;*/
+ }
+ return STATUS_CONTINUE_SELECT;
+ }
+ }
+ return STATUS_CONTINUE;
+}
+
+
+bool
+PGR_Is_System_Command(char * query)
+{
+ char * ptr;
+
+ if (query == NULL)
+ {
+ return false;
+ }
+ ptr = strstr(query,PGR_SYSTEM_COMMAND_FUNC);
+ if (ptr != NULL)
+ {
+ ptr = strchr(ptr,'(');
+ if (ptr == NULL)
+ return false;
+ return true;
+ }
+ return false;
+}
+
+static int
+set_command_args(char argv[ PGR_CMD_ARG_NUM ][256],char *str)
+{
+ int i,j,cnt,len;
+ char * ptr = str;
+
+ if (str == NULL)
+ {
+ return 0;
+ }
+ len = strlen(str);
+ cnt = j = 0;
+ for ( i = 0 ; i < len ; i++,ptr++)
+ {
+ if (cnt >= PGR_CMD_ARG_NUM)
+ break;
+ if (( *ptr == ',') || (*ptr == ')'))
+ {
+ argv[cnt][j] = '\0';
+ cnt ++;
+ j = 0;
+ continue;
+ }
+ argv[cnt][j] = *ptr;
+ j++;
+ }
+ if (cnt < PGR_CMD_ARG_NUM)
+ argv[cnt][j] = '\0';
+ cnt ++;
+
+ return cnt;
+}
+
+static int
+add_replication_server(char * hostname,char * port, char * recovery_port)
+{
+ int cnt;
+ int portNumber;
+ int recoveryPortNumber;
+ ReplicateServerInfo * sp;
+
+ if ((hostname == NULL) ||
+ (port == NULL ) ||
+ (recovery_port == NULL ))
+ {
+ return STATUS_ERROR;
+ }
+ if (ReplicateServerData == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ portNumber = atoi(port);
+ recoveryPortNumber = atoi(recovery_port);
+ cnt = 0;
+ sp = ReplicateServerData;
+ while (sp->useFlag != DATA_END){
+ if((!strncmp(sp->hostName,hostname,sizeof(sp->hostName))) &&
+ (sp->portNumber == portNumber) &&
+ (sp->recoveryPortNumber == recoveryPortNumber))
+ {
+ if (sp->useFlag != DATA_USE)
+ {
+ PGR_Set_Replication_Server_Status(sp, DATA_INIT);
+ }
+ return STATUS_OK;
+ }
+ sp ++;
+ cnt ++;
+ }
+ if (cnt < MAX_SERVER_NUM)
+ {
+ strncpy(sp->hostName,hostname,sizeof(sp->hostName));
+ sp->portNumber = portNumber;
+ sp->recoveryPortNumber = recoveryPortNumber;
+ PGR_Set_Replication_Server_Status(sp, DATA_INIT);
+ memset((sp+1),0,sizeof(ReplicateServerInfo));
+ (sp + 1)->useFlag = DATA_END;
+ }
+ else
+ {
+ return STATUS_ERROR;
+ }
+ return STATUS_OK;
+}
+
+static int
+change_replication_server(char * hostname,char * port, char * recovery_port)
+{
+ int cnt;
+ int portNumber;
+ int recoveryPortNumber;
+ ReplicateServerInfo * sp;
+
+ if ((hostname == NULL) ||
+ (port == NULL ) ||
+ (recovery_port == NULL ))
+ {
+ return STATUS_ERROR;
+ }
+ if (ReplicateServerData == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ portNumber = atoi(port);
+ recoveryPortNumber = atoi(recovery_port);
+ cnt = 0;
+ sp = ReplicateServerData;
+ while (sp->useFlag != DATA_END){
+ if((!strcmp(sp->hostName,hostname)) &&
+ (sp->portNumber == portNumber) &&
+ (sp->recoveryPortNumber == recoveryPortNumber))
+ {
+ PGR_Set_Replication_Server_Status(sp, DATA_USE);
+ }
+ else
+ {
+ if (sp->useFlag == DATA_USE)
+ {
+ PGR_Set_Replication_Server_Status(sp, DATA_INIT);
+ }
+ }
+ sp ++;
+ cnt ++;
+ }
+ return STATUS_OK;
+}
+
+int
+PGR_Set_Current_Time(char * sec, char * usec)
+{
+ int rtn = 0;
+ struct timeval local_tp;
+ struct timezone local_tpz;
+ struct timeval tv;
+
+ if ((sec == NULL) ||
+ (usec == NULL))
+ {
+ return STATUS_ERROR;
+ }
+ rtn = gettimeofday(&local_tp, &local_tpz);
+ tv.tv_sec = atol(sec);
+ tv.tv_usec = atol(usec);
+ ReplicateCurrentTime->offset_sec = local_tp.tv_sec - tv.tv_sec;
+ ReplicateCurrentTime->offset_usec = local_tp.tv_usec - tv.tv_usec;
+ ReplicateCurrentTime->tp.tv_sec = tv.tv_sec;
+ ReplicateCurrentTime->tp.tv_usec = tv.tv_usec;
+ ReplicateCurrentTime->useFlag = DATA_USE;
+ ReplicateCurrentTime->use_seed = 0;
+
+ return STATUS_OK;
+}
+
+static void
+PGR_Set_Current_Replication_Query_ID(char *id) {
+ MyProc->replicationId=atol(id);
+ return;
+}
+
+static void
+set_response_mode(char * mode)
+{
+ int response_mode = 0;
+
+ if (mode == NULL)
+ return;
+ response_mode = atoi(mode);
+ if (response_mode < 0)
+ return;
+ if (CurrentReplicateServer == NULL)
+ {
+ PGR_get_replicate_server_info();
+ if (CurrentReplicateServer == NULL)
+ {
+ return;
+ }
+ }
+ if (CurrentReplicateServer->response_mode != response_mode)
+ {
+ CurrentReplicateServer->response_mode = response_mode;
+ }
+}
+
+int
+PGR_Call_System_Command(char * command)
+{
+ char * ptr;
+ char * args;
+ char argv[ PGR_CMD_ARG_NUM ][256];
+ int argc = 0;
+ int func_no;
+ char * hostName = NULL;
+
+ if ((command == NULL) || (ReplicateCurrentTime == NULL))
+ {
+ return STATUS_ERROR;
+ }
+ ptr = strstr(command,PGR_SYSTEM_COMMAND_FUNC);
+ if (ptr == NULL)
+ return STATUS_ERROR;
+ ptr = strchr(ptr,'(');
+ if (ptr == NULL)
+ return STATUS_ERROR;
+ args = ptr+1;
+ ptr = strchr(ptr,')');
+ if (ptr == NULL)
+ return STATUS_ERROR;
+ *ptr = '\0';
+ argc = set_command_args(argv,args);
+ if (argc < 1)
+ return STATUS_ERROR;
+ func_no = atoi(argv[0]);
+ switch (func_no)
+ {
+ /* set current system time */
+ case PGR_SET_CURRENT_TIME_FUNC_NO:
+ if (atol(argv[1]) == 0)
+ {
+ CreateCheckPoint(false,true);
+ }
+ else
+ {
+ /*
+ if ((atoi(argv[3]) > 0) &&
+ (is_this_query_replicated(argv[3]) == true))
+ {
+ return STATUS_SKIP_QUERY;
+ }
+ */
+ PGR_Set_Current_Time(argv[1],argv[2]);
+ set_replication_id(argv[3]);
+ set_response_mode(argv[4]);
+ PGR_Set_Current_Replication_Query_ID(argv[5]);
+
+ }
+ break;
+ /* add new replication server data */
+ case PGR_STARTUP_REPLICATION_SERVER_FUNC_NO:
+ hostName = get_hostName(argv[1]);
+ add_replication_server(hostName,argv[2],argv[3]);
+ break;
+ /* change new replication server */
+ case PGR_CHANGE_REPLICATION_SERVER_FUNC_NO:
+ hostName = get_hostName(argv[1]);
+ change_replication_server(hostName,argv[2],argv[3]);
+ break;
+ case PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO:
+ PGR_Set_Current_Replication_Query_ID(argv[1]);
+ break;
+ case PGR_QUERY_CONFIRM_ANSWER_FUNC_NO:
+ if ((atoi(argv[3]) > 0) &&
+ (is_this_query_replicated(argv[3]) == true))
+ {
+ /* skip this query */
+ return STATUS_SKIP_QUERY;
+ }
+ else
+ {
+ PGR_Set_Current_Time(argv[1],argv[2]);
+ set_replication_id(argv[3]);
+ }
+ break;
+ /* get current oid */
+ case PGR_GET_OID_FUNC_NO:
+ return_current_oid();
+ break;
+ /* set current oid */
+ case PGR_SET_OID_FUNC_NO:
+ sync_oid(argv[1]);
+ break;
+ /* set noticed session abort */
+ case PGR_NOTICE_ABORT_FUNC_NO:
+ PGR_Noticed_Abort = true;
+ break;
+ }
+ return STATUS_OK;
+}
+
+int
+PGR_GetTimeOfDay(struct timeval *tp, struct timezone *tpz)
+{
+
+ int rtn;
+
+ rtn = gettimeofday(tp, tpz);
+ if (ReplicateCurrentTime == NULL)
+ {
+ return rtn;
+ }
+ if (ReplicateCurrentTime->useFlag == DATA_USE)
+ {
+ if (ReplicateCurrentTime->use_seed != 0)
+ {
+ tp->tv_sec -= ReplicateCurrentTime->offset_sec;
+ if (tp->tv_usec < ReplicateCurrentTime->offset_usec)
+ {
+ tp->tv_usec += (1000000 - ReplicateCurrentTime->offset_usec);
+ tp->tv_sec -= 1;
+ }
+ else
+ {
+ tp->tv_usec -= ReplicateCurrentTime->offset_usec;
+ }
+ }
+ else
+ {
+ tp->tv_sec = ReplicateCurrentTime->tp.tv_sec;
+ tp->tv_usec = ReplicateCurrentTime->tp.tv_usec;
+ }
+ rtn = 0;
+ }
+ return rtn;
+}
+
+long
+PGR_Random(void)
+{
+ double rtn;
+ if (ReplicateCurrentTime != NULL)
+ {
+ if ( ReplicateCurrentTime->use_seed == 0)
+ {
+ srand( ReplicateCurrentTime->tp.tv_usec );
+ ReplicateCurrentTime->use_seed = 1;
+ }
+ }
+ rtn = random();
+ return rtn;
+}
+
+char *
+PGR_scan_terminate( char * str)
+{
+ char * p;
+ int sflag = 0;
+ int dflag = 0;
+ int lflag = 0;
+ int i = 0;
+ char tag[256];
+
+ if (str == NULL)
+ return NULL;
+ p = str;
+ memset(tag,0,sizeof(tag));
+ while ( *p != '\0' )
+ {
+ if ((!strncmp(p,"--",2)) ||
+ (!strncmp(p,"//",2)))
+ {
+ while (( *p != '\n') && (*p != '\0'))
+ {
+ p++;
+ }
+ continue;
+ }
+
+ switch (*p)
+ {
+ case '\'':
+ sflag ^= 1;
+ break;
+ case '\"':
+ dflag ^= 1;
+ break;
+ case '$':
+ i = 0;
+ p++;
+ while (( *p != '\n') && (*p != '\0'))
+ {
+ if (isalnum(*p) == 0)
+ {
+ if (*p == '$')
+ {
+ lflag ^= 1;
+ }
+ break;
+ }
+ else
+ {
+ if (i >= sizeof(tag))
+ break;
+ if (lflag == 0)
+ {
+ tag[i] = *p;
+ }
+ else
+ {
+ if (tag[i] != *p)
+ {
+ break;
+ }
+ }
+ i++;
+ }
+ p++;
+ }
+ break;
+ case '\\':
+ p +=2;
+ continue;
+ break;
+ case ';':
+ if ((!sflag) && (!dflag) && (!lflag))
+ return p;
+ break;
+ }
+ p++;
+ }
+ return NULL;
+}
+
+static bool
+is_copy_from(char * query)
+{
+ char * p;
+ int i;
+ char buf[12];
+ int c_flag = 0;
+ if (query == NULL)
+ return false;
+ p = query;
+ for ( i = 0 ; i <= 1 ; i ++)
+ {
+ /* get 'copy table_name' string */
+ while(isspace(*p))
+ p++;
+ while ((*p != '\0') && (*p != '(') && (!isspace(*p)))
+ p++;
+ }
+ while(isspace(*p))
+ p++;
+ /* skip table column */
+ if (*p == '(')
+ {
+ c_flag = 1;
+ p++;
+ while (*p != '\0')
+ {
+ if (*p == '(')
+ c_flag ++;
+ if (*p == ')')
+ c_flag --;
+ if (c_flag == 0)
+ {
+ p++;
+ break;
+ }
+ p++;
+ }
+ while(isspace(*p))
+ p++;
+ }
+ /* get 'from' or 'to' */
+ i = 0;
+ memset(buf,0,sizeof(buf));
+ while ((*p != '\0') && (!isspace(*p)) && ( i < sizeof(buf)-1))
+ {
+ buf[i] = (char)toupper(*p);
+ p++;
+ i++;
+ }
+ if (!strcmp(buf,"FROM"))
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+static bool
+is_create_temp_table(char * query)
+{
+ int len,wc;
+ char buf[MAX_WORDS][MAX_WORD_LETTERS];
+
+ if (query == NULL)
+ return false;
+ len = strlen(query);
+ wc = get_words(buf,query,len,1);
+ if (wc < 4)
+ return false;
+ if ((!strncmp(buf[0],"CREATE", strlen("CREATE"))) &&
+ (!strncmp(buf[1],"TEMP",strlen("TEMP"))) &&
+ (!strncmp(buf[2],"TABLE",strlen("TABLE"))))
+ {
+ return true;
+ }
+ return false;
+}
+
+static int
+get_words( char words[MAX_WORDS][MAX_WORD_LETTERS] ,char * string,int length,int upper)
+{
+ int i,wc,lc;
+ char * p = NULL;
+ char * buf = NULL;
+
+ if (string == NULL)
+ return STATUS_ERROR;
+ buf = malloc(length);
+ if (buf == NULL)
+ return STATUS_ERROR;
+
+ memset(buf,0,length);
+ p = string;
+ wc = 0;
+ for (i = 0 ; i < length ; i ++)
+ {
+ if ((*p == '\0') || (wc >= MAX_WORDS))
+ break;
+ while (isspace(*p))
+ {
+ p++;
+ i++;
+ }
+ lc = 0;
+ while ((*p != '\0') && (! isspace(*p)))
+ {
+ if (upper)
+ *(buf+lc) = (char)toupper(*p);
+ else
+ *(buf+lc) = *p;
+
+ p++;
+ i++;
+ lc++;
+ }
+ memset(words[wc],0,MAX_WORD_LETTERS);
+ memcpy(words[wc],buf,lc);
+ memset(buf,0,length);
+ wc++;
+ }
+ free(buf);
+ buf = NULL;
+ return wc;
+}
+
+static int
+Comp_Not_Replicate(PGR_Not_Replicate_Type * nrp1,PGR_Not_Replicate_Type* nrp2)
+{
+ int rtn;
+
+ if ((nrp1 == NULL) ||
+ (nrp2 == NULL))
+ {
+ return 0;
+ }
+ rtn = strcasecmp(nrp1->table_name,nrp2->table_name);
+ if (rtn == 0)
+ {
+ rtn = strcasecmp(nrp1->db_name,nrp2->db_name);
+ }
+ return rtn;
+}
+
+bool
+PGR_Is_Stand_Alone(void)
+{
+ ReplicateServerInfo * sp = NULL;
+
+ if (PGR_Stand_Alone == NULL)
+ return true;
+ if (PGR_Stand_Alone->is_stand_alone == true)
+ {
+ sp = PGR_get_replicate_server_info();
+ if (sp == NULL)
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+void
+PGR_Send_Message_To_Frontend(char * msg)
+{
+ StringInfoData msgbuf;
+
+ pq_beginmessage(&msgbuf, 'N');
+
+ if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
+ {
+ /* New style with separate fields */
+ char tbuf[12];
+ int ssval;
+ int i;
+
+ pq_sendbyte(&msgbuf, PG_DIAG_SEVERITY);
+ pq_sendstring(&msgbuf, "NOTICE" );
+
+ /* unpack MAKE_SQLSTATE code */
+ ssval = ERRCODE_WARNING ;
+ for (i = 0; i < 5; i++)
+ {
+ tbuf[i] = PGUNSIXBIT(ssval);
+ ssval >>= 6;
+ }
+ tbuf[i] = '\0';
+
+ pq_sendbyte(&msgbuf, PG_DIAG_SQLSTATE);
+ pq_sendstring(&msgbuf, tbuf);
+
+ /* M field is required per protocol, so always send something */
+ pq_sendbyte(&msgbuf, PG_DIAG_MESSAGE_PRIMARY);
+ if (msg)
+ pq_sendstring(&msgbuf, msg);
+ else
+ pq_sendstring(&msgbuf, _("missing error text"));
+
+ pq_sendbyte(&msgbuf, '\0'); /* terminator */
+ }
+ else
+ {
+ /* Old style --- gin up a backwards-compatible message */
+ StringInfoData buf;
+
+ initStringInfo(&buf);
+
+ appendStringInfo(&buf, "%s: ", "NOTICE");
+
+ if (msg)
+ appendStringInfoString(&buf, msg);
+ else
+ appendStringInfoString(&buf, _("missing error text"));
+
+ appendStringInfoChar(&buf, '\n');
+
+ pq_sendstring(&msgbuf, buf.data);
+
+ pfree(buf.data);
+ }
+
+ pq_endmessage(&msgbuf);
+
+ /*
+ * This flush is normally not necessary, since postgres.c will flush out
+ * waiting data when control returns to the main loop. But it seems best
+ * to leave it here, so that the client has some clue what happened if the
+ * backend dies before getting back to the main loop ... error/notice
+ * messages should not be a performance-critical path anyway, so an extra
+ * flush won't hurt much ...
+ */
+ pq_flush();
+}
+
+static bool
+is_serial_control_query(char cmdType,char * query)
+{
+ char * buf = NULL;
+ int len = 0;
+ int i = 0;
+ char * p = NULL;
+
+ if ((cmdType != CMD_TYPE_SELECT ) ||
+ ( query == NULL))
+ {
+ return false;
+ }
+
+ p = query;
+ len = strlen(query) +1;
+ buf = malloc(len);
+ if (buf == NULL)
+ return false;
+
+ memset(buf,0,len);
+ for ( i = 0 ; i < len ; i ++)
+ {
+ *(buf+i) = toupper(*(query+i));
+ }
+ if ((strstr(buf,"NEXTVAL") != NULL) ||
+ (strstr(buf,"SETVAL") != NULL))
+ {
+ free(buf);
+ buf = NULL;
+ return true;
+ }
+ free(buf);
+ buf = NULL;
+ return false;
+}
+
+static bool
+is_select_into_query(char cmdType,char * query)
+{
+ char * buf = NULL;
+ int len = 0;
+ int i = 0;
+ char * p = NULL;
+
+ if ((cmdType != CMD_TYPE_SELECT ) ||
+ ( query == NULL))
+ {
+ return false;
+ }
+
+ p = query;
+ len = strlen(query) +1;
+ buf = malloc(len);
+ if (buf == NULL)
+ return false;
+
+ memset(buf,0,len);
+ for ( i = 0 ; i < len ; i ++)
+ {
+ *(buf+i) = toupper(*(query+i));
+ }
+ if (strstr(buf,"INTO") != NULL)
+ {
+ free(buf);
+ buf = NULL;
+ return true;
+ }
+ if (strstr(buf,"CREATE") != NULL)
+ {
+ free(buf);
+ buf = NULL;
+ return true;
+ }
+ free(buf);
+ buf = NULL;
+ return false;
+}
+
+static int
+send_response_to_replication_server(const char * notice)
+{
+ ReplicateHeader header;
+ int status;
+
+ if (PGR_Lock_Noticed)
+ {
+ return STATUS_OK;
+ }
+ if ((notice == NULL) ||
+ (PGR_Sock_To_Replication_Server < 0))
+ {
+ return STATUS_ERROR;
+ }
+
+ memset(&header,0,sizeof(ReplicateHeader));
+ header.cmdSys = CMD_SYS_CALL;
+ header.cmdSts = CMD_STS_RESPONSE;
+ if (!strcmp(notice,PGR_QUERY_ABORTED_NOTICE_CMD))
+ {
+ header.cmdType = CMD_TYPE_FRONTEND_CLOSED;
+ }
+ header.query_size = htonl(strlen(notice));
+ status = send_replicate_packet(PGR_Sock_To_Replication_Server,&header,(char *)notice);
+ return status;
+}
+
+void
+PGR_Notice_Transaction_Query_Done(void)
+{
+ send_response_to_replication_server(PGR_QUERY_DONE_NOTICE_CMD);
+}
+
+void
+PGR_Notice_Transaction_Query_Aborted(void)
+{
+ send_response_to_replication_server(PGR_QUERY_ABORTED_NOTICE_CMD);
+}
+
+int
+PGR_Notice_Conflict(void)
+{
+ const char * msg = NULL ;
+ int rtn = STATUS_OK;
+
+ msg = PGR_LOCK_CONFLICT_NOTICE_CMD ;
+ if (PGR_Check_Lock.deadlock == true)
+ {
+ msg = PGR_DEADLOCK_DETECT_NOTICE_CMD ;
+ }
+ if (PGR_Check_Lock.dest == TO_FRONTEND)
+ {
+ ReadyForQuery(DestRemote);
+ EndCommand(msg,DestRemote);
+#ifdef CONTROL_LOCK_CONFLICT
+ rtn = wait_lock_answer();
+#endif /* CONTROL_LOCK_CONFLICT */
+ }
+ else
+ {
+ send_response_to_replication_server(msg);
+#ifdef CONTROL_LOCK_CONFLICT
+ rtn = PGR_Recv_Trigger (PGR_Replication_Timeout);
+#endif /* CONTROL_LOCK_CONFLICT */
+ }
+ return rtn;
+}
+
+#ifdef CONTROL_LOCK_CONFLICT
+static int
+wait_lock_answer(void)
+{
+ char result[PGR_MESSAGE_BUFSIZE+4];
+ int rtn = 0;
+
+ memset(result,0,sizeof(result));
+ rtn = read_trigger(result, PGR_MESSAGE_BUFSIZE);
+ if (rtn < 0)
+ return STATUS_ERROR;
+ return STATUS_OK;
+}
+
+static int
+read_trigger(char * result, int buf_size)
+{
+ int i = 0;
+ char c;
+ int r = 0;
+
+ if ((result == NULL) || (buf_size <= 0 ))
+ {
+ return EOF;
+ }
+ /*
+ pq_getbytes(result,buf_size);
+ */
+ while ((r = pq_getbytes(&c,1)) == 0)
+ {
+ if (i < buf_size -1)
+ {
+ *(result + i) = c;
+ }
+ else
+ {
+ break;
+ }
+ if (c == '\0')
+ break;
+ i++;
+ }
+
+ return r;
+}
+#endif /* CONTROL_LOCK_CONFLICT */
+
+int
+PGR_Recv_Trigger (int user_timeout)
+{
+ char result[PGR_MESSAGE_BUFSIZE];
+ int rtn = 0;
+ int func_no = 0;
+
+
+ if (PGR_Lock_Noticed)
+ {
+ return STATUS_OK;
+ }
+ if (PGR_Sock_To_Replication_Server < 0)
+ return STATUS_ERROR;
+ memset(result,0,sizeof(result));
+ rtn = PGR_recv_replicate_result(PGR_Sock_To_Replication_Server,result,user_timeout);
+ if (rtn > 0)
+ {
+ func_no = atoi(result);
+ if (func_no <= 0)
+ {
+ func_no = STATUS_OK;
+ }
+ return func_no;
+ }
+ else
+ {
+ if (user_timeout == 0)
+ {
+ PGR_Set_Replication_Server_Status(CurrentReplicateServer, DATA_ERR);
+ }
+ return STATUS_ERROR;
+ }
+ return STATUS_OK;
+}
+
+
+int
+PGR_Set_Transaction_Mode(int mode,const char * commandTag)
+{
+ if (commandTag == NULL)
+ {
+ return mode;
+ }
+ if ((!strcmp(commandTag,"BEGIN")) ||
+ (!strcmp(commandTag,"START TRANSACTION")) )
+ {
+ return (++mode);
+ }
+ if (mode > 0)
+ {
+ if ((!strncmp(commandTag,"COMMIT",strlen("COMMIT"))) ||
+ (!strncmp(commandTag,"ROLLBACK",strlen("ROLLBACK"))))
+ {
+ return (--mode);
+ }
+ }
+ return mode;
+}
+
+static bool
+do_not_replication_command(const char * commandTag)
+{
+ if (commandTag == NULL)
+ {
+ return true;
+ }
+ if ((!strcmp(commandTag,"SELECT")) ||
+ (!strcmp(commandTag,"CLOSE CURSOR")) ||
+ (!strcmp(commandTag,"MOVE")) ||
+ (!strcmp(commandTag,"FETCH")) ||
+ (!strcmp(commandTag,"EXPLAIN")))
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+}
+
+void
+PGR_Set_Replication_Server_Status( ReplicateServerInfo * sp, int status)
+{
+ if (sp == NULL)
+ {
+ return;
+ }
+ if (sp->useFlag != status)
+ {
+ sp->useFlag = status;
+ }
+}
+
+int
+PGR_Is_Skip_Replication(char * query)
+{
+ char skip_2[256];
+
+ if ((query == NULL) ||
+ (MyProcPort == NULL))
+ {
+ return -1;
+ }
+ snprintf(skip_2,sizeof(skip_2),SKIP_QUERY_2,MyProcPort->user_name);
+ if ((strncmp(query,SKIP_QUERY_1,strlen(SKIP_QUERY_1)) == 0) ||
+ (strncmp(query,skip_2,strlen(skip_2)) == 0))
+ {
+ return 3;
+ }
+ if ((strncmp(query,SKIP_QUERY_3,strlen(SKIP_QUERY_3)) == 0) ||
+ (strncmp(query,SKIP_QUERY_4,strlen(SKIP_QUERY_4)) == 0))
+ {
+ return 1;
+ }
+ return 0;
+}
+
+bool
+PGR_Did_Commit_Transaction(void)
+{
+
+ int sock = -1;
+ int cnt = 0;
+ ReplicateHeader header;
+ char * serverName = NULL;
+ int portNumber=0;
+ char * result = NULL;
+ ReplicateServerInfo * sp = NULL;
+ ReplicateServerInfo * base = NULL;
+ int socket_type = 0;
+ char argv[ PGR_CMD_ARG_NUM ][256];
+ int argc = 0;
+ int func_no = 0;
+
+ if (ReplicateCurrentTime->useFlag != DATA_USE)
+ {
+ return false;
+ }
+ sp = PGR_get_replicate_server_info();
+ if (sp == NULL)
+ {
+ if (Debug_pretty_print)
+ elog(DEBUG1,"PGR_get_replicate_server_info get error");
+ return false;
+ }
+ sock = PGR_get_replicate_server_socket( sp , PGR_QUERY_SOCKET);
+ if (sock < 0)
+ {
+ if (Debug_pretty_print)
+ elog(DEBUG1,"PGR_get_replicate_server_socket fail");
+ return false;
+ }
+ result = malloc(PGR_MESSAGE_BUFSIZE);
+ if (result == NULL)
+ {
+ return false;
+ }
+ memset(result,0,PGR_MESSAGE_BUFSIZE);
+
+ serverName = sp->hostName;
+ portNumber = (int)sp->portNumber;
+ header.cmdSys = CMD_SYS_CALL;
+ header.cmdSts = CMD_STS_TRANSACTION_ABORT;
+ header.cmdType = CMD_TYPE_COMMIT_CONFIRM;
+ header.port = htons(PostPortNumber);
+ header.pid = htons(getpid());
+ header.tv.tv_sec = htonl(ReplicateCurrentTime->tp.tv_sec);
+ header.tv.tv_usec = htonl(ReplicateCurrentTime->tp.tv_usec);
+ header.query_size = htonl(0);
+ strncpy(header.dbName ,(char *)(MyProcPort->database_name),sizeof(header.dbName)-1);
+ strncpy(header.userName , (char *)(MyProcPort->user_name),sizeof(header.userName)-1);
+ strncpy(header.password , PGR_password->password, PASSWORD_MAX_LENGTH );
+ memcpy(header.md5Salt ,MyProcPort->md5Salt, sizeof(header.md5Salt));
+ memcpy(header.cryptSalt ,MyProcPort->cryptSalt, sizeof(header.cryptSalt));
+ if (PGRSelfHostName != NULL)
+ {
+ strncpy(header.from_host, PGRSelfHostName, HOSTNAME_MAX_LENGTH);
+ }
+ header.replicate_id = htonl(ReplicationLog_Info.PGR_Replicate_ID);
+ header.request_id = 0;
+
+ base = sp;
+ PGR_Sock_To_Replication_Server = sock;
+
+ cnt = 0;
+ while (send_replicate_packet(sock,&header,"") != STATUS_OK)
+ {
+ if (cnt > MAX_RETRY_TIMES )
+ {
+ sock = get_new_replication_socket( base, sp, socket_type);
+ if (sock < 0)
+ {
+ if (Debug_pretty_print)
+ elog(DEBUG1,"all replication servers may be down");
+ PGR_Stand_Alone->is_stand_alone = true;
+ free(result);
+ result = NULL;
+ return false;
+ }
+ PGR_Sock_To_Replication_Server = sock;
+ cnt = 0;
+ }
+ cnt ++;
+ }
+
+ if (PGR_recv_replicate_result(sock,result,6) < 0)
+ {
+ free(result);
+ result = NULL;
+ return false;
+ }
+ /* read answer */
+ argc = set_command_args(argv,result);
+ if (argc >= 1)
+ {
+ func_no = atoi(argv[0]);
+ if (func_no == PGR_TRANSACTION_CONFIRM_ANSWER_FUNC_NO)
+ {
+ /* the transaction was commited in other server */
+ if (atoi(argv[1]) == PGR_ALREADY_COMMITTED)
+ {
+ free(result);
+ result = NULL;
+ return true;
+ }
+ }
+ }
+ free(result);
+ result = NULL;
+ return false;
+}
+
+int
+PGRsend_system_command(char cmdSts, char cmdType)
+{
+ ReplicateServerInfo * sp = NULL;
+ int sock = -1;
+ int socket_type = 0;
+ char * result = NULL;
+ char * serverName = NULL;
+ int portNumber=0;
+ ReplicateHeader header;
+ int cnt = 0;
+ ReplicateServerInfo * base = NULL;
+
+ sp = PGR_get_replicate_server_info();
+ if (sp == NULL)
+ {
+ if (Debug_pretty_print)
+ elog(DEBUG1,"PGR_get_replicate_server_info get error");
+ return STATUS_ERROR;
+ }
+ sock = PGR_get_replicate_server_socket( sp , PGR_QUERY_SOCKET);
+ if (sock < 0)
+ {
+ if (Debug_pretty_print)
+ elog(DEBUG1,"PGR_get_replicate_server_socket fail");
+ return STATUS_ERROR;
+ }
+ result = malloc(PGR_MESSAGE_BUFSIZE);
+ if (result == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ memset(result,0,PGR_MESSAGE_BUFSIZE);
+
+ serverName = sp->hostName;
+ portNumber = (int)sp->portNumber;
+ header.cmdSys = CMD_SYS_CALL;
+ header.cmdSts = cmdSts;
+ header.cmdType = cmdType;
+ header.port = htons(PostPortNumber);
+ header.pid = htons(getpid());
+ header.tv.tv_sec = htonl(ReplicateCurrentTime->tp.tv_sec);
+ header.tv.tv_usec = htonl(ReplicateCurrentTime->tp.tv_usec);
+ header.query_size = htonl(0);
+ strncpy(header.dbName ,(char *)(MyProcPort->database_name),sizeof(header.dbName)-1);
+ strncpy(header.userName , (char *)(MyProcPort->user_name),sizeof(header.userName)-1);
+ strncpy(header.password , PGR_password->password, PASSWORD_MAX_LENGTH );
+ memcpy(header.md5Salt ,MyProcPort->md5Salt, sizeof(header.md5Salt));
+ memcpy(header.cryptSalt ,MyProcPort->cryptSalt, sizeof(header.cryptSalt));
+ if (PGRSelfHostName != NULL)
+ {
+ strncpy(header.from_host, PGRSelfHostName, HOSTNAME_MAX_LENGTH);
+ }
+ header.replicate_id = htonl(ReplicationLog_Info.PGR_Replicate_ID);
+ header.request_id = 0;
+
+ base = sp;
+ PGR_Sock_To_Replication_Server = sock;
+ cnt = 0;
+ while (send_replicate_packet(sock,&header,"") != STATUS_OK)
+ {
+ if (cnt > MAX_RETRY_TIMES )
+ {
+ sock = get_new_replication_socket( base, sp, socket_type);
+ if (sock < 0)
+ {
+ if (Debug_pretty_print)
+ elog(DEBUG1,"all replication servers may be down");
+ PGR_Stand_Alone->is_stand_alone = true;
+ free(result);
+ result = NULL;
+ return STATUS_ERROR;
+ }
+ PGR_Sock_To_Replication_Server = sock;
+ cnt = 0;
+ }
+ cnt ++;
+ }
+ free(result);
+ result = NULL;
+ return STATUS_OK;
+}
+
+static char *
+get_hostName(char * str)
+{
+ char * top = NULL;
+ char * p = NULL;
+
+ p = str;
+ while ( *p != '\0')
+ {
+ if (*p == '\'')
+ {
+ *p = '\0';
+ p++;
+ if (top == NULL)
+ {
+ top = p;
+ }
+ }
+ p++;
+ }
+ return top;
+}
+
+char *
+PGR_Remove_Comment(char * str)
+{
+ char * p = NULL;
+ p = str;
+ while( *p != '\0')
+ {
+ while(isspace(*p))
+ {
+ p++;
+ }
+ if ((!memcmp(p,"--",2)) ||
+ (!memcmp(p,"//",2)))
+ {
+ while((*p != '\n') && (*p != '\0'))
+ {
+ p++;
+ }
+ continue;
+ }
+ break;
+ }
+ return p;
+}
+
+void
+PGR_Force_Replicate_Query(void)
+{
+ if (PGR_Retry_Query.useFlag == DATA_USE)
+ {
+ PGR_Send_Replicate_Command(PGR_Retry_Query.query_string,
+ PGR_Retry_Query.query_len,
+ PGR_Retry_Query.cmdSts,
+ PGR_Retry_Query.cmdType);
+ }
+}
+
+void
+PGR_Notice_DeadLock(void)
+{
+ ReplicateHeader header;
+
+ memset(&header,0,sizeof(ReplicateHeader));
+ header.cmdSys = CMD_SYS_CALL;
+ header.cmdSts = CMD_STS_NOTICE;
+ header.cmdType = CMD_TYPE_DEADLOCK_DETECT;
+ header.query_size = 0;
+ send_replicate_packet(PGR_Sock_To_Replication_Server,&header,(char *)NULL);
+}
+
+void
+PGR_Set_Cluster_Status(int status)
+{
+ if (ClusterDBData != NULL)
+ {
+ if (ClusterDBData->status != status)
+ {
+ ClusterDBData->status = status;
+ }
+ }
+}
+
+int
+PGR_Get_Cluster_Status(void)
+{
+ if (ClusterDBData != NULL)
+ {
+ return (ClusterDBData->status);
+ }
+ return 0;
+}
+
+int
+PGR_Check_Replicate_Server_Status(ReplicateServerInfo * sp)
+{
+ ReplicateHeader header;
+ char * result = NULL;
+ int status;
+ int fdP;
+
+ result = malloc(PGR_MESSAGE_BUFSIZE + 4);
+ if (result == NULL)
+ {
+ if (Debug_pretty_print)
+ elog(DEBUG1,"malloc failed in PGR_Check_Replicate_Server_Status()");
+ return STATUS_ERROR;
+ }
+
+ memset(&header, 0, sizeof(ReplicateHeader));
+ memset(result, 0, PGR_MESSAGE_BUFSIZE + 4);
+
+ header.cmdSys = CMD_SYS_PREREPLICATE;
+ header.cmdSts = CMD_STS_OTHER;
+ header.cmdType = CMD_TYPE_OTHER;
+ header.port = htons(PostPortNumber);
+ header.pid = htons(getpid());
+ header.query_size = 0;
+ strncpy(header.dbName ,(char *)(MyProcPort->database_name),sizeof(header.dbName)-1);
+ strncpy(header.userName , (char *)(MyProcPort->user_name),sizeof(header.userName)-1);
+ strncpy(header.password , PGR_password->password, PASSWORD_MAX_LENGTH );
+ memcpy(header.md5Salt ,MyProcPort->md5Salt, sizeof(header.md5Salt));
+ memcpy(header.cryptSalt ,MyProcPort->cryptSalt, sizeof(header.cryptSalt));
+ header.request_id = htonl(get_next_request_id());
+ header.rlog = 0;
+ if (PGRSelfHostName != NULL) {
+ strncpy(header.from_host, PGRSelfHostName, HOSTNAME_MAX_LENGTH);
+ }
+
+ /* open a new socket for lifecheck */
+ if ((status = PGR_Create_Socket_Connect(&fdP, sp->hostName, sp->portNumber)) == STATUS_ERROR) {
+ if (Debug_pretty_print) {
+ elog(DEBUG1,"create socket failed in PGR_Check_Replicate_Server_Status()");
+ }
+
+ /* status = STATUS_OK */
+ } else {
+ if ((status = send_replicate_packet(fdP, &header, (char *)NULL)) == STATUS_OK) {
+ /* receive result to check for possible deadlock */
+ status = (0 >= PGR_recv_replicate_result(fdP, result ,0))
+ ? STATUS_OK : STATUS_ERROR;
+ }
+ }
+
+ free(result);
+ PGR_Close_Sock(&fdP);
+
+ return status;
+}
+
+static int
+return_current_oid(void)
+{
+ char msg[PGR_MESSAGE_BUFSIZE];
+
+ LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
+
+ if (ShmemVariableCache->nextOid < ((Oid) FirstBootstrapObjectId))
+ {
+ ShmemVariableCache->nextOid = FirstBootstrapObjectId;
+ ShmemVariableCache->oidCount = 0;
+ }
+
+ if (ShmemVariableCache->oidCount == 0)
+ {
+ XLogPutNextOid(ShmemVariableCache->nextOid + VAR_OID_PREFETCH);
+ ShmemVariableCache->oidCount = VAR_OID_PREFETCH;
+ }
+ LWLockRelease(OidGenLock);
+
+ memset(msg,0,sizeof(msg));
+ snprintf(msg, sizeof(msg), "%u", ShmemVariableCache->nextOid);
+ if (PGR_Check_Lock.dest == TO_FRONTEND)
+ {
+ pq_puttextmessage('C',msg);
+ pq_flush();
+ }
+ else
+ {
+ send_response_to_replication_server(msg);
+ }
+ return STATUS_OK;
+}
+
+static int
+sync_oid(char * oid)
+{
+ uint32_t next_oid = 0;
+ int offset = 0;
+ char msg[PGR_MESSAGE_BUFSIZE];
+
+ LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
+
+ next_oid = strtoul(oid, NULL, 10);
+ if (next_oid <= 0)
+ return STATUS_ERROR;
+ next_oid ++;
+ offset = next_oid - ShmemVariableCache->nextOid ;
+ if (offset <= 0)
+ return STATUS_ERROR;
+
+ if (next_oid < FirstBootstrapObjectId)
+ {
+ ShmemVariableCache->nextOid = FirstBootstrapObjectId;
+ ShmemVariableCache->oidCount = 0;
+ }
+
+ /* If we run out of logged for use oids then we must log more */
+ while (ShmemVariableCache->oidCount - offset <= 0)
+ {
+ offset -= (ShmemVariableCache->oidCount) ;
+ (ShmemVariableCache->nextOid) += (ShmemVariableCache->oidCount);
+ XLogPutNextOid(ShmemVariableCache->nextOid + VAR_OID_PREFETCH);
+ ShmemVariableCache->oidCount = VAR_OID_PREFETCH;
+ }
+
+ (ShmemVariableCache->nextOid) += offset;
+ (ShmemVariableCache->oidCount) -= offset;
+
+ LWLockRelease(OidGenLock);
+
+ memset(msg,0,sizeof(msg));
+ snprintf(msg, sizeof(msg), "%u", ShmemVariableCache->nextOid);
+ if (PGR_Check_Lock.dest == TO_FRONTEND)
+ {
+ pq_puttextmessage('C',msg);
+ pq_flush();
+ }
+ else
+ {
+ send_response_to_replication_server(msg);
+ }
+ return STATUS_OK;
+}
+
+int
+PGR_lo_import(char * filename)
+{
+ char * result = NULL;
+ LOArgs *lo_args;
+ int len = 0;
+ int buf_size = 0;
+
+ if ((PGR_Is_Replicated_Query == true) ||
+ (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
+ {
+ return STATUS_OK;
+ }
+ if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
+ (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
+ {
+ return STATUS_OK;
+ }
+
+ len = strlen(filename);
+ buf_size = sizeof(LOArgs) + len;
+ lo_args = (LOArgs *)malloc(buf_size + 4);
+ if (lo_args == (LOArgs *)NULL)
+ {
+ return STATUS_ERROR;
+ }
+ memset(lo_args, 0, buf_size + 4);
+ lo_args->arg1 = htonl((uint32_t)len);
+ memcpy(lo_args->buf, filename, len);
+
+ result = PGR_Send_Replicate_Command((char *)lo_args,
+ buf_size,
+ CMD_STS_LARGE_OBJECT,
+ CMD_TYPE_LO_IMPORT);
+
+ free(lo_args);
+ if (result != NULL)
+ {
+ free(result);
+ return STATUS_OK;
+ }
+
+ return STATUS_ERROR;
+}
+
+int
+PGR_lo_create(int flags)
+{
+ char * result = NULL;
+ LOArgs lo_args;
+
+ if ((PGR_Is_Replicated_Query == true) ||
+ (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
+ {
+ return STATUS_OK;
+ }
+ if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
+ (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
+ {
+ return STATUS_OK;
+ }
+ memset(&lo_args, 0, sizeof(LOArgs));
+ lo_args.arg1 = htonl(flags);
+
+ result = PGR_Send_Replicate_Command((char *)&lo_args,
+ sizeof(LOArgs),
+ CMD_STS_LARGE_OBJECT,
+ CMD_TYPE_LO_CREATE);
+
+ if (result != NULL)
+ {
+ free(result);
+ return STATUS_OK;
+ }
+
+ return STATUS_ERROR;
+}
+
+int
+PGR_lo_open(Oid lobjId,int32 mode)
+{
+ char * result = NULL;
+ LOArgs lo_args;
+
+ if ((PGR_Is_Replicated_Query == true) ||
+ (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
+ {
+ return STATUS_OK;
+ }
+ if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
+ (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
+ {
+ return STATUS_OK;
+ }
+ memset(&lo_args, 0, sizeof(LOArgs));
+ lo_args.arg1 = htonl((uint32_t)lobjId);
+ lo_args.arg2 = htonl((uint32_t)mode);
+
+ result = PGR_Send_Replicate_Command((char *)&lo_args,
+ sizeof(LOArgs),
+ CMD_STS_LARGE_OBJECT,
+ CMD_TYPE_LO_OPEN);
+
+ if (result != NULL)
+ {
+ free(result);
+ return STATUS_OK;
+ }
+
+ return STATUS_ERROR;
+}
+
+int
+PGR_lo_close(int32 fd)
+{
+ char * result = NULL;
+ LOArgs lo_args;
+
+ if ((PGR_Is_Replicated_Query == true) ||
+ (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
+ {
+ return STATUS_OK;
+ }
+ if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
+ (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
+ {
+ return STATUS_OK;
+ }
+ memset(&lo_args, 0, sizeof(LOArgs));
+ lo_args.arg1 = htonl((uint32_t)fd);
+
+ result = PGR_Send_Replicate_Command((char *)&lo_args,
+ sizeof(LOArgs),
+ CMD_STS_LARGE_OBJECT,
+ CMD_TYPE_LO_CLOSE);
+
+ if (result != NULL)
+ {
+ free(result);
+ return STATUS_OK;
+ }
+
+ return STATUS_ERROR;
+}
+
+int
+PGR_lo_write(int fd, char *buf, int len)
+{
+ char * result = NULL;
+ LOArgs *lo_args = NULL;
+ int buf_size = 0;
+
+ if ((PGR_Is_Replicated_Query == true) ||
+ (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
+ {
+ return STATUS_OK;
+ }
+ if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
+ (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
+ {
+ return STATUS_OK;
+ }
+ buf_size = sizeof(LOArgs) + len;
+ lo_args = malloc(buf_size + 4);
+ if (lo_args == (LOArgs *)NULL)
+ {
+ return STATUS_ERROR;
+ }
+ memset(lo_args, 0, buf_size + 4);
+ lo_args->arg1 = htonl((uint32_t)fd);
+ lo_args->arg2 = htonl((uint32_t)len);
+ memcpy(lo_args->buf, buf, len);
+ result = PGR_Send_Replicate_Command((char *)lo_args,
+ buf_size,
+ CMD_STS_LARGE_OBJECT,
+ CMD_TYPE_LO_WRITE);
+
+ free(lo_args);
+ if (result != NULL)
+ {
+ free(result);
+ return STATUS_OK;
+ }
+
+ return STATUS_ERROR;
+}
+
+int
+PGR_lo_lseek(int32 fd, int32 offset, int32 whence)
+{
+ char * result = NULL;
+ LOArgs lo_args;
+
+ if ((PGR_Is_Replicated_Query == true) ||
+ (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
+ {
+ return STATUS_OK;
+ }
+ if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
+ (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
+ {
+ return STATUS_OK;
+ }
+ memset(&lo_args, 0, sizeof(LOArgs));
+ lo_args.arg1 = htonl((uint32_t)fd);
+ lo_args.arg2 = htonl((uint32_t)offset);
+ lo_args.arg3 = htonl((uint32_t)whence);
+
+ result = PGR_Send_Replicate_Command((char *)&lo_args,
+ sizeof(LOArgs),
+ CMD_STS_LARGE_OBJECT,
+ CMD_TYPE_LO_LSEEK);
+
+ if (result != NULL)
+ {
+ free(result);
+ return STATUS_OK;
+ }
+
+ return STATUS_ERROR;
+}
+
+int
+PGR_lo_unlink(Oid lobjId)
+{
+ char * result = NULL;
+ LOArgs lo_args;
+
+ if ((PGR_Is_Replicated_Query == true) ||
+ (PGR_Retry_Query.cmdSts == CMD_STS_TRANSACTION))
+ {
+ return STATUS_OK;
+ }
+ if ((PGR_Retry_Query.cmdSts != CMD_STS_QUERY) ||
+ (PGR_Retry_Query.cmdType != CMD_TYPE_SELECT))
+ {
+ return STATUS_OK;
+ }
+ memset(&lo_args, 0, sizeof(LOArgs));
+ lo_args.arg1 = htonl((uint32_t)lobjId);
+
+ result = PGR_Send_Replicate_Command((char *)&lo_args,
+ sizeof(LOArgs),
+ CMD_STS_LARGE_OBJECT,
+ CMD_TYPE_LO_UNLINK);
+
+ if (result != NULL)
+ {
+ free(result);
+ return STATUS_OK;
+ }
+
+ return STATUS_ERROR;
+}
+
+Oid
+PGRGetNewObjectId(Oid last_id)
+{
+ Oid newId = 0;
+
+ if (last_id == 0)
+ {
+ newId = (Oid)PGRget_replication_id();
+ }
+ else
+ {
+ newId = last_id + 1;
+ }
+ return newId;
+}
+
+int
+PGR_Send_Input_Message(char cmdType,StringInfo input_message)
+{
+ int len = 0;
+ char * ptr = NULL;
+ char * result = NULL;
+
+ if (input_message == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ if (PGR_Is_Replicated_Query == true)
+ {
+ return STATUS_OK;
+ }
+ len = input_message->len+1;
+ ptr = input_message->data;
+
+ /* check setting of configuration value */
+ if ( PGRnotReplicatePreparedSelect == true)
+ {
+ if (is_concerned_with_prepared_select(cmdType, ptr+1) == true)
+ {
+ return STATUS_OK;
+ }
+ }
+ result = PGR_Send_Replicate_Command(ptr,len, CMD_STS_PREPARE,cmdType);
+ if (result != NULL)
+ {
+ PGR_Reload_Start_Time();
+ free(result);
+ result = NULL;
+ return STATUS_OK;
+ }
+ else
+ {
+ return STATUS_ERROR;
+ }
+}
+
+static bool
+is_concerned_with_prepared_select(char cmdType, char * query_string)
+{
+ if (cmdType == CMD_TYPE_P_PARSE)
+ {
+ switch (parse_message(query_string))
+ {
+ case PGR_MESSAGE_SELECT:
+ pgr_skip_in_prepared_query = true;
+ break;
+ case PGR_MESSAGE_PREPARE:
+ if (is_prepared_as_select(query_string) == true)
+ {
+ pgr_skip_in_prepared_query = true;
+ }
+ break;
+ case PGR_MESSAGE_EXECUTE:
+ case PGR_MESSAGE_DEALLOCATE:
+ if (is_statement_as_select(query_string) == true)
+ {
+ pgr_skip_in_prepared_query = true;
+ }
+ break;
+ }
+ if (pgr_skip_in_prepared_query == true)
+ {
+ return true;
+ }
+ }
+ if (pgr_skip_in_prepared_query == true)
+ {
+ if (cmdType == CMD_TYPE_P_SYNC)
+ {
+ pgr_skip_in_prepared_query = false;
+ }
+ return true;
+ }
+ return false;
+}
+
+static int
+skip_non_blank(char * ptr, int max)
+{
+ int i= 0;
+ while(!isspace(*(ptr+i)))
+ {
+ if ((*(ptr+1) == '(') || (*(ptr+1) == ')'))
+ {
+ return i;
+ }
+ i++;
+ if (i > max)
+ return -1;
+ }
+ return i;
+}
+
+static int
+skip_blank(char * ptr, int max)
+{
+ int i = 0;
+ while(isspace(*(ptr+i)))
+ {
+ i++;
+ if (i > max)
+ return -1;
+ }
+ return i;
+}
+
+static int
+parse_message(char * query_string)
+{
+ char * ptr =NULL;
+ int rtn = 0;
+ int i = 0;
+ int len = 0;
+ if (query_string == NULL)
+ {
+ return PGR_MESSAGE_OTHER;
+ }
+ len = strlen (query_string);
+ if (len <= 0)
+ {
+ return PGR_MESSAGE_OTHER;
+ }
+ ptr = (char *)query_string;
+ i = 0;
+ /* skip space */
+ rtn = skip_blank(ptr+i, len-i);
+ if (rtn < 0)
+ return PGR_MESSAGE_OTHER;
+ i += rtn;
+
+ if (!strncasecmp(ptr+i,"SELECT",strlen("SELECT")))
+ {
+ return PGR_MESSAGE_SELECT;
+ }
+ if (!strncasecmp(ptr+i,"PREPARE",strlen("PREPARE")))
+ {
+ return PGR_MESSAGE_PREPARE;
+ }
+ if (!strncasecmp(ptr+i,"EXECUTE",strlen("EXECUTE")))
+ {
+ return PGR_MESSAGE_EXECUTE;
+ }
+ if (!strncasecmp(ptr+i,"DEALLOCATE",strlen("DEALLOCATE")))
+ {
+ return PGR_MESSAGE_DEALLOCATE;
+ }
+ return PGR_MESSAGE_OTHER;
+}
+
+static bool
+is_prepared_as_select(char * query_string)
+{
+ char * ptr =NULL;
+ int rtn = 0;
+ int i = 0;
+ int len = 0;
+ int args =0;
+ if (query_string == NULL)
+ {
+ return false;
+ }
+ ptr = (char *)query_string;
+ len = strlen (query_string);
+ i = 0;
+ /* skip "PREPARE" word */
+ rtn = skip_non_blank(ptr+i, len-i);
+ if (rtn < 0)
+ return false;
+ i += rtn;
+ /* skip space */
+ rtn = skip_blank(ptr+i, len-i);
+ if (rtn < 0)
+ return false;
+ i += rtn;
+ /* skip plan_name */
+ rtn = skip_non_blank(ptr+i, len-i);
+ if (rtn < 0)
+ return false;
+ i += rtn;
+ /* skip space */
+ rtn = skip_blank(ptr+i, len-i);
+ if (rtn < 0)
+ return false;
+ i += rtn;
+ /* skip args */
+ args = 0;
+ if (*(ptr+i) == '(')
+ {
+ args ++;
+ i++;
+ while(args > 0)
+ {
+ if (*(ptr+i) == ')')
+ args --;
+ else if (*(ptr+i) == '(')
+ args ++;
+ i++;
+ if (i >= len)
+ return false;
+ }
+ /* skip space */
+ rtn = skip_blank(ptr+i, len-i);
+ if (rtn < 0)
+ return false;
+ i += rtn;
+ }
+ /* skip "AS" word */
+ i += strlen("AS");
+ if (i >= len)
+ return false;
+ /* skip space */
+ rtn = skip_blank(ptr+i, len-i);
+ if (rtn < 0)
+ return false;
+ i += rtn;
+ /* check "SELECT" word */
+ if (len-i < strlen("SELECT"))
+ return false;
+ if (!strncasecmp(ptr+i,"SELECT",strlen("SELECT")))
+ {
+ return true;
+ }
+ return false;
+
+}
+
+static bool
+is_statement_as_select(char * query_string)
+{
+ char * ptr =NULL;
+ int rtn = 0;
+ int i = 0;
+ int j = 0;
+ int len = 0;
+ bool result = false;
+ PrepareStmt stmt;
+ char * name = NULL;
+ if (query_string == NULL)
+ {
+ return false;
+ }
+ ptr = (char *)query_string;
+ len = strlen (query_string);
+ i = 0;
+ /* skip "EXECUTE" or "DEALLOCATE" word */
+ rtn = skip_non_blank(ptr+i, len-i);
+ if (rtn < 0)
+ return false;
+ i += rtn;
+ /* skip space */
+ rtn = skip_blank(ptr+i, len-i);
+ if (rtn < 0)
+ return false;
+ i += rtn;
+ if ((name = malloc(len)) == NULL)
+ return false;
+ memset(name,0,len);
+ j = 0;
+ while(isalnum(*(ptr+i)))
+ {
+ *(name+j) = *(ptr+i);
+ i++;
+ j++;
+ if (i > len)
+ return false;
+ }
+ stmt.name = name;
+ result = PGR_is_select_prepared_statement(&stmt);
+ free(name);
+ return result;
+}
+
+bool
+PGR_is_select_prepare_query(void)
+{
+ if (debug_query_string == NULL)
+ {
+ return false;
+ }
+ return (is_prepared_as_select((char *)debug_query_string));
+}
+
+char *
+PGR_get_md5salt(char * md5Salt, char * string)
+{
+ char buf[24];
+ char * ptr = NULL;
+ int len = 0;
+ int i = 0;
+ int cnt = 0;
+ int index = 0;
+ bool set_flag = false;
+
+ ptr = (char *)md5Salt;
+ len = strlen(string);
+ for ( i = 0 ; i < len ; i ++)
+ {
+ if (*(string+i) == ')')
+ {
+ buf[index++] = '\0';
+ *ptr = (char)atoi(buf);
+ set_flag = false;
+ }
+ if (set_flag)
+ {
+ buf[index++] = *(string+i);
+ }
+ if (*(string+i) == '(')
+ {
+ set_flag = true;
+ index = 0;
+ ptr = (char *)(md5Salt + cnt);
+ cnt++;
+ }
+ }
+ return md5Salt;
+}
+
+#endif /* USE_REPLICATION */
diff -aruN postgresql-8.2.4/src/backend/libpq/replicate_com.c pgcluster-1.7.0rc7/src/backend/libpq/replicate_com.c
--- postgresql-8.2.4/src/backend/libpq/replicate_com.c 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/libpq/replicate_com.c 2007-02-18 22:52:16.000000000 +0100
@@ -0,0 +1,675 @@
+/*--------------------------------------------------------------------
+ * FILE:
+ * replicate_com.c
+ *
+ * NOTE:
+ * This file is composed of the functions to call with the source
+ * at backend for the replication.
+ * Low level I/O functions that called by in these functions are
+ * contained in 'replicate_com.c'.
+ *
+ *--------------------------------------------------------------------
+ */
+
+/*--------------------------------------
+ * INTERFACE ROUTINES
+ *
+ * setup/teardown:
+ * PGR_Close_Sock
+ * PGR_Free_Conf_Data
+ * I/O call:
+ * PGR_Create_Socket_Connect
+ * PGR_Create_Socket_Bind
+ * PGR_Create_Acception
+ * table handling:
+ * PGR_Get_Conf_Data
+ *-------------------------------------
+ */
+#ifdef USE_REPLICATION
+
+#include "postgres.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#ifdef HAVE_NETINET_TCP_H
+#include
+#endif
+#include
+#include
+#include
+
+#include "libpq/libpq.h"
+#include "miscadmin.h"
+#include "nodes/print.h"
+#include "utils/guc.h"
+#include "parser/parser.h"
+#include "access/xact.h"
+#include "replicate_com.h"
+
+int PGR_Create_Socket_Connect(int * fdP, char * hostName , unsigned short portNumber);
+void PGR_Close_Sock(int * sock);
+int PGR_Create_Socket_Bind(int * fdP, char * hostName , unsigned short portNumber);
+int PGR_Create_Acception(int fd, int * sockP, char * hostName , unsigned short portNumber);
+int PGR_Free_Conf_Data(void);
+int PGR_Get_Conf_Data(char * dir , char * fname);
+void PGRset_recovery_packet_no(RecoveryPacket * packet, int packet_no);
+unsigned int PGRget_ip_by_name(char * host);
+int PGRget_time_value(char *str);
+
+static char * get_string(char * buf);
+static bool is_start_tag(char * ptr);
+static bool is_end_tag(char * ptr);
+static void init_conf_data(ConfDataType *conf);
+static int get_key(char * key, char * str);
+static int get_conf_key_value(char * key, char * value , char * str);
+static int add_conf_data(char *table,int rec_no, char *key,char * value);
+static int get_table_data(FILE * fp,char * table, int rec_no);
+static int get_single_data(char * str);
+static int get_conf_file(char * fname);
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * PGR_Create_Socket_Connect()
+ * NOTES
+ * create new socket
+ * ARGS
+ * int * fdP:
+ * char * hostName:
+ * unsigned short portNumber:
+ * RETURN
+ * OK: STATUS_OK
+ * NG: STATUS_ERROR
+ *--------------------------------------------------------------------
+ */
+int
+PGR_Create_Socket_Connect(int * fdP, char * hostName , unsigned short portNumber)
+{
+
+ int sock;
+ size_t len = 0;
+ struct sockaddr_in addr;
+ int one = 1;
+
+ if ((*hostName == '\0') || (portNumber < 1000))
+ {
+ * fdP = -1;
+ return STATUS_ERROR;
+ }
+ if ((*fdP = socket(AF_INET, SOCK_STREAM, 0)) < 0)
+ {
+ * fdP = -1;
+ return STATUS_ERROR;
+ }
+ if ((setsockopt(*fdP, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one))) == -1)
+ {
+ PGR_Close_Sock(fdP);
+ return STATUS_ERROR;
+ }
+ if (setsockopt(*fdP, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) < 0)
+ {
+ PGR_Close_Sock(fdP);
+ return STATUS_ERROR;
+ }
+
+ addr.sin_family = AF_INET;
+ if ((hostName == NULL ) || (hostName[0] == '\0'))
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ else
+ {
+ struct hostent *hp;
+
+ hp = gethostbyname(hostName);
+ if ((hp == NULL) || (hp->h_addrtype != AF_INET))
+ {
+ PGR_Close_Sock(fdP);
+ return STATUS_ERROR;
+ }
+ memmove((char *) &(addr.sin_addr), (char *) hp->h_addr, hp->h_length);
+ }
+
+ addr.sin_port = htons(portNumber);
+ len = sizeof(struct sockaddr_in);
+
+ if ((sock = connect(*fdP,(struct sockaddr*)&addr,len)) < 0)
+ {
+ PGR_Close_Sock(fdP);
+ return STATUS_ERROR;
+ }
+
+ return STATUS_OK;
+}
+
+int
+PGR_Create_Socket_Bind(int * fdP, char * hostName , unsigned short portNumber)
+{
+
+ int err;
+ size_t len = 0;
+ struct sockaddr_in addr;
+ int one = 1;
+
+ if ((*fdP = socket(AF_INET, SOCK_STREAM, 0)) < 0)
+ {
+ return STATUS_ERROR;
+ }
+ if ((setsockopt(*fdP, SOL_SOCKET, SO_REUSEADDR, (char *) &one, sizeof(one))) == -1)
+ {
+ PGR_Close_Sock(fdP);
+ return STATUS_ERROR;
+ }
+ addr.sin_family = AF_INET;
+ if ((hostName == NULL ) || (hostName[0] == '\0'))
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ else
+ {
+ struct hostent *hp;
+
+ hp = gethostbyname(hostName);
+ if ((hp == NULL) || (hp->h_addrtype != AF_INET))
+ {
+ PGR_Close_Sock(fdP);
+ return STATUS_ERROR;
+ }
+ memmove((char *) &(addr.sin_addr), (char *) hp->h_addr, hp->h_length);
+ }
+
+ addr.sin_port = htons(portNumber);
+ len = sizeof(struct sockaddr_in);
+
+ err = bind(*fdP, (struct sockaddr *) & addr, len);
+ if (err < 0)
+ {
+ PGR_Close_Sock(fdP);
+ return STATUS_ERROR;
+ }
+ err = listen(*fdP, MAX_SOCKET_QUEUE );
+ if (err < 0)
+ {
+ PGR_Close_Sock(fdP);
+ return STATUS_ERROR;
+ }
+ return STATUS_OK;
+}
+
+int
+PGR_Create_Acception(int fd, int * sockP, char * hostName , unsigned short portNumber)
+{
+ int sock;
+ struct sockaddr addr;
+ size_t len = 0;
+ int one = 1;
+
+ len = sizeof(struct sockaddr);
+ if ((sock = accept(fd, &addr, &len)) < 0)
+ {
+ *sockP = -1;
+ return STATUS_ERROR;
+ }
+
+ if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) < 0)
+ {
+ return STATUS_ERROR;
+ }
+ if (setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one)) < 0)
+ {
+ return STATUS_ERROR;
+ }
+ *sockP = sock;
+
+ return STATUS_OK;
+}
+
+void
+PGR_Close_Sock(int * sock)
+{
+ close( (int)*sock);
+ *sock = -1;
+}
+
+static char *
+get_string(char * buf)
+{
+ int i,len1,len2,start_flag;
+ char *readp, *writep;
+
+ writep = readp = buf;
+ i = len1 = 0;
+ while (*(readp +i) != '\0')
+ {
+ if (!isspace(*(readp+ i)))
+ {
+ len1 ++;
+ }
+ i++;
+ }
+ start_flag = len2 = 0;
+ while (*readp != '\0')
+ {
+ if (*readp == '#')
+ {
+ *writep = '\0';
+ break;
+ }
+ if (isspace(*readp))
+ {
+ if ((len2 >= len1) || (!start_flag))
+ {
+ readp++;
+ continue;
+ }
+ *writep = *readp;
+ }
+ else
+ {
+ start_flag = 1;
+ *writep = *readp;
+ len2 ++;
+ }
+ readp ++;
+ writep ++;
+ }
+ *writep = '\0';
+ return buf;
+}
+
+static bool
+is_start_tag(char * ptr)
+{
+ if ((*ptr == '<') && (*(ptr+1) != '/'))
+ {
+ return true;
+ }
+ return false;
+}
+
+static bool
+is_end_tag(char * ptr)
+{
+ if ((*ptr == '<') && (*(ptr+1) == '/'))
+ {
+ return true;
+ }
+ return false;
+}
+
+static void
+init_conf_data(ConfDataType *conf)
+{
+ memset(conf->table,0,sizeof(conf->table));
+ memset(conf->key,0,sizeof(conf->key));
+ memset(conf->value,0,sizeof(conf->value));
+ conf->rec_no = 0;
+ conf->last = NULL;
+ conf->next = NULL;
+}
+
+static int
+get_key(char * key, char * str)
+{
+ int offset = 1;
+ char * ptr_s,*ptr_e;
+
+ ptr_s = strchr(str,'<');
+ if (ptr_s == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ if (*(ptr_s+1) == '/')
+ {
+ offset = 2;
+ }
+ ptr_e = strchr(str,'>');
+ if (ptr_e == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ *ptr_e = '\0';
+ strcpy(key,ptr_s + offset);
+ *ptr_e = '>';
+ return STATUS_OK;
+}
+
+static int
+get_conf_key_value(char * key, char * value , char * str)
+{
+ int i;
+ int len1,len2,start_flag;
+ char * ptr_s,*ptr_e;
+
+ if(get_key(key,str) == STATUS_ERROR)
+ {
+ return STATUS_ERROR;
+ }
+ ptr_e = strchr(str,'>');
+ if (ptr_e == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ ptr_s = ptr_e + 1;
+
+ len1 = 0;
+ while ((*ptr_s != '<') && (*ptr_s != '\0'))
+ {
+ if (! isspace(*ptr_s))
+ {
+ len1 ++;
+ }
+ ptr_s ++;
+ }
+ ptr_s = ptr_e + 1;
+ i = len2 = start_flag = 0;
+ while ((*ptr_s != '<') && (*ptr_s != '\0'))
+ {
+ if (isspace(*ptr_s))
+ {
+ if ((len2 >= len1) || (!start_flag))
+ {
+ ptr_s ++;
+ continue;
+ }
+ *(value + i) = *ptr_s;
+ }
+ else
+ {
+ start_flag = 1;
+ *(value + i) = *ptr_s;
+ len2 ++;
+ }
+ i++;
+ ptr_s ++;
+ }
+ *(value + i) = '\0';
+ return STATUS_OK;
+}
+
+static int
+add_conf_data(char *table,int rec_no, char *key,char * value)
+{
+ ConfDataType * conf_data;
+
+ conf_data = (ConfDataType *)malloc(sizeof(ConfDataType));
+ if (conf_data == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ init_conf_data(conf_data);
+ if (table != NULL)
+ {
+ memcpy(conf_data->table,table,sizeof(conf_data->table));
+ }
+ else
+ {
+ memset(conf_data->table,0,sizeof(conf_data->table));
+ }
+ memcpy(conf_data->key,key,sizeof(conf_data->key));
+ memcpy(conf_data->value,value,sizeof(conf_data->value));
+ conf_data->rec_no = rec_no;
+ if (ConfData_Top == (ConfDataType *)NULL)
+ {
+ ConfData_Top = conf_data;
+ conf_data->last = (char *)NULL;
+ }
+ if (ConfData_End == (ConfDataType *)NULL)
+ {
+ conf_data->last = (char *)NULL;
+ }
+ else
+ {
+ conf_data->last = (char *)ConfData_End;
+ ConfData_End->next = (char *)conf_data;
+ }
+ ConfData_End = conf_data;
+ conf_data->next = (char *)NULL;
+ return STATUS_OK;
+}
+
+static int
+get_table_data(FILE * fp,char * table, int rec_no)
+{
+ char buf[1024];
+ char key_buf[1024];
+ char value_buf[1024];
+ int len = 0;
+ char * ptr;
+
+ while (fgets(buf,sizeof(buf),fp) != NULL)
+ {
+ /*
+ * pic up a data string
+ */
+ ptr = get_string(buf);
+ len = strlen(ptr);
+ if (len == 0)
+ {
+ continue;
+ }
+ if (is_end_tag(ptr))
+ {
+ if(get_key(key_buf,ptr) == STATUS_ERROR)
+ {
+ return STATUS_ERROR;
+ }
+ if (!strcmp(key_buf,table))
+ {
+ return STATUS_OK;
+ }
+ }
+ if (is_start_tag(ptr))
+ {
+ if(get_conf_key_value(key_buf,value_buf,ptr) == STATUS_ERROR)
+ {
+ return STATUS_ERROR;
+ }
+ add_conf_data(table,rec_no,key_buf,value_buf);
+ }
+ }
+ return STATUS_ERROR;
+}
+
+static int
+get_single_data(char * str)
+{
+ char key_buf[1024];
+ char value_buf[1024];
+ if(get_conf_key_value(key_buf,value_buf,str) == STATUS_ERROR)
+ {
+ return STATUS_ERROR;
+ }
+ add_conf_data(NULL,0,key_buf,value_buf);
+ return STATUS_OK;
+}
+
+
+static int
+get_conf_file(char * fname)
+{
+ FILE * fp = NULL;
+ int len;
+ char buf[1024];
+ char key_buf[1024];
+ char last_key_buf[1024];
+ char *ptr;
+ int rec_no = 0;
+
+ /*
+ * configuration file open
+ */
+ if ((fp = fopen(fname,"r")) == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ /*
+ * configuration file read
+ */
+ memset(last_key_buf,0,sizeof(last_key_buf));
+ memset(key_buf,0,sizeof(key_buf));
+ while (fgets(buf,sizeof(buf),fp) != NULL)
+ {
+ /*
+ * pic up a data string
+ */
+ ptr = get_string(buf);
+ len = strlen(ptr);
+ if (len == 0)
+ {
+ continue;
+ }
+ if (is_start_tag(ptr))
+ {
+ if(get_key(key_buf,ptr) == STATUS_ERROR)
+ {
+ fclose(fp);
+ return STATUS_ERROR;
+ }
+ if (strstr(ptr,"") == NULL)
+ {
+ if (strcmp(last_key_buf,key_buf))
+ {
+ rec_no = 0;
+ strcpy(last_key_buf,key_buf);
+ }
+ get_table_data(fp,key_buf,rec_no);
+ rec_no ++;
+ }
+ else
+ {
+ get_single_data(ptr);
+ }
+ }
+ }
+ fclose(fp);
+ return STATUS_OK;
+}
+
+int
+PGR_Free_Conf_Data(void)
+{
+ ConfDataType * conf, *nextp;
+
+ if (ConfData_Top == (ConfDataType *)NULL)
+ {
+ return STATUS_ERROR;
+ }
+ conf = ConfData_Top;
+
+ while (conf != (ConfDataType *)NULL)
+ {
+ nextp = (ConfDataType*)conf->next;
+ free (conf);
+ conf = nextp;
+ }
+ ConfData_Top = ConfData_End = (ConfDataType *)NULL;
+ return STATUS_OK;
+}
+
+int
+PGR_Get_Conf_Data(char * dir , char * fname)
+{
+
+ int status;
+
+ char * conf_file;
+ if ((dir == NULL) || ( fname == NULL))
+ {
+ return STATUS_ERROR;
+ }
+ conf_file = malloc(strlen(dir) + strlen(fname) + 2);
+ if (conf_file == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ sprintf(conf_file,"%s/%s",dir,fname);
+
+ ConfData_Top = ConfData_End = (ConfDataType * )NULL;
+ status = get_conf_file(conf_file);
+ free (conf_file);
+ conf_file = NULL;
+
+ return status;
+}
+
+void
+PGRset_recovery_packet_no(RecoveryPacket * packet, int packet_no)
+{
+ if (packet == NULL)
+ {
+ return;
+ }
+ packet->packet_no = htons(packet_no) ;
+
+}
+
+unsigned int
+PGRget_ip_by_name(char * host)
+{
+ struct hostent *hp = NULL;
+ unsigned int ip = 0;
+ unsigned char uc = 0;
+ int i;
+
+ if ((host == NULL) || (*host == '\0'))
+ {
+ return 0;
+ }
+ hp = gethostbyname( host );
+ if (hp == NULL)
+ {
+ return 0;
+ }
+ for (i = 3 ; i>= 0 ; i --)
+ {
+ uc = (unsigned char)hp->h_addr_list[0][i];
+ ip = ip | uc;
+ if (i > 0)
+ ip = ip << 8;
+ }
+ return ip;
+}
+
+int
+PGRget_time_value(char *str)
+{
+ int i,len;
+ char * ptr;
+ int unit = 1;
+
+ if (str == NULL)
+ return -1;
+
+ len = strlen(str);
+ ptr = str;
+ for (i = 0; i < len ; i ++,ptr++)
+ {
+ if ((! isdigit(*ptr)) && (! isspace(*ptr)))
+ {
+ switch (*ptr)
+ {
+ case 'm':
+ case 'M':
+ unit = 60;
+ break;
+ case 'h':
+ case 'H':
+ unit = 60*60;
+ break;
+ }
+ *ptr = '\0';
+ break;
+ }
+ }
+ return (atoi(str) * unit);
+}
+
+#endif /* USE_REPLICATION */
diff -aruN postgresql-8.2.4/src/backend/main/main.c pgcluster-1.7.0rc7/src/backend/main/main.c
--- postgresql-8.2.4/src/backend/main/main.c 2007-01-04 01:58:01.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/main/main.c 2007-02-18 22:52:16.000000000 +0100
@@ -316,6 +316,13 @@
printf(_(" -r FILENAME send stdout and stderr to given file\n"));
printf(_(" -x NUM internal use\n"));
+#ifdef USE_REPLICATION
+ printf(_("\nOptions for PGCluster only:\n"));
+ printf(_(" -R recovery startup with rsync\n"));
+ printf(_(" -u recovery startup with rsync(it is not create backup files.\n"));
+ printf(_(" -U recovery startup with pg_dump\n"));
+#endif /* USE_REPLICATION */
+
printf(_("\nPlease read the documentation for the complete list of run-time\n"
"configuration settings and how to set them on the command line or in\n"
"the configuration file.\n\n"
diff -aruN postgresql-8.2.4/src/backend/parser/gram.y pgcluster-1.7.0rc7/src/backend/parser/gram.y
--- postgresql-8.2.4/src/backend/parser/gram.y 2006-11-05 23:42:09.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/parser/gram.y 2007-02-18 22:52:16.000000000 +0100
@@ -412,10 +412,10 @@
QUOTE
READ REAL REASSIGN RECHECK REFERENCES REINDEX RELATIVE_P RELEASE RENAME
- REPEATABLE REPLACE RESET RESTART RESTRICT RETURNING RETURNS REVOKE RIGHT
+ REPEATABLE REPLACE REPLICATION RESET RESTART RESTRICT RETURNING RETURNS REVOKE RIGHT
ROLE ROLLBACK ROW ROWS RULE
- SAVEPOINT SCHEMA SCROLL SECOND_P SECURITY SELECT SEQUENCE
+ SAVEPOINT SCHEMA SCROLL SECOND_P SECURITY SELECT SEQUENCE SERVER
SERIALIZABLE SESSION SESSION_USER SET SETOF SHARE
SHOW SIMILAR SIMPLE SMALLINT SOME STABLE START STATEMENT
STATISTICS STDIN STDOUT STORAGE STRICT_P SUBSTRING SUPERUSER_P SYMMETRIC
@@ -1224,6 +1224,12 @@
n->name = $2;
$$ = (Node *) n;
}
+ | SHOW REPLICATION SERVER
+ {
+ VariableShowStmt *n = makeNode(VariableShowStmt);
+ n->name = "replication_server";
+ $$ = (Node *) n;
+ }
| SHOW TIME ZONE
{
VariableShowStmt *n = makeNode(VariableShowStmt);
@@ -8678,6 +8684,7 @@
| RENAME
| REPEATABLE
| REPLACE
+ | REPLICATION
| RESET
| RESTART
| RESTRICT
@@ -8692,6 +8699,7 @@
| SCROLL
| SECOND_P
| SECURITY
+ | SERVER
| SEQUENCE
| SERIALIZABLE
| SESSION
diff -aruN postgresql-8.2.4/src/backend/parser/keywords.c pgcluster-1.7.0rc7/src/backend/parser/keywords.c
--- postgresql-8.2.4/src/backend/parser/keywords.c 2006-10-07 23:51:02.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/parser/keywords.c 2007-02-18 22:52:16.000000000 +0100
@@ -281,6 +281,7 @@
{"relative", RELATIVE_P},
{"release", RELEASE},
{"rename", RENAME},
+ {"replication", REPLICATION},
{"repeatable", REPEATABLE},
{"replace", REPLACE},
{"reset", RESET},
diff -aruN postgresql-8.2.4/src/backend/parser/parse_clause.c pgcluster-1.7.0rc7/src/backend/parser/parse_clause.c
--- postgresql-8.2.4/src/backend/parser/parse_clause.c 2006-11-28 13:54:41.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/parser/parse_clause.c 2007-02-18 22:52:16.000000000 +0100
@@ -34,6 +34,9 @@
#include "rewrite/rewriteManip.h"
#include "utils/guc.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
#define ORDER_CLAUSE 0
#define GROUP_CLAUSE 1
@@ -154,7 +157,18 @@
* analyze.c will eventually do the corresponding heap_close(), but *not*
* release the lock.
*/
+#ifdef USE_REPLICATION
+ if (PGRautoLockTable == true)
+ {
+ pstate->p_target_relation = heap_openrv(relation, ShareRowExclusiveLock);
+ }
+ else
+ {
+ pstate->p_target_relation = heap_openrv(relation, RowExclusiveLock);
+ }
+#else
pstate->p_target_relation = heap_openrv(relation, RowExclusiveLock);
+#endif /* USE_REPLICATION */
/*
* Now build an RTE.
diff -aruN postgresql-8.2.4/src/backend/parser/parse_relation.c pgcluster-1.7.0rc7/src/backend/parser/parse_relation.c
--- postgresql-8.2.4/src/backend/parser/parse_relation.c 2006-10-04 02:29:56.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/parser/parse_relation.c 2007-02-18 22:52:16.000000000 +0100
@@ -30,6 +30,9 @@
#include "utils/lsyscache.h"
#include "utils/syscache.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/* GUC parameter */
bool add_missing_from;
@@ -636,7 +639,14 @@
* to a rel in a statement, be careful to get the right access level
* depending on whether we're doing SELECT FOR UPDATE/SHARE.
*/
+#ifdef USE_REPLICATION
+ if (PGRautoLockTable == true)
+ lockmode = isLockedRel(pstate, refname) ? ShareRowExclusiveLock : AccessShareLock;
+ else
+ lockmode = isLockedRel(pstate, refname) ? RowShareLock : AccessShareLock;
+#else
lockmode = isLockedRel(pstate, refname) ? RowShareLock : AccessShareLock;
+#endif /* USE_REPLICATION */
rel = heap_openrv(relation, lockmode);
rte->relid = RelationGetRelid(rel);
diff -aruN postgresql-8.2.4/src/backend/postmaster/postmaster.c pgcluster-1.7.0rc7/src/backend/postmaster/postmaster.c
--- postgresql-8.2.4/src/backend/postmaster/postmaster.c 2007-01-04 01:58:01.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/postmaster/postmaster.c 2007-02-18 22:52:16.000000000 +0100
@@ -122,6 +122,9 @@
#include "storage/spin.h"
#endif
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/*
* List of active backends (or child processes anyway; we don't actually
@@ -363,6 +366,61 @@
#define EXIT_STATUS_0(st) ((st) == 0)
#define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
+#ifdef USE_REPLICATION
+char * Query_String = NULL;
+ReplicateServerInfo * ReplicateServerData = NULL;
+ReplicateServerInfo * CurrentReplicateServer = NULL;
+ReplicateServerInfo * LastReplicateServer = NULL;
+int ReplicateServerShmid = -1;
+int TransactionQuery = 0;
+int TransactionSock = -1;
+int Transaction_Mode = 0;
+bool PGR_Noticed_Abort = false;
+bool Session_Authorization_Mode = false;
+bool Create_Temp_Table_Mode = false;
+ConfDataType * ConfData_Top = (ConfDataType *)NULL;
+ConfDataType * ConfData_End = (ConfDataType *)NULL;
+int RecoveryPortNumber = 0;
+char * RsyncPath = NULL;
+char * RsyncOption = NULL;
+char * PgDumpPath = NULL;
+bool RsyncCompress = true;
+ReplicateNow * ReplicateCurrentTime = NULL;
+CopyData * PGRCopyData = NULL;
+bool PGR_Copy_Data_Need_Replicate = false;
+PGR_Stand_Alone_Type * PGR_Stand_Alone = NULL;
+PGR_Not_Replicate_Type * PGR_Not_Replicate = NULL;
+int PGR_Not_Replicate_Rec_Num = 0;
+bool PGR_Is_Replicated_Query = false;
+PGR_Check_Lock_Type PGR_Check_Lock;
+int PGR_Sock_To_Replication_Server = -1;
+bool PGR_Need_Notice = false;
+bool PGR_Lock_Noticed = false;
+bool PGR_Recovery_Option = false;
+int PGR_recovery_mode = 0;
+char * PGRSelfHostName = NULL;
+int PGR_Pending_Sem_Num = 0;
+bool PGR_Reliable_Mode_Wait = true;
+PGR_Retry_Query_Type PGR_Retry_Query;
+int ClusterDBShmid = -1;
+ClusterDBInfo * ClusterDBData = NULL;
+PGR_Password_Info * PGR_password = NULL;
+int PGR_Replication_Timeout = 60;
+int PGR_Lifecheck_Timeout = 3;
+int PGR_Lifecheck_Interval = 11;
+
+/* initialize in utils/misc/guc.c */
+bool PGRforceLoadBalance = false;
+bool PGRcheckConstraintWithLock = false;
+bool PGRautoLockTable = true;
+bool PGRnotReplicatePreparedSelect = false;
+
+bool needToUpdateReplicateIdOnNextQueryIsDone=false;
+bool PGR_Is_Sync_OID = false;
+
+static int Master_Pid = 0;
+static int Lifecheck_Pid = 0;
+#endif /* USE_REPLICATION */
/*
* Postmaster main entry point
@@ -375,6 +433,11 @@
char *userDoption = NULL;
int i;
+#ifdef USE_REPLICATION
+ PGR_Check_Lock.check_lock_conflict = false;
+ PGR_Check_Lock.status_lock_conflict = STATUS_OK;
+#endif /* USE REPLICATION */
+
MyProcPid = PostmasterPid = getpid();
IsPostmasterEnvironment = true;
@@ -420,10 +483,24 @@
* tcop/postgres.c (the option sets should not conflict)
* and with the common help() function in main/main.c.
*/
- while ((opt = getopt(argc, argv, "A:B:c:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:")) != -1)
+ while ((opt = getopt(argc, argv, "A:B:c:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:URu")) != -1)
{
switch (opt)
{
+#ifdef USE_REPLICATION
+ case 'U':
+ PGR_Recovery_Option = true;
+ PGR_recovery_mode = PGR_HOT_RECOVERY;
+ break;
+ case 'R':
+ PGR_Recovery_Option = true;
+ PGR_recovery_mode = PGR_COLD_RECOVERY;
+ break;
+ case 'u':
+ PGR_Recovery_Option = true;
+ PGR_recovery_mode = PGR_WITHOUT_BACKUP;
+ break;
+#endif /* USE_REPLICATION */
case 'A':
SetConfigOption("debug_assertions", optarg, PGC_POSTMASTER, PGC_S_ARGV);
break;
@@ -696,6 +773,30 @@
*/
CreateDataDirLockFile(true);
+#ifdef USE_REPLICATION
+ if (PGR_Get_Conf_Data( DataDir, CLUSTER_CONF_FILE ) == STATUS_OK)
+ {
+ if (PGR_Init_Replicate_Server_Data() != STATUS_OK)
+ {
+ fprintf(stderr,"PGR_Init_Replicate_Server_Data failed\n");
+ ExitPostmaster(0);
+ }
+ PGR_Set_Replicate_Server_Socket();
+ PGR_Free_Conf_Data();
+ if ((PGR_Recovery_Option) &&
+ (PGR_recovery_mode != PGR_HOT_RECOVERY))
+ {
+ fprintf(stderr,"Start in recovery mode! \n");
+ fprintf(stderr,"Please wait until a data synchronization finishes from Master DB... \n");
+ if (PGR_Recovery_Main(PGR_recovery_mode) != STATUS_OK)
+ {
+ fprintf(stderr,"PGR_Recovery_Main() failed with cold recovery\n");
+ ExitPostmaster(0);
+ }
+ }
+ }
+#endif /* USE_REPLICATION */
+
/*
* If timezone is not set, determine what the OS uses. (In theory this
* should be done during GUC initialization, but because it can take as
@@ -960,6 +1061,21 @@
*/
StartupPID = StartupDataBase();
+#ifdef USE_REPLICATION
+ Master_Pid = PGR_Master_Main();
+ if (Master_Pid < 0)
+ {
+ elog(DEBUG1,"PGR_Master_Main failed");
+ ExitPostmaster(1);
+ }
+ Lifecheck_Pid = PGR_Lifecheck_Main();
+ if (Lifecheck_Pid < 0)
+ {
+ elog(DEBUG1,"PGR_Lifecheck_Main failed");
+ ExitPostmaster(1);
+ }
+#endif /* USE_REPLICATION */
+
status = ServerLoop();
/*
@@ -1133,6 +1249,60 @@
last_touch_time = time(NULL);
nSockets = initMasks(&readmask);
+#ifdef USE_REPLICATION
+ if (PGR_Recovery_Option)
+ {
+ int pid = 0;
+ pid = fork_process();
+ if (pid == 0) /* child */
+ {
+ fprintf(stderr,"Start in recovery mode! \n");
+ fprintf(stderr,"Please wait until a data synchronization finishes from Master DB... \n");
+ IsUnderPostmaster = true; /* we are a postmaster subprocess now */
+
+ /* Close the postmaster's sockets */
+ ClosePostmasterPorts(false);
+ /* Lose the postmaster's on-exit routines and port connections */
+ on_exit_reset();
+ /* Release postmaster's working memory context */
+ MemoryContextSwitchTo(TopMemoryContext);
+ MemoryContextDelete(PostmasterContext);
+ PostmasterContext = NULL;
+ if (PGR_recovery_mode == PGR_HOT_RECOVERY)
+ {
+ if (PGR_Recovery_Main(PGR_recovery_mode) != STATUS_OK)
+ {
+ elog(DEBUG1,"PGR_Recovery_Main() failed with hot recovery.");
+ ExitPostmaster(1);
+ }
+ }
+ else
+ {
+ if (PGR_recovery_queue_data_req() != STATUS_OK)
+ {
+ elog(DEBUG1,"PGR_recovery_queue_data_req failed");
+ ExitPostmaster(1);
+ }
+ }
+ PGR_recovery_finish_send();
+ PGR_Recovery_Option = false;
+ fprintf(stderr,"OK! The data synchronization with Master DB was finished. \n");
+
+ ExitPostmaster(0);
+ }
+ else if (pid < 0)
+ {
+ ExitPostmaster(1);
+ }
+ }
+ if (PGR_password != NULL)
+ {
+ if(PGR_password->password != NULL)
+ memset(PGR_password->password,0,PASSWORD_MAX_LENGTH);
+ memset(PGR_password->md5Salt,0,sizeof(PGR_password->md5Salt));
+ memset(PGR_password->cryptSalt,0,sizeof(PGR_password->cryptSalt));
+ }
+#endif /* USE_REPLICATION */
for (;;)
{
@@ -1591,6 +1761,9 @@
ereport(FATAL,
(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
errmsg("sorry, too many clients already")));
+#ifdef USE_REPLICATION
+ return STATUS_ERROR;
+#endif
break;
case CAC_OK:
default:
@@ -1858,6 +2031,23 @@
(errmsg_internal("postmaster received signal %d",
postgres_signal_arg)));
+#ifdef USE_REPLICATION
+ if (PGR_Get_Cluster_Status() == STATUS_RECOVERY)
+ {
+ PGR_recovery_error_send();
+ PGR_Recovery_Option = false;
+ }
+ if (Master_Pid > 0)
+ {
+ kill (Master_Pid,postgres_signal_arg);
+ }
+ if (Lifecheck_Pid > 0)
+ {
+ kill (Lifecheck_Pid,postgres_signal_arg);
+ }
+ PGR_delete_shm();
+#endif /* USE_REPLICATION */
+
switch (postgres_signal_arg)
{
case SIGTERM:
@@ -3452,6 +3642,16 @@
* MUST -- vadim 05-10-1999
*/
+#ifdef USE_REPLICATION
+ if (PGR_Get_Cluster_Status() == STATUS_RECOVERY)
+ {
+ write_stderr("sorry, recovery failed.");
+ PGR_recovery_error_send();
+ PGR_Recovery_Option = false;
+ }
+ PGR_delete_shm();
+#endif /* USE_REPLICATION */
+
proc_exit(status);
}
diff -aruN postgresql-8.2.4/src/backend/storage/large_object/inv_api.c pgcluster-1.7.0rc7/src/backend/storage/large_object/inv_api.c
--- postgresql-8.2.4/src/backend/storage/large_object/inv_api.c 2006-09-07 17:37:25.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/storage/large_object/inv_api.c 2007-02-18 22:52:16.000000000 +0100
@@ -36,6 +36,10 @@
#include "utils/fmgroids.h"
#include "utils/resowner.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
+
/*
* All accesses to pg_largeobject and its index make use of a single Relation
@@ -188,6 +192,9 @@
* use. We can use the index on pg_largeobject for checking OID
* uniqueness, even though it has additional columns besides OID.
*/
+#ifdef USE_REPLICATION
+ PGR_Is_Sync_OID = true;
+#endif /* USE_REPLICATION */
if (!OidIsValid(lobjId))
{
open_lo_relation();
@@ -206,6 +213,9 @@
*/
CommandCounterIncrement();
+#ifdef USE_REPLICATION
+ PGR_Is_Sync_OID = false;
+#endif /* USE_REPLICATION */
return lobjId;
}
diff -aruN postgresql-8.2.4/src/backend/storage/lmgr/deadlock.c pgcluster-1.7.0rc7/src/backend/storage/lmgr/deadlock.c
--- postgresql-8.2.4/src/backend/storage/lmgr/deadlock.c 2006-09-23 01:20:13.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/storage/lmgr/deadlock.c 2007-02-18 22:52:16.000000000 +0100
@@ -30,6 +30,9 @@
#include "storage/proc.h"
#include "utils/memutils.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/* One edge in the waits-for graph */
typedef struct
@@ -217,6 +220,13 @@
if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges))
elog(FATAL, "deadlock seems to have disappeared");
+#ifdef USE_REPLICATION
+ if (PGR_Notice_Conflict() == STATUS_ERROR)
+ {
+ return FALSE;
+ }
+ PGR_Lock_Noticed =true;
+#endif
return true; /* cannot find a non-deadlocked state */
}
@@ -426,6 +436,18 @@
int numLockModes,
lm;
+#ifdef USE_REPLICATION
+ /*
+ * In PGCluster mode , conflicts with procs has younger rep-id didn't
+ * matter. It's also processed younger proc's CheckDeadLock().
+ * It's nesseary to make sure all nodes have same deadlock order.
+ * So, always most young (rep-id) process only will rollback by deadlock.
+ */
+ if ( MyProc->replicationId!=0 &&
+ MyProc -> replicationId < checkProc->replicationId)
+ return false;
+
+#endif
/*
* Have we already seen this proc?
*/
diff -aruN postgresql-8.2.4/src/backend/storage/lmgr/lmgr.c pgcluster-1.7.0rc7/src/backend/storage/lmgr/lmgr.c
--- postgresql-8.2.4/src/backend/storage/lmgr/lmgr.c 2006-10-04 02:29:57.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/storage/lmgr/lmgr.c 2007-02-18 22:52:16.000000000 +0100
@@ -26,6 +26,9 @@
#include "utils/inval.h"
#include "utils/lsyscache.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/*
* RelationInitLockInfo
@@ -476,9 +479,16 @@
SET_LOCKTAG_TRANSACTION(tag, xid);
+#ifdef USE_REPLICATION
+ if (!LockAcquire(&tag, ExclusiveLock, false,false))
+ elog(ERROR, "XactLockTableWait: LockAcquire failed");
+
+ LockRelease(&tag, ExclusiveLock,false);
+#else
(void) LockAcquire(&tag, ShareLock, false, false);
LockRelease(&tag, ShareLock, false);
+#endif /* USE_REPLICATION */
if (!TransactionIdIsInProgress(xid))
break;
@@ -635,3 +645,37 @@
}
return false; /* default case */
}
+
+#ifdef USE_REPLICATION
+/*
+ * XactLockTableWait
+ *
+ * Wait for the specified transaction to commit or abort.
+ */
+void
+XactLockTableWaitForCluster(TransactionId xid,Buffer buffer)
+{
+ LOCKTAG tag;
+ TransactionId myxid = GetCurrentTransactionId();
+
+ Assert(!TransactionIdEquals( xid, myxid ));
+
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+
+ SET_LOCKTAG_TRANSACTION(tag, xid);
+
+ if (!LockAcquire(&tag, ExclusiveLock, false,false))
+ elog(ERROR, "XactLockTableWait: LockAcquire failed");
+
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ LockRelease(&tag, ExclusiveLock,false);
+
+ /*
+ * Transaction was committed/aborted/crashed - we have to update
+ * pg_clog if transaction is still marked as running.
+ */
+ if (!TransactionIdDidCommit(xid) && !TransactionIdDidAbort(xid))
+ TransactionIdAbort(xid);
+}
+#endif /*USE_REPLICATION*/
diff -aruN postgresql-8.2.4/src/backend/storage/lmgr/lock.c pgcluster-1.7.0rc7/src/backend/storage/lmgr/lock.c
--- postgresql-8.2.4/src/backend/storage/lmgr/lock.c 2006-10-04 02:29:57.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/storage/lmgr/lock.c 2007-02-18 22:52:16.000000000 +0100
@@ -42,6 +42,10 @@
#include "utils/ps_status.h"
#include "utils/resowner.h"
+#ifdef USE_REPLICATION
+#include "storage/lmgr.h"
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/* This configuration variable is used to set the lock table size */
int max_locks_per_xact; /* set by guc.c */
@@ -737,6 +741,10 @@
status = LockCheckConflicts(lockMethodTable, lockmode,
lock, proclock, MyProc);
+#ifdef USE_REPLICATION
+ PGR_Check_Lock.status_lock_conflict = status;
+ PGR_Check_Lock.deadlock = false;
+#endif /* USE_REPLICATION */
if (status == STATUS_OK)
{
/* No conflict with held or previously requested locks */
@@ -746,6 +754,17 @@
else
{
Assert(status == STATUS_FOUND);
+#ifdef USE_REPLICATION
+ if ((PGR_Need_Notice == true) &&
+ (PGR_Check_Lock.check_lock_conflict == true))
+ {
+ if (!PGR_Lock_Noticed && PGR_Notice_Conflict() == STATUS_ERROR)
+ {
+ return FALSE;
+ }
+ PGR_Lock_Noticed = true;
+ }
+#endif /* USE_REPLICATION */
/*
* We can't acquire the lock immediately. If caller specified no
diff -aruN postgresql-8.2.4/src/backend/storage/lmgr/proc.c pgcluster-1.7.0rc7/src/backend/storage/lmgr/proc.c
--- postgresql-8.2.4/src/backend/storage/lmgr/proc.c 2006-11-21 21:59:52.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/storage/lmgr/proc.c 2007-02-18 22:52:16.000000000 +0100
@@ -43,6 +43,9 @@
#include "storage/procarray.h"
#include "storage/spin.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/* GUC variables */
int DeadlockTimeout = 1000;
@@ -263,6 +266,9 @@
MyProc->lwWaitLink = NULL;
MyProc->waitLock = NULL;
MyProc->waitProcLock = NULL;
+#ifdef USE_REPLICATION
+ MyProc->replicationId = 0;
+#endif
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
SHMQueueInit(&(MyProc->myProcLocks[i]));
@@ -395,6 +401,9 @@
MyProc->lwWaitLink = NULL;
MyProc->waitLock = NULL;
MyProc->waitProcLock = NULL;
+#ifdef USE_REPLICATION
+ MyProc->replicationId = 0;
+#endif
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
SHMQueueInit(&(MyProc->myProcLocks[i]));
@@ -737,6 +746,17 @@
GrantAwaitedLock();
return STATUS_OK;
}
+#ifdef USE_REPLICATION
+ if(proc->replicationId == 0 ||
+ (MyProc->replicationId > proc->replicationId &&
+ proc->heldLocks & aheadRequests) ) {
+ elog(DEBUG1,"origin's RId = %d , MyProc->RId = %d , skip",proc->replicationId,MyProc->replicationId);
+ aheadRequests |= (1 << proc->waitLockMode);
+ proc = (PGPROC *) MAKE_PTR(proc->links.next);
+ continue;
+ }
+
+#endif
/* Break out of loop to put myself before him */
break;
}
@@ -752,8 +772,21 @@
}
else
{
+#ifdef USE_REPLICATION
+ proc = (PGPROC *) &(waitQueue->links);
+ for (i = 0; i < waitQueue->size+1; i++){
+ elog(DEBUG1,"origin's RId = %d , MyProc->RId = %d",proc->replicationId,MyProc->replicationId);
+ if(proc->replicationId == 0 ||
+ MyProc->replicationId > proc->replicationId) {
+ proc= (PGPROC *) MAKE_PTR(proc->links.next);
+ }else {
+ break;
+ }
+ }
+#else
/* I hold no locks, so I can't push in front of anyone. */
proc = (PGPROC *) &(waitQueue->links);
+#endif /* USE_REPLICATION */
}
/*
@@ -776,7 +809,11 @@
* CheckDeadLock's recovery code, except that we shouldn't release the
* semaphore since we haven't tried to lock it yet.
*/
+#ifdef USE_REPLICATION
+ if (early_deadlock && proc->replicationId < MyProc->replicationId)
+#else
if (early_deadlock)
+#endif
{
RemoveFromWaitQueue(MyProc, hashcode);
return STATUS_ERROR;
@@ -976,6 +1013,9 @@
CheckDeadLock(void)
{
int i;
+#ifdef USE_REPLICATION
+ bool pgr_notice = false;
+#endif /* USE_REPLICATION */
/*
* Acquire exclusive lock on the entire shared lock data structures. Must
@@ -1047,6 +1087,10 @@
* such processes.
*/
+#ifdef USE_REPLICATION
+ pgr_notice = true;
+#endif
+
/*
* Release locks acquired at head of routine. Order is not critical, so
* do it back-to-front to avoid waking another CheckDeadLock instance
@@ -1055,6 +1099,12 @@
check_done:
for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
LWLockRelease(FirstLockMgrLock + i);
+#ifdef USE_REPLICATION
+ if (pgr_notice == true)
+ {
+ PGR_Notice_DeadLock();
+ }
+#endif
}
@@ -1110,6 +1160,15 @@
{
TimestampTz fin_time;
struct itimerval timeval;
+#ifdef USE_REPLICATION
+ int useFlag = 0;
+
+ if (ReplicateCurrentTime != NULL)
+ {
+ useFlag = ReplicateCurrentTime->useFlag;
+ ReplicateCurrentTime->useFlag = DATA_INIT;
+ }
+#endif /* USE_REPLICATION */
if (is_statement_timeout)
{
@@ -1154,6 +1213,12 @@
fin_time = GetCurrentTimestamp();
fin_time = TimestampTzPlusMilliseconds(fin_time, delayms);
deadlock_timeout_active = true;
+#ifdef USE_REPLICATION
+ if (ReplicateCurrentTime != NULL)
+ {
+ ReplicateCurrentTime->useFlag = useFlag;
+ }
+#endif /* USE_REPLICATION */
if (fin_time >= statement_fin_time)
return true;
}
@@ -1167,6 +1232,12 @@
MemSet(&timeval, 0, sizeof(struct itimerval));
timeval.it_value.tv_sec = delayms / 1000;
timeval.it_value.tv_usec = (delayms % 1000) * 1000;
+#ifdef USE_REPLICATION
+ if (ReplicateCurrentTime != NULL)
+ {
+ ReplicateCurrentTime->useFlag = useFlag;
+ }
+#endif /* USE_REPLICATION */
if (setitimer(ITIMER_REAL, &timeval, NULL))
return false;
return true;
@@ -1232,12 +1303,30 @@
CheckStatementTimeout(void)
{
TimestampTz now;
+#ifdef USE_REPLICATION
+ int useFlag = 0;
+#endif /* USE_REPLICATION */
if (!statement_timeout_active)
return true; /* do nothing if not active */
+#ifdef USE_REPLICATION
+ if (ReplicateCurrentTime != NULL)
+ {
+ useFlag = ReplicateCurrentTime->useFlag;
+ ReplicateCurrentTime->useFlag = DATA_INIT;
+ }
+#endif /* USE_REPLICATION */
+
now = GetCurrentTimestamp();
+#ifdef USE_REPLICATION
+ if (ReplicateCurrentTime != NULL)
+ {
+ ReplicateCurrentTime->useFlag = useFlag;
+ }
+#endif /* USE_REPLICATION */
+
if (now >= statement_fin_time)
{
/* Time to die */
diff -aruN postgresql-8.2.4/src/backend/tcop/postgres.c pgcluster-1.7.0rc7/src/backend/tcop/postgres.c
--- postgresql-8.2.4/src/backend/tcop/postgres.c 2007-01-04 01:58:01.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/tcop/postgres.c 2007-02-18 22:52:16.000000000 +0100
@@ -68,6 +68,10 @@
#include "pgstat.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
+
extern int optind;
extern char *optarg;
@@ -91,7 +95,9 @@
/* wait N seconds to allow attach from a debugger */
int PostAuthDelay = 0;
-
+#ifdef USE_REPLICATION
+bool PGR_Not_Replication_Query = false;
+#endif /* USE_REPLICATION */
/* ----------------
* private variables
@@ -753,6 +759,24 @@
bool was_logged = false;
char msec_str[32];
+#ifdef USE_REPLICATION
+ char * query_ptr = NULL;
+ char * null_ptr = NULL;
+ int skip_cnt = 0;
+ int status = 0;
+
+ PGR_Reliable_Mode_Wait = false;
+ query_ptr = (char *)query_string;
+ if (PGR_Is_Replicated_Query == false)
+ {
+ PGR_Is_Replicated_Query = PGR_Is_Replicated_Command(query_ptr);
+ }
+ PGR_Retry_Query.query_string = (char *)query_string;
+ PGR_Retry_Query.query_len = strlen(query_string);
+ PGR_Retry_Query.cmdSts = CMD_STS_OTHER;
+ PGR_Retry_Query.cmdType = CMD_TYPE_OTHER;
+#endif /* USE_REPLICATION */
+
/*
* Report query to various monitoring facilities.
*/
@@ -831,6 +855,18 @@
DestReceiver *receiver;
int16 format;
+#ifdef USE_REPLICATION
+ PGR_Not_Replication_Query = false;
+ PGR_Reliable_Mode_Wait = false;
+
+ PGR_Retry_Query.query_string = NULL;
+ PGR_Retry_Query.query_len = 0;
+ PGR_Retry_Query.cmdSts = CMD_STS_OTHER;
+ PGR_Retry_Query.cmdType = CMD_TYPE_OTHER;
+ PGR_Retry_Query.useFlag = DATA_INIT;
+ PGR_Lock_Noticed = false;
+#endif /* USE_REPLICATION */
+
/*
* Get the command name for use in status display (it also becomes the
* default completion tag, down inside PortalRun). Set ps_status and
@@ -853,10 +889,232 @@
*/
if (IsAbortedTransactionBlockState() &&
!IsTransactionExitStmt(parsetree))
+ {
+#ifdef USE_REPLICATION
+ Transaction_Mode = 0;
+#endif
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
"commands ignored until end of transaction block")));
+ }
+
+#ifdef USE_REPLICATION
+ Query_String = NULL;
+ query_ptr = PGR_Remove_Comment(query_ptr);
+ PGR_Check_Lock.dest = TO_FRONTEND;
+ PGR_Need_Notice = false;
+ PGR_Check_Lock.check_lock_conflict = false;
+
+ /* skip replication during recovery mode runing */
+ if (PGR_Get_Cluster_Status() == STATUS_RECOVERY)
+ {
+ /*
+ PGR_Not_Replication_Query = true;
+ */
+ PGR_Is_Replicated_Query = true;
+ if (!strcmp(commandTag,"SELECT"))
+ {
+ if (PGR_Is_System_Command(query_ptr))
+ {
+ status = PGR_Call_System_Command(query_ptr);
+ if (status == STATUS_SKIP_QUERY)
+ {
+ EndCommand(PGR_ALREADY_REPLICATED_NOTICE_CMD,dest);
+ break;
+ }
+ else
+ {
+ EndCommand("SYSTEM_COMMAND",dest);
+ continue;
+ }
+ }
+ }
+ Transaction_Mode = PGR_Set_Transaction_Mode(Transaction_Mode,commandTag);
+ if (Transaction_Mode > 0)
+ {
+ PGR_Need_Notice = true;
+ PGR_Check_Lock.check_lock_conflict = true;
+ }
+ goto Skip_Replication;
+ }
+
+ /*
+ if (!xact_started)
+ {
+ start_xact_command();
+ xact_started = true;
+ }
+ */
+ if (skip_cnt == 0)
+ {
+ skip_cnt = PGR_Is_Skip_Replication(query_ptr);
+ }
+ null_ptr = PGR_scan_terminate (query_ptr);
+ if(null_ptr != NULL)
+ {
+ *null_ptr = '\0';
+ }
+ Transaction_Mode = PGR_Set_Transaction_Mode(Transaction_Mode,commandTag);
+ if ((PGR_Is_Replicated_Query ) ||
+ (skip_cnt != 0))
+ {
+ if (skip_cnt > 0)
+ {
+ skip_cnt --;
+ }
+ else
+ {
+ skip_cnt = 0;
+ }
+ PGR_Copy_Data_Need_Replicate = false;
+ if (!strncmp(commandTag,"SELECT",strlen("SELECT")))
+ {
+ if (PGR_Is_System_Command(query_ptr))
+ {
+ status = PGR_Call_System_Command(query_ptr);
+ if (status == STATUS_SKIP_QUERY)
+ {
+ EndCommand(PGR_ALREADY_REPLICATED_NOTICE_CMD,dest);
+ break;
+ }
+ else
+ {
+ EndCommand("SYSTEM_COMMAND",dest);
+ continue;
+ }
+ }
+ }
+ PGR_Check_Lock.status_lock_conflict = STATUS_OK;
+ PGR_Check_Lock.dest = TO_FRONTEND;
+ }
+ else
+ {
+ PGR_Copy_Data_Need_Replicate = false;
+
+ /* check cluster db status */
+ /*
+ if ((PGR_Get_Cluster_Status() == STATUS_RECOVERY) &&
+ (PGR_Not_Replication_Query == false) &&
+ (Transaction_Mode == 0 ) )
+ {
+ elog(WARNING, "This query is not permitted while recovery db ");
+ if(null_ptr != NULL)
+ {
+ *null_ptr = ';';
+ query_ptr = null_ptr +1;
+ }
+ continue;
+ }
+ */
+ if (PGR_Is_Stand_Alone() == true)
+ {
+ if (PGR_Stand_Alone->permit == PERMIT_READ_ONLY)
+ {
+ if (!strcmp(commandTag, "SHOW")) {
+ VariableShowStmt *stmt = (VariableShowStmt *)parsetree;
+ if (!strcmp(stmt->name, "replication_server")) {
+ PGR_Not_Replication_Query = true;
+ }
+ }
+
+ if (PGR_Not_Replication_Query == false)
+ elog(ERROR, "This query is not permitted when all replication servers fell down ");
+ }
+ }
+ else if ((PGRforceLoadBalance == false) &&
+ ((PGR_Not_Replication_Query == false ) ||
+ (!strcmp(commandTag,"SELECT"))))
+ {
+ status = PGR_replication(query_ptr,dest,parsetree,commandTag);
+ if (status == STATUS_REPLICATED)
+ {
+ if (xact_started)
+ {
+ finish_xact_command();
+ xact_started = false;
+ }
+ CommandCounterIncrement();
+ continue;
+ }
+ else if (status == STATUS_ERROR)
+ {
+ if (!strcmp(commandTag, "SHOW")) {
+ VariableShowStmt *stmt = (VariableShowStmt *)parsetree;
+ if (!strcmp(stmt->name, "replication_server")) {
+ PGR_Not_Replication_Query = true;
+ }
+ }
+ else if (PGR_Stand_Alone->permit == PERMIT_READ_ONLY)
+ {
+ elog(ERROR, "This query is not permitted when all replication servers fell down ");
+ }
+ }
+ else if (status == STATUS_DEADLOCK_DETECT)
+ {
+ PGR_Need_Notice = false;
+ elog(ERROR, "postmaster deadlock detected");
+ continue;
+ }
+ else if (status == STATUS_REPLICATION_ABORT)
+ {
+ PGR_Need_Notice = false;
+ elog(ERROR, "replication server should be down, transaction aborted.");
+ continue;
+ }
+ else if (status != STATUS_CONTINUE)
+ {
+ PGR_Check_Lock.dest = TO_FRONTEND;
+ }
+ else
+ {
+ PGR_Check_Lock.dest = TO_REPLICATION_SERVER;
+ PGR_Reliable_Mode_Wait = true;
+ }
+ }
+ }
+ if(null_ptr != NULL)
+ {
+ *null_ptr = ';';
+ query_ptr = null_ptr +1;
+ }
+ if (!PGR_Is_Replicated_Query )
+ {
+ if ((!strcmp(commandTag,"BEGIN")) ||
+ (!strcmp(commandTag, "START TRANSACTION")) ||
+ (Transaction_Mode == 0 ) )
+ {
+ PGR_Reload_Start_Time();
+ }
+ }
+ if (((IsA(parsetree, TransactionStmt)) ||
+ (Transaction_Mode > 0) ||
+ (Create_Temp_Table_Mode == true) ||
+ (Session_Authorization_Mode == true)) ||
+ (!strcmp(commandTag,"COPY")))
+ {
+ PGR_Need_Notice = true;
+ PGR_Check_Lock.check_lock_conflict = true;
+ }
+ else
+ {
+ if (PGR_Not_Replication_Query == false)
+ {
+ PGR_Need_Notice = true;
+ PGR_Check_Lock.check_lock_conflict = true;
+ }
+ else
+ {
+ if ((PGR_Is_Replicated_Query ) &&
+ (!strncmp(commandTag, "SELECT",strlen("SELECT"))))
+ {
+ PGR_Need_Notice = true;
+ PGR_Check_Lock.check_lock_conflict = true;
+ }
+ }
+ }
+Skip_Replication:
+#endif /* USE_REPLICATION */
/* Make sure we are in a transaction command */
start_xact_command();
@@ -983,7 +1241,44 @@
* command the client sent, regardless of rewriting. (But a command
* aborted by error will not send an EndCommand report at all.)
*/
+#ifdef USE_REPLICATION
+ /*
+ * In Non-CONTROL LOCK CONFLICT mode, we *MUST NOT* send command tag twice.
+ * So , if it was already sent for lock notification , we didn't send
+ * tag here. also ReadyForQuery,too.
+ */
+ if(!(PGR_Is_Replicated_Query && PGR_Lock_Noticed))
+#endif
EndCommand(completionTag, dest);
+
+#ifdef USE_REPLICATION
+ if(PGR_Is_Replicated_Query &&
+ needToUpdateReplicateIdOnNextQueryIsDone) {
+ ++(ReplicationLog_Info.PGR_Replicate_ID);
+
+ if (CurrentReplicateServer != NULL)
+ {
+ /* set replicate id in this system */
+ ++(CurrentReplicateServer->replicate_id);
+ }
+ elog(DEBUG1,"increased replicate_id to %d",CurrentReplicateServer->replicate_id);
+ needToUpdateReplicateIdOnNextQueryIsDone=false;
+ }
+
+ if (PGR_Get_Cluster_Status() != STATUS_RECOVERY)
+ {
+ if ((PGR_Need_Notice == true) &&
+ (PGRforceLoadBalance == false))
+ {
+ PGR_Notice_Transaction_Query_Done();
+ }
+ if ((Transaction_Mode == 0) &&
+ (ReplicateCurrentTime != NULL))
+ {
+ ReplicateCurrentTime->use_seed = 1;
+ }
+ }
+#endif
} /* end loop over parsetrees */
/*
@@ -1144,11 +1439,15 @@
*/
if (IsAbortedTransactionBlockState() &&
!IsTransactionExitStmt(parsetree))
+ {
+#ifdef USE_REPLICATION
+ Transaction_Mode = 0;
+#endif
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
"commands ignored until end of transaction block")));
-
+ }
/*
* OK to analyze, rewrite, and plan this query. Note that the
* originally specified parameter set is not required to be complete,
@@ -1382,11 +1681,15 @@
if (IsAbortedTransactionBlockState() &&
(!IsTransactionExitStmtList(pstmt->query_list) ||
numParams != 0))
+ {
+#ifdef USE_REPLICATION
+ Transaction_Mode = 0;
+#endif
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
"commands ignored until end of transaction block")));
-
+ }
/*
* Create the portal. Allow silent replacement of an existing portal only
* if the unnamed portal is specified.
@@ -1769,11 +2072,15 @@
*/
if (IsAbortedTransactionBlockState() &&
!IsTransactionExitStmtList(portal->parseTrees))
+ {
+#ifdef USE_REPLICATION
+ Transaction_Mode = 0;
+#endif
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
"commands ignored until end of transaction block")));
-
+ }
/* Check for cancel signal before we start execution */
CHECK_FOR_INTERRUPTS();
@@ -2101,11 +2408,15 @@
*/
if (IsAbortedTransactionBlockState() &&
PreparedStatementReturnsTuples(pstmt))
+ {
+#ifdef USE_REPLICATION
+ Transaction_Mode = 0;
+#endif
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
"commands ignored until end of transaction block")));
-
+ }
if (whereToSendOutput != DestRemote)
return; /* can't actually do anything... */
@@ -2171,11 +2482,15 @@
*/
if (IsAbortedTransactionBlockState() &&
portal->tupDesc)
+ {
+#ifdef USE_REPLICATION
+ Transaction_Mode = 0;
+#endif
ereport(ERROR,
(errcode(ERRCODE_IN_FAILED_SQL_TRANSACTION),
errmsg("current transaction is aborted, "
"commands ignored until end of transaction block")));
-
+ }
if (whereToSendOutput != DestRemote)
return; /* can't actually do anything... */
@@ -2332,6 +2647,9 @@
* backend. This is necessary precisely because we don't clean up our
* shared memory state.
*/
+#ifdef USE_REPLICATION
+ PGR_delete_shm();
+#endif /* USE_REPLICATION */
exit(2);
}
@@ -2369,6 +2687,9 @@
}
}
+#ifdef USE_REPLICATION
+ PGR_delete_shm();
+#endif /* USE_REPLICATION */
errno = save_errno;
}
@@ -2383,6 +2704,9 @@
void
authdie(SIGNAL_ARGS)
{
+#ifdef USE_REPLICATION
+ PGR_delete_shm();
+#endif /* USE_REPLICATION */
exit(1);
}
@@ -3369,6 +3693,14 @@
pgstat_report_activity("");
}
+#ifdef USE_REPLICATION
+ /*
+ * In Non-CONTROL LOCK CONFLICT mode, we *MUST NOT* send command tag twice.
+ * So , if it was already sent for lock notification , we didn't send
+ * tag here. also ReadyForQuery,too.
+ */
+ if(!(PGR_Is_Replicated_Query && PGR_Lock_Noticed))
+#endif
ReadyForQuery(whereToSendOutput);
send_ready_for_query = false;
}
@@ -3409,6 +3741,26 @@
if (ignore_till_sync && firstchar != EOF)
continue;
+#ifdef USE_REPLICATION
+ if ((firstchar == CMD_TYPE_P_PARSE) ||
+ (firstchar == CMD_TYPE_P_BIND) ||
+ (firstchar == CMD_TYPE_P_DESCRIBE) ||
+ (firstchar == CMD_TYPE_P_EXECUTE) ||
+ (firstchar == CMD_TYPE_P_SYNC) ||
+ (firstchar == CMD_TYPE_P_CLOSE))
+ {
+ if (PGR_Send_Input_Message(firstchar, &input_message) != STATUS_OK)
+ {
+ if ((PGR_Is_Stand_Alone() == true) &&
+ (PGR_Stand_Alone->permit == PERMIT_READ_ONLY))
+ {
+ elog(WARNING, "This query is not permitted when all replication servers fell down ");
+ break;
+ }
+ }
+ }
+#endif /* USE_REPLICATION */
+
switch (firstchar)
{
case 'Q': /* simple query */
@@ -3622,6 +3974,27 @@
case 'X':
case EOF:
+#ifdef USE_REPLICATION
+ if (PGRforceLoadBalance == false)
+ {
+ if (PGR_Is_Replicated_Query == false)
+ {
+ PGR_Noticed_Abort = true;
+ PGRsend_system_command(CMD_STS_TRANSACTION_ABORT, CMD_TYPE_FRONTEND_CLOSED);
+ }
+ else if ((Transaction_Mode >= 1) && (PGR_Noticed_Abort == false))
+ {
+ if (PGR_Did_Commit_Transaction() == true)
+ {
+ pgstat_report_activity("commit");
+ exec_simple_query("commit");
+ }
+ }
+ }
+ /*
+ PGR_Notice_Transaction_Query_Aborted();
+ */
+#endif /* USE_REPLICATION */
/*
* Reset whereToSendOutput to prevent ereport from attempting
* to send any more messages to client.
diff -aruN postgresql-8.2.4/src/backend/tcop/pquery.c pgcluster-1.7.0rc7/src/backend/tcop/pquery.c
--- postgresql-8.2.4/src/backend/tcop/pquery.c 2006-10-04 02:29:58.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/tcop/pquery.c 2007-02-18 22:52:16.000000000 +0100
@@ -24,6 +24,9 @@
#include "tcop/utility.h"
#include "utils/memutils.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/*
* ActivePortal is the currently executing Portal (the most closely nested,
@@ -188,6 +191,19 @@
strcpy(completionTag, "???");
break;
}
+#ifdef USE_REPLICATION
+ if ((PGR_Is_Replicated_Query == true ) &&
+ (PGR_Get_Cluster_Status() != STATUS_RECOVERY))
+ {
+ /*
+ * Replicated *SELECT* query is used to replicate
+ * ONLY lock and function execution , results . All of
+ * them will be discarded by pgrp processes.
+ * So , we don't need to send it.
+ */
+ dest = None_Receiver;
+ }
+#endif /*USE_REPLICATION */
}
/* Now take care of any queued AFTER triggers */
diff -aruN postgresql-8.2.4/src/backend/tcop/utility.c pgcluster-1.7.0rc7/src/backend/tcop/utility.c
--- postgresql-8.2.4/src/backend/tcop/utility.c 2006-10-04 02:29:58.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/tcop/utility.c 2007-02-18 22:52:16.000000000 +0100
@@ -54,6 +54,9 @@
#include "utils/guc.h"
#include "utils/syscache.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/*
* Error-checking support for DROP commands
@@ -1289,29 +1292,48 @@
case T_SelectStmt:
tag = "SELECT";
+#ifdef USE_REPLICATION
+ PGR_Not_Replication_Query = true;
+#endif /* USE_REPLICATION */
break;
case T_TransactionStmt:
{
TransactionStmt *stmt = (TransactionStmt *) parsetree;
+#ifdef USE_REPLICATION
+ bool isInTransaction=IsTransactionBlock();
+#endif /* USE_REPLICATION */
+
switch (stmt->kind)
{
case TRANS_STMT_BEGIN:
tag = "BEGIN";
+#ifdef USE_REPLICATION
+ PGR_Not_Replication_Query=isInTransaction;
+#endif /* USE_REPLICATION */
break;
case TRANS_STMT_START:
tag = "START TRANSACTION";
+#ifdef USE_REPLICATION
+ PGR_Not_Replication_Query=isInTransaction;
+#endif /* USE_REPLICATION */
break;
case TRANS_STMT_COMMIT:
tag = "COMMIT";
+#ifdef USE_REPLICATION
+ PGR_Not_Replication_Query=!isInTransaction;
+#endif /* USE_REPLICATION */
break;
case TRANS_STMT_ROLLBACK:
case TRANS_STMT_ROLLBACK_TO:
tag = "ROLLBACK";
+#ifdef USE_REPLICATION
+ PGR_Not_Replication_Query=!isInTransaction;
+#endif /* USE_REPLICATION */
break;
case TRANS_STMT_SAVEPOINT:
@@ -1343,10 +1365,16 @@
case T_DeclareCursorStmt:
tag = "DECLARE CURSOR";
+#ifdef USE_REPLICATION
+ PGR_Not_Replication_Query = true;
+#endif /* USE_REPLICATION */
break;
case T_ClosePortalStmt:
tag = "CLOSE CURSOR";
+#ifdef USE_REPLICATION
+ PGR_Not_Replication_Query = true;
+#endif /* USE_REPLICATION */
break;
case T_FetchStmt:
@@ -1355,6 +1383,9 @@
tag = (stmt->ismove) ? "MOVE" : "FETCH";
}
+#ifdef USE_REPLICATION
+ PGR_Not_Replication_Query = true;
+#endif /* USE_REPLICATION */
break;
case T_CreateDomainStmt:
@@ -1677,10 +1708,16 @@
tag = "VACUUM";
else
tag = "ANALYZE";
+#ifdef USE_REPLICATION
+ PGR_Not_Replication_Query = true;
+#endif /* USE_REPLICATION */
break;
case T_ExplainStmt:
tag = "EXPLAIN";
+#ifdef USE_REPLICATION
+ PGR_Not_Replication_Query = true;
+#endif /* USE_REPLICATION */
break;
case T_VariableSetStmt:
@@ -1689,6 +1726,14 @@
case T_VariableShowStmt:
tag = "SHOW";
+#ifdef USE_REPLICATION
+ {
+ VariableShowStmt *stmt = (VariableShowStmt *)parsetree;
+ if (strcasecmp(stmt->name, "replication_server")) {
+ PGR_Not_Replication_Query = true;
+ }
+ }
+#endif /* USE_REPLICATION */
break;
case T_VariableResetStmt:
@@ -1755,10 +1800,16 @@
case T_CheckPointStmt:
tag = "CHECKPOINT";
+#ifdef USE_REPLICATION
+ PGR_Not_Replication_Query = true;
+#endif /* USE_REPLICATION */
break;
case T_ReindexStmt:
tag = "REINDEX";
+#ifdef USE_REPLICATION
+ PGR_Not_Replication_Query = true;
+#endif /* USE_REPLICATION */
break;
case T_CreateConversionStmt:
@@ -1783,14 +1834,35 @@
case T_PrepareStmt:
tag = "PREPARE";
+#ifdef USE_REPLICATION
+ if ((PGRnotReplicatePreparedSelect == true) &&
+ (PGR_is_select_prepare_query() == true))
+ {
+ PGR_Not_Replication_Query = true;
+ }
+#endif /* USE_REPLICATION */
break;
case T_ExecuteStmt:
tag = "EXECUTE";
+#ifdef USE_REPLICATION
+ if ((PGRnotReplicatePreparedSelect == true) &&
+ (PGR_is_select_prepared_statement((PrepareStmt *)parsetree) == true))
+ {
+ PGR_Not_Replication_Query = true;
+ }
+#endif /* USE_REPLICATION */
break;
case T_DeallocateStmt:
tag = "DEALLOCATE";
+#ifdef USE_REPLICATION
+ if ((PGRnotReplicatePreparedSelect == true) &&
+ (PGR_is_select_prepared_statement((PrepareStmt *)parsetree) == true))
+ {
+ PGR_Not_Replication_Query = true;
+ }
+#endif /* USE_REPLICATION */
break;
default:
@@ -1800,6 +1872,13 @@
break;
}
+#ifdef USE_REPLICATION
+ if(PGRforceLoadBalance == true)
+ {
+ PGR_Not_Replication_Query = true;
+ }
+#endif /* USE_REPLICATION */
+
return tag;
}
@@ -1835,7 +1914,12 @@
tag = "SELECT FOR SHARE";
}
else
+ {
tag = "SELECT";
+#ifdef USE_REPLICATION
+ PGR_Not_Replication_Query = true;
+#endif /* USE_REPLICATION */
+ }
break;
case CMD_UPDATE:
tag = "UPDATE";
@@ -1853,6 +1937,9 @@
elog(WARNING, "unrecognized commandType: %d",
(int) parsetree->commandType);
tag = "???";
+#ifdef USE_REPLICATION
+ PGR_Not_Replication_Query = true;
+#endif /* USE_REPLICATION */
break;
}
diff -aruN postgresql-8.2.4/src/backend/utils/adt/float.c pgcluster-1.7.0rc7/src/backend/utils/adt/float.c
--- postgresql-8.2.4/src/backend/utils/adt/float.c 2006-10-05 03:40:45.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/utils/adt/float.c 2007-02-18 22:52:16.000000000 +0100
@@ -66,6 +66,9 @@
#include "utils/array.h"
#include "utils/builtins.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
#ifndef M_PI
/* from my RH5.2 gcc math.h file - thomas 2000-04-03 */
@@ -1886,7 +1889,11 @@
float8 result;
/* result [0.0 - 1.0) */
+#ifdef USE_REPLICATION
+ result = ((double) PGR_Random()) / ((double) MAX_RANDOM_VALUE + 1);
+#else
result = (double) random() / ((double) MAX_RANDOM_VALUE + 1);
+#endif /* USE_REPLICATION */
PG_RETURN_FLOAT8(result);
}
diff -aruN postgresql-8.2.4/src/backend/utils/adt/nabstime.c pgcluster-1.7.0rc7/src/backend/utils/adt/nabstime.c
--- postgresql-8.2.4/src/backend/utils/adt/nabstime.c 2006-07-14 16:52:24.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/utils/adt/nabstime.c 2007-02-18 22:52:16.000000000 +0100
@@ -27,6 +27,10 @@
#include "utils/builtins.h"
#include "utils/nabstime.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
+
#define MIN_DAYNUM (-24856) /* December 13, 1901 */
#define MAX_DAYNUM 24854 /* January 18, 2038 */
@@ -92,7 +96,13 @@
{
time_t now;
+#ifdef USE_REPLICATION
+ struct timeval tp;
+ PGR_GetTimeOfDay(&tp,NULL);
+ now = tp.tv_sec;
+#else
now = time(NULL);
+#endif /* USE_REPLICATION */
return (AbsoluteTime) now;
}
@@ -1031,9 +1041,14 @@
{
time_t sec;
+#ifdef USE_REPLICATION
+ struct timeval tp;
+ PGR_GetTimeOfDay(&tp,NULL);
+ sec = tp.tv_sec;
+#else
if (time(&sec) < 0)
PG_RETURN_ABSOLUTETIME(INVALID_ABSTIME);
-
+#endif
PG_RETURN_ABSOLUTETIME((AbsoluteTime) sec);
}
@@ -1588,7 +1603,11 @@
int len;
pg_time_t tt;
+#ifdef USE_REPLICATION
+ PGR_GetTimeOfDay(&tp,NULL);
+#else
gettimeofday(&tp, NULL);
+#endif /* USE_REPLICATION */
tt = (pg_time_t) tp.tv_sec;
pg_strftime(templ, sizeof(templ), "%a %b %d %H:%M:%S.%%06d %Y %Z",
pg_localtime(&tt, global_timezone));
diff -aruN postgresql-8.2.4/src/backend/utils/adt/ri_triggers.c pgcluster-1.7.0rc7/src/backend/utils/adt/ri_triggers.c
--- postgresql-8.2.4/src/backend/utils/adt/ri_triggers.c 2006-10-04 02:29:59.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/utils/adt/ri_triggers.c 2007-02-18 22:52:16.000000000 +0100
@@ -40,6 +40,9 @@
#include "utils/typcache.h"
#include "miscadmin.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/* ----------
* Local definitions
@@ -271,8 +274,18 @@
* ----------
*/
quoteRelationName(pkrelname, pk_rel);
+#ifdef USE_REPLICATION
+ if (PGRcheckConstraintWithLock)
+ snprintf(querystr, sizeof(querystr), "SELECT 1 FROM ONLY %s x FOR UPDATE OF x",
+ pkrelname);
+ else
+ snprintf(querystr, sizeof(querystr), "SELECT 1 FROM ONLY %s x ",
+ pkrelname);
+
+#else
snprintf(querystr, sizeof(querystr), "SELECT 1 FROM ONLY %s x FOR SHARE OF x",
pkrelname);
+#endif /* USE_REPLICATION */
/* Prepare and save the plan */
qplan = ri_PlanCheck(querystr, 0, NULL,
@@ -416,6 +429,9 @@
queryoids[i] = SPI_gettypeid(fk_rel->rd_att,
qkey.keypair[i][RI_KEYPAIR_FK_IDX]);
}
+#ifdef USE_REPLICATION
+ if (PGRcheckConstraintWithLock)
+#endif /* USE_REPLICATION */
strcat(querystr, " FOR SHARE OF x");
/* Prepare and save the plan */
@@ -577,6 +593,9 @@
queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
}
+#ifdef USE_REPLICATION
+ if (PGRcheckConstraintWithLock)
+#endif /* USE_REPLICATION */
strcat(querystr, " FOR SHARE OF x");
/* Prepare and save the plan */
@@ -733,6 +752,9 @@
queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
}
+#ifdef USE_REPLICATION
+ if (PGRcheckConstraintWithLock)
+#endif /* USE_REPLICATION */
strcat(querystr, " FOR SHARE OF x");
/* Prepare and save the plan */
@@ -922,6 +944,9 @@
queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
}
+#ifdef USE_REPLICATION
+ if (PGRcheckConstraintWithLock)
+#endif /* USE_REPLICATION */
strcat(querystr, " FOR SHARE OF x");
/* Prepare and save the plan */
@@ -1428,6 +1453,9 @@
queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
}
+#ifdef USE_REPLICATION
+ if (PGRcheckConstraintWithLock)
+#endif /* USE_REPLICATION */
strcat(querystr, " FOR SHARE OF x");
/* Prepare and save the plan */
@@ -1607,6 +1635,9 @@
queryoids[i] = SPI_gettypeid(pk_rel->rd_att,
qkey.keypair[i][RI_KEYPAIR_PK_IDX]);
}
+#ifdef USE_REPLICATION
+ if (PGRcheckConstraintWithLock)
+#endif /* USE_REPLICATION */
strcat(querystr, " FOR SHARE OF x");
/* Prepare and save the plan */
diff -aruN postgresql-8.2.4/src/backend/utils/adt/timestamp.c pgcluster-1.7.0rc7/src/backend/utils/adt/timestamp.c
--- postgresql-8.2.4/src/backend/utils/adt/timestamp.c 2006-11-11 02:14:19.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/utils/adt/timestamp.c 2007-02-18 22:52:16.000000000 +0100
@@ -39,6 +39,9 @@
#error -ffast-math is known to break this code
#endif
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/* Set at postmaster start */
TimestampTz PgStartTime;
@@ -948,7 +951,11 @@
TimestampTz result;
struct timeval tp;
+#ifdef USE_REPLICATION
+ PGR_GetTimeOfDay(&tp,NULL);
+#else
gettimeofday(&tp, NULL);
+#endif
result = (TimestampTz) tp.tv_sec -
((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY);
diff -aruN postgresql-8.2.4/src/backend/utils/error/assert.c pgcluster-1.7.0rc7/src/backend/utils/error/assert.c
--- postgresql-8.2.4/src/backend/utils/error/assert.c 2006-03-05 16:58:46.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/utils/error/assert.c 2007-02-18 22:52:16.000000000 +0100
@@ -19,6 +19,10 @@
#include
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
+
/*
* ExceptionalCondition - Handles the failure of an Assert()
*/
@@ -39,6 +43,18 @@
fileName, lineNumber);
}
+#ifdef USE_REPLICATION
+ if ((PGR_Check_Lock.dest == TO_REPLICATION_SERVER ) &&
+ (PGR_Need_Notice == true))
+ {
+ PGR_Notice_Transaction_Query_Aborted();
+ }
+ if (PGR_Copy_Data_Need_Replicate)
+ {
+ PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
+ }
+#endif /* USE_REPLICATION */
+
#ifdef SLEEP_ON_ASSERT
/*
diff -aruN postgresql-8.2.4/src/backend/utils/error/elog.c pgcluster-1.7.0rc7/src/backend/utils/error/elog.c
--- postgresql-8.2.4/src/backend/utils/error/elog.c 2006-11-28 13:54:42.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/utils/error/elog.c 2007-02-18 22:52:16.000000000 +0100
@@ -70,6 +70,9 @@
#include "utils/memutils.h"
#include "utils/ps_status.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/* Global variables */
ErrorContextCallback *error_context_stack = NULL;
@@ -314,6 +317,16 @@
MemoryContext oldcontext;
ErrorContextCallback *econtext;
+#ifdef USE_REPLICATION
+ int status = 0;
+ bool parse_error_flag = false;
+
+ if ((edata->message) && (strstr(edata->message,"parse error") != NULL))
+ {
+ parse_error_flag = true;
+ }
+#endif /* USE_REPLICATION */
+
recursion_depth++;
CHECK_STACK_DEPTH();
@@ -363,6 +376,24 @@
* handler should reset it to something else soon.
*/
+#ifdef USE_REPLICATION
+ if (parse_error_flag)
+ {
+ if ((PGR_Check_Lock.dest != TO_FRONTEND) &&
+ (Transaction_Mode > 0))
+ {
+ PGR_Force_Replicate_Query();
+ }
+ }
+ if (PGR_Copy_Data_Need_Replicate)
+ {
+ PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
+ }
+ else if (PGR_Need_Notice == true)
+ {
+ PGR_Notice_Transaction_Query_Done();
+ }
+#endif /* USE_REPLICATION */
recursion_depth--;
PG_RE_THROW();
}
@@ -377,7 +408,16 @@
* client_min_messages above FATAL, so don't look at output_to_client.
*/
if (elevel >= FATAL && whereToSendOutput == DestRemote)
+ {
+#ifdef USE_REPLICATION
+ if (PGR_Copy_Data_Need_Replicate)
+ {
+ PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
+ }
+#endif /* USE_REPLICATION */
pq_endcopyout(true);
+ }
+
/* Emit the message to the right places */
EmitErrorReport();
@@ -417,6 +457,34 @@
if (PG_exception_stack == NULL && whereToSendOutput == DestRemote)
whereToSendOutput = DestNone;
+#ifdef USE_REPLICATION
+ if (CurrentReplicateServer != NULL)
+ {
+ if (PGR_Need_Notice == true)
+ {
+ PGR_Notice_Transaction_Query_Aborted();
+ }
+ if (PGR_Copy_Data_Need_Replicate)
+ {
+ PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
+ }
+ else
+ {
+ if ((!PGR_Is_Replicated_Query ) &&
+ (PGR_Check_Lock.dest != TO_FRONTEND) &&
+ (PGR_Reliable_Mode_Wait == true) &&
+ (CurrentReplicateServer->response_mode == PGR_RELIABLE_MODE))
+ {
+ status = PGR_Recv_Trigger(0);
+ }
+ }
+ }
+ if (TransactionSock != -1)
+ {
+ close (TransactionSock);
+ TransactionSock = -1;
+ }
+#endif /* USE_REPLICATION */
/*
* fflush here is just to improve the odds that we get to see the
* error message, in case things are so hosed that proc_exit crashes.
@@ -436,6 +504,34 @@
if (elevel >= PANIC)
{
+#ifdef USE_REPLICATION
+ if (CurrentReplicateServer != NULL)
+ {
+ if (PGR_Need_Notice == true)
+ {
+ PGR_Notice_Transaction_Query_Aborted();
+ }
+ if (PGR_Copy_Data_Need_Replicate)
+ {
+ PGR_Set_Copy_Data(PGRCopyData,NULL,0,1);
+ }
+ else
+ {
+ if ((!PGR_Is_Replicated_Query ) &&
+ (PGR_Check_Lock.dest != TO_FRONTEND) &&
+ (PGR_Reliable_Mode_Wait == true) &&
+ (CurrentReplicateServer->response_mode == PGR_RELIABLE_MODE))
+ {
+ status = PGR_Recv_Trigger(PGR_Replication_Timeout);
+ }
+ }
+ }
+ if (TransactionSock != -1)
+ {
+ close (TransactionSock);
+ TransactionSock = -1;
+ }
+#endif /* USE_REPLICATION */
/*
* Serious crash time. Postmaster will observe SIGABRT process exit
* status and kill the other backends too.
diff -aruN postgresql-8.2.4/src/backend/utils/fmgr/fmgr.c pgcluster-1.7.0rc7/src/backend/utils/fmgr/fmgr.c
--- postgresql-8.2.4/src/backend/utils/fmgr/fmgr.c 2006-10-04 02:30:01.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/utils/fmgr/fmgr.c 2007-02-18 22:52:16.000000000 +0100
@@ -25,6 +25,9 @@
#include "utils/fmgrtab.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/*
* Declaration for old-style function pointer type. This is now used only
@@ -218,7 +221,12 @@
ReleaseSysCache(procedureTuple);
return;
}
-
+#ifdef USE_REPLICATION
+ if (PGR_Replicate_Function_Call() != STATUS_OK)
+ {
+ return;
+ }
+#endif /* USE_REPLICATION */
switch (procedureStruct->prolang)
{
case INTERNALlanguageId:
diff -aruN postgresql-8.2.4/src/backend/utils/mb/mbutils.c pgcluster-1.7.0rc7/src/backend/utils/mb/mbutils.c
--- postgresql-8.2.4/src/backend/utils/mb/mbutils.c 2006-10-04 02:30:02.000000000 +0200
+++ pgcluster-1.7.0rc7/src/backend/utils/mb/mbutils.c 2007-02-18 22:52:16.000000000 +0100
@@ -15,6 +15,9 @@
#include "utils/memutils.h"
#include "utils/syscache.h"
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
/*
* We handle for actual FE and BE encoding setting encoding-identificator
* and encoding-name too. It prevent searching and conversion from encoding
@@ -442,6 +445,11 @@
dest_encoding;
FmgrInfo *flinfo;
+#ifdef USE_REPLICATION
+ if (PGR_Is_Replicated_Query)
+ return (char *)src;
+#endif /* USE_REPLICATION */
+
if (is_client_to_server)
{
src_encoding = ClientEncoding->encoding;
diff -aruN postgresql-8.2.4/src/backend/utils/misc/guc.c pgcluster-1.7.0rc7/src/backend/utils/misc/guc.c
--- postgresql-8.2.4/src/backend/utils/misc/guc.c 2006-11-29 15:50:07.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/utils/misc/guc.c 2007-02-18 22:52:16.000000000 +0100
@@ -25,6 +25,9 @@
#include
#endif
+#ifdef USE_REPLICATION
+#include "replicate.h"
+#endif /* USE_REPLICATION */
#include "access/gin.h"
#include "access/twophase.h"
@@ -236,6 +239,9 @@
char *role_string;
char *session_authorization_string;
+#ifdef USE_REPLICATION
+static void ShowReplicationServerConfig(DestReceiver *dest);
+#endif /* USE_REPLICATION */
/*
* Displayable names for context types (enum GucContext)
@@ -970,6 +976,40 @@
&pg_krb_caseins_users,
false, NULL, NULL
},
+#ifdef USE_REPLICATION
+ {
+ {"pgr_force_loadbalance", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("force loadbalance mode"),
+ NULL
+ },
+ &PGRforceLoadBalance,
+ false, NULL, NULL
+ },
+ {
+ {"check_constraint_with_lock", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("check constrain with lock"),
+ NULL
+ },
+ &PGRcheckConstraintWithLock,
+ false, NULL, NULL
+ },
+ {
+ {"auto_lock_table", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("auto lock table"),
+ NULL
+ },
+ &PGRautoLockTable,
+ true, NULL, NULL
+ },
+ {
+ {"not_replicate_prepared_select", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("not replicate the prepared as select"),
+ NULL
+ },
+ &PGRnotReplicatePreparedSelect,
+ false, NULL, NULL
+ },
+#endif
{
{"escape_string_warning", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS,
@@ -4830,6 +4870,10 @@
{
if (pg_strcasecmp(name, "all") == 0)
ShowAllGUCConfig(dest);
+#ifdef USE_REPLICATION
+ else if (strcasecmp(name, "replication_server") == 0)
+ ShowReplicationServerConfig(dest);
+#endif
else
ShowGUCConfigOption(name, dest);
}
@@ -6512,5 +6556,72 @@
return nbuf;
}
+#ifdef USE_REPLICATION
+/*
+ * SHOW REPLICATION SERVER command
+ */
+static void
+ShowReplicationServerConfig(DestReceiver *dest)
+{
+ TupOutputState *tstate;
+ TupleDesc tupdesc;
+ char *values[4];
+ char buffer[256];
+ ReplicateServerInfo *sp;
+
+ /* need a tuple descriptor representing two TEXT columns */
+ tupdesc = CreateTemplateTupleDesc(4, false);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "status",
+ TEXTOID, -1, 0 );
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "host_name",
+ TEXTOID, -1, 0 );
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "port_num",
+ TEXTOID, -1, 0 );
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "recovery_port_num",
+ TEXTOID, -1, 0 );
+
+ /* prepare for projection of tuples */
+ tstate = begin_tup_output_tupdesc(dest, tupdesc);
+
+ sp = ReplicateServerData;
+ while (sp->useFlag != DATA_END) {
+ if (PGR_Check_Replicate_Server_Status(sp) == STATUS_ERROR) {
+ PGR_Set_Replication_Server_Status(sp, DATA_ERR);
+ }
+
+ sp++;
+ }
+
+ sp = ReplicateServerData;
+ while (sp->useFlag != DATA_END) {
+ if (sp->useFlag == DATA_USE) {
+ values[0] = "ALIVE";
+ } else if (sp->useFlag == DATA_ERR) {
+ values[0] = "DEAD";
+ } else if (sp->useFlag == DATA_INIT) {
+ values[0] = "STANDBY";
+ } else {
+ values[0] = "UNKNOWN";
+ }
+
+ values[1] = (char *) sp->hostName;
+
+ snprintf(buffer, sizeof(buffer), "%d", sp->portNumber);
+ values[2] = pstrdup(buffer);
+
+ snprintf(buffer, sizeof(buffer), "%d", sp->recoveryPortNumber);
+ values[3] = pstrdup(buffer);
+
+ do_tup_output(tstate, values);
+
+ pfree(values[2]);
+ pfree(values[3]);
+
+ sp++;
+ }
+
+ end_tup_output(tstate);
+}
+#endif /* USE_REPLICATION */
#include "guc-file.c"
diff -aruN postgresql-8.2.4/src/backend/utils/misc/postgresql.conf.sample pgcluster-1.7.0rc7/src/backend/utils/misc/postgresql.conf.sample
--- postgresql-8.2.4/src/backend/utils/misc/postgresql.conf.sample 2007-01-20 22:42:06.000000000 +0100
+++ pgcluster-1.7.0rc7/src/backend/utils/misc/postgresql.conf.sample 2007-02-18 22:52:16.000000000 +0100
@@ -469,3 +469,12 @@
#---------------------------------------------------------------------------
#custom_variable_classes = '' # list of custom variable class names
+
+
+#---------------------------------------------------------------------------
+# PGCluster
+#---------------------------------------------------------------------------
+
+# auto_lock_table = true
+# check_constraint_with_lock = false
+# not_replicate_prepared_select = false
diff -aruN postgresql-8.2.4/src/bin/initdb/initdb.c pgcluster-1.7.0rc7/src/bin/initdb/initdb.c
--- postgresql-8.2.4/src/bin/initdb/initdb.c 2006-10-04 20:58:08.000000000 +0200
+++ pgcluster-1.7.0rc7/src/bin/initdb/initdb.c 2007-02-18 22:52:16.000000000 +0100
@@ -122,6 +122,11 @@
static int n_buffers = 50;
static int n_fsm_pages = 20000;
+#ifdef USE_REPLICATION
+static char *cluster_conf_file;
+static char *pgreplicate_conf_file;
+static char *pglb_conf_file;
+#endif /* USE_REPLICATION */
/*
* Warning messages for authentication methods
*/
@@ -1352,6 +1357,14 @@
free(conflines);
+#ifdef USE_REPLICATION
+ /* cluster.conf */
+ conflines = readfile(cluster_conf_file);
+ snprintf(path, sizeof(path), "%s/cluster.conf", pg_data);
+ writefile(path, conflines);
+ chmod(path, 0600);
+ free(conflines);
+#endif /* USE_REPLICATION */
check_ok();
}
@@ -2712,6 +2725,11 @@
set_input(&info_schema_file, "information_schema.sql");
set_input(&features_file, "sql_features.txt");
set_input(&system_views_file, "system_views.sql");
+#ifdef USE_REPLICATION
+ set_input(&cluster_conf_file, "cluster.conf.sample");
+ set_input(&pgreplicate_conf_file, "pgreplicate.conf.sample");
+ set_input(&pglb_conf_file, "pglb.conf.sample");
+#endif /* USE_REPLICATION */
set_info_version();
@@ -2730,6 +2748,16 @@
desc_file, shdesc_file,
conf_file,
hba_file, ident_file);
+#ifdef USE_REPLICATION
+ fprintf(stderr,
+ "PGCLUSTER_VERSION=%s\n"
+ "CLUSTER_CONF_SAMPLE=%s\nPGREPLICATE_CONF_SAMPLE=%s\n"
+ "PGLB_CONF_SAMPLE=%s\n",
+ PGCLUSTER_VERSION,
+ cluster_conf_file,
+ pgreplicate_conf_file,
+ pglb_conf_file);
+#endif /* USE_REPLICATION */
if (show_setting)
exit(0);
}
@@ -2744,6 +2772,11 @@
check_input(info_schema_file);
check_input(features_file);
check_input(system_views_file);
+#ifdef USE_REPLICATION
+ check_input(cluster_conf_file);
+ check_input(pgreplicate_conf_file);
+ check_input(pglb_conf_file);
+#endif /* USE_REPLICATION */
setlocales();
diff -aruN postgresql-8.2.4/src/bin/pg_dump/pg_dump.c pgcluster-1.7.0rc7/src/bin/pg_dump/pg_dump.c
--- postgresql-8.2.4/src/bin/pg_dump/pg_dump.c 2006-10-10 01:36:59.000000000 +0200
+++ pgcluster-1.7.0rc7/src/bin/pg_dump/pg_dump.c 2007-02-18 22:52:16.000000000 +0100
@@ -119,6 +119,9 @@
/* flag to turn on/off dollar quoting */
static int disable_dollar_quoting = 0;
+#ifdef USE_REPLICATION
+ bool nonReplicate=true;
+#endif
static void help(const char *progname);
static void expand_schema_name_patterns(SimpleStringList *patterns,
@@ -235,6 +238,9 @@
{"column-inserts", no_argument, NULL, 'D'},
{"host", required_argument, NULL, 'h'},
{"ignore-version", no_argument, NULL, 'i'},
+#ifdef USE_REPLICATION
+ {"non-replicate", no_argument ,NULL, 'r'},
+#endif
{"no-reconnect", no_argument, NULL, 'R'},
{"oids", no_argument, NULL, 'o'},
{"no-owner", no_argument, NULL, 'O'},
@@ -368,6 +374,11 @@
pgport = optarg;
break;
+#ifdef USE_REPLICATION
+ case 'r':
+ nonReplicate = true;
+ break;
+#endif
case 'R':
/* no-op, still accepted for backwards compatibility */
break;
@@ -553,6 +564,11 @@
/*
* Start serializable transaction to dump consistent data.
*/
+#ifdef USE_REPLICATION
+ if(nonReplicate) {
+ do_sql_command(g_conn, "set pgr_force_loadbalance to on");
+ }
+#endif /* USE_REPLICATION */
do_sql_command(g_conn, "BEGIN");
do_sql_command(g_conn, "SET TRANSACTION ISOLATION LEVEL SERIALIZABLE");
@@ -751,6 +767,9 @@
printf(_(" -o, --oids include OIDs in dump\n"));
printf(_(" -O, --no-owner skip restoration of object ownership\n"
" in plain text format\n"));
+#ifdef USE_REPLICATION
+ printf(_(" -r, --non-replicate No queries replicate. Available only in pgcluster.\n"));
+#endif
printf(_(" -s, --schema-only dump only the schema, no data\n"));
printf(_(" -S, --superuser=NAME specify the superuser user name to use in\n"
" plain text format\n"));
diff -aruN postgresql-8.2.4/src/bin/pg_dump/pg_dumpall.c pgcluster-1.7.0rc7/src/bin/pg_dump/pg_dumpall.c
--- postgresql-8.2.4/src/bin/pg_dump/pg_dumpall.c 2006-11-21 23:19:46.000000000 +0100
+++ pgcluster-1.7.0rc7/src/bin/pg_dump/pg_dumpall.c 2007-02-18 22:52:16.000000000 +0100
@@ -97,6 +97,9 @@
{"oids", no_argument, NULL, 'o'},
{"no-owner", no_argument, NULL, 'O'},
{"port", required_argument, NULL, 'p'},
+#ifdef USE_REPLICATION
+ {"non-replicate", no_argument ,NULL, 'r'},
+#endif
{"password", no_argument, NULL, 'W'},
{"schema-only", no_argument, NULL, 's'},
{"superuser", required_argument, NULL, 'S'},
@@ -161,7 +164,7 @@
pgdumpopts = createPQExpBuffer();
- while ((c = getopt_long(argc, argv, "acdDgh:ioOp:sS:U:vWxX:", long_options, &optindex)) != -1)
+ while ((c = getopt_long(argc, argv, "acdDgh:ioOp:rsS:U:vWxX:", long_options, &optindex)) != -1)
{
switch (c)
{
@@ -215,6 +218,11 @@
#endif
break;
+#ifdef USE_REPLICATION
+ case 'r':
+ appendPQExpBuffer(pgdumpopts, " -r");
+ break;
+#endif /* USE_REPLICATION */
case 's':
schema_only = true;
appendPQExpBuffer(pgdumpopts, " -s");
@@ -397,6 +405,9 @@
printf(_("\nConnection options:\n"));
printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
printf(_(" -p, --port=PORT database server port number\n"));
+#ifdef USE_REPLICATION
+ printf(_(" -r, --non-replicate No queries replicate. Available only in pgcluster.\n"));
+#endif /* USE_REPLICATION */
printf(_(" -U, --username=NAME connect as specified database user\n"));
printf(_(" -W, --password force password prompt (should happen automatically)\n"));
diff -aruN postgresql-8.2.4/src/include/commands/prepare.h pgcluster-1.7.0rc7/src/include/commands/prepare.h
--- postgresql-8.2.4/src/include/commands/prepare.h 2006-10-04 02:30:08.000000000 +0200
+++ pgcluster-1.7.0rc7/src/include/commands/prepare.h 2007-02-18 22:52:16.000000000 +0100
@@ -64,4 +64,8 @@
extern bool PreparedStatementReturnsTuples(PreparedStatement *stmt);
extern List *FetchPreparedStatementTargetList(PreparedStatement *stmt);
+#ifdef USE_REPLICATION
+extern bool PGR_is_select_prepared_statement(PrepareStmt *stmt);
+#endif /* USE_REPLICATION */
+
#endif /* PREPARE_H */
diff -aruN postgresql-8.2.4/src/include/pg_config.h.in pgcluster-1.7.0rc7/src/include/pg_config.h.in
--- postgresql-8.2.4/src/include/pg_config.h.in 2006-11-06 04:44:38.000000000 +0100
+++ pgcluster-1.7.0rc7/src/include/pg_config.h.in 2007-02-18 22:52:17.000000000 +0100
@@ -673,3 +673,7 @@
/* Define to empty if the keyword `volatile' does not work. Warning: valid
code using `volatile' can become incorrect without. Disable with care. */
#undef volatile
+
+/* PGCluster version */
+#undef PGCLUSTER_VERSION
+
diff -aruN postgresql-8.2.4/src/include/replicate.h pgcluster-1.7.0rc7/src/include/replicate.h
--- postgresql-8.2.4/src/include/replicate.h 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/include/replicate.h 2007-02-18 22:52:17.000000000 +0100
@@ -0,0 +1,223 @@
+/*-------------------------------------------------------------------------
+ *
+ * replicate.h
+ * Primary include file for replicate server .c files
+ *
+ * This should be the first file included by replicate modules.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef REPLICATE_H
+#define REPLICATE_H
+
+#ifndef _SYS_TIME_H
+#include
+#endif
+#include "tcop/dest.h"
+#include "storage/proc.h"
+#include "lib/stringinfo.h"
+#include "replicate_com.h"
+
+#define STAND_ALONE_TAG "When_Stand_Alone"
+#define NOT_REPLICATE_INFO_TAG "Not_Replicate_Info"
+#define DB_NAME_TAG "DB_Name"
+#define TABLE_NAME_TAG "Table_Name"
+#define RSYNC_PATH_TAG "Rsync_Path"
+#define RSYNC_OPTION_TAG "Rsync_Option"
+#define RSYNC_COMPRESS_TAG "Rsync_Compress"
+#define PG_DUMP_PATH_TAG "Pg_Dump_Path"
+
+#define CLUSTER_CONF_FILE "cluster.conf"
+#define DEFAULT_RSYNC "/usr/bin/rsync"
+#define DEFAULT_PG_DUMP "/usr/local/pgsql/bin/pg_dump"
+#define NOT_SESSION_AUTHORIZATION (0)
+#define SESSION_AUTHORIZATION_BEGIN (1)
+#define SESSION_AUTHORIZATION_END (2)
+
+#define READ_ONLY_IF_STAND_ALONE "read_only"
+#define READ_WRITE_IF_STAND_ALONE "read_write"
+#define PERMIT_READ_ONLY (1)
+#define PERMIT_READ_WRITE (2)
+#define STATUS_REPLICATED (3)
+#define STATUS_CONTINUE (4)
+#define STATUS_CONTINUE_SELECT (5)
+#define STATUS_NOT_REPLICATE (6)
+#define STATUS_SKIP_QUERY (7)
+#define STATUS_RECOVERY (11)
+#define STATUS_REPLICATION_ABORT (98)
+#define STATUS_DEADLOCK_DETECT (99)
+
+#define TO_REPLICATION_SERVER (0)
+#define TO_FRONTEND (1)
+
+#define PGR_DEADLOCK_DETECTION_MSG "deadlock detected!"
+#define PGR_REPLICATION_ABORT_MSG "replication aborted!"
+#define SKIP_QUERY_1 "begin; select getdatabaseencoding(); commit"
+#define SKIP_QUERY_2 "BEGIN; SELECT usesuper FROM pg_catalog.pg_user WHERE usename = '%s'; COMMIT"
+#define SKIP_QUERY_3 "SET autocommit TO 'on'"
+#define SKIP_QUERY_4 "SET search_path = public"
+#define SYS_QUERY_1 "set pgr_force_loadbalance to on"
+
+#define PGR_1ST_RECOVERY (1)
+#define PGR_2ND_RECOVERY (2)
+#define PGR_COLD_RECOVERY (1)
+#define PGR_HOT_RECOVERY (2)
+#define PGR_WITHOUT_BACKUP (3)
+
+#define PGR_MESSAGE_OTHER (0)
+#define PGR_MESSAGE_SELECT (1)
+#define PGR_MESSAGE_PREPARE (2)
+#define PGR_MESSAGE_EXECUTE (3)
+#define PGR_MESSAGE_DEALLOCATE (4)
+
+typedef struct
+{
+ bool is_stand_alone;
+ int permit;
+} PGR_Stand_Alone_Type;
+
+typedef struct
+{
+ char db_name[DBNAME_MAX_LENGTH];
+ char table_name[TABLENAME_MAX_LENGTH];
+} PGR_Not_Replicate_Type;
+
+typedef struct
+{
+ bool check_lock_conflict;
+ bool deadlock;
+ int status_lock_conflict;
+ int dest;
+} PGR_Check_Lock_Type;
+
+typedef struct
+{
+ char * query_string;
+ int query_len;
+ char cmdSts;
+ char cmdType;
+ char useFlag;
+} PGR_Retry_Query_Type;
+
+
+/* replicaition log */
+typedef struct {
+ uint32_t PGR_Replicate_ID;
+ uint32_t PGR_Request_ID;
+} PGR_ReplicationLog_Info;
+
+typedef struct {
+ char * password;
+ char md5Salt[4];
+ char cryptSalt[2];
+} PGR_Password_Info;
+
+extern char * Query_String;
+extern int TransactionQuery;
+extern int Transaction_Mode;
+extern bool PGR_Noticed_Abort;
+extern bool Session_Authorization_Mode;
+extern bool Create_Temp_Table_Mode;
+extern int RecoveryPortNumber;
+extern char * RsyncPath;
+extern char * RsyncOption;
+extern bool RsyncCompress;
+extern char * PgDumpPath;
+extern int TransactionSock;
+extern ReplicateNow * ReplicateCurrentTime;
+extern CopyData * PGRCopyData;
+extern bool PGR_Copy_Data_Need_Replicate;
+extern PGR_Stand_Alone_Type * PGR_Stand_Alone;
+extern PGR_Not_Replicate_Type * PGR_Not_Replicate;
+extern int PGR_Not_Replicate_Rec_Num;
+extern bool autocommit;
+extern bool PGR_Is_Replicated_Query;
+extern PGR_Check_Lock_Type PGR_Check_Lock;
+extern int PGR_Sock_To_Replication_Server;
+extern bool PGR_Need_Notice;
+extern bool PGR_Lock_Noticed;
+extern bool PGR_Recovery_Option;
+extern int PGR_recovery_mode;
+extern ReplicateServerInfo * CurrentReplicateServer;
+extern ReplicateServerInfo * LastReplicateServer;
+extern char * PGRSelfHostName;
+extern int PGR_Pending_Sem_Num;
+extern int PGR_Response_Mode;
+extern bool PGR_Reliable_Mode_Wait;
+extern PGR_Retry_Query_Type PGR_Retry_Query;
+extern bool needToUpdateReplicateIdOnNextQueryIsDone;
+extern PGR_ReplicationLog_Info ReplicationLog_Info;
+extern bool PGR_Not_Replication_Query;
+extern bool PGR_Is_Sync_OID;
+extern PGR_Password_Info * PGR_password;
+
+/* backend/utils/misc/guc.c */
+extern bool PGRforceLoadBalance;
+extern bool PGRcheckConstraintWithLock;
+extern bool PGRautoLockTable;
+extern bool PGRnotReplicatePreparedSelect;
+
+/* in backend/libpq/replicate.c */
+extern int PGR_Init_Replicate_Server_Data(void);
+extern int PGR_Set_Replicate_Server_Socket(void);
+extern int PGR_get_replicate_server_socket ( ReplicateServerInfo * sp , int socket_type );
+extern ReplicateServerInfo * PGR_get_replicate_server_info(void);
+extern ReplicateServerInfo * PGR_check_replicate_server_info(void);
+extern char * PGR_Send_Replicate_Command(char * query_string, int query_len, char cmdSts ,char cmdType);
+extern bool PGR_Is_Replicated_Command(char * query);
+extern int Xlog_Check_Replicate(int operation);
+extern int PGR_Replicate_Function_Call(void);
+extern void PGR_delete_shm(void);
+extern int PGR_replication(char * query_string, CommandDest dest, Node *parsetree, const char * commandTag);
+extern bool PGR_Is_System_Command(char * query);
+extern int PGR_Call_System_Command(char * command);
+extern int PGR_GetTimeOfDay(struct timeval *tp,struct timezone *tpz);
+extern long PGR_Random(void);
+extern int PGR_Set_Current_Time(char * sec, char * usec);
+extern int PGR_Send_Copy(CopyData * copy, int end);
+extern CopyData * PGR_Set_Copy_Data(CopyData * copy, char *str, int len, int end);
+extern char * PGR_scan_terminate( char * str);
+extern bool PGR_Is_Stand_Alone(void);
+extern void PGR_Send_Message_To_Frontend(char * msg);
+extern void PGR_Notice_Transaction_Query_Done(void);
+extern void PGR_Notice_Transaction_Query_Aborted(void);
+extern int PGRsend_system_command(char cmdSts, char cmdType);
+extern int PGR_Notice_Conflict(void);
+extern int PGR_Recv_Trigger (int user_timeout);
+extern void PGR_Set_Replication_Server_Status( ReplicateServerInfo * sp, int status);
+extern int PGR_Is_Skip_Replication(char * query);
+extern bool PGR_Did_Commit_Transaction(void);
+extern int PGR_Set_Transaction_Mode(int mode,const char * commandTag);
+extern char * PGR_Remove_Comment(char * str);
+extern void PGR_Force_Replicate_Query(void);
+extern void PGR_Notice_DeadLock(void);
+extern void PGR_Set_Cluster_Status(int status);
+extern int PGR_Get_Cluster_Status(void);
+extern int PGR_Check_Replicate_Server_Status(ReplicateServerInfo * sp);
+extern int PGR_lo_import(char * filename);
+extern int PGR_lo_create(int flags);
+extern int PGR_lo_open(Oid lobjId,int32 mode);
+extern int PGR_lo_close(int32 fd);
+extern int PGR_lo_write(int fd, char *buf, int len);
+extern int PGR_lo_lseek(int32 fd, int32 offset, int32 whence);
+extern int PGR_lo_unlink(Oid lobjId);
+extern uint32_t PGRget_replication_id(void);
+extern Oid PGRGetNewObjectId(Oid last_id);
+extern int PGR_Send_Input_Message(char cmdType,StringInfo input_message);
+extern bool PGR_is_select_prepare_query(void);
+extern char * PGR_get_md5salt(char * md5Salt, char * string);
+extern int PGR_recv_replicate_result(int sock,char * result,int user_timeout);
+
+/* in backend/libpq/recovery.c */
+extern int PGR_Master_Main(void);
+extern int PGR_Recovery_Main(int mode);
+extern int PGR_recovery_error_send(void);
+extern int PGR_recovery_finish_send(void);
+extern int PGR_recovery_queue_data_req(void);
+
+/* in backend/libpq/lifecheck.c */
+extern int PGR_Lifecheck_Main(void);
+
+/* in backend/access/transam/xact.c */
+extern void PGR_Reload_Start_Time(void);
+#endif /* REPLICATE_H */
diff -aruN postgresql-8.2.4/src/include/replicate_com.h pgcluster-1.7.0rc7/src/include/replicate_com.h
--- postgresql-8.2.4/src/include/replicate_com.h 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/include/replicate_com.h 2007-03-01 16:27:15.000000000 +0100
@@ -0,0 +1,432 @@
+/*-------------------------------------------------------------------------
+ *
+ * replicate.h
+ * Primary include file for replicate server .c files
+ *
+ * This should be the first file included by replicate modules.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef REPLICATE_COM_H
+#define REPLICATE_COM_H 1
+
+#ifndef _SYS_TYPES_H
+#include
+#endif
+#ifndef _INTTYPES_H
+#include
+#endif
+#ifndef _NETINET_IN_H
+#include
+#endif
+
+#include "c.h"
+#include "pg_config.h"
+
+/* default values */
+#define DEFAULT_PGLB_PORT (6001)
+#define DEFAULT_PGLB_RECOVERY_PORT (6101)
+#define DEFAULT_PGLB_LIFECHECK_PORT (6201)
+#define DEFAULT_CLUSTER_PORT (5432)
+#define DEFAULT_CLUSTER_RECOVERY_PORT (7101)
+#define DEFAULT_CLUSTER_LIFECHECK_PORT (7201)
+#define DEFAULT_PGRP_PORT (8001)
+#define DEFAULT_PGRP_RECOVERY_PORT (8101)
+#define DEFAULT_PGRP_LIFECHECK_PORT (8201)
+#define DEFAULT_PGRP_RLOG_PORT (8301)
+#define MAX_DB_SERVER (32)
+
+/**************************
+* *
+* Packet ID definition *
+* *
+***************************/
+/*=========================
+ Replication packet id
+===========================*/
+#define CMD_SYS_REPLICATE 'R'
+/*-------------------------
+ Simple Query
+--------------------------*/
+#define CMD_STS_SET_SESSION_AUTHORIZATION 'S'
+#define CMD_STS_TRANSACTION 'T'
+#define CMD_STS_TEMP_TABLE 'E'
+#define CMD_STS_QUERY 'Q'
+#define CMD_STS_OTHER 'O'
+
+#define CMD_TYPE_VACUUM 'V'
+#define CMD_TYPE_ANALYZE 'A'
+#define CMD_TYPE_REINDEX 'N'
+#define CMD_TYPE_SELECT 'S'
+#define CMD_TYPE_EXPLAIN 'X'
+#define CMD_TYPE_SET 'T'
+#define CMD_TYPE_RESET 't'
+#define CMD_TYPE_INSERT 'I'
+#define CMD_TYPE_DELETE 'D'
+#define CMD_TYPE_EXECUTE 'U'
+#define CMD_TYPE_UPDATE 'U'
+#define CMD_TYPE_BEGIN 'B'
+#define CMD_TYPE_COMMIT 'E'
+#define CMD_TYPE_ROLLBACK 'R'
+#define CMD_TYPE_CONNECTION_CLOSE 'x'
+#define CMD_TYPE_SESSION_AUTHORIZATION_BEGIN 'a'
+#define CMD_TYPE_SESSION_AUTHORIZATION_END 'b'
+#define CMD_TYPE_SAVEPOINT 's'
+#define CMD_TYPE_ROLLBACK_TO_SAVEPOINT 'r'
+#define CMD_TYPE_RELEASE_SAVEPOINT 'l'
+#define CMD_TYPE_OTHER 'O'
+
+/*=========================
+ System call packet id
+===========================*/
+#define CMD_SYS_CALL 'S'
+#define CMD_SYS_PREREPLICATE 'Z'
+
+#define CMD_STS_NOTICE 'N'
+#define CMD_STS_RESPONSE 'R'
+#define CMD_STS_TRANSACTION_ABORT 'A'
+#define CMD_STS_QUERY_SUSPEND 'P'
+#define CMD_STS_QUERY_DONE 'D'
+
+#define CMD_TYPE_COMMIT_CONFIRM 'c'
+#define CMD_TYPE_QUERY_CONFIRM 'q'
+#define CMD_TYPE_DEADLOCK_DETECT 'd'
+#define CMD_TYPE_FRONTEND_CLOSED 'x'
+
+/*----------------------------
+ Copy Command
+------------------------------*/
+#define CMD_STS_COPY 'C'
+
+#define CMD_TYPE_COPY 'C'
+#define CMD_TYPE_COPY_DATA 'd'
+#define CMD_TYPE_COPY_DATA_END 'e'
+
+/*----------------------------
+ Large Object
+------------------------------*/
+#define CMD_STS_LARGE_OBJECT 'L'
+
+#define CMD_TYPE_LO_IMPORT 'I'
+#define CMD_TYPE_LO_CREATE 'C'
+#define CMD_TYPE_LO_OPEN 'O'
+#define CMD_TYPE_LO_WRITE 'W'
+#define CMD_TYPE_LO_LSEEK 'S'
+#define CMD_TYPE_LO_CLOSE 'X'
+#define CMD_TYPE_LO_UNLINK 'U'
+
+/*-------------------------
+ Prepare/Params Query
+--------------------------*/
+#define CMD_STS_PREPARE 'P'
+
+#define CMD_TYPE_P_PARSE 'P'
+#define CMD_TYPE_P_BIND 'B'
+#define CMD_TYPE_P_EXECUTE 'E'
+#define CMD_TYPE_P_FASTPATH 'F'
+#define CMD_TYPE_P_CLOSE 'C'
+#define CMD_TYPE_P_DESCRIBE 'D'
+#define CMD_TYPE_P_FLUSH 'H'
+#define CMD_TYPE_P_SYNC 'S'
+
+/*=========================
+ Lifecheck packet id
+===========================*/
+#define CMD_SYS_LIFECHECK 'W'
+#define CMD_STS_LOADBALANCER 'A'
+#define CMD_STS_CLUSTER 'B'
+#define CMD_STS_REPLICATOR 'C'
+
+#define PGR_TRANSACTION_SOCKET (0)
+#define PGR_QUERY_SOCKET (1)
+
+#define DATA_FREE (0)
+#define DATA_INIT (1)
+#define DATA_USE (2)
+#define DATA_ERR (90)
+#define DATA_END (-1)
+#define HOSTNAME_MAX_LENGTH (128)
+#define DBNAME_MAX_LENGTH (128)
+#define USERNAME_MAX_LENGTH (128)
+#define PASSWORD_MAX_LENGTH (128)
+#define TABLENAME_MAX_LENGTH (128)
+#define PATH_MAX_LENGTH (256)
+#define MAX_SERVER_NUM (128)
+#define MAX_RETRY_TIMES (3)
+#define MAX_SOCKET_QUEUE (100000)
+#define TRANSACTION_ERROR_RESULT "TRANSACTION_ERROR"
+#define REPLICATE_SERVER_SHM_KEY (1020)
+/* target -> replicate */
+#define RECOVERY_PREPARE_REQ (1)
+/* replicate -> master */
+#define RECOVERY_PGDATA_REQ (2)
+/* master -> replicate */
+#define RECOVERY_PGDATA_ANS (3)
+/* replicate -> target */
+#define RECOVERY_PREPARE_ANS (4)
+/* target -> replicate */
+#define RECOVERY_START_REQ (5)
+/* replicate -> master */
+#define RECOVERY_FSYNC_REQ (6)
+/* master -> replicate */
+#define RECOVERY_FSYNC_ANS (7)
+/* replicate -> target */
+#define RECOVERY_START_ANS (8)
+/* target -> replicate */
+#define RECOVERY_QUEUE_DATA_REQ (9)
+/* replicate -> target */
+#define RECOVERY_QUEUE_DATA_ANS (10)
+/* target -> replicate */
+#define RECOVERY_FINISH (11)
+
+#define RECOVERY_ERROR_OCCUPIED (100)
+#define RECOVERY_ERROR_CONNECTION (101)
+#define RECOVERY_ERROR_TARGET_ONLY (102)
+#define RECOVERY_ERROR_ANS (200)
+
+/* lifecheck ask from cluster db */
+#define LIFECHECK_ASK_FROM_CLUSTER (1)
+/* lifecheck response from replication server */
+#define LIFECHECK_RES_FROM_REPLICATOR (2)
+/* lifecheck ask from replication server */
+#define LIFECHECK_ASK_FROM_REPLICATOR (3)
+/* lifecheck response from cluster db */
+#define LIFECHECK_RES_FROM_CLUSTER (4)
+
+#define REPLICATION_SERVER_INFO_TAG "Replicate_Server_Info"
+#define HOST_NAME_TAG "Host_Name"
+#define PORT_TAG "Port"
+#define RECOVERY_PORT_TAG "Recovery_Port"
+#define LIFECHECK_PORT_TAG "LifeCheck_Port"
+#define TIMEOUT_TAG "Replication_Timeout"
+#define LIFECHECK_TIMEOUT_TAG "LifeCheck_Timeout"
+#define LIFECHECK_INTERVAL_TAG "LifeCheck_Interval"
+
+#define RECOVERY_INIT (0)
+#define RECOVERY_PREPARE_START (1)
+#define RECOVERY_START_1 (2)
+#define RECOVERY_CLEARED (3)
+#define RECOVERY_WAIT_CLEAN (10)
+#define RECOVERY_ERROR (99)
+
+/* response mode */
+#define PGR_FAST_MODE (0)
+#define PGR_NORMAL_MODE (1)
+#define PGR_RELIABLE_MODE (2)
+
+#define RECOVERY_TIMEOUT (600)
+#ifndef COMPLETION_TAG_BUFSIZE
+#define COMPLETION_TAG_BUFSIZE (128)
+#endif
+
+/* replicate log type */
+#define FROM_R_LOG_TYPE (1)
+#define FROM_C_DB_TYPE (2)
+#define CONNECTION_SUSPENDED_TYPE (3)
+
+#define PGR_SYSTEM_COMMAND_FUNC "PGR_SYSTEM_COMMAND_FUNCTION"
+#define PGR_STARTUP_REPLICATION_SERVER_FUNC_NO (1)
+#define PGR_CHANGE_REPLICATION_SERVER_FUNC_NO (2)
+#define PGR_SET_CURRENT_TIME_FUNC_NO (3)
+#define PGR_NOTICE_DEADLOCK_DETECTION_FUNC_NO (4)
+#define PGR_TRANSACTION_CONFIRM_ANSWER_FUNC_NO (5)
+#define PGR_RELIABLE_MODE_DONE_FUNC_NO (6)
+#define PGR_NOTICE_ABORT_FUNC_NO (7)
+#define PGR_SET_CURRENT_REPLICATION_QUERY_ID_NO (8)
+#define PGR_QUERY_CONFIRM_ANSWER_FUNC_NO (9)
+#define PGR_GET_OID_FUNC_NO (10)
+#define PGR_SET_OID_FUNC_NO (11)
+
+#define PGR_CMD_ARG_NUM (10)
+#define PGR_LOCK_CONFLICT_NOTICE_CMD "PGR_LOCK_CONFLICT_NOTICE_CMD"
+#define PGR_DEADLOCK_DETECT_NOTICE_CMD "PGR_DEADLOCK_DETECT_NOTICE_CMD"
+#define PGR_QUERY_DONE_NOTICE_CMD "PGR_QUERY_DONE_NOTICE_CMD"
+#define PGR_QUERY_ABORTED_NOTICE_CMD "PGR_QUERY_ABORTED_NOTICE_CMD"
+#define PGR_RETRY_LOCK_QUERY_CMD "PGR_RETRY_LOCK_QUERY_CMD"
+#define PGR_NOT_YET_REPLICATE_NOTICE_CMD "PGR_NOT_YET_REPLICATE_NOTICE_CMD"
+#define PGR_ALREADY_REPLICATED_NOTICE_CMD "PGR_ALREADY_REPLICATED_NOTICE_CMD"
+#define PGR_NOT_YET_COMMIT (0)
+#define PGR_ALREADY_COMMITTED (1)
+
+#define COPYBUFSIZ (8192)
+#define MAX_WORDS (24)
+#define MAX_WORD_LETTERS (48)
+#define PGR_MESSAGE_BUFSIZE (128)
+#define INT_LENGTH (12)
+#define PGR_MAX_COUNTER (0x0FFFFFFF)
+#define PGR_GET_OVER_FLOW_FILTER (0xF0000000)
+#define PGR_GET_DATA_FILTER (0x0FFFFFFF)
+#define PGR_SET_OVER_FLOW (0x10000000)
+#define PGR_MIN_COUNTER (0x0000000F)
+
+#define STRCMP(x,y) (strncmp(x,y,strlen(y)))
+
+/* life check target */
+#define SYN_TO_LOAD_BALANCER (0)
+#define SYN_TO_CLUSTER_DB (1)
+#define SYN_TO_REPLICATION_SERVER (2)
+#define LIFE_CHECK_TRY_COUNT (2)
+#define LIFE_CHECK_STOP (0)
+#define LIFE_CHECK_START (1)
+
+#ifndef HAVE_UNION_SEMUN
+union semun {
+ int val;
+ struct semid_ds *buf;
+ unsigned short int *array;
+ struct seminfo *__buf;
+};
+#endif
+
+typedef struct ReplicateHeaderType
+{
+ char cmdSys;
+ char cmdSts; /*
+ Q:query
+ T:transaction
+ */
+ char cmdType; /*
+ S:select
+ I:insert
+ D:delete
+ U:update
+ B:begin
+ E:commit/rollback/end
+ O:others
+ */
+ char rlog; /*
+ -- kind of replication log --
+ 1: send from replication log
+ 2: send from cluster db (should be retry)
+ 3: connection suspended
+ */
+ uint16_t port;
+ uint16_t pid;
+ uint32_t query_size;
+ char from_host[HOSTNAME_MAX_LENGTH];
+ char dbName[DBNAME_MAX_LENGTH];
+ char userName[USERNAME_MAX_LENGTH];
+ struct timeval tv;
+ uint32_t query_id;
+ int isAutoCommit; /* 0 if autocommit is off. 1 if autocommit is on */
+ uint32_t request_id;
+ uint32_t replicate_id;
+ char password[PASSWORD_MAX_LENGTH];
+ char md5Salt[4];
+ char cryptSalt[2];
+ char dummySalt[2];
+} ReplicateHeader;
+
+typedef struct RecoveryPacketType
+{
+ uint16_t packet_no; /*
+ 1:start recovery prepare
+ 2:ask pgdata
+ 3:ans pgdata
+ 4:send master info
+ 5:start queueing query
+ 6:requst fsync
+ 7:ready to fsync
+ 8:pepared master
+ 9:finished rsync
+ */
+ uint16_t max_connect;
+ uint16_t port;
+ uint16_t recoveryPort;
+ char hostName[HOSTNAME_MAX_LENGTH];
+ char pg_data[PATH_MAX_LENGTH];
+ char userName[USERNAME_MAX_LENGTH];
+} RecoveryPacket;
+
+typedef struct
+{
+ char table[128];
+ int rec_no;
+ char key[128];
+ char value[128];
+ char * last;
+ char * next;
+} ConfDataType;
+
+
+typedef struct ReplicateServerInfoType
+{
+ uint32_t useFlag;
+ char hostName[HOSTNAME_MAX_LENGTH];
+ uint16_t portNumber;
+ uint16_t recoveryPortNumber;
+ uint16_t lifecheckPortNumber;
+ uint16_t RLogPortNumber;
+ uint32_t sock;
+ uint32_t rlog_sock;
+ uint32_t replicate_id;
+ uint16_t response_mode;
+ uint16_t retry_count;
+} ReplicateServerInfo;
+
+
+typedef struct ReplicateNowType
+{
+ uint32_t replicate_id;
+ int useFlag;
+ int use_seed;
+ int use_time;
+ int offset_sec;
+ int offset_usec;
+ struct timeval tp;
+} ReplicateNow;
+
+typedef struct CopyDataType
+{
+ int cnt;
+ char copy_data[COPYBUFSIZ];
+} CopyData;
+
+typedef struct ClusterDBInfoType
+{
+ int status;
+} ClusterDBInfo;
+
+typedef struct
+{
+ uint32_t arg1;
+ uint32_t arg2;
+ uint32_t arg3;
+ char buf[1];
+} LOArgs;
+
+typedef struct
+{
+ int length;
+ char data[1];
+} ArrayData;
+
+extern ConfDataType * ConfData_Top;
+extern ConfDataType * ConfData_End;
+extern ReplicateServerInfo * ReplicateServerData;
+extern ClusterDBInfo * ClusterDBData;
+extern int ReplicateServerShmid;
+extern int ClusterDBShmid;
+extern bool PGR_Under_Replication_Server;
+extern int PGR_Replication_Timeout;
+extern int PGR_Lifecheck_Timeout;
+extern int PGR_Lifecheck_Interval;
+
+/* in backend/libpq/replicate_com.c */
+extern int PGR_Create_Socket_Connect(int * fdP, char * hostName , unsigned short portNumber);
+extern void PGR_Close_Sock(int * sock);
+extern int PGR_Create_Socket_Bind(int * fdP, char * hostName , unsigned short portNumber);
+extern int PGR_Create_Acception(int fd, int * sockP, char * hostName , unsigned short portNumber);
+extern int PGR_Free_Conf_Data(void);
+extern int PGR_Get_Conf_Data(char * dir , char * fname);
+extern void PGRset_recovery_packet_no(RecoveryPacket * packet, int packet_no);
+extern unsigned int PGRget_ip_by_name(char * host);
+extern int PGRget_time_value(char *str);
+
+extern void PGRwrite_log_file(FILE * fp, const char * fmt,...);
+extern void show_debug(const char * fmt,...);
+extern void show_error(const char * fmt,...);
+
+
+
+#endif /* REPLICATE_COM_H */
diff -aruN postgresql-8.2.4/src/include/storage/lmgr.h pgcluster-1.7.0rc7/src/include/storage/lmgr.h
--- postgresql-8.2.4/src/include/storage/lmgr.h 2006-08-18 18:09:13.000000000 +0200
+++ pgcluster-1.7.0rc7/src/include/storage/lmgr.h 2007-02-18 22:52:17.000000000 +0100
@@ -15,6 +15,7 @@
#define LMGR_H
#include "storage/lock.h"
+#include "storage/bufmgr.h"
#include "utils/rel.h"
@@ -69,4 +70,5 @@
/* Knowledge about which locktags describe temp objects */
extern bool LockTagIsTemp(const LOCKTAG *tag);
+extern void XactLockTableWaitForCluster(TransactionId xid,Buffer buffer);
#endif /* LMGR_H */
diff -aruN postgresql-8.2.4/src/include/storage/proc.h pgcluster-1.7.0rc7/src/include/storage/proc.h
--- postgresql-8.2.4/src/include/storage/proc.h 2006-10-04 02:30:10.000000000 +0200
+++ pgcluster-1.7.0rc7/src/include/storage/proc.h 2007-02-18 22:52:17.000000000 +0100
@@ -97,6 +97,9 @@
SHM_QUEUE myProcLocks[NUM_LOCK_PARTITIONS];
struct XidCache subxids; /* cache for subtransaction XIDs */
+#ifdef USE_REPLICATION
+ unsigned int replicationId; /* id for replication. */
+#endif
};
/* NOTE: "typedef struct PGPROC PGPROC" appears in storage/lock.h. */
diff -aruN postgresql-8.2.4/src/interfaces/libpq/Makefile pgcluster-1.7.0rc7/src/interfaces/libpq/Makefile
--- postgresql-8.2.4/src/interfaces/libpq/Makefile 2006-12-28 01:01:12.000000000 +0100
+++ pgcluster-1.7.0rc7/src/interfaces/libpq/Makefile 2007-02-18 22:52:17.000000000 +0100
@@ -33,7 +33,7 @@
OBJS= fe-auth.o fe-connect.o fe-exec.o fe-misc.o fe-print.o fe-lobj.o \
fe-protocol2.o fe-protocol3.o pqexpbuffer.o pqsignal.o fe-secure.o \
- md5.o ip.o wchar.o encnames.o noblock.o pgstrcasecmp.o thread.o \
+ dllist.o md5.o ip.o wchar.o encnames.o noblock.o pgstrcasecmp.o thread.o \
$(filter crypt.o getaddrinfo.o inet_aton.o open.o snprintf.o strerror.o strlcpy.o, $(LIBOBJS))
ifeq ($(PORTNAME), cygwin)
@@ -89,6 +89,9 @@
encnames.c wchar.c : % : $(backend_src)/utils/mb/%
rm -f $@ && $(LN_S) $< .
+dllist.c : % : $(backend_src)/lib/dllist.c
+ rm -f $@ && $(LN_S) $< .
+
# We need several not-quite-identical variants of .DEF files to build libpq
# DLLs for Windows. These are made from the single source file exports.txt.
@@ -169,7 +172,7 @@
rm -f '$(DESTDIR)$(includedir)/libpq-fe.h' '$(DESTDIR)$(includedir_internal)/libpq-int.h' '$(DESTDIR)$(includedir_internal)/pqexpbuffer.h' '$(DESTDIR)$(datadir)/pg_service.conf.sample'
clean distclean: clean-lib
- rm -f $(OBJS) pg_config_paths.h crypt.c getaddrinfo.c inet_aton.c noblock.c open.c pgstrcasecmp.c snprintf.c strerror.c strlcpy.c thread.c md5.c ip.c encnames.c wchar.c pthread.h exports.list
+ rm -f $(OBJS) pg_config_paths.h crypt.c getaddrinfo.c inet_aton.c noblock.c open.c pgstrcasecmp.c snprintf.c strerror.c strlcpy.c thread.c md5.c ip.c encnames.c wchar.c pthread.h exports.list dllist.c
rm -f pg_config_paths.h # Might be left over from a Win32 client-only build
maintainer-clean: distclean
diff -aruN postgresql-8.2.4/src/interfaces/libpq/fe-auth.c pgcluster-1.7.0rc7/src/interfaces/libpq/fe-auth.c
--- postgresql-8.2.4/src/interfaces/libpq/fe-auth.c 2006-10-04 02:30:12.000000000 +0200
+++ pgcluster-1.7.0rc7/src/interfaces/libpq/fe-auth.c 2007-02-18 22:52:17.000000000 +0100
@@ -51,6 +51,10 @@
#include "fe-auth.h"
#include "libpq/md5.h"
+#ifdef USE_REPLICATION
+#include "replicate_com.h"
+bool PGR_Under_Replication_Server = false;
+#endif /* USE_REPLICATION */
#ifdef KRB5
/*
@@ -412,6 +416,19 @@
free(crypt_pwd);
return STATUS_ERROR;
}
+#ifdef USE_REPLICATION
+ if (PGR_Under_Replication_Server)
+ {
+ /*
+ * When this module is called from the replication server,
+ * there is no need encrypt password.
+ * Since the password was already encrypted at the Cluster DB
+ */
+ int size = 2 * (MD5_PASSWD_LEN + 1);
+ memset(crypt_pwd,0, size);
+ strncpy(crypt_pwd,password, size);
+ }
+#endif /* USE_REPLICATION */
break;
}
case AUTH_REQ_CRYPT:
diff -aruN postgresql-8.2.4/src/makefiles/Makefile.aix pgcluster-1.7.0rc7/src/makefiles/Makefile.aix
--- postgresql-8.2.4/src/makefiles/Makefile.aix 2006-09-19 17:36:08.000000000 +0200
+++ pgcluster-1.7.0rc7/src/makefiles/Makefile.aix 2007-02-18 22:52:17.000000000 +0100
@@ -44,3 +44,5 @@
$(CC) $(LDFLAGS) $(LDFLAGS_SL) -o $@ $*.o -Wl,-bE:$*$(EXPSUFF) $(SHLIB_LINK)
sqlmansect = 7
+CFLAGS += -pthread
+LDFLAGS += -L/usr/lib/threads
diff -aruN postgresql-8.2.4/src/makefiles/Makefile.freebsd pgcluster-1.7.0rc7/src/makefiles/Makefile.freebsd
--- postgresql-8.2.4/src/makefiles/Makefile.freebsd 2006-04-19 18:32:08.000000000 +0200
+++ pgcluster-1.7.0rc7/src/makefiles/Makefile.freebsd 2007-02-18 22:52:17.000000000 +0100
@@ -28,3 +28,5 @@
endif
sqlmansect = 7
+
+LIBS += -lc_r
diff -aruN postgresql-8.2.4/src/makefiles/Makefile.hpux pgcluster-1.7.0rc7/src/makefiles/Makefile.hpux
--- postgresql-8.2.4/src/makefiles/Makefile.hpux 2006-02-07 18:36:13.000000000 +0100
+++ pgcluster-1.7.0rc7/src/makefiles/Makefile.hpux 2007-02-18 22:52:17.000000000 +0100
@@ -10,6 +10,9 @@
# correctly in the LP64 data model.
LIBS := -lxnet $(LIBS)
+# add thread lib for PGCluster
+LIBS := -lpthread $(LIBS)
+
# Set up rpath so that the executables don't need SHLIB_PATH to be set.
# (Note: --disable-rpath is a really bad idea on this platform...)
ifeq ($(with_gnu_ld), yes)
diff -aruN postgresql-8.2.4/src/makefiles/Makefile.linux pgcluster-1.7.0rc7/src/makefiles/Makefile.linux
--- postgresql-8.2.4/src/makefiles/Makefile.linux 2005-12-09 22:19:36.000000000 +0100
+++ pgcluster-1.7.0rc7/src/makefiles/Makefile.linux 2007-02-18 22:52:17.000000000 +0100
@@ -14,3 +14,4 @@
$(CC) -shared -o $@ $<
sqlmansect = 7
+LIBS += -lpthread
diff -aruN postgresql-8.2.4/src/makefiles/Makefile.netbsd pgcluster-1.7.0rc7/src/makefiles/Makefile.netbsd
--- postgresql-8.2.4/src/makefiles/Makefile.netbsd 2006-04-19 18:32:08.000000000 +0200
+++ pgcluster-1.7.0rc7/src/makefiles/Makefile.netbsd 2007-02-18 22:52:17.000000000 +0100
@@ -30,3 +30,4 @@
endif
sqlmansect = 7
+LIBS += -lpthread
diff -aruN postgresql-8.2.4/src/makefiles/Makefile.openbsd pgcluster-1.7.0rc7/src/makefiles/Makefile.openbsd
--- postgresql-8.2.4/src/makefiles/Makefile.openbsd 2006-04-19 18:32:08.000000000 +0200
+++ pgcluster-1.7.0rc7/src/makefiles/Makefile.openbsd 2007-02-18 22:52:17.000000000 +0100
@@ -28,3 +28,4 @@
endif
sqlmansect = 7
+LIBS += -lc_r
diff -aruN postgresql-8.2.4/src/makefiles/Makefile.solaris pgcluster-1.7.0rc7/src/makefiles/Makefile.solaris
--- postgresql-8.2.4/src/makefiles/Makefile.solaris 2005-12-09 22:19:36.000000000 +0100
+++ pgcluster-1.7.0rc7/src/makefiles/Makefile.solaris 2007-02-18 22:52:17.000000000 +0100
@@ -20,3 +20,4 @@
$(LD) -G -Bdynamic -o $@ $<
sqlmansect = 5sql
+LIBS += -lpthread
diff -aruN postgresql-8.2.4/src/makefiles/Makefile.sunos4 pgcluster-1.7.0rc7/src/makefiles/Makefile.sunos4
--- postgresql-8.2.4/src/makefiles/Makefile.sunos4 2002-09-05 00:54:18.000000000 +0200
+++ pgcluster-1.7.0rc7/src/makefiles/Makefile.sunos4 2007-02-18 22:52:17.000000000 +0100
@@ -11,3 +11,4 @@
$(LD) -assert pure-text -Bdynamic -o $@ $<
sqlmansect = 7
+LIBS += -lpthread
diff -aruN postgresql-8.2.4/src/pgcluster/Makefile pgcluster-1.7.0rc7/src/pgcluster/Makefile
--- postgresql-8.2.4/src/pgcluster/Makefile 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/Makefile 2007-02-18 22:52:17.000000000 +0100
@@ -0,0 +1,17 @@
+#-------------------------------------------------------------------------
+#
+# Makefile for src/pgcluster (server programs)
+#
+#-------------------------------------------------------------------------
+
+subdir = src/pgcluster
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+DIRS := libpgc pgrp pglb tool
+
+all install installdirs uninstall depend distprep:
+ @for dir in $(DIRS); do $(MAKE) -C $$dir $@ || exit; done
+
+clean distclean maintainer-clean:
+ -@for dir in $(DIRS); do $(MAKE) -C $$dir $@; done
diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/Makefile pgcluster-1.7.0rc7/src/pgcluster/libpgc/Makefile
--- postgresql-8.2.4/src/pgcluster/libpgc/Makefile 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/Makefile 2007-02-18 22:52:17.000000000 +0100
@@ -0,0 +1,29 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+# Makefile for libpq subsystem (common library for replication server)
+#
+#-------------------------------------------------------------------------
+
+subdir = src/pgcluster/libpgc
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = sem.o show.o signal.o
+
+all: SUBSYS.o
+
+SUBSYS.o: $(OBJS)
+ $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS)
+
+depend dep:
+ $(CC) -MM $(CFLAGS) *.c >depend
+
+distclean: clean
+
+clean:
+ rm -f SUBSYS.o $(OBJS)
+
+ifeq (depend,$(wildcard depend))
+include depend
+endif
diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/libpgc.h pgcluster-1.7.0rc7/src/pgcluster/libpgc/libpgc.h
--- postgresql-8.2.4/src/pgcluster/libpgc/libpgc.h 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/libpgc.h 2007-02-18 22:52:17.000000000 +0100
@@ -0,0 +1,47 @@
+/*-------------------------------------------------------------------------
+ *
+ * lilbpgc.h
+ * external definition of the function for pgreplicate and pglb
+ *
+ * This should be the first file included by replicate modules.
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef LIBPGC_H
+#define LIBPGC_H
+
+#include
+
+/* character length of IP address */
+#define ADDRESS_LENGTH (24)
+
+/* logging file data tag in configuration file */
+#define LOG_INFO_TAG "Log_File_Info"
+#define FILE_NAME_TAG "File_Name"
+#define FILE_SIZE_TAG "File_Size"
+#define LOG_ROTATION_TAG "Rotate"
+
+typedef struct {
+ char file_name[256];
+ FILE * fp;
+ int max_size;
+ int rotation;
+} LogFileInf;
+
+extern LogFileInf * LogFileData;
+/* external definition of the function in sem.c */
+extern void PGRsem_unlock( int semid, short sem_num );
+extern void PGRsem_lock( int semid, short sem_num );
+
+/* external definition of the function in show.c */
+extern FILE * PGRopen_log_file(char * fname, int max_size, int rotation);
+extern void PGRclose_log_file(FILE * fp);
+extern void show_debug(const char * fmt,...);
+extern void show_error(const char * fmt,...);
+extern void PGRwrite_log_file(FILE * fp, const char * fmt,...);
+
+/* external definition of the function in signal.c */
+typedef void (*PGRsighandler)(int);
+extern PGRsighandler PGRsignal(int signo, PGRsighandler sighandler);
+
+#endif /* LIBPGC_H */
diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/sem.c pgcluster-1.7.0rc7/src/pgcluster/libpgc/sem.c
--- postgresql-8.2.4/src/pgcluster/libpgc/sem.c 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/sem.c 2007-02-18 22:52:17.000000000 +0100
@@ -0,0 +1,67 @@
+/*--------------------------------------------------------------------
+ * FILE:
+ * sem.c
+ *
+ * NOTE:
+ * This file is composed of the functions to call with the source
+ * at pgreplicate for the semapho control.
+ *
+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
+ *--------------------------------------------------------------------
+ */
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+extern void show_debug(const char * fmt,...);
+
+void PGRsem_unlock( int semid, short sem_num );
+void PGRsem_lock( int semid, short sem_num );
+
+#define PGR_SEM_UNLOCK_WAIT_MSEC (100)
+#define PGR_SEM_LOCK_WAIT_MSEC (500)
+
+void
+PGRsem_unlock( int semid, short sem_num )
+{
+ int status = 0;
+ struct sembuf sops;
+
+ sops.sem_num = sem_num;
+ sops.sem_op = 1;
+ /*sops.sem_flg = IPC_NOWAIT;*/
+ sops.sem_flg = 0;
+ do
+ {
+ status = semop(semid, &sops, 1);
+ if ((status == -1) && (errno != EINTR))
+ {
+ usleep(PGR_SEM_UNLOCK_WAIT_MSEC);
+ }
+ } while (status == -1);
+}
+
+void
+PGRsem_lock( int semid, short sem_num )
+{
+ int status = 0;
+ struct sembuf sops;
+
+ sops.sem_num = sem_num;
+ sops.sem_op = -1;
+ /*sops.sem_flg = IPC_NOWAIT;*/
+ sops.sem_flg = 0;
+ do
+ {
+ status = semop(semid, &sops, 1);
+ if ((status == -1) && (errno != EINTR))
+ {
+ usleep(PGR_SEM_LOCK_WAIT_MSEC);
+ }
+ } while (status == -1);
+}
+
diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/show.c pgcluster-1.7.0rc7/src/pgcluster/libpgc/show.c
--- postgresql-8.2.4/src/pgcluster/libpgc/show.c 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/show.c 2007-02-18 22:52:17.000000000 +0100
@@ -0,0 +1,226 @@
+/*--------------------------------------------------------------------
+ * FILE:
+ * show.c
+ *
+ * NOTE:
+ * This file is composed of the logging and debug functions
+ *
+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
+ *--------------------------------------------------------------------
+ */
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "libpgc.h"
+
+#define TIMESTAMP_SIZE 19 /* format `YYYY-MM-DD HH:MM:SS' */
+
+/*--------------------------------------
+ * PROTOTYPE DECLARATION
+ *--------------------------------------
+ */
+static char* get_current_timestamp(void);
+static int file_rotation(char * fname, int max_rotation);
+
+FILE * PGRopen_log_file(char * fname, int max_size, int rotation);
+void PGRclose_log_file(FILE * fp);
+void show_debug(const char * fmt,...);
+void show_error(const char * fmt,...);
+void PGRwrite_log_file(FILE * fp, const char * fmt,...);
+
+extern int Debug_Print;
+extern int Log_Print;
+
+LogFileInf * LogFileData = NULL;
+
+static char*
+get_current_timestamp(void)
+{
+ time_t now;
+ static char buf[TIMESTAMP_SIZE + 1];
+
+ now = time(NULL);
+ strftime(buf, sizeof(buf),
+ "%Y-%m-%d %H:%M:%S", localtime(&now));
+ return buf;
+}
+
+void
+show_debug(const char * fmt,...)
+{
+ va_list ap;
+ char *timestamp;
+ char buf[256];
+
+ if (Debug_Print)
+ {
+ timestamp = get_current_timestamp();
+ fprintf(stdout,"%s [%d] DEBUG:",timestamp, getpid());
+ va_start(ap,fmt);
+ vfprintf(stdout,fmt,ap);
+ va_end(ap);
+ fprintf(stdout,"\n");
+ fflush(stdout);
+ if ((Log_Print) && (LogFileData != NULL))
+ {
+ FILE * fp = NULL;
+ fp = PGRopen_log_file(LogFileData->file_name, LogFileData->max_size, LogFileData->rotation);
+ va_start(ap,fmt);
+ vsnprintf(buf,sizeof(buf),fmt,ap);
+ va_end(ap);
+ PGRwrite_log_file(fp, buf);
+ PGRclose_log_file(fp);
+ }
+ }
+}
+
+void
+show_error(const char * fmt,...)
+{
+ va_list ap;
+ char buf[256], *timestamp;
+
+ if (Debug_Print)
+ {
+ timestamp = get_current_timestamp();
+ fprintf(stderr,"%s [%d] ERROR:",timestamp, getpid());
+ va_start(ap,fmt);
+ vfprintf(stderr,fmt,ap);
+ va_end(ap);
+ fprintf(stderr,"\n");
+ fflush(stderr);
+ }
+ if ((Log_Print) && (LogFileData != NULL))
+ {
+ FILE * fp = NULL;
+ fp = PGRopen_log_file(LogFileData->file_name, LogFileData->max_size, LogFileData->rotation);
+ va_start(ap,fmt);
+ vsnprintf(buf,sizeof(buf),fmt,ap);
+ va_end(ap);
+ PGRwrite_log_file(fp, buf);
+ PGRclose_log_file(fp);
+ }
+}
+
+void
+PGRwrite_log_file(FILE * fp, const char * fmt,...)
+{
+ char buf[256];
+ char log[288];
+ char * p;
+ va_list ap;
+ time_t t;
+
+ if (fp == NULL)
+ {
+ return;
+ }
+ if (time(&t) < 0)
+ {
+ return;
+ }
+ snprintf(log,sizeof(log),"%s ",ctime(&t));
+ p = strchr(log,'\n');
+ if (p != NULL)
+ {
+ *p = ' ';
+ }
+ va_start(ap,fmt);
+ vsnprintf(buf,sizeof(buf),fmt,ap);
+ va_end(ap);
+ strcat(log,buf);
+ strcat(log,"\n");
+ if (fputs(log,fp) >= 0)
+ {
+ fflush(fp);
+ }
+}
+
+FILE *
+PGRopen_log_file(char * fname, int max_size, int rotation)
+{
+ int rtn;
+ struct stat st;
+
+ if (fname == NULL)
+ {
+ return (FILE *)NULL;
+ }
+
+ if (max_size > 0)
+ {
+ rtn = stat(fname,&st);
+ if (rtn == 0)
+ {
+ if (st.st_size > max_size)
+ {
+ if (file_rotation(fname, rotation) < 0)
+ {
+ return (FILE *)NULL;
+ }
+ }
+ }
+ }
+ return (fopen(fname,"a"));
+}
+
+void
+PGRclose_log_file(FILE * fp)
+{
+ if (fp != NULL)
+ {
+ fflush(fp);
+ fclose(fp);
+ }
+}
+
+static int
+file_rotation(char * fname, int max_rotation)
+{
+ char * func = "file_rotation()";
+ int i;
+ int rtn;
+ struct stat st;
+ char old_fname[256];
+ char new_fname[256];
+
+ if ((fname == NULL) || (max_rotation < 0))
+ {
+ return -1;
+ }
+
+ for ( i = max_rotation ; i > 1 ; i -- )
+ {
+ sprintf(old_fname,"%s.%d",fname,i-1);
+ rtn = stat(old_fname,&st);
+ if (rtn == 0)
+ {
+ sprintf(new_fname,"%s.%d",fname,i);
+ rtn = rename(old_fname, new_fname);
+ if (rtn < 0)
+ {
+ show_error("%s:rotate failed: (%s)",func,strerror(errno));
+ return rtn;
+ }
+ }
+ }
+ if (max_rotation > 0)
+ {
+ sprintf(new_fname,"%s.1",fname);
+ rtn = rename(fname, new_fname);
+ }
+ else
+ {
+ rtn = unlink(fname);
+ }
+
+ return rtn;
+}
+
diff -aruN postgresql-8.2.4/src/pgcluster/libpgc/signal.c pgcluster-1.7.0rc7/src/pgcluster/libpgc/signal.c
--- postgresql-8.2.4/src/pgcluster/libpgc/signal.c 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/libpgc/signal.c 2007-02-18 22:52:17.000000000 +0100
@@ -0,0 +1,35 @@
+/*--------------------------------------------------------------------
+ * FILE:
+ * replicate.c
+ *
+ * NOTE:
+ * This file is composed of the functions to set signal handler
+ *
+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
+ *--------------------------------------------------------------------
+ */
+
+#include
+#include "pg_config.h"
+#include "libpgc.h"
+
+/*
+ * Set up a signal handler
+ */
+PGRsighandler
+PGRsignal(int signo, PGRsighandler sighandler)
+{
+#if !defined(HAVE_POSIX_SIGNALS)
+ return signal(signo, func);
+#else
+ struct sigaction act,
+ oact;
+
+ act.sa_handler = sighandler;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ if (sigaction(signo, &act, &oact) < 0)
+ return SIG_ERR;
+ return oact.sa_handler;
+#endif /* !HAVE_POSIX_SIGNALS */
+}
diff -aruN postgresql-8.2.4/src/pgcluster/pglb/AUTHORS pgcluster-1.7.0rc7/src/pgcluster/pglb/AUTHORS
--- postgresql-8.2.4/src/pgcluster/pglb/AUTHORS 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/AUTHORS 2007-02-18 22:52:17.000000000 +0100
@@ -0,0 +1,4 @@
+Authors of pglb
+
+pglb was written by Atsushi Mitani.
+pglb is based on pg_pool which is written by Tatsuo Ishii.
diff -aruN postgresql-8.2.4/src/pgcluster/pglb/COPYING pgcluster-1.7.0rc7/src/pgcluster/pglb/COPYING
--- postgresql-8.2.4/src/pgcluster/pglb/COPYING 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/COPYING 2007-02-18 22:52:17.000000000 +0100
@@ -0,0 +1,14 @@
+Copyright (c) 2003-2006 Atsushi Mitani
+
+Permission to use, copy, modify, and distribute this software and
+its documentation for any purpose and without fee is hereby
+granted, provided that the above copyright notice appear in all
+copies and that both that copyright notice and this permission
+notice appear in supporting documentation, and that the name of the
+author not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission. The author makes no representations about the
+suitability of this software for any purpose. It is provided "as
+is" without express or implied warranty.
+
+Portions copyright (c) 2003-2006, Tatsuo Ishii
diff -aruN postgresql-8.2.4/src/pgcluster/pglb/Makefile pgcluster-1.7.0rc7/src/pgcluster/pglb/Makefile
--- postgresql-8.2.4/src/pgcluster/pglb/Makefile 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/Makefile 2007-02-18 22:52:17.000000000 +0100
@@ -0,0 +1,38 @@
+#-------------------------------------------------------------------------
+#
+# Makefile for src/pgcluster/pgrp
+#
+#-------------------------------------------------------------------------
+
+subdir = src/pgcluster/pglb
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS= child.o cluster_table.o load_balance.o main.o pool_auth.o \
+ pool_connection_pool.o pool_process_query.o pool_stream.o \
+ pool_params.o recovery.o socket.o lifecheck.o
+
+EXTRA_OBJS = $(top_builddir)/src/backend/libpq/replicate_com.o ../libpgc/SUBSYS.o
+
+CFLAGS += -DPRINT_DEBUG
+override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) -DBINDIR=\"$(bindir)\"
+
+all: pglb
+
+pglb: $(OBJS) $(libpq_builddir)/libpq.a
+ $(CC) $(CFLAGS) $(OBJS) $(EXTRA_OBJS) $(libpq) $(LDFLAGS) $(LIBS) -o $@
+
+install: all installdirs
+ $(INSTALL_PROGRAM) pglb$(X) $(DESTDIR)$(bindir)/pglb$(X)
+ $(INSTALL_DATA) pglb.conf.sample $(DESTDIR)$(datadir)/pglb.conf.sample
+
+installdirs:
+ $(mkinstalldirs) $(DESTDIR)$(bindir)
+ $(mkinstalldirs) $(DESTDIR)$(datadir)
+
+uninstall:
+ rm -f $(addprefix $(DESTDIR)$(bindir)/, pglb$(X))
+ rm -f $(DESTDIR)$(datadir)/pglb.conf.sample
+
+clean distclean maintainer-clean:
+ rm -f pglb$(X) $(OBJS)
diff -aruN postgresql-8.2.4/src/pgcluster/pglb/child.c pgcluster-1.7.0rc7/src/pgcluster/pglb/child.c
--- postgresql-8.2.4/src/pgcluster/pglb/child.c 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/child.c 2007-02-18 22:52:17.000000000 +0100
@@ -0,0 +1,1194 @@
+/*--------------------------------------------------------------------
+ * FILE:
+ * child.c
+ *
+ * NOTE:
+ * This file is composed of the functions to call with the source
+ * at child process of pglb.
+ *
+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
+ * Portions Copyright (c) 2003-2006, Tatsuo Ishii
+ *--------------------------------------------------------------------
+ */
+/*
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that copyright notice and this permission
+ * notice appear in supporting documentation, and that the name of the
+ * author not be used in advertising or publicity pertaining to
+ * distribution of the software without specific, written prior
+ * permission. The author makes no representations about the
+ * suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+*/
+#include "postgres.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#ifdef HAVE_NETINET_TCP_H
+#include
+#endif
+
+#ifdef HAVE_CRYPT_H
+#include
+#endif
+
+#include "postgres_fe.h"
+#include "libpq/pqcomm.h"
+
+#include "replicate_com.h"
+#include "pglb.h"
+
+/*--------------------------------------
+ * GLOBAL VARIABLE DECLARATION
+ *--------------------------------------
+ */
+POOL_CONNECTION * Frontend = NULL;
+
+/*--------------------------------------
+ * PROTOTYPE DECLARATION
+ *--------------------------------------
+ */
+int PGRpre_fork_children(ClusterTbl * ptr);
+int PGRpre_fork_child(ClusterTbl * ptr);
+int PGRdo_child( int use_pool);
+int PGRcreate_child(ClusterTbl * cluster_p);
+pid_t PGRscan_child_tbl(ClusterTbl * cluster_p);
+void notice_backend_error(void);
+void do_pooling_child(int sig);
+int PGRset_status_to_child_tbl(pid_t pid, int status);
+int PGRadd_child_tbl(ClusterTbl * cluster_p, pid_t pid, int status);
+int PGRget_child_status(pid_t pid);
+void PGRreturn_connection_full_error(void);
+void PGRreturn_no_connection_error(void);
+void PGRquit_children_on_cluster(int rec_no);
+
+#ifdef NONE_BLOCK
+static void set_nonblock(int fd);
+#endif
+static void unset_nonblock(int fd);
+static POOL_CONNECTION *do_accept(int unix_fd, int inet_fd);
+static PGR_StartupPacket *read_startup_packet(POOL_CONNECTION *cp);
+static int send_startup_packet(POOL_CONNECTION_POOL_SLOT *cp);
+static void cancel_request(CancelPacket *sp, int secondary_backend);
+static POOL_CONNECTION_POOL *connect_backend(PGR_StartupPacket *sp, POOL_CONNECTION *frontend);
+static int send_params(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
+static void child_end(int sig);
+static void PGRreturn_with_error(char *msg);
+
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * PGRpre_fork_children()
+ * NOTES
+ * pre forked child precesses
+ * ARGS
+ * ClusterTbl * ptr: pointer of cluster server table (I)
+ * RETURN
+ * OK: STATUS_OK
+ * NG: STATUS_ERROR
+ *--------------------------------------------------------------------
+ */
+int
+PGRpre_fork_children(ClusterTbl * ptr)
+{
+ int cnt;
+
+ if (ptr == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ cnt = 0 ;
+ while ((ptr->useFlag != TBL_END) && (cnt < ClusterNum))
+ {
+ PGRpre_fork_child(ptr);
+ cnt ++;
+ ptr ++;
+ }
+ return STATUS_OK;
+}
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * PGRpre_fork_child()
+ * NOTES
+ * pre forked child precess
+ * ARGS
+ * ClusterTbl * ptr: pointer of cluster server table (I)
+ * RETURN
+ * OK: STATUS_OK
+ * NG: STATUS_ERROR
+ *--------------------------------------------------------------------
+ */
+int
+PGRpre_fork_child(ClusterTbl * ptr)
+{
+ pid_t pid = 0;
+ int i;
+
+ if (ptr == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ if (ptr->useFlag == TBL_END)
+ {
+ return STATUS_ERROR;
+ }
+ for ( i = 0 ; i < ptr->max_connect * Max_Pool ; i ++)
+ {
+ pid = PGRcreate_child(ptr);
+ }
+ return STATUS_OK;
+}
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * PGRdo_child()
+ * NOTES
+ * execute child process
+ * ARGS
+ * int use_pool: usage flag of connection pooling (I)
+ * RETURN
+ * OK: STATUS_OK
+ * NG: STATUS_ERROR
+ *--------------------------------------------------------------------
+ */
+int
+PGRdo_child( int use_pool)
+{
+ char * func = "PGRdo_child()";
+ pid_t pid = 0;
+ PGR_StartupPacket *sp = NULL;
+ POOL_CONNECTION *frontend = NULL;
+ POOL_CONNECTION_POOL *backend = NULL;
+ int status = 0;
+ int connection_reuse = 1;
+ int ssl_request = 0;
+ int count = 0;
+
+ pid = getpid();
+#ifdef PRINT_DEBUG
+ show_debug("%s:I am %d",func, pid);
+#endif
+
+ /* set up signal handlers */
+ PGRsignal(SIGALRM, SIG_DFL);
+ PGRsignal(SIGTERM, child_end);
+ PGRsignal(SIGHUP, child_end);
+ PGRsignal(SIGINT, child_end);
+ PGRsignal(SIGUSR1, SIG_IGN);
+ PGRsignal(SIGUSR2, SIG_IGN);
+
+#ifdef NONE_BLOCK
+ /* set listen fds to none block */
+ set_nonblock(Frontend_FD.unix_fd);
+ set_nonblock(Frontend_FD.inet_fd);
+#endif
+
+retry_accept:
+ /* perform accept() */
+ frontend = do_accept(Frontend_FD.unix_fd,Frontend_FD.inet_fd);
+ if (frontend == NULL)
+ {
+ /* accept() failed. return to the accept() loop */
+ PGRset_status_to_child_tbl(pid,TBL_FREE);
+ return STATUS_ERROR;
+ }
+
+ /* unset frontend fd tp none block */
+ unset_nonblock(frontend->fd);
+
+ /* read the startup packet */
+ sp = 0;
+retry_startup:
+ if (sp)
+ {
+ free(sp->startup_packet);
+ free(sp->database);
+ free(sp->user);
+ free(sp);
+ }
+
+ sp = read_startup_packet(frontend);
+ if (sp == NULL)
+ {
+ /* failed to read the startup packet. return to the
+ accept() loop */
+ pool_close(frontend);
+ PGRset_status_to_child_tbl(pid,TBL_FREE);
+ return STATUS_ERROR;
+ }
+ PGRset_status_to_child_tbl(pid,TBL_ACCEPT);
+
+ /* cancel request? */
+ if (sp->major == 1234 && sp->minor == 5678)
+ {
+ cancel_request((CancelPacket *)sp->startup_packet, 0);
+ pool_close(frontend);
+ return STATUS_ERROR;
+ }
+
+ /* SSL? */
+ if (sp->major == 1234 && sp->minor == 5679)
+ {
+ /* SSL not supported */
+#ifdef PRINT_DEBUG
+ show_debug("%s:SSLRequest: sent N; retry startup",func);
+#endif
+ if (ssl_request && use_pool)
+ {
+ pool_close(frontend);
+ return STATUS_ERROR;
+ }
+
+ /*
+ * say to the frontend "we do not suppport SSL"
+ * note that this is not a NOTICE response despite it's an 'N'!
+ */
+ pool_write_and_flush(frontend, "N", 1);
+ ssl_request = 1;
+ goto retry_startup;
+ }
+
+ /*
+ * Ok, negotiaton with frontend has been done. Let's go to the next step.
+ */
+ /*
+ * if there's no connection associated with user and database,
+ * we need to connect to the backend and send the startup packet.
+ */
+ count = 0;
+ if ((backend = pool_get_cp(sp->user, sp->database, sp->major)) == NULL)
+ {
+ connection_reuse = 0;
+
+ if ((backend = connect_backend(sp, frontend)) == NULL)
+ {
+ /*
+ PGRset_status_on_cluster_tbl(TBL_ERROR,CurrentCluster);
+ return STATUS_ERROR;
+ */
+ goto retry_accept;
+ }
+ }
+ else
+ {
+ /* reuse existing connection to backend */
+
+ if (pool_do_reauth(frontend, backend))
+ {
+ pool_close(frontend);
+ return STATUS_ERROR;
+ }
+
+ if (MAJOR(backend) == 3)
+ {
+ if (send_params(frontend, backend))
+ {
+ pool_close(frontend);
+ return STATUS_ERROR;
+ }
+ }
+
+ /* send ReadyForQuery to frontend */
+ pool_write(frontend, "Z", 1);
+
+ if (MAJOR(backend) == 3)
+ {
+ int len;
+ char tstate;
+
+ len = htonl(5);
+ pool_write(frontend, &len, sizeof(len));
+ tstate = TSTATE(backend);
+ pool_write(frontend, &tstate, 1);
+ }
+
+ if (pool_flush(frontend) < 0)
+ {
+ pool_close(frontend);
+ return STATUS_ERROR;
+ }
+
+ }
+
+ /* query process loop */
+ for (;;)
+ {
+ POOL_STATUS status;
+
+ status = pool_process_query(frontend, backend, 0);
+
+ switch (status)
+ {
+ /* client exits */
+ case POOL_END:
+ /* do not cache connection to template0, template1, regression */
+ if (!strcmp(sp->database, "template0") || !strcmp(sp->database, "template1") ||
+ !strcmp(sp->database, "regression") || use_pool == NOT_USE_CONNECTION_POOL)
+ {
+ pool_close(frontend);
+ pool_send_frontend_exits(backend);
+ pool_discard_cp(sp->user, sp->database, sp->major);
+ }
+ else
+ {
+ POOL_STATUS status1;
+
+ /* send reset request to backend */
+ status1 = pool_process_query(frontend, backend, 1);
+ pool_close(frontend);
+
+ /* if we detect errors on resetting connection, we need to discard
+ * this connection since it might be in unknown status
+ */
+ if (status1 != POOL_CONTINUE)
+ pool_discard_cp(sp->user, sp->database, sp->major);
+ else
+ pool_connection_pool_timer(backend);
+ }
+ break;
+
+ /* error occured. discard backend connection pool
+ and disconnect connection to the frontend */
+ case POOL_ERROR:
+ show_error("%s:do_child: exits with status 1 due to error",func);
+ break;
+
+ /* fatal error occured. just exit myself... */
+ case POOL_FATAL:
+ show_error("%s:do_child: fatal error occured",func);
+ notice_backend_error();
+ break;
+
+ /* not implemented yet */
+ case POOL_IDLE:
+ do_accept(Frontend_FD.unix_fd,Frontend_FD.inet_fd);
+#ifdef PRINT_DEBUG
+ show_debug("%s:accept while idle",func);
+#endif
+ break;
+
+ default:
+ break;
+ }
+
+ if (status != POOL_CONTINUE)
+ break;
+ }
+ if ((status == POOL_ERROR) ||
+ (status == POOL_FATAL))
+ {
+ PGRset_status_to_child_tbl(pid,TBL_FREE);
+ return STATUS_ERROR;
+ }
+ PGRset_status_to_child_tbl(pid,TBL_INIT);
+ return STATUS_OK;
+}
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * PGRcreate_child()
+ * NOTES
+ * create child process
+ * ARGS
+ * ClusterTbl * ptr: pointer of cluster server table (I)
+ * RETURN
+ * OK: STATUS_OK
+ * NG: STATUS_ERROR
+ *--------------------------------------------------------------------
+ */
+int
+PGRcreate_child(ClusterTbl * cluster_p)
+{
+ char * func = "PGRcreate_child()";
+ pid_t pid,pgid;
+
+ if (cluster_p == NULL)
+ return STATUS_ERROR;
+
+#ifdef PRINT_DEBUG
+ show_debug("%s:create child [%d@%s]",func,cluster_p->port,cluster_p->hostName);
+#endif
+ PGRsignal(SIGCHLD,PGRrecreate_child);
+ pgid = getpgid((pid_t)0);
+ pid = fork();
+ if (pid < 0)
+ {
+ show_error("%s:fork() failed. (%s)",func,strerror(errno));
+ return STATUS_ERROR;
+ }
+ if (pid == 0)
+ {
+ CurrentCluster = cluster_p;
+ if (pool_init_cp())
+ {
+ show_error("%s:pool_init_cp failed",func);
+ exit(1);
+ }
+ PGRsignal(SIGCHLD,PGRchild_wait);
+ PGRsignal(SIGTERM, child_end);
+ PGRsignal(SIGHUP, child_end);
+ PGRsignal(SIGINT, child_end);
+ PGRsignal(SIGUSR1,do_pooling_child);
+ setpgid((pid_t)0,pgid);
+ for (;;)
+ {
+ pause();
+ PGRsignal(SIGUSR1,do_pooling_child);
+ }
+#ifdef PRINT_DEBUG
+ show_debug("%s:create child end [%d@%s]",func,cluster_p->port,cluster_p->hostName);
+#endif
+ child_end(SIGTERM);
+ }
+ else
+ {
+ PGRadd_child_tbl(cluster_p,pid,TBL_INIT);
+ }
+ return pid;
+}
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * PGRscan_child_tbl()
+ * NOTES
+ * get a child process id that is waiting for connection
+ * with the cluster server
+ * ARGS
+ * ClusterTbl * ptr: pointer of cluster server table (I)
+ * RETURN
+ * OK: child process id
+ * NG: 0
+ *--------------------------------------------------------------------
+ */
+pid_t
+PGRscan_child_tbl(ClusterTbl * cluster_p)
+{
+ char * func = "PGRscan_child_tbl()";
+ ChildTbl * p;
+
+ if ( cluster_p == NULL)
+ {
+ show_error("%s:Cluster_Tbl is not initialize",func);
+ return STATUS_ERROR;
+ }
+ p = Child_Tbl;
+ if ( p == NULL)
+ {
+ show_error("%s:Child_Tbl is not initialize",func);
+ return STATUS_ERROR;
+ }
+ while(p->useFlag != TBL_END)
+ {
+ if (p->pid <= 0)
+ {
+ p++;
+ continue;
+ }
+ if ((p->useFlag == TBL_INIT) &&
+ (p->rec_no == cluster_p->rec_no))
+ {
+ p->useFlag = TBL_USE;
+ return (p->pid);
+ }
+ p++;
+ }
+ return 0;
+}
+
+/* notice backend connection error using SIGUSR2 */
+void
+notice_backend_error(void)
+{
+ pid_t pid = getpid();
+
+ PGRset_status_to_child_tbl(pid,TBL_ERROR);
+ PGRset_status_on_cluster_tbl(TBL_ERROR_NOTICE,CurrentCluster);
+
+ /*
+ kill(parent, SIGUSR2);
+ sleep(1);
+ */
+}
+
+
+/*
+ * start up pooling child process
+ */
+void
+do_pooling_child(int sig)
+{
+ char * func = "do_pooling_child()";
+ int rtn;
+ pid_t pid;
+
+ pid = getpid();
+ rtn = PGRdo_child(USE_CONNECTION_POOL);
+ PGRrelease_connection(CurrentCluster);
+ if (rtn != STATUS_OK)
+ {
+ show_error("%s:PGRdo_child failed",func);
+ child_end(SIGTERM);
+ }
+ return ;
+}
+
+/*
+ * set status in child process table
+ */
+int
+PGRset_status_to_child_tbl(pid_t pid, int status)
+{
+ char * func = "PGRset_status_to_child_tbl()";
+ ChildTbl * p;
+
+ p = Child_Tbl;
+ if ( p == NULL)
+ {
+ show_error("%s:Child_Tbl is not initialize",func);
+ return STATUS_ERROR;
+ }
+ while(p->useFlag != TBL_END)
+ {
+ if (p->pid == pid)
+ {
+ p->useFlag = status;
+ return STATUS_OK;
+ }
+ p++;
+ }
+ return STATUS_ERROR;
+}
+
+/*
+ * add child process data in child process table
+ */
+int
+PGRadd_child_tbl(ClusterTbl * cluster_p, pid_t pid, int status)
+{
+ char * func = "PGRadd_child_tbl()";
+ ChildTbl * p;
+
+ p = Child_Tbl;
+ if ( cluster_p == NULL)
+ {
+ show_error("%s:Cluster_Tbl is not initialize",func);
+ return STATUS_ERROR;
+ }
+ if ( p == NULL)
+ {
+ show_error("%s:Child_Tbl is not initialize",func);
+ return STATUS_ERROR;
+ }
+ while(p->useFlag != TBL_END)
+ {
+ if ((p->useFlag == TBL_FREE) ||
+ (p->useFlag == TBL_ERROR))
+ {
+ p->useFlag = status;
+ p->rec_no = cluster_p->rec_no;
+ p->pid = pid;
+ return STATUS_OK;
+ }
+ p++;
+ }
+ return STATUS_ERROR;
+}
+
+int
+PGRget_child_status(pid_t pid)
+{
+ char * func = "PGRget_child_status()";
+ ChildTbl * p;
+
+ p = Child_Tbl;
+ if ( p == NULL)
+ {
+ show_error("%s:Child_Tbl is not initialize",func);
+ return STATUS_ERROR;
+ }
+
+ while (p->useFlag != TBL_END)
+ {
+ if (p->pid == pid)
+ {
+ return p->useFlag;
+ }
+ p++;
+ }
+ return STATUS_ERROR;
+}
+
+void
+PGRreturn_connection_full_error(void)
+{
+ PGRreturn_with_error( "Sorry, backend connection is full\n");
+}
+
+void
+PGRreturn_no_connection_error(void) {
+ PGRreturn_with_error("pglb could not connect to server: no cluster available.\n");
+}
+
+static void
+PGRreturn_with_error (char *msg)
+{
+ PGR_StartupPacket *sp = NULL;
+ POOL_CONNECTION *frontend = NULL;
+
+
+ /* perform accept() */
+ frontend = do_accept(Frontend_FD.unix_fd,Frontend_FD.inet_fd);
+ if (frontend == NULL)
+ {
+ /* accept() failed. return to the accept() loop */
+ return ;
+ }
+ sp = read_startup_packet(frontend);
+ if (sp == NULL)
+ {
+ /* failed to read the startup packet. return to the
+ accept() loop */
+ pool_close(frontend);
+ return ;
+ }
+ pool_write_and_flush(frontend, "E", 1);
+ pool_write_and_flush(frontend, msg, strlen(msg)+1);
+ pool_close(frontend);
+ return ;
+}
+
+void
+PGRquit_children_on_cluster(int rec_no)
+{
+ char * func = "PGRquit_children_on_cluster()";
+ ChildTbl * p;
+
+ if (Child_Tbl == NULL)
+ {
+ return;
+ }
+ PGRsignal(SIGCHLD,SIG_IGN);
+ p = Child_Tbl;
+ while(p->useFlag != TBL_END)
+ {
+ if (p->rec_no == rec_no)
+ {
+ if (kill (p->pid,SIGTERM) == -1)
+ {
+ show_error("%s:could not stop pid: %d (%s)",func,p->pid,strerror(errno));
+ return;
+ }
+ PGRchild_wait(SIGTERM);
+ p->useFlag = DATA_FREE;
+ }
+ p++;
+ }
+ if (Use_Connection_Pool)
+ {
+ PGRsignal(SIGCHLD,PGRrecreate_child);
+ }
+ else
+ {
+ PGRsignal(SIGCHLD,PGRchild_wait);
+ }
+}
+
+/* -------------------------------------------------------------------
+ * private functions
+ * -------------------------------------------------------------------
+ */
+
+#ifdef NONE_BLOCK
+/*
+ * set non-block flag
+ */
+static void set_nonblock(int fd)
+{
+ char* func = "set_nonblock()";
+ int var;
+
+ /* set fd to none blocking */
+ var = fcntl(fd, F_GETFL, 0);
+ if (var == -1)
+ {
+ show_error("%s:fcntl failed. %s", func,strerror(errno));
+ child_end(SIGTERM);
+ }
+ if (fcntl(fd, F_SETFL, var | O_NONBLOCK) == -1)
+ {
+ show_error("%s:fcntl failed. %s", func,strerror(errno));
+ child_end(SIGTERM);
+ }
+}
+#endif
+
+/*
+ * unset non-block flag
+ */
+static void unset_nonblock(int fd)
+{
+ char * func = "unset_nonblock()";
+ int var;
+
+ /* set fd to none blocking */
+ var = fcntl(fd, F_GETFL, 0);
+ if (var == -1)
+ {
+ show_error("%s,fcntl failed. %s", func,strerror(errno));
+ child_end(SIGTERM);
+ }
+ if (fcntl(fd, F_SETFL, var & ~O_NONBLOCK) == -1)
+ {
+ show_error("%s,fcntl failed. %s", func,strerror(errno));
+ child_end(SIGTERM);
+ }
+}
+
+
+/*
+* perform accept() and returns new fd
+*/
+static POOL_CONNECTION *do_accept(int unix_fd, int inet_fd)
+{
+ char * func = "do_accept()";
+ fd_set readmask;
+ int fds;
+ struct sockaddr addr;
+ socklen_t addrlen;
+ int fd = 0;
+ int afd;
+ int inet = 0;
+ POOL_CONNECTION *cp;
+#ifdef ACCEPT_PERFORMANCE
+ struct timeval now1, now2;
+ static long atime;
+ static int cnt;
+#endif
+
+ FD_ZERO(&readmask);
+ FD_SET(unix_fd, &readmask);
+ if (inet_fd)
+ FD_SET(inet_fd, &readmask);
+
+ fds = select(Max(unix_fd, inet_fd)+1, &readmask, NULL, NULL, NULL);
+ if (fds == -1)
+ {
+ if (errno == EAGAIN || errno == EINTR)
+ return NULL;
+
+ show_error("%s:select() failed. reason %s",func, strerror(errno));
+ return NULL;
+ }
+
+ if (fds == 0)
+ return NULL;
+
+ if (FD_ISSET(unix_fd, &readmask))
+ {
+ fd = unix_fd;
+ }
+
+ if (FD_ISSET(inet_fd, &readmask))
+ {
+ fd = inet_fd;
+ inet++;
+ }
+
+ /*
+ * Note that some SysV systems do not work here. For those
+ * systems, we need some locking mechanism for the fd.
+ */
+ addrlen = sizeof(addr);
+
+#ifdef ACCEPT_PERFORMANCE
+ gettimeofday(&now1,0);
+#endif
+ afd = accept(fd, &addr, &addrlen);
+ if (afd < 0)
+ {
+ /*
+ * "Resource temporarily unavailable" (EAGAIN or EWOULDBLOCK)
+ * can be silently ignored.
+ */
+ if (errno != EAGAIN && errno != EWOULDBLOCK)
+ show_error("%s:accept() failed. reason: %s",func, strerror(errno));
+ return NULL;
+ }
+#ifdef ACCEPT_PERFORMANCE
+ gettimeofday(&now2,0);
+ atime += (now2.tv_sec - now1.tv_sec)*1000000 + (now2.tv_usec - now1.tv_usec);
+ cnt++;
+ if (cnt % 100 == 0)
+ {
+ show_error("%s:cnt: %d atime: %ld",func, cnt, atime);
+ }
+#endif
+#ifdef PRINT_DEBUG
+ show_debug("%s:I am %d accept fd %d",func, getpid(), afd);
+#endif
+
+ /* set NODELAY and KEEPALIVE options if INET connection */
+ if (inet)
+ {
+ int on = 1;
+
+ if (setsockopt(afd, IPPROTO_TCP, TCP_NODELAY,
+ (char *) &on,
+ sizeof(on)) < 0)
+ {
+ show_error("%s:do_accept: setsockopt() failed: %s",func, strerror(errno));
+ close(afd);
+ return NULL;
+ }
+ if (setsockopt(afd, SOL_SOCKET, SO_KEEPALIVE,
+ (char *) &on,
+ sizeof(on)) < 0)
+ {
+ show_error("%s:do_accept: setsockopt() failed: %s", func,strerror(errno));
+ close(afd);
+ return NULL;
+ }
+ }
+
+ if ((cp = pool_open(afd)) == NULL)
+ {
+ close(afd);
+ return NULL;
+ }
+ return cp;
+}
+
+/*
+* read startup packet
+*/
+static PGR_StartupPacket *read_startup_packet(POOL_CONNECTION *cp)
+{
+ char * func = "read_startup_packet()";
+ PGR_StartupPacket *sp;
+ PGR_StartupPacket_v2 *sp2;
+ int protov;
+ int len;
+ char *p;
+
+ sp = (PGR_StartupPacket *)malloc(sizeof(PGR_StartupPacket));
+ if (!sp)
+ {
+ show_error("%s:read_startup_packet: out of memory",func);
+ return NULL;
+ }
+
+ /* read startup packet length */
+ if (pool_read(cp, &len, sizeof(len)))
+ {
+ free(sp);
+ return NULL;
+ }
+ len = ntohl(len);
+ len -= sizeof(len);
+
+ if (len <= 0)
+ {
+ show_error("%s:read_startup_packet: incorrect packet length (%d)", func,len);
+ free(sp);
+ return NULL;
+ }
+
+ sp->startup_packet = calloc(len, 1);
+ if (!sp->startup_packet)
+ {
+ show_error("%s:read_startup_packet: out of memory",func);
+ free(sp);
+ return NULL;
+ }
+
+ /* read startup packet */
+ if (pool_read(cp, sp->startup_packet, len))
+ {
+ free(sp);
+ return NULL;
+ }
+
+ sp->len = len;
+ memcpy(&protov, sp->startup_packet, sizeof(protov));
+ sp->major = ntohl(protov)>>16;
+ sp->minor = ntohl(protov) & 0x0000ffff;
+ p = sp->startup_packet;
+
+ switch(sp->major)
+ {
+ case PROTO_MAJOR_V2: /* V2 */
+ sp2 = (PGR_StartupPacket_v2 *)(sp->startup_packet);
+
+ sp->database = calloc(SM_DATABASE+1, 1);
+ if (!sp->database)
+ {
+ show_error("%s:read_startup_packet: out of memory",func);
+ free(sp);
+ return NULL;
+ }
+ strncpy(sp->database, sp2->database, SM_DATABASE);
+
+ sp->user = calloc(SM_USER+1, 1);
+ if (!sp->user)
+ {
+ show_error("%s:read_startup_packet: out of memory",func);
+ free(sp);
+ return NULL;
+ }
+ strncpy(sp->user, sp2->user, SM_USER);
+
+ break;
+
+ case PROTO_MAJOR_V3: /* V3 */
+ p += sizeof(int); /* skip protocol version info */
+
+ while(*p)
+ {
+ if (!strcmp("user", p))
+ {
+ p += (strlen(p) + 1);
+ sp->user = strdup(p);
+ if (!sp->user)
+ {
+ show_error("%s:read_startup_packet: out of memory",func);
+ free(sp);
+ return NULL;
+ }
+ }
+ else if (!strcmp("database", p))
+ {
+ p += (strlen(p) + 1);
+ sp->database = strdup(p);
+ if (!sp->database)
+ {
+ show_error("%s:read_startup_packet: out of memory",func);
+ free(sp);
+ return NULL;
+ }
+ }
+ p += (strlen(p) + 1);
+ }
+ break;
+
+ case 1234: /* cancel or SSL request */
+ /* set dummy database, user info */
+ sp->database = calloc(1, 1);
+ if (!sp->database)
+ {
+ show_error("%s:read_startup_packet: out of memory",func);
+ free(sp);
+ return NULL;
+ }
+ sp->user = calloc(1, 1);
+ if (!sp->user)
+ {
+ show_error("%s:read_startup_packet: out of memory",func);
+ free(sp);
+ return NULL;
+ }
+ break;
+
+ default:
+ show_error("%s:read_startup_packet: invalid major no: %d",func, sp->major);
+ free(sp);
+ return NULL;
+ }
+
+#ifdef PRINT_DEBUG
+ show_debug("%s:Protocol Major: %d Minor: %d database: %s user: %s",
+ func,sp->major, sp->minor, sp->database, sp->user);
+#endif
+
+ return sp;
+}
+
+/*
+* send startup packet
+*/
+static int send_startup_packet(POOL_CONNECTION_POOL_SLOT *cp)
+{
+ int len;
+
+ len = htonl(cp->sp->len + sizeof(len));
+ pool_write(cp->con, &len, sizeof(len));
+ return pool_write_and_flush(cp->con, cp->sp->startup_packet, cp->sp->len);
+}
+
+/*
+ * process cancel request
+ */
+static void cancel_request(CancelPacket *sp, int secondary_backend)
+{
+ char * func = "cancel_request()";
+ int len;
+ int fd;
+ POOL_CONNECTION *con;
+ char hostName[128];
+
+#ifdef PRINT_DEBUG
+ show_debug("%s:Cancel request received",func);
+#endif
+
+ if (CurrentCluster == NULL)
+ {
+ return;
+ }
+ if (gethostname(hostName,sizeof(hostName)) < 0)
+ {
+ show_error("%s:gethostname() failed. (%s)",func,strerror(errno));
+ return ;
+ }
+ if (secondary_backend)
+ {
+ if (PGRis_same_host(hostName,CurrentCluster->hostName))
+ fd = connect_unix_domain_socket(1);
+ else
+ fd = connect_inet_domain_socket(1);
+ }
+ else
+ {
+ if (PGRis_same_host(hostName,CurrentCluster->hostName))
+ fd = connect_unix_domain_socket(0);
+ else
+ fd = connect_inet_domain_socket(0);
+ }
+
+ if (fd < 0)
+ {
+ show_error("%s:Could not create socket for sending cancel request",func);
+ return;
+ }
+
+ con = pool_open(fd);
+ if (con == NULL)
+ return;
+
+ len = htonl(sizeof(len) + sizeof(CancelPacket));
+ pool_write(con, &len, sizeof(len));
+
+ if (pool_write_and_flush(con, sp, sizeof(CancelPacket)) < 0)
+ show_error("%s:Could not send cancel request packet",func);
+ pool_close(con);
+}
+
+static POOL_CONNECTION_POOL *connect_backend(PGR_StartupPacket *sp, POOL_CONNECTION *frontend)
+{
+ char * func ="connect_backend()";
+ POOL_CONNECTION_POOL *backend;
+
+ /* connect to the backend */
+ backend = pool_create_cp();
+ if (backend == NULL)
+ {
+ pool_send_error_message(frontend, sp->major, "XX000", "connection cache is full", "",
+ "increace max_pool", __FILE__, __LINE__);
+ pool_close(frontend);
+ return NULL;
+ }
+
+ /* mark this is a backend connection */
+ backend->slots[0]->con->isbackend = 1;
+ /*
+ * save startup packet info
+ */
+ backend->slots[0]->sp = sp;
+
+ if (pool_config_replication_enabled)
+ {
+ backend->slots[1]->con->isbackend = 1;
+ backend->slots[1]->con->issecondary_backend = 1;
+ /*
+ * save startup packet info
+ */
+ backend->slots[1]->sp = sp;
+ }
+
+ /* send startup packet */
+ if (send_startup_packet(backend->slots[0]) < 0)
+ {
+ show_error("%s:do_child: fails to send startup packet to the backend",func);
+ pool_close(frontend);
+ return NULL;
+ }
+
+ /* send startup packet */
+ if (pool_config_replication_enabled)
+ {
+ if (send_startup_packet(backend->slots[1]) < 0)
+ {
+ show_error("%s:do_child: fails to send startup packet to the secondary backend",func);
+ pool_close(frontend);
+ return NULL;
+ }
+ }
+
+ /*
+ * do authentication stuff
+ */
+ if (pool_do_auth(frontend, backend))
+ {
+ pool_close(frontend);
+ pool_discard_cp(sp->user, sp->database, sp->major);
+ return NULL;
+ }
+ return backend;
+}
+
+static int send_params(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend)
+{
+ char * func = "send_params()";
+ int index;
+ char *name, *value;
+ int len, sendlen;
+
+ index = 0;
+ while (pool_get_param(&MASTER(backend)->params, index++, &name, &value) == 0)
+ {
+ pool_write(frontend, "S", 1);
+ len = sizeof(sendlen) + strlen(name) + 1 + strlen(value) + 1;
+ sendlen = htonl(len);
+ pool_write(frontend, &sendlen, sizeof(sendlen));
+ pool_write(frontend, name, strlen(name) + 1);
+ pool_write(frontend, value, strlen(value) + 1);
+ }
+
+ if (pool_flush(frontend))
+ {
+ show_error("%s:pool_send_params: pool_flush() failed",func);
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * ending function of child process
+ */
+static void
+child_end(int sig)
+{
+ PGRsignal(sig,SIG_IGN);
+
+ pool_finish();
+ exit(0);
+}
diff -aruN postgresql-8.2.4/src/pgcluster/pglb/cluster_table.c pgcluster-1.7.0rc7/src/pgcluster/pglb/cluster_table.c
--- postgresql-8.2.4/src/pgcluster/pglb/cluster_table.c 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/cluster_table.c 2007-02-18 22:52:17.000000000 +0100
@@ -0,0 +1,343 @@
+/*--------------------------------------------------------------------
+ * FILE:
+ * cluster_tbl.c
+ *
+ * NOTE:
+ * This file is composed of the functions to use a cluster table.
+ *
+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
+ *--------------------------------------------------------------------
+ */
+/*
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that copyright notice and this permission
+ * notice appear in supporting documentation, and that the name of the
+ * author not be used in advertising or publicity pertaining to
+ * distribution of the software without specific, written prior
+ * permission. The author makes no representations about the
+ * suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+*/
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#ifdef HAVE_NETINET_TCP_H
+#include
+#endif
+
+#include "replicate_com.h"
+#include "pglb.h"
+
+
+/*--------------------------------------
+ * PROTOTYPE DECLARATION
+ *--------------------------------------
+ */
+int PGRis_cluster_alive(void) ;
+ClusterTbl * PGRscan_cluster(void);
+void PGRset_key_of_cluster(ClusterTbl * ptr, RecoveryPacket * packet);
+ClusterTbl * PGRadd_cluster_tbl (ClusterTbl * conf_data);
+ClusterTbl * PGRset_status_on_cluster_tbl (int status, ClusterTbl * ptr);
+ClusterTbl * PGRsearch_cluster_tbl(ClusterTbl * conf_data);
+
+static int set_cluster_tbl(ClusterTbl * ptr , ClusterTbl * conf_data);
+static ClusterTbl * search_free_cluster_tbl(void );
+static void write_cluster_status_file(ClusterTbl * ptr);
+
+int PGRis_cluster_alive(void)
+{
+ ClusterTbl * ptr = NULL;
+ int use=0;
+ ptr = Cluster_Tbl;
+
+ PGRsem_lock(ClusterSemid,MAX_DB_SERVER);
+ while (ptr->useFlag != TBL_END)
+ {
+ if ((ptr->useFlag == TBL_USE) || (ptr->useFlag == TBL_INIT))
+ {
+ use++;
+ }
+ ptr++;
+ }
+ PGRsem_unlock(ClusterSemid,MAX_DB_SERVER);
+ return use==0 ? STATUS_ERROR : STATUS_OK;
+}
+
+ClusterTbl *
+PGRscan_cluster(void)
+{
+ char * func = "PGRscan_cluster";
+ ClusterTbl * ptr = NULL;
+ ClusterTbl * rtn = NULL;
+ int min_use_rate = 100;
+ int use_rate = 0;
+ int cnt = 0;
+
+
+ ptr = Cluster_Tbl;
+ if (ptr == NULL)
+ {
+ show_error("%s:Cluster Table is not initialize",func);
+ return (ClusterTbl *)NULL;
+ }
+#ifdef PRINT_DEBUG
+ show_debug("%s:%d ClusterDB can be used",func,ClusterNum);
+#endif
+ PGRsem_lock(ClusterSemid,MAX_DB_SERVER);
+ while ((cnt <= ClusterNum) && (ptr->useFlag != TBL_END))
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s:%s [%d],useFlag->%d max->%d use_num->%d\n",
+ func, ptr->hostName,ptr->port,ptr->useFlag,ptr->max_connect,ptr->use_num);
+#endif
+ cnt ++;
+ if ((ptr->useFlag != TBL_USE) && (ptr->useFlag != TBL_INIT))
+ {
+ ptr ++;
+ continue;
+ }
+ if (ptr->max_connect <= ptr->use_num)
+ {
+ ptr ++;
+ continue;
+ }
+ if (ptr->use_num > 0)
+ {
+ use_rate = ptr->use_num * 100 / ptr->max_connect ;
+ }
+ else
+ {
+ use_rate = 0;
+ rtn = ptr;
+ break;
+ }
+ if (min_use_rate > use_rate)
+ {
+ min_use_rate = use_rate;
+ rtn = ptr;
+ }
+ ptr ++;
+ }
+ if (rtn != NULL)
+ {
+ rtn->use_num ++;
+ if (rtn->useFlag == TBL_INIT)
+ {
+ PGRset_status_on_cluster_tbl (TBL_USE,rtn);
+ }
+ }
+ PGRsem_unlock(ClusterSemid,MAX_DB_SERVER);
+ return rtn;
+}
+
+void
+PGRset_key_of_cluster(ClusterTbl * ptr, RecoveryPacket * packet)
+{
+ int max_connect = 0;
+ int port = 0;
+
+ memset(ptr,0,sizeof(ClusterTbl));
+ memcpy(ptr->hostName,packet->hostName,sizeof(ptr->hostName));
+ max_connect = ntohs(packet->max_connect);
+ if (max_connect >= 0)
+ {
+ ptr->max_connect = max_connect;
+ }
+ else
+ {
+ ptr->max_connect = DEFAULT_CONNECT_NUM;
+ }
+ port = ntohs(packet->port);
+ if ( port >= 0)
+ {
+ ptr->port = port;
+ }
+ else
+ {
+ ptr->port = DEFAULT_PORT;
+ }
+}
+
+ClusterTbl *
+PGRadd_cluster_tbl (ClusterTbl * conf_data)
+{
+ char * func = "PGRadd_cluster_tbl()";
+ ClusterTbl * ptr;
+
+ ptr = PGRsearch_cluster_tbl(conf_data);
+ if ((ptr != NULL) &&
+ ((ptr->useFlag == TBL_USE ) || ((ptr->useFlag == TBL_INIT))))
+ {
+ ptr->max_connect = conf_data->max_connect;
+ ptr->use_num = 0;
+ ptr->rate = 0;
+ return ptr;
+ }
+ ptr = search_free_cluster_tbl();
+ if (ptr == (ClusterTbl *) NULL)
+ {
+ show_error("%s:no more free space in cluster table",func);
+ return (ClusterTbl *)NULL;
+ }
+ if (ClusterNum < Max_DB_Server)
+ {
+ set_cluster_tbl( ptr, conf_data);
+ return ptr;
+ }
+ return (ClusterTbl *)NULL;
+}
+
+ClusterTbl *
+PGRset_status_on_cluster_tbl (int status, ClusterTbl * ptr)
+{
+#ifdef PRINT_DEBUG
+ char * func = "PGRset_status_on_cluster_tbl()";
+#endif
+
+ if (ptr != (ClusterTbl*)NULL)
+ {
+ if (ptr->useFlag != status)
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s:host:%s port:%d max:%d use:%d status%d",
+ func, ptr->hostName,ptr->port,ptr->max_connect,ptr->useFlag,status);
+#endif
+ ptr->useFlag = status;
+ write_cluster_status_file(ptr);
+ if (status == TBL_INIT)
+ {
+ if (ClusterNum < Max_DB_Server)
+ ClusterNum ++ ;
+ }
+ else if (status != TBL_STOP)
+ {
+ if (ClusterNum > 0)
+ ClusterNum -- ;
+ }
+ }
+ }
+ return ptr;
+}
+
+static void
+write_cluster_status_file(ClusterTbl * ptr)
+{
+ switch( ptr->useFlag)
+ {
+ case TBL_FREE:
+ PGRwrite_log_file(StatusFp,"port(%d) host:%s free",
+ ptr->port,
+ ptr->hostName);
+ break;
+ case TBL_INIT:
+ PGRwrite_log_file(StatusFp,"port(%d) host:%s initialize",
+ ptr->port,
+ ptr->hostName);
+ break;
+ case TBL_USE:
+ PGRwrite_log_file(StatusFp,"port(%d) host:%s start use",
+ ptr->port,
+ ptr->hostName);
+ break;
+ case TBL_ERROR:
+ PGRwrite_log_file(StatusFp,"port(%d) host:%s error",
+ ptr->port,
+ ptr->hostName);
+ break;
+ case TBL_END:
+ PGRwrite_log_file(StatusFp,"port(%d) host:%s end",
+ ptr->port,
+ ptr->hostName);
+ break;
+ }
+}
+
+ClusterTbl *
+PGRsearch_cluster_tbl(ClusterTbl * conf_data)
+{
+ ClusterTbl *ptr;
+ int cnt = 0;
+ int rec_num = 0;
+
+ ptr = Cluster_Tbl;
+ while ((cnt <= ClusterNum) && (rec_num < Max_DB_Server))
+ {
+ if (ptr->port > 0)
+ {
+ if ((!strcmp(ptr->hostName,conf_data->hostName)) &&
+ (ptr->port == conf_data->port))
+ {
+ return ptr;
+ }
+ if ((ptr->useFlag == TBL_USE) || (ptr->useFlag == TBL_INIT))
+ {
+ cnt ++;
+ }
+ }
+ ptr ++;
+ rec_num ++;
+ }
+ return (ClusterTbl *)NULL;
+}
+
+static int
+set_cluster_tbl(ClusterTbl * ptr , ClusterTbl * conf_data)
+{
+ int rec_no;
+
+ rec_no = ptr->rec_no;
+ memcpy(ptr->hostName,conf_data->hostName,sizeof(ptr->hostName));
+ ptr->max_connect = conf_data->max_connect;
+ ptr->port = conf_data->port;
+ ptr->use_num = conf_data->use_num;
+ ptr->rate = conf_data->rate;
+ PGRset_status_on_cluster_tbl (TBL_INIT, ptr);
+
+ return STATUS_OK;
+}
+
+static ClusterTbl *
+search_free_cluster_tbl(void )
+{
+ ClusterTbl *ptr;
+ int cnt = 0;
+
+ ptr = Cluster_Tbl;
+ while ((cnt <= ClusterNum ) && (cnt < Max_DB_Server))
+ {
+ if ((ptr->useFlag == TBL_FREE) || (ptr->useFlag == TBL_ERROR))
+ {
+ return ptr;
+ }
+ cnt ++;
+ ptr ++;
+ }
+ return (ClusterTbl *)NULL;
+}
+
diff -aruN postgresql-8.2.4/src/pgcluster/pglb/lifecheck.c pgcluster-1.7.0rc7/src/pgcluster/pglb/lifecheck.c
--- postgresql-8.2.4/src/pgcluster/pglb/lifecheck.c 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/lifecheck.c 2007-03-01 16:27:15.000000000 +0100
@@ -0,0 +1,329 @@
+/*--------------------------------------------------------------------
+ * FILE:
+ * lifecheck.c
+ *
+ * NOTE:
+ * This file is composed of the functions to call with the source
+ * at pgreplicate for the lifecheck.
+ *
+ * Portions Copyright (c) 2003-2007, Atsushi Mitani
+ *--------------------------------------------------------------------
+ */
+#include "postgres.h"
+#include "postgres_fe.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "libpq-fe.h"
+#include "libpq-int.h"
+#include "fe-auth.h"
+
+#include
+#include
+#include
+#include
+
+#ifdef HAVE_NETINET_TCP_H
+#include
+#endif
+
+#ifdef HAVE_SYS_SELECT_H
+#include
+#endif
+
+
+#ifdef HAVE_CRYPT_H
+#include
+#endif
+
+
+#ifdef MULTIBYTE
+#include "mb/pg_wchar.h"
+#endif
+
+#include "access/xact.h"
+#include "lib/dllist.h"
+#include "libpq/pqformat.h"
+#include "replicate_com.h"
+#include "pglb.h"
+
+#define PING_DB "template1"
+#define PING_QUERY "SELECT 1"
+
+static ClusterTbl * PGR_Cluster_DB_4_Lifecheck = (ClusterTbl*)NULL;
+
+/*--------------------------------------
+ * PROTOTYPE DECLARATION
+ *--------------------------------------
+ */
+int PGRlifecheck_main(int fork_wait_time);
+PGconn * PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt );
+
+static bool is_started_loadbalance(void);
+static void set_timeout(SIGNAL_ARGS);
+static int lifecheck_loop(void);
+static int ping_cluster(PGconn * conn);
+static void set_cluster_status(ClusterTbl * host_ptr, int status);
+
+int
+PGRlifecheck_main(int fork_wait_time)
+{
+ bool started = false;
+ pid_t pgid = 0;
+ pid_t pid = 0;
+
+ pgid = getpgid(0);
+ pid = fork();
+ if (pid != 0)
+ {
+ return STATUS_OK;
+ }
+
+ /*
+ * in child process,
+ * call recovery module
+ */
+ setpgid(0,pgid);
+
+ PGRsignal(SIGHUP, PGRexit_subprocess);
+ PGRsignal(SIGTERM, PGRexit_subprocess);
+ PGRsignal(SIGINT, PGRexit_subprocess);
+ PGRsignal(SIGQUIT, PGRexit_subprocess);
+ PGRsignal(SIGALRM, set_timeout);
+
+ if (fork_wait_time > 0) {
+ sleep(fork_wait_time);
+ }
+
+ if (PGRuserName == NULL)
+ {
+ PGRuserName = getenv("LOGNAME");
+ if (PGRuserName == NULL)
+ {
+ PGRuserName = getenv("USER");
+ if (PGRuserName == NULL)
+ PGRuserName = "postgres";
+ }
+ }
+
+ for (;;)
+ {
+ started = is_started_loadbalance();
+ if (!started)
+ {
+ /* wait next lifecheck as interval */
+ sleep(PGR_Lifecheck_Interval);
+ continue;
+ }
+
+ /* life check to all cluster dbs */
+ lifecheck_loop();
+
+ /* wait next lifecheck as interval */
+ sleep(PGR_Lifecheck_Interval);
+ }
+ return STATUS_OK;
+}
+
+static bool
+is_started_loadbalance(void)
+{
+ ClusterTbl * host_ptr = (ClusterTbl*)NULL;
+
+ host_ptr = Cluster_Tbl;
+ if (host_ptr == NULL)
+ {
+ return false;
+ }
+ while(host_ptr->useFlag != TBL_END)
+ {
+ if (host_ptr->useFlag == TBL_USE)
+ {
+ return true;
+ }
+ host_ptr ++;
+ }
+ return false;
+}
+
+static void
+set_timeout(SIGNAL_ARGS)
+{
+ if (PGR_Cluster_DB_4_Lifecheck != NULL)
+ {
+ set_cluster_status( PGR_Cluster_DB_4_Lifecheck, TBL_ERROR);
+ }
+ PGRsignal(SIGALRM, set_timeout);
+}
+
+static int
+lifecheck_loop(void)
+{
+ ClusterTbl * host_ptr = (ClusterTbl*)NULL;
+ char port[8];
+ char * host = NULL;
+ PGconn * conn = NULL;
+
+ host_ptr = Cluster_Tbl;
+ if (host_ptr == NULL)
+ {
+ return STATUS_ERROR;
+ }
+ alarm(0);
+ while(host_ptr->useFlag != TBL_END)
+ {
+ /*
+ * check the status of the cluster DB
+ */
+ if ((host_ptr->useFlag != TBL_USE) || (host_ptr->useFlag != TBL_INIT))
+ {
+ host_ptr ++;
+ continue;
+ }
+ snprintf(port,sizeof(port),"%d", host_ptr->port);
+ host = (char *)(host_ptr->hostName);
+ /* set host data */
+ PGR_Cluster_DB_4_Lifecheck = host_ptr;
+
+ /* set alarm as lifecheck timeout */
+ alarm(PGR_Lifecheck_Timeout);
+
+ /* connect DB */
+ conn = PGRcreateConn(host,port, PING_DB ,PGRuserName,"","","");
+ if ((conn != NULL) &&
+ (ping_cluster(conn) == STATUS_OK))
+ {
+ set_cluster_status(host_ptr,TBL_USE);
+ }
+ else
+ {
+ set_cluster_status(host_ptr,TBL_ERROR);
+ }
+ /* reset alarm */
+ alarm(0);
+
+ PQfinish(conn);
+ conn = NULL;
+ host_ptr ++;
+ }
+
+ return STATUS_OK;
+}
+
+static int
+ping_cluster(PGconn * conn)
+{
+ int status = 0;
+ PGresult * res = (PGresult *)NULL;
+
+ res = PQexec(conn, PING_QUERY );
+
+ status = PQresultStatus(res);
+ if (res != NULL)
+ {
+ PQclear(res);
+ }
+ if ((status == PGRES_NONFATAL_ERROR ) ||
+ (status == PGRES_FATAL_ERROR ))
+ {
+ return STATUS_ERROR;
+ }
+ return STATUS_OK;
+}
+
+PGconn *
+PGRcreateConn( char * host, char * port,char * database, char * userName, char * password, char * md5Salt, char * cryptSalt )
+{
+ int cnt = 0;
+ PGconn * conn = NULL;
+ char pwd[256];
+
+ memset(pwd,0,sizeof(pwd));
+ if (*password != '\0')
+ {
+ if ((strncmp(password,"md5",3) == 0) && (md5Salt != NULL))
+ {
+ sprintf(pwd,"%s(%d)(%d)(%d)(%d)",password,
+ *md5Salt,*(md5Salt+1),*(md5Salt+2),*(md5Salt+3));
+ }
+ else
+ {
+ strncpy(pwd,password,sizeof(pwd));
+ }
+ }
+ conn = PQsetdbLogin(host, port, NULL, NULL, database, userName, pwd);
+ /* check to see that the backend Connection was successfully made */
+ cnt = 0;
+ while (PQstatus(conn) == CONNECTION_BAD)
+ {
+ if (conn != NULL)
+ {
+ PQfinish(conn);
+ conn = NULL;
+ }
+ conn = PQsetdbLogin(host, port, NULL, NULL, database, userName, pwd);
+ if (cnt > PGLB_CONNECT_RETRY_TIME )
+ {
+ if (conn != NULL)
+ {
+ PQfinish(conn);
+ conn = NULL;
+ }
+ return (PGconn *)NULL;
+ }
+
+ if(PQstatus(conn) == CONNECTION_BAD && h_errno==2)
+ {
+ usleep(PGR_SEND_WAIT_MSEC);
+ cnt ++;
+ }
+ else if(!strncasecmp(PQerrorMessage(conn),"FATAL: Sorry, too many clients already",30) ||
+ !strncasecmp(PQerrorMessage(conn),"FATAL: Non-superuser connection limit",30) )
+ {
+ usleep(PGR_SEND_WAIT_MSEC);
+ cnt ++;
+ }
+ else if(!strncasecmp(PQerrorMessage(conn),"FATAL: The database system is starting up",40) )
+ {
+ usleep(PGR_SEND_WAIT_MSEC);
+ }
+ else
+ {
+ usleep(PGR_SEND_WAIT_MSEC);
+ cnt ++;
+ }
+ }
+ return conn;
+}
+
+static void
+set_cluster_status(ClusterTbl * host_ptr, int status)
+{
+ if (host_ptr == NULL)
+ return;
+ if (status == TBL_ERROR)
+ {
+ host_ptr->retry_count ++;
+ if (host_ptr->retry_count > PGLB_CONNECT_RETRY_TIME )
+ {
+ PGRset_status_on_cluster_tbl(status, host_ptr);
+ }
+ }
+ else
+ {
+ host_ptr->retry_count = 0;
+ PGRset_status_on_cluster_tbl(status, host_ptr);
+ }
+}
diff -aruN postgresql-8.2.4/src/pgcluster/pglb/load_balance.c pgcluster-1.7.0rc7/src/pgcluster/pglb/load_balance.c
--- postgresql-8.2.4/src/pgcluster/pglb/load_balance.c 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/load_balance.c 2007-02-18 22:52:17.000000000 +0100
@@ -0,0 +1,252 @@
+/*--------------------------------------------------------------------
+ * FILE:
+ * load_balance.c
+ *
+ * NOTE:
+ * This file is composed of the functions of load balance modules
+ * with connection pooling or not
+ *
+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
+ *--------------------------------------------------------------------
+ */
+/*
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that copyright notice and this permission
+ * notice appear in supporting documentation, and that the name of the
+ * author not be used in advertising or publicity pertaining to
+ * distribution of the software without specific, written prior
+ * permission. The author makes no representations about the
+ * suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+*/
+#include "postgres.h"
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#ifdef HAVE_NETINET_TCP_H
+#include
+#endif
+
+#include "replicate_com.h"
+#include "pglb.h"
+
+/*--------------------------------------
+ * PROTOTYPE DECLARATION
+ *--------------------------------------
+ */
+int PGRload_balance(void);
+int PGRload_balance_with_pool(void);
+char PGRis_connection_full(ClusterTbl * ptr);
+void PGRrelease_connection(ClusterTbl * ptr);
+void PGRchild_wait(int sig);
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * PGRload_balance()
+ * NOTES
+ * load balance module that normal connection is used
+ * ARGS
+ * void
+ * RETURN
+ * OK: STATUS_OK
+ * NG: STATUS_ERROR
+ *--------------------------------------------------------------------
+ */
+int
+PGRload_balance(void)
+{
+ char * func = "PGRload_balance()";
+ pid_t pid,pgid;
+ int count;
+ int status;
+ ClusterTbl * cluster_p = NULL;
+
+ PGRsignal(SIGCHLD, PGRchild_wait);
+ /* get the least locaded cluster server info */
+ cluster_p = PGRscan_cluster();
+ count = 0;
+ while (cluster_p == NULL )
+ {
+ if ( count > PGLB_CONNECT_RETRY_TIME)
+ {
+ show_error("%s:no cluster available",func);
+ return STATUS_ERROR;
+ }
+ cluster_p = PGRscan_cluster();
+ count ++;
+ }
+
+ pgid = getpgid((pid_t)0);
+ pid = fork();
+ if (pid < 0)
+ {
+ show_error("%s:fork() failed. (%s)",func,strerror(errno));
+ exit(1);
+ }
+ if (pid == 0)
+ {
+ setpgid((pid_t)0,pgid);
+ CurrentCluster = cluster_p;
+
+ if (pool_init_cp())
+ {
+ show_error("%s:pool_init_cp failed",func);
+ exit(1);
+ }
+ PGRsem_lock(ClusterSemid,cluster_p->rec_no);
+ if (PGRget_child_status(getpid()) == STATUS_ERROR)
+ {
+ PGRadd_child_tbl(cluster_p, getpid(), TBL_USE);
+ }
+ PGRsem_unlock(ClusterSemid,cluster_p->rec_no);
+ PGRdo_child(NOT_USE_CONNECTION_POOL );
+ PGRrelease_connection(cluster_p);
+ PGRset_status_to_child_tbl(getpid(), TBL_FREE);
+ exit(0);
+ }
+ else if (pid > 0)
+ {
+ PGRsem_lock(ClusterSemid,cluster_p->rec_no);
+ if (PGRget_child_status(pid) == STATUS_ERROR)
+ {
+ PGRadd_child_tbl(cluster_p, pid, TBL_USE);
+ }
+ PGRsem_unlock(ClusterSemid,cluster_p->rec_no);
+ status = PGRget_child_status(pid);
+ while (status == TBL_USE)
+ {
+ status = PGRget_child_status(pid);
+ usleep(20);
+ }
+ return STATUS_OK;
+ }
+ else
+ {
+ return STATUS_ERROR;
+ }
+}
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * PGRload_balance_with_pool()
+ * NOTES
+ * load balance module that connection pooling system is used
+ * ARGS
+ * void
+ * RETURN
+ * OK: STATUS_OK
+ * NG: STATUS_ERROR
+ *--------------------------------------------------------------------
+ */
+int
+PGRload_balance_with_pool(void)
+{
+ char * func = "PGRload_balance_with_pool()";
+ int count;
+ pid_t pid;
+ ClusterTbl * cluster_p = NULL;
+ int status = TBL_USE;
+
+ /* get the least locaded cluster server info */
+ cluster_p = PGRscan_cluster();
+ count = 0;
+ while (cluster_p == NULL )
+ {
+ if ( count > PGLB_CONNECT_RETRY_TIME)
+ {
+ show_error("%s:no cluster available",func);
+ PGRreturn_no_connection_error();
+ return STATUS_ERROR;
+ }
+ cluster_p = PGRscan_cluster();
+ count ++;
+ }
+ pid = PGRscan_child_tbl(cluster_p);
+ if ((pid == 0) || (pid == STATUS_ERROR))
+ {
+ show_error("%s:no child process available",func);
+ return STATUS_ERROR;
+ }
+ kill(pid,SIGUSR1);
+
+ status = PGRget_child_status(pid);
+ while (status == TBL_USE)
+ {
+ status = PGRget_child_status(pid);
+ usleep(20);
+ }
+
+ return STATUS_OK;
+
+}
+
+char
+PGRis_connection_full(ClusterTbl * ptr)
+{
+ char rtn = 1;
+
+ if (ptr == NULL)
+ {
+ return rtn;
+ }
+ PGRsem_lock(ClusterSemid,ptr->rec_no);
+ if (ptr->max_connect > ptr->use_num)
+ {
+ rtn = 0;
+ }
+ PGRsem_unlock(ClusterSemid,ptr->rec_no);
+ return rtn;
+}
+
+void
+PGRrelease_connection(ClusterTbl * ptr)
+{
+ if (ptr == NULL)
+ {
+ return;
+ }
+ PGRsem_lock(ClusterSemid,MAX_DB_SERVER);
+ if (ptr->use_num > 0)
+ {
+ ptr->use_num --;
+ }
+ PGRsem_unlock(ClusterSemid,MAX_DB_SERVER);
+}
+
+void
+PGRchild_wait(int sig)
+{
+ pid_t pid = 0;
+ int ret = 0;
+
+ do {
+ pid = waitpid(-1,&ret,WNOHANG);
+ if ((pid <= 0) && (WTERMSIG(ret) > 0))
+ {
+ pid = 1;
+ }
+ } while(pid > 0);
+}
diff -aruN postgresql-8.2.4/src/pgcluster/pglb/main.c pgcluster-1.7.0rc7/src/pgcluster/pglb/main.c
--- postgresql-8.2.4/src/pgcluster/pglb/main.c 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/main.c 2007-03-01 16:27:15.000000000 +0100
@@ -0,0 +1,1137 @@
+/*--------------------------------------------------------------------
+ * FILE:
+ * main.c
+ *
+ * NOTE:
+ * This file is composed of the main function of pglb.
+ *
+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
+ *--------------------------------------------------------------------
+ */
+/*
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that copyright notice and this permission
+ * notice appear in supporting documentation, and that the name of the
+ * author not be used in advertising or publicity pertaining to
+ * distribution of the software without specific, written prior
+ * permission. The author makes no representations about the
+ * suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+*/
+#include "postgres.h"
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#ifdef HAVE_NETINET_TCP_H
+#include
+#endif
+
+#ifdef HAVE_CRYPT_H
+#include
+#endif
+
+#ifdef HAVE_GETOPT_H
+#include
+#endif
+
+#include "replicate_com.h"
+#include "pglb.h"
+
+
+
+#define IPC_NMAXSEM (32)
+/*--------------------------------------
+ * GLOBAL VARIABLE DECLARATION
+ *--------------------------------------
+ */
+/* for replicate_com.h */
+ConfDataType * ConfData_Top = (ConfDataType *)NULL;
+ConfDataType * ConfData_End = (ConfDataType *)NULL;
+int MapTableShmid = -1;
+int LifeCheckStartShmid = -1;
+char * LifeCheckStartFlag = NULL;
+int LifeCheckTimeOut = 10;
+FILE * StatusFp = (FILE *)NULL;
+char * PGRStatusFileName = NULL;
+char * PGRLogFileName = NULL;
+char * PGRuserName = NULL;
+int Log_Print = 0;
+int Debug_Print = 0;
+
+char * ResolvedName = NULL;
+int Recv_Port_Number = 0;
+int Recovery_Port_Number = 0;
+uint16_t LifeCheck_Port_Number = 0;
+int Use_Connection_Pool = 0;
+int Max_Pool = 1;
+int Connection_Life_Time = 0;
+int Max_DB_Server = 0;
+int MaxBackends = 0;
+ClusterTbl * Cluster_Tbl = (ClusterTbl *)NULL;
+int ClusterNum = 0;
+int ClusterShmid = 0;
+int ClusterSemid = 0;
+ChildTbl * Child_Tbl = (ChildTbl *)NULL;
+int ChildShmid = 0;
+char * PGR_Data_Path = NULL;
+char * PGR_Write_Path = NULL;
+char * Backend_Socket_Dir = NULL;
+FrontSocket Frontend_FD;
+ClusterTbl * CurrentCluster = NULL;
+int PGR_Lifecheck_Timeout = 3;
+int PGR_Lifecheck_Interval = 15;
+
+int fork_wait_time = 0;
+
+extern char *optarg;
+
+/*--------------------------------------
+ * PROTOTYPE DECLARATION
+ *--------------------------------------
+ */
+static int init_pglb(char * path);
+static void pglb_exit(int signal_args);
+static void load_balance_main(void);
+static void daemonize(void);
+static void write_pid_file(void);
+static void stop_pglb(void);
+static int is_exist_pid_file(void);
+static ClusterTbl * scan_cluster_by_pid(pid_t pid);
+static void usage(void);
+static void close_child(int signal_args);
+
+void PGRrecreate_child(int signal_args);
+void PGRexit_subprocess(int sig);
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * init_pglb()
+ * NOTES
+ * Reading of the setup file
+ * and the initialization of the memory area.
+ * ARGS
+ * char * path: path of the setup file (I)
+ * RETURN
+ * OK: STATUS_OK
+ * NG: STATUS_ERROR
+ *--------------------------------------------------------------------
+ */
+static int
+init_pglb(char * path)
+{
+ char * func = "init_pglb()";
+
+ ConfDataType * conf;
+ ClusterTbl cluster_tbl[MAX_DB_SERVER];
+ int size = 0;
+ int rec_no = 0;
+ int i;
+ int max_connect = 0;
+ union semun sem_arg;
+ char fname[256];
+
+ /*
+ * read configuration file
+ */
+ if (path == NULL)
+ {
+ path = ".";
+ }
+ if (PGR_Get_Conf_Data(path,PGLB_CONF_FILE) != STATUS_OK)
+ {
+ show_error("%s:PGR_Get_Conf_Data failed",func);
+ return STATUS_ERROR;
+ }
+
+ size = sizeof(LogFileInf);
+ LogFileData = (LogFileInf *) malloc(size);
+ if (LogFileData == NULL)
+ {
+ show_error("%s:malloc() failed. reason: %s", func,strerror(errno));
+ return STATUS_ERROR;
+ }
+ memset(LogFileData,0,size);
+
+ /* cluster db status file open */
+ if (PGRStatusFileName == NULL)
+ {
+ snprintf(fname,sizeof(fname),"%s/%s",PGR_Write_Path,PGLB_STATUS_FILE);
+ }
+ else
+ {
+ memcpy(fname,PGRStatusFileName,sizeof(fname));
+ }
+ StatusFp = fopen(fname, "a");
+ if (StatusFp == NULL)
+ {
+ show_error("%s:open() %s file failed. (%s)",
+ func,fname, strerror(errno));
+ exit(1);
+ }
+
+ Backend_Socket_Dir = malloc(128);
+ if (Backend_Socket_Dir == NULL)
+ {
+ show_error("%s:malloc() failed. (%s)",func,strerror(errno));
+ return STATUS_ERROR;
+ }
+ memset(Backend_Socket_Dir,0,128);
+ /* set initiarize data */
+ strcpy(Backend_Socket_Dir,"/tmp");
+ Max_Pool = 1;
+ Connection_Life_Time = 0;
+ Use_Connection_Pool = 0;
+
+ conf = ConfData_Top;
+ while (conf != (ConfDataType *)NULL)
+ {
+ /* get cluster db servers name */
+ if (!strcmp(conf->table,CLUSTER_SERVER_TAG))
+ {
+ rec_no = conf->rec_no;
+ if (!strcmp(conf->key,HOST_NAME_TAG))
+ {
+ memcpy(cluster_tbl[rec_no].hostName,conf->value,sizeof(cluster_tbl[rec_no].hostName));
+ conf = (ConfDataType*)conf->next;
+ continue;
+ }
+ if (!strcmp(conf->key,PORT_TAG))
+ {
+ cluster_tbl[rec_no].port = atoi(conf->value);
+ conf = (ConfDataType*)conf->next;
+ continue;
+ }
+ if (!strcmp(conf->key,MAX_CONNECT_TAG))
+ {
+ cluster_tbl[rec_no].max_connect = atoi(conf->value);
+ conf = (ConfDataType*)conf->next;
+ continue;
+ }
+ }
+ /* get logging file data */
+ else if (!strcmp(conf->table, LOG_INFO_TAG))
+ {
+ if (!strcmp(conf->key, FILE_NAME_TAG))
+ {
+ strncpy(LogFileData->file_name, conf->value ,sizeof(LogFileData->file_name));
+ LogFileData->fp = NULL;
+ conf = (ConfDataType*)conf->next;
+ continue;
+ }
+ if (!strcmp(conf->key, FILE_SIZE_TAG))
+ {
+ int i,len;
+ char * ptr;
+ int unit = 1;
+ len = strlen(conf->value);
+ ptr = conf->value;
+ for (i = 0; i < len ; i ++,ptr++)
+ {
+ if ((! isdigit(*ptr)) && (! isspace(*ptr)))
+ {
+ switch (*ptr)
+ {
+ case 'K':
+ case 'k':
+ unit = 1024;
+ break;
+ case 'M':
+ case 'm':
+ unit = 1024*1024;
+ break;
+ case 'G':
+ case 'g':
+ unit = 1024*1024*1024;
+ break;
+ }
+ *ptr = '\0';
+ break;
+ }
+ }
+ LogFileData->max_size = atoi(conf->value) * unit;
+ conf = (ConfDataType*)conf->next;
+ continue;
+ }
+ if (!strcmp(conf->key, LOG_ROTATION_TAG))
+ {
+ LogFileData->rotation = atoi(conf->value);
+ conf = (ConfDataType*)conf->next;
+ continue;
+ }
+ }
+ else
+ {
+ if (!strcmp(conf->key,HOST_NAME_TAG))
+ {
+ int ip;
+ ip=PGRget_ip_by_name(conf->value);
+ if (ResolvedName == NULL)
+ {
+ ResolvedName = malloc(ADDRESS_LENGTH);
+ }
+ if (ResolvedName == NULL)
+ {
+ continue;
+ }
+ else
+ {
+ memset(ResolvedName,0,ADDRESS_LENGTH);
+ }
+
+ sprintf(ResolvedName,
+ "%d.%d.%d.%d",
+ (ip ) & 0xff ,
+ (ip >> 8) & 0xff ,
+ (ip >> 16) & 0xff ,
+ (ip >> 24) & 0xff );
+ conf = (ConfDataType*)conf->next;
+ continue;
+ }
+ /* get port number for receive querys */
+ else if (!strcmp(conf->key,RECV_PORT_TAG))
+ {
+ Recv_Port_Number = atoi(conf->value);
+ conf = (ConfDataType*)conf->next;
+ continue;
+ }
+ /* get port number for recovery session */
+ else if (!strcmp(conf->key,RECOVERY_PORT_TAG))
+ {
+ Recovery_Port_Number = atoi(conf->value);
+ conf = (ConfDataType*)conf->next;
+ continue;
+ }
+ else if (!strcmp(conf->key,MAX_CLUSTER_TAG))
+ {
+ Max_DB_Server = atoi(conf->value);
+ conf = (ConfDataType*)conf->next;
+ continue;
+ }
+ else if (!strcmp(conf->key,USE_CONNECTION_POOL_TAG))
+ {
+ if (!strcmp(conf->value,"yes"))
+ {
+ Use_Connection_Pool = 1;
+ }
+ conf = (ConfDataType*)conf->next;
+ continue;
+ }
+ else if (!strcmp(conf->key,MAX_POOL_TAG))
+ {
+ Max_Pool = atoi(conf->value);
+ if (Max_Pool < 0)
+ Max_Pool = 1;
+ conf = (ConfDataType*)conf->next;
+ continue;
+ }
+ else if (!strcmp(conf->key,CONNECTION_LIFE_TIME))
+ {
+ Connection_Life_Time = atoi(conf->value);
+ if (Connection_Life_Time < 0)
+ Connection_Life_Time = 0;
+ conf = (ConfDataType*)conf->next;
+ continue;
+ }
+ else if (!strcmp(conf->key,BACKEND_SOCKET_DIR_TAG))
+ {
+ strncpy(Backend_Socket_Dir,conf->value,128);
+ conf = (ConfDataType*)conf->next;
+ continue;
+ }
+ else if (!STRCMP(conf->key,LIFECHECK_TIMEOUT_TAG))
+ {
+ /* get lifecheck timeout */
+ PGR_Lifecheck_Timeout = PGRget_time_value(conf->value);
+ if ((PGR_Lifecheck_Timeout < 1) || (PGR_Lifecheck_Timeout > 3600))
+ {
+ show_error("%s is out of range. It should be between 1sec-1hr.\n",LIFECHECK_TIMEOUT_TAG);
+ return STATUS_ERROR;
+ }
+ conf = (ConfDataType*)conf->next;
+ continue;
+ }
+ else if (!STRCMP(conf->key,LIFECHECK_INTERVAL_TAG))
+ {
+ /* get lifecheck interval */
+ PGR_Lifecheck_Interval = PGRget_time_value(conf->value);
+ if ((PGR_Lifecheck_Interval < 1) || (PGR_Lifecheck_Interval > 3600))
+ {
+ show_error("%s is out of range. It should between 1sec-1hr.\n",LIFECHECK_INTERVAL_TAG);
+ return STATUS_ERROR;
+ }
+ conf = (ConfDataType*)conf->next;
+ continue;
+ }
+ }
+ conf = (ConfDataType*)conf->next;
+ }
+ if (Max_DB_Server <= 0)
+ {
+ show_error("%s:Max_DB_Server is wrong value. %s/%s file should be broken",func, path, PGLB_CONF_FILE);
+ exit(1);
+ }
+ /* shared memory allocation for cluster table */
+ size = sizeof(ClusterTbl) * Max_DB_Server;
+
+ ClusterShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
+ if (ClusterShmid < 0)
+ {
+ show_error("%s:ClusterShm shmget() failed. (%s)", func,strerror(errno));
+ return STATUS_ERROR;
+ }
+ Cluster_Tbl = (ClusterTbl *)shmat(ClusterShmid,0,0);
+ if (Cluster_Tbl == (ClusterTbl *)-1)
+ {
+ show_error("%s:shmat() failed. (%s)", func,strerror(errno));
+ return STATUS_ERROR;
+ }
+ memset(Cluster_Tbl,0,size);
+
+ if ((ClusterSemid = semget(IPC_PRIVATE,MAX_DB_SERVER+1,IPC_CREAT | IPC_EXCL | 0600)) < 0)
+ {
+ show_error("%s:semget() failed. (%s)",func,strerror(errno));
+ return STATUS_ERROR;
+ }
+ for ( i = 0 ; i <= MAX_DB_SERVER ; i ++)
+ {
+ semctl(ClusterSemid, i, GETVAL, sem_arg);
+ sem_arg.val = 1;
+ semctl(ClusterSemid, i, SETVAL, sem_arg);
+ }
+ ClusterNum = 0;
+ /* set cluster db server name into cluster db server table */
+ for ( i = 0 ; i < Max_DB_Server ; i ++)
+ {
+ (Cluster_Tbl + i)->rec_no = i;
+ }
+ (Cluster_Tbl + i)->useFlag = TBL_END;
+ max_connect = 0;
+ for ( i = 0 ; i <= rec_no ; i ++)
+ {
+ cluster_tbl[i].use_num = 0;
+ cluster_tbl[i].rate = 0;
+ if (cluster_tbl[i].max_connect < 0)
+ {
+ cluster_tbl[i].max_connect = 0;
+ }
+ if (max_connect < cluster_tbl[i].max_connect)
+ {
+ max_connect = cluster_tbl[i].max_connect;
+ }
+ PGRadd_cluster_tbl(&cluster_tbl[i]);
+ }
+
+ /* shared memory allocation for children table */
+ size = sizeof(ChildTbl) * (Max_DB_Server + 1) * max_connect * Max_Pool;
+#ifdef PRINT_DEBUG
+ show_debug("%s:Child_Tbl size is[%d]",func,size);
+#endif
+
+ ChildShmid = shmget(IPC_PRIVATE,size,IPC_CREAT | IPC_EXCL | 0600);
+ if (ChildShmid < 0)
+ {
+ show_error("%s:ChildShm shmget() failed. (%s)",func, strerror(errno));
+ return STATUS_ERROR;
+ }
+ Child_Tbl = (ChildTbl *)shmat(ChildShmid,0,0);
+ if (Child_Tbl == (ChildTbl *)-1)
+ {
+ show_error("%s:shmat() failed. (%s)", func,strerror(errno));
+ return STATUS_ERROR;
+ }
+ memset(Child_Tbl, 0, size);
+ (Child_Tbl + ( Max_DB_Server * max_connect * Max_Pool) -1)->useFlag = TBL_END;
+
+ PGR_Free_Conf_Data();
+
+ return STATUS_OK;
+}
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * pglb_exit()
+ * NOTES
+ * Closing of pglb process
+ * ARGS
+ * int signal_args: signal number (I)
+ * RETURN
+ * none
+ *--------------------------------------------------------------------
+ */
+static void
+pglb_exit(int signal_args)
+{
+ char fname[256];
+ int rtn;
+
+ Child_Tbl->useFlag = TBL_END;
+ PGRsignal(SIGCHLD,SIG_IGN);
+ PGRsignal(signal_args,SIG_IGN);
+ kill (0,signal_args);
+ while (wait(NULL) > 0 )
+ ;
+
+ if (ClusterShmid > 0)
+ {
+ rtn = shmdt((char *)Cluster_Tbl);
+ shmctl(ClusterShmid,IPC_RMID,(struct shmid_ds *)NULL);
+ ClusterShmid = 0;
+ Cluster_Tbl = NULL;
+ }
+ if (ChildShmid > 0)
+ {
+ rtn = shmdt((char *)Child_Tbl);
+ shmctl(ChildShmid,IPC_RMID,(struct shmid_ds *)NULL);
+ ChildShmid = 0;
+ Child_Tbl = NULL;
+ }
+ if (ClusterSemid > 0)
+ {
+ semctl(ClusterSemid, 0, IPC_RMID);
+ ClusterSemid = 0;
+ }
+
+ if (StatusFp != NULL)
+ {
+ fflush(StatusFp);
+ fclose(StatusFp);
+ }
+ if (Frontend_FD.unix_fd != 0)
+ {
+ close(Frontend_FD.unix_fd);
+ Frontend_FD.unix_fd = 0;
+ snprintf(fname, sizeof(fname), "%s/.s.PGSQL.%d", Backend_Socket_Dir,Recv_Port_Number);
+ unlink(fname);
+ }
+ if (Frontend_FD.inet_fd != 0)
+ {
+ close(Frontend_FD.inet_fd);
+ Frontend_FD.inet_fd = 0;
+ }
+ /*
+ PGRsyn_quit();
+ */
+ snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGLB_PID_FILE);
+ unlink(fname);
+
+ if (ResolvedName != NULL)
+ {
+ free(ResolvedName);
+ ResolvedName = NULL;
+ }
+ exit(0);
+}
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * load_balance_main()
+ * NOTES
+ * This is a main module of load balance function
+ * ARGS
+ * void
+ * RETURN
+ * none
+ *--------------------------------------------------------------------
+ */
+static void
+load_balance_main(void)
+{
+ char * func = "load_balance_main()";
+ int status;
+ int rtn;
+ int count = 0;
+
+ Frontend_FD.unix_fd = PGRcreate_unix_domain_socket(Backend_Socket_Dir, Recv_Port_Number);
+ if (Frontend_FD.unix_fd < 0)
+ {
+ show_error("%s:PGRcreate_unix_domain_socket failed",func);
+ pglb_exit(SIGTERM);
+ }
+ Frontend_FD.inet_fd = PGRcreate_recv_socket(ResolvedName, Recv_Port_Number);
+ if (Frontend_FD.inet_fd < 0)
+ {
+ show_error("%s:PGRcreate_recv_socket failed",func);
+ pglb_exit(SIGTERM);
+ }
+ if (Use_Connection_Pool)
+ {
+ PGRsignal(SIGCHLD,PGRrecreate_child);
+ rtn = PGRpre_fork_children(Cluster_Tbl);
+ if (rtn != STATUS_OK)
+ {
+ show_error("%s:PGRpre_fork_children failed",func);
+ pglb_exit(SIGTERM);
+ }
+ }
+
+ for (;;)
+ {
+ fd_set rmask;
+ struct timeval timeout;
+
+ timeout.tv_sec = 60;
+ timeout.tv_usec = 0;
+
+ /*
+ * Wait for something to happen.
+ */
+ FD_ZERO(&rmask);
+ FD_SET(Frontend_FD.unix_fd,&rmask);
+ if(Frontend_FD.inet_fd)
+ FD_SET(Frontend_FD.inet_fd,&rmask);
+ rtn = select(Max(Frontend_FD.unix_fd, Frontend_FD.inet_fd) + 1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
+ if( rtn > 0)
+ {
+ if(PGRis_cluster_alive() == STATUS_ERROR) {
+ show_error("%s:all clusters were dead.",func);
+ PGRreturn_no_connection_error();
+ count=0;
+ }
+ else
+ {
+ if (Use_Connection_Pool)
+ {
+ status = PGRload_balance_with_pool();
+ }
+ else
+ {
+ status = PGRload_balance();
+ }
+ if (status != STATUS_OK)
+ {
+ show_error("%s:load balance process failed",func);
+ if ( count > PGLB_CONNECT_RETRY_TIME)
+ {
+ show_error("%s:no cluster available",func);
+ PGRreturn_connection_full_error();
+ count = 0;
+ }
+ count ++;
+ }
+ else
+ {
+ count = 0;
+ }
+ }
+ }
+ }
+}
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * daemonize()
+ * NOTES
+ * Daemonize this process
+ * ARGS
+ * void
+ * RETURN
+ * none
+ *--------------------------------------------------------------------
+ */
+static void
+daemonize(void)
+{
+ char * func = "daemonize()";
+ int i;
+ pid_t pid;
+
+ pid = fork();
+ if (pid == (pid_t) -1)
+ {
+ show_error("%s:fork() failed. (%s)",func, strerror(errno));
+ exit(1);
+ return; /* not reached */
+ }
+ else if (pid > 0)
+ { /* parent */
+ exit(0);
+ }
+
+#ifdef HAVE_SETSID
+ if (setsid() < 0)
+ {
+ show_error("%s:setsid() failed. (%s)", func,strerror(errno));
+ exit(1);
+ }
+#endif
+
+ i = open("/dev/null", O_RDWR);
+ dup2(i, 0);
+ dup2(i, 1);
+ dup2(i, 2);
+ close(i);
+}
+
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * write_pid_file()
+ * NOTES
+ * The process ID is written in the file.
+ * This process ID is used when finish pglb.
+ * ARGS
+ * void
+ * RETURN
+ * none
+ *--------------------------------------------------------------------
+ */
+static void
+write_pid_file(void)
+{
+ char * func = "write_pid_file()";
+ FILE *fd;
+ char fname[256];
+ char pidbuf[128];
+
+ snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGLB_PID_FILE);
+ fd = fopen(fname, "w");
+ if (!fd)
+ {
+ show_error("%s:open() %s file failed. (%s)",
+ func,fname, strerror(errno));
+ exit(1);
+ }
+ snprintf(pidbuf, sizeof(pidbuf), "%d", getpid());
+ fwrite(pidbuf, strlen(pidbuf), 1, fd);
+ if (fclose(fd))
+ {
+ show_error("%s:fwrite() %s file failed. (%s)",
+ func,fname, strerror(errno));
+ exit(1);
+ }
+}
+
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * stop_pglb()
+ * NOTES
+ * Stop the pglb process
+ * ARGS
+ * void
+ * RETURN
+ * none
+ *--------------------------------------------------------------------
+ */
+static void
+stop_pglb(void)
+{
+ char * func = "stop_pglb()";
+ FILE *fd;
+ char fname[256];
+ char pidbuf[128];
+ pid_t pid;
+
+ if (PGR_Write_Path == NULL)
+ {
+ PGR_Write_Path = ".";
+ }
+ snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGLB_PID_FILE);
+ fd = fopen(fname, "r");
+ if (!fd)
+ {
+ show_error("%s:open() %s file failed. (%s)",
+ func,fname, strerror(errno));
+ exit(1);
+ }
+ memset(pidbuf,0,sizeof(pidbuf));
+ fread(pidbuf, sizeof(pidbuf), 1, fd);
+ fclose(fd);
+ pid = atoi(pidbuf);
+ if (kill (pid,SIGTERM) == -1)
+ {
+ show_error("%s:could not stop pid: %d (%s)",func,pid,strerror(errno));
+ exit(1);
+ }
+}
+
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * is_exist_pid_file()
+ * NOTES
+ * Check existence of pid file.
+ * ARGS
+ * void
+ * RETURN
+ * 1: the pid file is exist
+ * 0: the pid file is not exist
+ *--------------------------------------------------------------------
+ */
+static int
+is_exist_pid_file(void)
+{
+ char fname[256];
+ struct stat buf;
+
+ snprintf(fname, sizeof(fname), "%s/%s", PGR_Write_Path, PGLB_PID_FILE);
+ if (stat(fname,&buf) == 0)
+ {
+ /* pid file is exist */
+ return 1;
+ }
+ else
+ {
+ /* pid file is not exist */
+ return 0;
+ }
+}
+
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * PGRrecreate_child()
+ * NOTES
+ * create the child process again which it hunged up
+ * ARGS
+ * int signal_args: signal number (expecting the SIGCHLD)
+ * RETURN
+ * none
+ *--------------------------------------------------------------------
+ */
+void
+PGRrecreate_child(int signal_args)
+{
+ pid_t pid = 0;
+ int status;
+ ClusterTbl * cluster_p;
+
+ReWait:
+
+ errno = 0;
+#ifdef HAVE_WAITPID
+ while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
+ {
+#else
+ while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
+ {
+#endif
+ cluster_p = scan_cluster_by_pid(pid);
+ pid = PGRcreate_child(cluster_p);
+ }
+ if ((pid < 0) && (errno == EINTR))
+ goto ReWait;
+}
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * close_child()
+ * NOTES
+ * Hung up child process
+ * ARGS
+ * int signal_args: signal number (expecting the SIGUSR2)
+ * RETURN
+ * none
+ *--------------------------------------------------------------------
+ */
+static void
+close_child(int signal_args)
+{
+ char * func = "close_child()";
+ ChildTbl * child;
+ ClusterTbl * cluster;
+ int rec_no = -1;
+
+ if (( Cluster_Tbl == NULL) || (Child_Tbl == NULL))
+ {
+ show_error("%s:Cluster_Tbl or Child_Tbl is not initialize",func);
+ return ;
+ }
+ cluster = Cluster_Tbl;
+ while(cluster->useFlag != TBL_END)
+ {
+ if (cluster->useFlag == TBL_ERROR_NOTICE)
+ {
+ rec_no = cluster->rec_no;
+ PGRset_status_on_cluster_tbl(TBL_ERROR,cluster);
+ break;
+ }
+ cluster++;
+ }
+ if (rec_no < 0)
+ {
+ return;
+ }
+ child = Child_Tbl;
+ while(child->useFlag != TBL_END)
+ {
+ if (child->rec_no == rec_no)
+ {
+ if (kill (child->pid,SIGTERM) == -1)
+ {
+ show_error("%s:could not stop pid: %d (%s)",func,child->pid,strerror(errno));
+ return;
+ }
+ PGRchild_wait(signal_args);
+ child->useFlag = DATA_FREE;
+ }
+ child++;
+ }
+ PGRsignal(SIGUSR2, close_child);
+}
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * scan_cluster_by_pid()
+ * NOTES
+ * get cluster server record from child process id
+ * ARGS
+ * pid_t pid: child process id (I)
+ * RETURN
+ * OK: pointer of cluster table
+ * NG: NULL
+ *--------------------------------------------------------------------
+ */
+static ClusterTbl *
+scan_cluster_by_pid(pid_t pid)
+{
+ char * func = "scan_cluster_by_pid()";
+ ChildTbl * child_p;
+ ClusterTbl * cluster_p;
+ int cnt;
+
+ child_p = Child_Tbl;
+ if (child_p == NULL)
+ {
+ show_error("%s:Child Table is not initialize",func);
+ return NULL;
+ }
+ cluster_p = Cluster_Tbl;
+ if (cluster_p == NULL)
+ {
+ show_error("%s:Cluster Table is not initialize",func);
+ return NULL;
+ }
+
+ while (child_p->useFlag != TBL_END)
+ {
+ if (child_p->pid == pid)
+ {
+ break;
+ }
+ child_p++;
+ }
+ if (child_p->useFlag == TBL_END)
+ {
+ show_error("%s:pid:%d not found in child table",func,pid);
+ return NULL;
+ }
+
+ cnt = 0;
+ while ((cluster_p->useFlag != TBL_END) && (cnt < ClusterNum))
+ {
+ if (cluster_p->rec_no == child_p->rec_no)
+ {
+ return cluster_p;
+ }
+ cluster_p++;
+ cnt ++;
+ }
+ return NULL;
+}
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * usage()
+ * NOTES
+ * show usage of pglb
+ * ARGS
+ * void
+ * RETURN
+ * none
+ *--------------------------------------------------------------------
+ */
+static void
+usage(void)
+{
+ char * path;
+
+ path = getenv("PGDATA");
+ if (path == NULL)
+ path = ".";
+ fprintf(stderr,"pglb version [%s]\n",PGLB_VERSION);
+ fprintf(stderr,"A load balancer for PostgreSQL\n\n");
+ fprintf(stderr,"usage: pglb [-D path_of_config_file] [-W path_of_work_files] [-n][-v][-h][stop | restart]\n");
+ fprintf(stderr," config file default path: %s/%s\n",path, PGLB_CONF_FILE);
+ fprintf(stderr," -l: print error logs in the log file.\n");
+ fprintf(stderr," -n: don't run in daemon mode.\n");
+ fprintf(stderr," -v: debug mode. need '-n' flag\n");
+ fprintf(stderr," -h: print this help\n");
+ fprintf(stderr," stop: stop pglb\n");
+ fprintf(stderr," restart: restart pglb\n");
+}
+
+/*--------------------------------------------------------------------
+ * SYMBOL
+ * main()
+ * NOTES
+ * main module of pglb
+ * ARGS
+ * int argc: number of parameter
+ * char ** argv: value of parameter
+ * RETURN
+ * none
+ *--------------------------------------------------------------------
+ */
+int
+main(int argc, char ** argv)
+{
+ int opt = 0;
+ char * r_path = NULL;
+ char * w_path = NULL;
+ int detach = 1;
+
+ PGRsignal(SIGHUP, pglb_exit);
+ PGRsignal(SIGINT, pglb_exit);
+ PGRsignal(SIGQUIT, pglb_exit);
+ PGRsignal(SIGTERM, pglb_exit);
+ PGRsignal(SIGALRM, SIG_IGN); /* ignored */
+ PGRsignal(SIGPIPE, SIG_IGN); /* ignored */
+ PGRsignal(SIGTTIN, SIG_IGN); /* ignored */
+ PGRsignal(SIGTTOU, SIG_IGN); /* ignored */
+ PGRsignal(SIGCHLD,PGRchild_wait);
+ PGRsignal(SIGUSR1, SIG_IGN); /* ignored */
+ PGRsignal(SIGUSR2, close_child); /* close child process */
+ r_path = getenv("PGDATA");
+ if (r_path == NULL)
+ r_path = ".";
+
+ while ((opt = getopt(argc, argv, "U:D:W:w:lvnh")) != -1)
+ {
+ switch (opt)
+ {
+ case 'U':
+ if (!optarg)
+ {
+ usage();
+ exit(1);
+ }
+ PGRuserName = strdup(optarg);
+ break;
+ case 'D':
+ if (!optarg)
+ {
+ usage();
+ exit(1);
+ }
+ r_path = optarg;
+ break;
+ case 'W':
+ if (!optarg)
+ {
+ usage();
+ exit(1);
+ }
+ w_path = optarg;
+ break;
+ case 'w':
+ fork_wait_time = atoi(optarg);
+ if (fork_wait_time < 0)
+ fork_wait_time = 0;
+ break;
+ case 'l':
+ Log_Print = 1;
+ break;
+ case 'v':
+ Debug_Print = 1;
+ break;
+ case 'n':
+ detach = 0;
+ break;
+ case 'h':
+ usage();
+ exit(0);
+ break;
+ default:
+ usage();
+ exit(1);
+ }
+ }
+ PGR_Data_Path = r_path;
+ if (w_path == NULL)
+ {
+ PGR_Write_Path = PGR_Data_Path;
+ }
+ else
+ {
+ PGR_Write_Path = w_path;
+ }
+
+ if (optind == (argc-1) &&
+ ((!strcmp(argv[optind],"stop")) ||
+ (!strcmp(argv[optind],"restart"))))
+ {
+ stop_pglb();
+ if (!strcmp(argv[optind],"stop"))
+ {
+ exit(0);
+ }
+ }
+ else if (optind == argc)
+ {
+ if (is_exist_pid_file())
+ {
+ fprintf(stderr,"pid file %s/%s found. is another pglb running?", PGR_Write_Path, PGLB_PID_FILE);
+ exit(1);
+ }
+ }
+ else if (optind < argc)
+ {
+ usage();
+ exit(1);
+ }
+
+ if (detach)
+ {
+ daemonize();
+ }
+ write_pid_file();
+
+ if (init_pglb(PGR_Data_Path) != STATUS_OK)
+ {
+ exit(0);
+ }
+
+ /* call recovery process */
+ PGRrecovery_main(fork_wait_time);
+
+ /* call lifecheck process */
+ PGRlifecheck_main(fork_wait_time);
+
+ /* start loadbalance module */
+ load_balance_main();
+ pglb_exit(0);
+ return STATUS_OK;
+}
+
+void
+PGRexit_subprocess(int sig)
+{
+ pglb_exit(sig);
+}
diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pglb.conf.sample pgcluster-1.7.0rc7/src/pgcluster/pglb/pglb.conf.sample
--- postgresql-8.2.4/src/pgcluster/pglb/pglb.conf.sample 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pglb.conf.sample 2007-02-18 22:52:17.000000000 +0100
@@ -0,0 +1,73 @@
+#============================================================
+# Load Balance Server configuration file
+#-------------------------------------------------------------
+# file: pglb.conf
+#-------------------------------------------------------------
+# This file controls:
+# o which hosts are db cluster server
+# o which port use connect to db cluster server
+# o how many connections are allowed on each DB server
+#============================================================
+#-------------------------------------------------------------
+# set cluster DB server information
+# o Host_Name : Hostname of Cluster
+# Please write a host name by FQDN or IP address.
+# o Port : Connection port for postmaster
+# o Max_Connection : Maximum number of connections to postmaster
+#-------------------------------------------------------------
+#
+# master.pgcluster.org
+# 5432
+# 32
+#
+#
+# post2.pgcluster.org
+# 5432
+# 32
+#
+#
+# post3.pgcluster.org
+# 5432
+# 32
+#
+#-------------------------------------------------------------
+# set Load Balance server information
+# o Host_Name : The host name of this load balance server
+# Please write a host name by FQDN or IP address.
+# o Backend_Socket_Dir : Unix domain socket path for the backend
+# o Receive_Port Connection port from client
+# o Recovery_Port : Connection port for recovery process
+# o Max_Cluster_Num : Maximum number of cluster DB servers
+# o Use_Connection_Pooling : Use connection pool [yes/no]
+# o Lifecheck_Timeout : Timeout of the lifecheck response
+# o Lifecheck_Interval : Interval time of the lifecheck
+# (range 1s - 1h)
+# 10s -- 10 seconds
+# 10min -- 10 minutes
+# 1h -- 1 hours
+#-------------------------------------------------------------
+ loadbalancer.pgcluster.org
+ /tmp
+ 5432
+ 6001
+ 128
+ no
+ 3s
+ 15s
+#-------------------------------------------------------------
+# A setup of a log files
+#
+# o File_Name : Log file name with full path
+# o File_Size : Maximum size of each log files
+# Please specify in a number and unit(K or M)
+# 10 -- 10 Byte
+# 10K -- 10 KByte
+# 10M -- 10 MByte
+# o Rotate : Rotation times
+# If specified 0, old versions are removed.
+#-------------------------------------------------------------
+
+ /tmp/pglb.log
+ 1M
+ 3
+
diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pglb.h pgcluster-1.7.0rc7/src/pgcluster/pglb/pglb.h
--- postgresql-8.2.4/src/pgcluster/pglb/pglb.h 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pglb.h 2007-03-01 16:27:49.000000000 +0100
@@ -0,0 +1,472 @@
+/*--------------------------------------------------------------------
+ * FILE:
+ * pglb.h
+ *
+ * Portions Copyright (c) 2003-2006 Atsushi Mitani
+ *--------------------------------------------------------------------
+ */
+#ifndef PGLB_H
+#define PGLB_H
+
+#define PGLB_VERSION "1.7.0rc7"
+
+#include "../libpgc/libpgc.h"
+
+/*
+ * from pool.h
+ */
+
+/*
+ * define this if you do not want to issue RESET ALL at each new
+ * connection. Also you need to define this for 7.1 or prior
+ * PostgreSQL since they do not support RESET ALL
+ */
+#undef NO_RESET_ALL
+
+/* undef this if you have problems with non blocking accept() */
+#define NONE_BLOCK
+
+#define POOLMAXPATHLEN 8192
+
+/* configuration file name */
+#define POOL_CONF_FILE_NAME "pgpool.conf"
+
+/* pid file directory */
+#define DEFAULT_LOGDIR "/tmp"
+
+/* Unix domain socket directory */
+#define DEFAULT_SOCKET_DIR "/tmp"
+
+/* pid file name */
+#define PID_FILE_NAME "pgpool.pid"
+
+/* strict mode comment in SQL */
+#define STRICT_MODE_STR "/*STRICT*/"
+#define STRICT_MODE(s) (strncasecmp((s), STRICT_MODE_STR, strlen(STRICT_MODE_STR)) == 0)
+
+typedef enum {
+ POOL_CONTINUE = 0,
+ POOL_IDLE,
+ POOL_END,
+ POOL_ERROR,
+ POOL_FATAL
+} POOL_STATUS;
+
+/* protocol major version numbers */
+#define PROTO_MAJOR_V2 2
+#define PROTO_MAJOR_V3 3
+
+/*
+ * startup packet definitions (v2) stolen from PostgreSQL
+ */
+#define SM_DATABASE 64
+#define SM_USER 32
+#define SM_OPTIONS 64
+#define SM_UNUSED 64
+#define SM_TTY 64
+
+typedef struct PGR_StartupPacket_v2
+{
+ int protoVersion; /* Protocol version */
+ char database[SM_DATABASE]; /* Database name */
+ char user[SM_USER]; /* User name */
+ char options[SM_OPTIONS]; /* Optional additional args */
+ char unused[SM_UNUSED]; /* Unused */
+ char tty[SM_TTY]; /* Tty for debug output */
+} PGR_StartupPacket_v2;
+
+/* startup packet info */
+typedef struct
+{
+ char *startup_packet; /* raw startup packet without packet length (malloced area) */
+ int len; /* raw startup packet length */
+ int major; /* protocol major version */
+ int minor; /* protocol minor version */
+ char *database; /* database name in startup_packet (malloced area) */
+ char *user; /* user name in startup_packet (malloced area) */
+} PGR_StartupPacket;
+
+typedef struct CancelPacket
+{
+ int protoVersion; /* Protocol version */
+ int pid; /* bcckend process id */
+ int key; /* cancel key */
+} CancelPacket;
+
+/*
+ * configuration paramters
+ */
+typedef struct {
+ int inetdomain; /* should we make an INET domain socket too? */
+ int port; /* port # to bind */
+ char *socket_dir; /* pgpool socket directory */
+ char *backend_host_name; /* backend host name */
+ int backend_port; /* backend port # */
+ char *secondary_backend_host_name; /* secondary backend host name */
+ int secondary_backend_port; /* secondary backend port # */
+ int num_init_children; /* # of children initially pre-forked */
+ int child_life_time; /* if idle for this seconds, child exits */
+ int connection_life_time; /* if idle for this seconds, connection closes */
+ int max_pool; /* max # of connection pool per child */
+ char *logdir; /* logging directory */
+ char *backend_socket_dir; /* Unix domain socket directory for the PostgreSQL server */
+ int replication_mode; /* replication mode */
+ int replication_strict; /* if non 0, wait for completion of the
+ query sent to master to avoid deadlock */
+ /*
+ * if secondary does not respond in this milli seconds, abort this session.
+ * this is not compatible with replication_strict = 1. 0 means no timeout.
+ */
+ int replication_timeout;
+
+ int load_balance_mode; /* load balance mode */
+
+ /* followings do not exist in the configuration file */
+ char *current_backend_host_name; /* current backend host name */
+ int current_backend_port; /* current backend port # */
+ int replication_enabled; /* replication mode enabled */
+
+ int replication_stop_on_mismatch; /* if there's a data mismatch between master and secondary
+ * start degenration to stop replication mode
+ */
+} POOL_CONFIG;
+
+#define MAX_PASSWORD_SIZE (1024)
+
+typedef struct {
+ int num; /* number of entries */
+ char **names; /* parameter names */
+ char **values; /* values */
+} ParamStatus;
+
+/*
+ * stream connection structure
+ */
+typedef struct {
+ int fd; /* fd for connection */
+ FILE *write_fd; /* stream write connection */
+
+ char *hp; /* pending data buffer head address */
+ int po; /* pending data offset */
+ int bufsz; /* pending data buffer size */
+ int len; /* pending data length */
+
+ char *sbuf; /* buffer for pool_read_string */
+ int sbufsz; /* its size in bytes */
+
+ char *buf2; /* buffer for pool_read2 */
+ int bufsz2; /* its size in bytes */
+
+ int isbackend; /* this connection is for backend if non 0 */
+ int issecondary_backend; /* this connection is for secondary backend if non 0 */
+
+ char tstate; /* transaction state (V3 only) */
+
+ /*
+ * following are used to remember when re-use the authenticated connection
+ */
+ int auth_kind; /* 3: clear text password, 4: crypt password, 5: md5 password */
+ int pwd_size; /* password (sent back from frontend) size in host order */
+ char password[MAX_PASSWORD_SIZE]; /* password (sent back from frontend) */
+ char salt[4]; /* password salt */
+
+ /*
+ * following are used to remember current session paramter status.
+ * re-used connection will need them (V3 only)
+ */
+ ParamStatus params;
+
+ int no_forward; /* if non 0, do not write to frontend */
+
+} POOL_CONNECTION;
+
+/*
+ * connection pool structure
+ */
+typedef struct {
+ PGR_StartupPacket *sp; /* startup packet info */
+ int pid; /* backend pid */
+ int key; /* cancel key */
+ POOL_CONNECTION *con;
+ time_t closetime; /* absolute time in second when the connection closed
+ * if 0, that means the connection is under use.
+ */
+} POOL_CONNECTION_POOL_SLOT;
+
+#define MAX_CONNECTION_SLOTS 2
+
+typedef struct {
+ int num; /* number of slots */
+ POOL_CONNECTION_POOL_SLOT *slots[MAX_CONNECTION_SLOTS];
+} POOL_CONNECTION_POOL;
+
+#define MASTER_CONNECTION(p) ((p)->slots[0])
+#define SECONDARY_CONNECTION(p) ((p)->slots[1])
+#define MASTER(p) MASTER_CONNECTION(p)->con
+#define SECONDARY(p) SECONDARY_CONNECTION(p)->con
+#define MAJOR(p) MASTER_CONNECTION(p)->sp->major
+#define TSTATE(p) MASTER(p)->tstate
+
+#define Max(x, y) ((x) > (y) ? (x) : (y))
+#define Min(x, y) ((x) < (y) ? (x) : (y))
+
+/*
+ * pglb
+ */
+
+typedef struct {
+ int useFlag;
+ int sock;
+}SocketTbl;
+
+typedef struct {
+ int useFlag;
+ char hostName[HOSTNAME_MAX_LENGTH];
+ unsigned short port;
+ short max_connect;
+ int use_num;
+ int rate;
+ int rec_no;
+ int retry_count;
+}ClusterTbl;
+
+typedef struct {
+ long mtype;
+ char mdata[1];
+}MsgData;
+
+typedef struct {
+ int useFlag;
+ int rec_no;
+ pid_t pid;
+}ChildTbl;
+
+#define UNIX_DOMAIN_FD (0)
+#define INET_DOMAIN_FD (1)
+typedef struct {
+ int unix_fd;
+ int inet_fd;
+}FrontSocket;
+
+#define pool_config_inetdomain (0)
+#define pool_config_replication_mode (0)
+#define pool_config_replication_strict (0)
+#define pool_config_replication_timeout (0)
+#define pool_config_replication_enabled (0)
+#define pool_config_load_balance_mode (0)
+#define pool_config_replication_stop_on_mismatch (0)
+#define pool_config_port (Recv_Port_Number)
+#define pool_config_socket_dir (Backend_Socket_Dir)
+#define pool_config_backend_host_name (CurrentCluster->hostName)
+#define pool_config_backend_port (CurrentCluster->port)
+#define pool_config_secondary_backend_host_name (CurrentCluster->hostName)
+#define pool_config_secondary_backend_port (CurrentCluster->port)
+#define pool_config_num_init_children (CurrentCluster->max_connect)
+#define pool_config_child_life_time (Connection_Life_Time)
+#define pool_config_connection_life_time (Connection_Life_Time)
+#define pool_config_max_pool (Max_Pool)
+#define pool_config_logdir "./"
+#define pool_config_backend_socket_dir (Backend_Socket_Dir)
+#define pool_config_current_backend_host_name (CurrentCluster->hostName)
+#define pool_config_current_backend_port (CurrentCluster->port)
+#define REPLICATION (0)
+#define IN_LOAD_BALANCE (0)
+
+/*
+ * for pglb
+ */
+#define MAX_DB_SERVER (32)
+#define PGLB_MAX_SOCKET_QUEUE (10000)
+#define CLUSTER_TBL_SHM_KEY (1010)
+#define PGLB_CONNECT_RETRY_TIME (3)
+#define DEFAULT_CONNECT_NUM (32)
+#define DEFAULT_PORT (5432)
+#define BUF_SIZE (16384)
+#define TBL_FREE (0)
+#define TBL_INIT (1)
+#define TBL_USE (2)
+#define TBL_STOP (3)
+#define TBL_ACCEPT (10)
+#define TBL_ERROR_NOTICE (98)
+#define TBL_ERROR (99)
+#define TBL_END (-1)
+#define STATUS_OK (0)
+#define STATUS_ERROR (-1)
+#ifdef RECOVERY_PREPARE_REQ
+#define ADD_DB RECOVERY_PREPARE_REQ
+#else
+#define ADD_DB (1)
+#endif
+#ifdef RECOVERY_PGDATA_ANS
+#define STOP_DB RECOVERY_PGDATA_ANS
+#else
+#define STOP_DB (3)
+#endif
+#ifdef RECOVERY_FINISH
+#define START_DB RECOVERY_FINISH
+#else
+#define START_DB (9)
+#endif
+#define DELETE_DB (99)
+#define QUERY_TERMINATE (0x00)
+#define RESPONSE_TERMINATE (0x5a)
+#define PGLB_CONF_FILE "pglb.conf"
+#define PGLB_PID_FILE "pglb.pid"
+#define PGLB_STATUS_FILE "pglb.sts"
+#define PGLB_LOG_FILE "pglb.log"
+#define CLUSTER_SERVER_TAG "Cluster_Server_Info"
+#define MAX_CONNECT_TAG "Max_Connect"
+#define RECOVERY_PORT_TAG "Recovery_Port"
+#define RECV_PORT_TAG "Receive_Port"
+#define MAX_CLUSTER_TAG "Max_Cluster_Num"
+#define USE_CONNECTION_POOL_TAG "Use_Connection_Pooling"
+#define MAX_POOL_TAG "Max_Pool_Each_Server"
+#define BACKEND_SOCKET_DIR_TAG "Backend_Socket_Dir"
+#define CONNECTION_LIFE_TIME "Connection_Life_Time"
+#define NOT_USE_CONNECTION_POOL (0)
+#define USE_CONNECTION_POOL (1)
+
+#define PGR_SEND_RETRY_CNT (100)
+#define PGR_SEND_WAIT_MSEC (500)
+#define PGR_RECV_RETRY_CNT (100)
+#define PGR_RECV_WAIT_MSEC (500)
+
+extern int Recv_Port_Number;
+extern int Recovery_Port_Number;
+extern uint16_t LifeCheck_Port_Number;
+extern int Use_Connection_Pool;
+extern int Max_Pool;
+extern int Connection_Life_Time;
+extern int Msg_Id;
+extern ClusterTbl * Cluster_Tbl;
+extern int Max_DB_Server;
+extern int MaxBackends;
+extern char * Backend_Socket_Dir;
+extern int ClusterShmid;
+extern int ClusterSemid;
+extern int ChildShmid;
+extern int ClusterNum;
+extern ChildTbl * Child_Tbl;
+extern char * PGR_Data_Path;
+extern char * PGR_Write_Path;
+extern char * Backend_Socket_Dir;
+extern FrontSocket Frontend_FD;
+extern FILE * StatusFp;
+extern char * ResolvedName;
+extern char * PGRuserName;
+
+/* for child.c */
+extern POOL_CONNECTION * Frontend;
+extern ClusterTbl * CurrentCluster;
+
+extern char * Function;
+
+extern POOL_CONNECTION_POOL *pool_connection_pool; /* connection pool */
+
+/* extern of main.c */
+extern void PGRrecreate_child(int signal_args);
+extern void PGRexit_subprocess(int sig);
+
+/* extern of child.c */
+extern int PGRpre_fork_children(ClusterTbl * ptr);
+extern int PGRpre_fork_child(ClusterTbl * ptr);
+extern int PGRdo_child( int use_pool);
+extern int PGRcreate_child(ClusterTbl * cluster_p);
+extern pid_t PGRscan_child_tbl(ClusterTbl * cluster_p);
+extern void notice_backend_error(void);
+extern void do_pooling_child(int sig);
+extern int PGRset_status_to_child_tbl(pid_t pid, int status);
+extern int PGRadd_child_tbl(ClusterTbl * cluster_p, pid_t pid, int status);
+extern int PGRget_child_status(pid_t pid);
+extern void PGRreturn_connection_full_error(void);
+extern void PGRreturn_no_connection_error(void);
+extern void PGRquit_children_on_cluster(int rec_no);
+
+/* extern of cluster_table.c */
+extern int PGRis_cluster_alive(void) ;
+extern ClusterTbl * PGRscan_cluster(void);
+extern void PGRset_key_of_cluster(ClusterTbl * ptr, RecoveryPacket * packet);
+extern ClusterTbl * PGRadd_cluster_tbl (ClusterTbl * conf_data);
+extern ClusterTbl * PGRset_status_on_cluster_tbl (int status, ClusterTbl * ptr);
+extern ClusterTbl * PGRsearch_cluster_tbl(ClusterTbl * conf_data);
+
+/* extern of load_balance.c */
+extern int PGRload_balance(void);
+extern int PGRload_balance_with_pool(void);
+extern char PGRis_connection_full(ClusterTbl * ptr);
+extern void PGRuse_connection(ClusterTbl * ptr);
+extern void PGRrelease_connection(ClusterTbl * ptr);
+extern void PGRchild_wait(int sig);
+
+/* extern of recovery.c */
+extern void PGRrecovery_main(int fork_wait_fime);
+
+/* extern of socket.c */
+extern int PGRcreate_unix_domain_socket(char * sock_dir, unsigned short port);
+extern int PGRcreate_recv_socket(char * hostName , unsigned short portNumber);
+extern int PGRcreate_acception(int fd, char * hostName , unsigned short portNumber);
+extern void PGRclose_sock(int * sock);
+extern int PGRread_byte(int sock,char * buf,int len, int flag);
+extern int PGRcreate_cluster_socket( int * sock, ClusterTbl * ptr );
+
+/* extern of pool_auth.c */
+extern int pool_do_auth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp);
+extern int pool_do_reauth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp);
+extern int pool_read_message_length(POOL_CONNECTION_POOL *cp);
+extern signed char pool_read_kind(POOL_CONNECTION_POOL *cp);
+
+/* extern of pool_connection_pool.c */
+extern int pool_init_cp(void);
+extern POOL_CONNECTION_POOL *pool_get_cp(char *user, char *database, int protoMajor);
+extern void pool_discard_cp(char *user, char *database, int protoMajor);
+extern POOL_CONNECTION_POOL *pool_create_cp(void);
+extern void pool_connection_pool_timer(POOL_CONNECTION_POOL *backend);
+extern void pool_backend_timer_handler(int sig);
+extern int connect_inet_domain_socket(int secondary_backend);
+extern int connect_unix_domain_socket(int secondary_backend);
+extern char PGRis_same_host(char * host1, char * host2);
+extern void pool_finish(void);
+
+/* extern of pool_process_query.c */
+extern POOL_STATUS pool_process_query(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend, int connection_reuse);
+extern POOL_STATUS ErrorResponse(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
+extern void pool_enable_timeout();
+extern void pool_disable_timeout();
+extern int pool_check_fd(POOL_CONNECTION *cp, int notimeout);
+extern void pool_send_frontend_exits(POOL_CONNECTION_POOL *backend);
+extern POOL_STATUS SimpleForwardToFrontend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
+extern POOL_STATUS SimpleForwardToBackend(char kind, POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
+extern POOL_STATUS ParameterStatus(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *backend);
+extern void pool_send_error_message(POOL_CONNECTION *frontend, int protoMajor, char *code, char *message, char *detail, char *hint, char *file, int line);
+
+/* extern of pool_params.c */
+extern int pool_init_params(ParamStatus *params);
+extern void pool_discard_params(ParamStatus *params);
+extern char *pool_find_name(ParamStatus *params, char *name, int *pos);
+extern int pool_get_param(ParamStatus *params, int index, char **name, char **value);
+extern int pool_add_param(ParamStatus *params, char *name, char *value);
+extern void pool_param_debug_print(ParamStatus *params);
+
+/* extern of pool_stream.c */
+extern POOL_CONNECTION *pool_open(int fd);
+extern void pool_close(POOL_CONNECTION *cp);
+extern int pool_read(POOL_CONNECTION *cp, void *buf, int len);
+extern char *pool_read2(POOL_CONNECTION *cp, int len);
+extern int pool_write(POOL_CONNECTION *cp, void *buf, int len);
+extern int pool_flush(POOL_CONNECTION *cp);
+extern int pool_write_and_flush(POOL_CONNECTION *cp, void *buf, int len);
+extern char *pool_read_string(POOL_CONNECTION *cp, int *len, int line);
+
+/*
+ * external prototype in show.c
+ */
+extern void show_error(const char * fmt,...);
+extern void show_debug(const char * fmt,...);
+extern void PGRwrite_log_file(FILE * fp, const char * fmt,...);
+
+/*
+ * external prototype in lifecheck.c
+ */
+extern int PGRlifecheck_main(int fork_wait_time);
+
+#endif /* PGLB_H */
diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_auth.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_auth.c
--- postgresql-8.2.4/src/pgcluster/pglb/pool_auth.c 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_auth.c 2007-02-18 22:52:17.000000000 +0100
@@ -0,0 +1,959 @@
+/*--------------------------------------------------------------------
+ * FILE:
+ * pool_auth.c
+ *
+ * NOTE:
+ * authenticaton stuff
+ *
+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
+ * Portions Copyright (c) 2003-2006, Tatsuo Ishii
+ *--------------------------------------------------------------------
+ */
+/*
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that copyright notice and this permission
+ * notice appear in supporting documentation, and that the name of the
+ * author not be used in advertising or publicity pertaining to
+ * distribution of the software without specific, written prior
+ * permission. The author makes no representations about the
+ * suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+*/
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "replicate_com.h"
+#include "pglb.h"
+
+int pool_do_auth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp);
+int pool_do_reauth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp);
+int pool_read_message_length(POOL_CONNECTION_POOL *cp);
+signed char pool_read_kind(POOL_CONNECTION_POOL *cp);
+
+static POOL_STATUS pool_send_auth_ok(POOL_CONNECTION *frontend, int pid, int key, int protoMajor);
+static int do_clear_text_password(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor);
+static int do_crypt(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor);
+static int do_md5(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor);
+
+/*
+* do authentication against backend. if success return 0 otherwise non 0.
+*/
+int pool_do_auth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp)
+{
+ char * func = "pool_do_auth()";
+ int status;
+ signed char kind;
+ int pid, pid1;
+ int key, key1;
+ int protoMajor;
+ int length;
+
+ protoMajor = MAJOR(cp);
+
+ kind = pool_read_kind(cp);
+ if (kind < 0)
+ {
+ return -1;
+ }
+
+ /* error response? */
+ if (kind == 'E')
+ {
+ /* we assume error response at this stage is likely version
+ * protocol mismatch (v3 frontend vs. v2 backend). So we throw
+ * a V2 protocol error response in the hope that v3 frontend
+ * will negotiate again using v2 protocol.
+ */
+ show_error("%s:pool_do_auth: maybe protocol version mismatch (current version %d)",func, protoMajor);
+ ErrorResponse(frontend, cp);
+ return -1;
+ }
+ else if (kind != 'R')
+ {
+ show_error("%s:pool_do_auth: expect \"R\" got %c",func, kind);
+ return -1;
+ }
+
+ /*
+ * message length (v3 only) */
+ if (protoMajor == PROTO_MAJOR_V3 && pool_read_message_length(cp) < 0)
+ {
+ return -1;
+ }
+
+ /*
+ * read authentication request kind.
+ *
+ * 0: authentication ok
+ * 1: kerberos v4
+ * 2: kerberos v5
+ * 3: clear text password
+ * 4: crypt password
+ * 5: md5 password
+ * 6: scm credential
+ *
+ * in replication mode, we only supports kind = 0, 3. this is because to "salt"
+ * cannot be replicated among master and secondary.
+ * in non replication mode, we supports kind = 0, 3, 4, 5
+ */
+
+ status = pool_read(MASTER(cp), &pid, sizeof(pid));
+ if (status < 0)
+ {
+ show_error("%s:pool_do_auth: read authentication kind failed",func);
+ return -1;
+ }
+
+ if (REPLICATION)
+ {
+ status = pool_read(SECONDARY(cp), &pid1, sizeof(pid1));
+
+ if (status < 0)
+ {
+ show_error("%s:pool_do_auth: read authentication kind from secondary failed",func);
+ return -1;
+ }
+ }
+
+ pid = ntohl(pid);
+
+ /* trust? */
+ if (pid == 0)
+ {
+ if (protoMajor == PROTO_MAJOR_V3)
+ {
+ int msglen;
+
+ pool_write(frontend, "R", 1);
+ msglen = htonl(8);
+ pool_write(frontend, &msglen, sizeof(msglen));
+ msglen = htonl(0);
+ if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
+ {
+ return -1;
+ }
+ }
+ MASTER(cp)->auth_kind = 0;
+ }
+
+ /* clear text password authentication? */
+ else if (pid == 3)
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s:trying clear text password authentication",func);
+#endif
+
+ pid = do_clear_text_password(MASTER(cp), frontend, 0, protoMajor);
+
+ if (pid >= 0 && REPLICATION)
+ {
+ pid = do_clear_text_password(SECONDARY(cp), frontend, 0, protoMajor);
+ }
+ }
+
+ /* crypt authentication? */
+ else if (pid == 4)
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s:trying crypt authentication",func);
+#endif
+
+ pid = do_crypt(MASTER(cp), frontend, 0, protoMajor);
+
+ if (pid >= 0 && REPLICATION)
+ {
+ pid = do_crypt(SECONDARY(cp), frontend, 0, protoMajor);
+ }
+ }
+
+ /* md5 authentication? */
+ else if (pid == 5)
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s:trying md5 authentication",func);
+#endif
+
+ pid = do_md5(MASTER(cp), frontend, 0, protoMajor);
+
+ if (pid >= 0 && REPLICATION)
+ {
+ pid = do_md5(SECONDARY(cp), frontend, 0, protoMajor);
+ }
+ }
+
+ if (pid != 0)
+ {
+ show_error("%s:pool_do_auth: backend does not return authenticaton ok",func);
+ return -1;
+ }
+
+ /*
+ * authentication ok. now read pid and secret key from the
+ * backend
+ */
+ kind = pool_read_kind(cp);
+ if (kind < 0)
+ {
+ return -1;
+ }
+
+ /* error response? */
+ if (kind == 'E')
+ {
+ if (protoMajor == PROTO_MAJOR_V2)
+ ErrorResponse(frontend, cp);
+ else
+ SimpleForwardToFrontend(kind, frontend, cp);
+ return -1;
+ }
+ else if (kind != 'K')
+ {
+ if (protoMajor == PROTO_MAJOR_V3)
+ {
+ /* process parameter status */
+ while (kind == 'S')
+ {
+ if (ParameterStatus(frontend, cp) != POOL_CONTINUE)
+ return -1;
+
+ pool_flush(frontend);
+
+ kind = pool_read_kind(cp);
+ if (kind < 0)
+ {
+ show_error("%s:pool_do_auth: failed to read kind while processing ParamterStatus",func);
+ return -1;
+ }
+ }
+ }
+ else
+ {
+ show_error("%s:pool_do_auth: expect \"K\" got %c",func, kind);
+ return -1;
+ }
+ }
+
+ /*
+ * message length (V3 only)
+ */
+ if (protoMajor == PROTO_MAJOR_V3 && (length = pool_read_message_length(cp)) != 12)
+ {
+ show_error("%s:pool_do_auth: invalid messages length(%d) for BackendKeyData",func, length);
+ return -1;
+ }
+
+ /*
+ * OK, read pid and secret key
+ */
+
+ /* pid */
+ pool_read(MASTER(cp), &pid, sizeof(pid));
+ MASTER_CONNECTION(cp)->pid = pid;
+
+ /* key */
+ pool_read(MASTER(cp), &key, sizeof(key));
+ MASTER_CONNECTION(cp)->key = key;
+
+ if (REPLICATION)
+ {
+ pool_read(SECONDARY(cp), &pid1, sizeof(pid1));
+ SECONDARY_CONNECTION(cp)->pid = pid;
+
+ /* key */
+ pool_read(SECONDARY(cp), &key1, sizeof(key1));
+ SECONDARY_CONNECTION(cp)->key = key;
+ }
+
+ return (pool_send_auth_ok(frontend, pid, key, protoMajor));
+}
+
+/*
+* do re-authentication for reused connection. if success return 0 otherwise non 0.
+*/
+int pool_do_reauth(POOL_CONNECTION *frontend, POOL_CONNECTION_POOL *cp)
+{
+ char * func = "pool_do_reauth()";
+ int status;
+ int protoMajor;
+
+ protoMajor = MAJOR(cp);
+
+ switch(MASTER(cp)->auth_kind)
+ {
+ case 0:
+ /* trust */
+ status = 0;
+ break;
+
+ case 3:
+ /* clear text password */
+ status = do_clear_text_password(MASTER(cp), frontend, 1, protoMajor);
+ break;
+
+ case 4:
+ /* crypt password */
+ status = do_crypt(MASTER(cp), frontend, 1, protoMajor);
+ break;
+
+ case 5:
+ /* md5 password */
+ status = do_md5(MASTER(cp), frontend, 1, protoMajor);
+ break;
+
+ default:
+ show_error("%s: unknown authentication request code %d",
+ func,MASTER(cp)->auth_kind);
+ return -1;
+ }
+
+ if (status == 0)
+ {
+ if (protoMajor == PROTO_MAJOR_V3)
+ {
+ int msglen;
+
+ pool_write(frontend, "R", 1);
+ msglen = htonl(8);
+ pool_write(frontend, &msglen, sizeof(msglen));
+ msglen = htonl(0);
+ if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
+ {
+ return -1;
+ }
+ }
+ }
+ else
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s: authentication failed",func);
+#endif
+ return -1;
+ }
+
+ return (pool_send_auth_ok(frontend, MASTER_CONNECTION(cp)->pid, MASTER_CONNECTION(cp)->key, protoMajor) != POOL_CONTINUE);
+}
+
+/*
+* send authentication ok to frontend. if success return 0 otherwise non 0.
+*/
+static POOL_STATUS pool_send_auth_ok(POOL_CONNECTION *frontend, int pid, int key, int protoMajor)
+{
+ char kind;
+ int len;
+
+ if (protoMajor == PROTO_MAJOR_V2)
+ {
+ /* return "Authentication OK" to the frontend */
+ kind = 'R';
+ pool_write(frontend, &kind, 1);
+ len = htonl(0);
+ if (pool_write_and_flush(frontend, &len, sizeof(len)) < 0)
+ {
+ return -1;
+ }
+ }
+
+ /* send backend key data */
+ kind = 'K';
+ pool_write(frontend, &kind, 1);
+ if (protoMajor == PROTO_MAJOR_V3)
+ {
+ len = htonl(12);
+ pool_write(frontend, &len, sizeof(len));
+ }
+ pool_write(frontend, &pid, sizeof(pid));
+ if (pool_write_and_flush(frontend, &key, sizeof(key)) < 0)
+ {
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * perform clear text password authetication
+ */
+static int do_clear_text_password(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor)
+{
+ char * func = "do_clear_text_password()";
+ static int size;
+ static char password[MAX_PASSWORD_SIZE];
+ char response;
+ int kind;
+ int len;
+
+ /* master? */
+ if (!backend->issecondary_backend)
+ {
+ pool_write(frontend, "R", 1); /* authenticaton */
+ if (protoMajor == PROTO_MAJOR_V3)
+ {
+ len = htonl(8);
+ pool_write(frontend, &len, sizeof(len));
+ }
+ kind = htonl(3); /* clear text password authentication */
+ pool_write_and_flush(frontend, &kind, sizeof(kind)); /* indicating clear text password authentication */
+
+ /* read password packet */
+ if (protoMajor == PROTO_MAJOR_V2)
+ {
+ if (pool_read(frontend, &size, sizeof(size)))
+ {
+ show_error("%s: failed to read password packet size",func);
+ return -1;
+ }
+ }
+ else
+ {
+ char k;
+
+ if (pool_read(frontend, &k, sizeof(k)))
+ {
+ show_error("%s: failed to read password packet \"p\"",func);
+ return -1;
+ }
+ if (k != 'p')
+ {
+ show_error("%s:packet does not start with \"p\"",func);
+ return -1;
+ }
+ if (pool_read(frontend, &size, sizeof(size)))
+ {
+ show_error("%s: failed to read password packet size",func);
+ return -1;
+ }
+ }
+
+ if ((ntohl(size) - 4) > sizeof(password))
+ {
+ show_error("%s: password is too long (size: %d)",func, ntohl(size) - 4);
+ return -1;
+ }
+
+ if (pool_read(frontend, password, ntohl(size) - 4))
+ {
+ show_error("%s: failed to read password (size: %d)",func, ntohl(size) - 4);
+ return -1;
+ }
+ }
+
+ /* connection reusing? */
+ if (reauth)
+ {
+ if ((ntohl(size) - 4) != backend->pwd_size)
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s; password size does not match in re-authetication",func);
+#endif
+ return -1;
+ }
+
+ if (memcmp(password, backend->password, backend->pwd_size) != 0)
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s; password does not match in re-authetication",func);
+#endif
+ return -1;
+ }
+
+ return 0;
+ }
+
+ /* send password packet to backend */
+ if (protoMajor == PROTO_MAJOR_V3)
+ pool_write(backend, "p", 1);
+ pool_write(backend, &size, sizeof(size));
+ pool_write_and_flush(backend, password, ntohl(size) -4);
+ if (pool_read(backend, &response, sizeof(response)))
+ {
+ show_error("%s: failed to read authentication response",func);
+ return -1;
+ }
+
+ if (response != 'R')
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s: backend does not return R while processing clear text password authentication",func);
+#endif
+ return -1;
+ }
+
+ if (protoMajor == PROTO_MAJOR_V3)
+ {
+ if (pool_read(backend, &len, sizeof(len)))
+ {
+ show_error("%s: failed to read authentication packet size",func);
+ return -1;
+ }
+
+ if (ntohl(len) != 8)
+ {
+ show_error("%s: incorrect authentication packet size (%d)",func, ntohl(len));
+ return -1;
+ }
+ }
+
+ /* expect to read "Authentication OK" response. kind should be 0... */
+ if (pool_read(backend, &kind, sizeof(kind)))
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s: failed to read Authentication OK response",func);
+#endif
+ return -1;
+ }
+
+ /* if authenticated, save info */
+ if (!reauth && kind == 0)
+ {
+ if (!backend->issecondary_backend && protoMajor == PROTO_MAJOR_V3)
+ {
+ int msglen;
+
+ pool_write(frontend, "R", 1);
+ msglen = htonl(8);
+ pool_write(frontend, &msglen, sizeof(msglen));
+ msglen = htonl(0);
+ if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
+ {
+ return -1;
+ }
+ }
+
+ backend->auth_kind = 3;
+ backend->pwd_size = ntohl(size) - 4;
+ memcpy(backend->password, password, backend->pwd_size);
+ }
+ return kind;
+}
+
+/*
+ * perform crypt authetication
+ */
+static int do_crypt(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor)
+{
+ char * func = "do_crypt()";
+ char salt[2];
+ static int size;
+ static char password[MAX_PASSWORD_SIZE];
+ char response;
+ int kind;
+ int len;
+
+ if (!reauth)
+ {
+ /* read salt */
+ if (pool_read(backend, salt, sizeof(salt)))
+ {
+ show_error("%s: failed to read salt",func);
+ return -1;
+ }
+ }
+ else
+ {
+ memcpy(salt, backend->salt, sizeof(salt));
+ }
+
+ /* master? */
+ if (!backend->issecondary_backend)
+ {
+ pool_write(frontend, "R", 1); /* authenticaton */
+ if (protoMajor == PROTO_MAJOR_V3)
+ {
+ len = htonl(10);
+ pool_write(frontend, &len, sizeof(len));
+ }
+ kind = htonl(4); /* crypt authentication */
+ pool_write(frontend, &kind, sizeof(kind)); /* indicating crypt authentication */
+ pool_write_and_flush(frontend, salt, sizeof(salt)); /* salt */
+
+ /* read password packet */
+ if (protoMajor == PROTO_MAJOR_V2)
+ {
+ if (pool_read(frontend, &size, sizeof(size)))
+ {
+ show_error("%s: failed to read password packet size",func);
+ return -1;
+ }
+ }
+ else
+ {
+ char k;
+
+ if (pool_read(frontend, &k, sizeof(k)))
+ {
+ show_error("%s: failed to read password packet",func);
+ return -1;
+ }
+ if (k != 'p')
+ {
+ show_error("%s: password packet does not start with \"p\"",func);
+ return -1;
+ }
+ if (pool_read(frontend, &size, sizeof(size)))
+ {
+ show_error("%s: failed to read password packet size",func);
+ return -1;
+ }
+ }
+
+ if ((ntohl(size) - 4) > sizeof(password))
+ {
+ show_error("%s: password is too long(size: %d)", func,ntohl(size) - 4);
+ return -1;
+ }
+
+ if (pool_read(frontend, password, ntohl(size) - 4))
+ {
+ show_error("%s: failed to read password (size: %d)", func,ntohl(size) - 4);
+ return -1;
+ }
+ }
+
+ /* connection reusing? */
+ if (reauth)
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s:size: %d saved_size: %d",func, (ntohl(size) - 4), backend->pwd_size);
+#endif
+ if ((ntohl(size) - 4) != backend->pwd_size)
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s: password size does not match in re-authetication",func);
+#endif
+ return -1;
+ }
+
+ if (memcmp(password, backend->password, backend->pwd_size) != 0)
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s: password does not match in re-authetication",func);
+#endif
+ return -1;
+ }
+
+ return 0;
+ }
+
+ /* send password packet to backend */
+ if (protoMajor == PROTO_MAJOR_V3)
+ pool_write(backend, "p", 1);
+ pool_write(backend, &size, sizeof(size));
+ pool_write_and_flush(backend, password, ntohl(size) -4);
+ if (pool_read(backend, &response, sizeof(response)))
+ {
+ show_error("%s: failed to read authentication response",func);
+ return -1;
+ }
+
+ if (response != 'R')
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s: backend does not return R while processing crypt authentication(%02x) secondary: %d",func, response, backend->issecondary_backend);
+#endif
+ return -1;
+ }
+
+ if (protoMajor == PROTO_MAJOR_V3)
+ {
+ if (pool_read(backend, &len, sizeof(len)))
+ {
+ show_error("%s: failed to read authentication packet size",func);
+ return -1;
+ }
+
+ if (ntohl(len) != 8)
+ {
+ show_error("%s: incorrect authentication packet size (%d)",func, ntohl(len));
+ return -1;
+ }
+ }
+
+ /* expect to read "Authentication OK" response. kind should be 0... */
+ if (pool_read(backend, &kind, sizeof(kind)))
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s: failed to read Authentication OK response",func);
+#endif
+ return -1;
+ }
+
+ /* if authenticated, save info */
+ if (!reauth && kind == 0)
+ {
+ if (protoMajor == PROTO_MAJOR_V3)
+ {
+ int msglen;
+
+ pool_write(frontend, "R", 1);
+ msglen = htonl(8);
+ pool_write(frontend, &msglen, sizeof(msglen));
+ msglen = htonl(0);
+ if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
+ {
+ return -1;
+ }
+ }
+ backend->auth_kind = 4;
+ backend->pwd_size = ntohl(size) - 4;
+ memcpy(backend->password, password, backend->pwd_size);
+ memcpy(backend->salt, salt, sizeof(salt));
+ }
+ return kind;
+}
+
+/*
+ * perform MD5 authetication
+ */
+static int do_md5(POOL_CONNECTION *backend, POOL_CONNECTION *frontend, int reauth, int protoMajor)
+{
+ char * func = "do_md5()";
+ char salt[4];
+ static int size;
+ static char password[MAX_PASSWORD_SIZE];
+ char response;
+ int kind;
+ int len;
+
+ if (!reauth)
+ {
+ /* read salt */
+ if (pool_read(backend, salt, sizeof(salt)))
+ {
+ show_error("%s: failed to read salt",func);
+ return -1;
+ }
+ }
+ else
+ {
+ memcpy(salt, backend->salt, sizeof(salt));
+ }
+
+ /* master? */
+ if (!backend->issecondary_backend)
+ {
+ pool_write(frontend, "R", 1); /* authenticaton */
+ if (protoMajor == PROTO_MAJOR_V3)
+ {
+ len = htonl(12);
+ pool_write(frontend, &len, sizeof(len));
+ }
+ kind = htonl(5);
+ pool_write(frontend, &kind, sizeof(kind)); /* indicating MD5 */
+ pool_write_and_flush(frontend, salt, sizeof(salt)); /* salt */
+
+ /* read password packet */
+ if (protoMajor == PROTO_MAJOR_V2)
+ {
+ if (pool_read(frontend, &size, sizeof(size)))
+ {
+ show_error("%s: failed to read password packet size",func);
+ return -1;
+ }
+ }
+ else
+ {
+ char k;
+
+ if (pool_read(frontend, &k, sizeof(k)))
+ {
+ show_error("%s: failed to read password packet \"p\"",func);
+ return -1;
+ }
+ if (k != 'p')
+ {
+ show_error("%s: password packet does not start with \"p\"",func);
+ return -1;
+ }
+ if (pool_read(frontend, &size, sizeof(size)))
+ {
+ show_error("%s: failed to read password packet size",func);
+ return -1;
+ }
+ }
+
+ if ((ntohl(size) - 4) > sizeof(password))
+ {
+ show_error("%s: password is too long(size: %d)",func, ntohl(size) - 4);
+ return -1;
+ }
+
+ if (pool_read(frontend, password, ntohl(size) - 4))
+ {
+ show_error("%s: failed to read password (size: %d)",func, ntohl(size) - 4);
+ return -1;
+ }
+ }
+
+ /* connection reusing? */
+ if (reauth)
+ {
+ if ((ntohl(size) - 4) != backend->pwd_size)
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s; password size does not match in re-authetication",func);
+#endif
+ return -1;
+ }
+
+ if (memcmp(password, backend->password, backend->pwd_size) != 0)
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s; password does not match in re-authetication",func);
+#endif
+ return -1;
+ }
+
+ return 0;
+ }
+
+ /* send password packet to backend */
+ if (protoMajor == PROTO_MAJOR_V3)
+ pool_write(backend, "p", 1);
+ pool_write(backend, &size, sizeof(size));
+ pool_write_and_flush(backend, password, ntohl(size) -4);
+ if (pool_read(backend, &response, sizeof(response)))
+ {
+ show_error("%s: failed to read authentication response",func);
+ return -1;
+ }
+
+ if (response != 'R')
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s: backend does not return R while processing MD5 authentication %c", func,response);
+#endif
+ return -1;
+ }
+
+ if (protoMajor == PROTO_MAJOR_V3)
+ {
+ if (pool_read(backend, &len, sizeof(len)))
+ {
+ show_error("%s: failed to read authentication packet size",func);
+ return -1;
+ }
+
+ if (ntohl(len) != 8)
+ {
+ show_error("%s: incorrect authentication packet size (%d)",func, ntohl(len));
+ return -1;
+ }
+ }
+
+ /* expect to read "Authentication OK" response. kind should be 0... */
+ if (pool_read(backend, &kind, sizeof(kind)))
+ {
+#ifdef PRINT_DEBUG
+ show_debug("%s: failed to read Authentication OK response",func);
+#endif
+ return -1;
+ }
+
+ /* if authenticated, save info */
+ if (!reauth && kind == 0)
+ {
+ if (protoMajor == PROTO_MAJOR_V3)
+ {
+ int msglen;
+
+ pool_write(frontend, "R", 1);
+ msglen = htonl(8);
+ pool_write(frontend, &msglen, sizeof(msglen));
+ msglen = htonl(0);
+ if (pool_write_and_flush(frontend, &msglen, sizeof(msglen)) < 0)
+ {
+ return -1;
+ }
+ }
+ backend->auth_kind = 5;
+ backend->pwd_size = ntohl(size) - 4;
+ memcpy(backend->password, password, backend->pwd_size);
+ memcpy(backend->salt, salt, sizeof(salt));
+ }
+ return kind;
+}
+
+/*
+ * read message length (V3 only)
+ */
+int pool_read_message_length(POOL_CONNECTION_POOL *cp)
+{
+ char * func = "pool_read_message_length()";
+ int status;
+ int length, length1;
+
+ status = pool_read(MASTER(cp), &length, sizeof(length));
+ if (status < 0)
+ {
+ show_error("%s: error while reading message length",func);
+ return -1;
+ }
+ length = ntohl(length);
+
+ if (REPLICATION)
+ {
+ status = pool_read(SECONDARY(cp), &length1, sizeof(length1));
+ if (status < 0)
+ {
+ show_error("%s: error while reading message length from secondary backend",func);
+ return -1;
+ }
+ length1 = ntohl(length1);
+
+ if (length != length1)
+ {
+ show_error("%s: length does not match between backends master(%d) secondary(%d)",
+ func,length, length1);
+ return -1;
+ }
+ }
+
+ if (length < 0)
+ {
+ show_error("%s:read_message_length: invalid message length (%d)", func, length);
+ return -1;
+ }
+
+ return length;
+}
+
+signed char pool_read_kind(POOL_CONNECTION_POOL *cp)
+{
+ char * func = "pool_read_kind()";
+ int status;
+ char kind, kind1;
+
+ status = pool_read(MASTER(cp), &kind, sizeof(kind));
+ if (status < 0)
+ {
+ show_error("%s:read_message_kind: error while reading message kind",func);
+ return -1;
+ }
+
+ if (REPLICATION)
+ {
+ status = pool_read(SECONDARY(cp), &kind1, sizeof(kind1));
+ if (status < 0)
+ {
+ show_error("%s: error while reading message kind from secondary backend",func);
+ return -1;
+ }
+
+ if (kind != kind1)
+ {
+ show_error("%s: kind does not match between backends master(%d) secondary(%d)",
+ func, kind, kind1);
+ return -1;
+ }
+ }
+
+ return kind;
+}
diff -aruN postgresql-8.2.4/src/pgcluster/pglb/pool_connection_pool.c pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_connection_pool.c
--- postgresql-8.2.4/src/pgcluster/pglb/pool_connection_pool.c 1970-01-01 01:00:00.000000000 +0100
+++ pgcluster-1.7.0rc7/src/pgcluster/pglb/pool_connection_pool.c 2007-02-18 22:52:17.000000000 +0100
@@ -0,0 +1,535 @@
+/*--------------------------------------------------------------------
+ * FILE:
+ * pool_connection_pool.c
+ *
+ * NOTE:
+ * connection pool stuff
+ *
+ * Portions Copyright (c) 2003-2006, Atsushi Mitani
+ * Portions Copyright (c) 2003-2006, Tatsuo Ishii
+ *--------------------------------------------------------------------
+ */
+/*
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose and without fee is hereby
+ * granted, provided that the above copyright notice appear in all
+ * copies and that both that copyright notice and this permission
+ * notice appear in supporting documentation, and that the name of the
+ * author not be used in advertising or publicity pertaining to
+ * distribution of the software without specific, written prior
+ * permission. The author makes no representations about the
+ * suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ */
+#include "postgres.h"
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include