# TODO # - make use offline maven # - hadoop-fuse? # - hadoop-native.spec? # - pick snippets from http://issues.apache.org/jira/browse/HADOOP-5615 # - http://issues.apache.org/jira/browse/HADOOP-6255 # - https://wiki.ubuntu.com/HadoopPackagingSpec Summary: Hadoop Distributed File System and MapReduce implementation Name: hadoop Version: 0.20.2 Release: 0.1 License: Apache v2.0 Group: Daemons URL: http://hadoop.apache.org/common/ Source0: http://www.apache.org/dist/hadoop/core/%{name}-%{version}/hadoop-%{version}.tar.gz # Source0-md5: 8f40198ed18bef28aeea1401ec536cb9 BuildRequires: ant BuildRequires: jdk BuildRequires: jpackage-utils BuildRequires: rpmbuild(macros) >= 1.202 Requires(postun): /usr/sbin/groupdel Requires(postun): /usr/sbin/userdel Requires(pre): /bin/id Requires(pre): /usr/bin/getgid Requires(pre): /usr/sbin/groupadd Requires(pre): /usr/sbin/useradd Requires: jpackage-utils Requires: jre Provides: group(hadoop) Provides: user(hadoop) BuildArch: noarch BuildRoot: %{tmpdir}/%{name}-%{version}-root-%(id -u -n) %define _appdir %{_datadir}/%{name} %description Apache Hadoop Core is a software platform that lets one easily write and run applications that process vast amounts of data. Here's what makes Hadoop especially useful: - Scalable: Hadoop can reliably store and process petabytes. - Economical: It distributes the data and processing across clusters of commonly available computers. These clusters can number into the thousands of nodes. - Efficient: By distributing the data, Hadoop can process it in parallel on the nodes where the data is located. This makes it extremely rapid. - Reliable: Hadoop automatically maintains multiple copies of data and automatically redeploys computing tasks based on failures. Hadoop implements MapReduce, using the Hadoop Distributed File System (HDFS). MapReduce divides applications into many small blocks of work. HDFS creates multiple replicas of data blocks for reliability, placing them on compute nodes around the cluster. MapReduce can then process the data where it is located. %prep %setup -q # hadoop-env.sh defaults %{__sed} -i -e ' # set JAVA_HOME from jpackage-utils s|.*JAVA_HOME=.*|. %{_javadir}-utils/java-functions; set_jvm| s|.*HADOOP_CLASSPATH=.*|export HADOOP_CLASSPATH=$HADOOP_CONF_DIR:$(build-classpath hadoop)| s|.*HADOOP_LOG_DIR=.*|export HADOOP_LOG_DIR=%{_var}/log/hadoop| s|.*HADOOP_PID_DIR=.*|export HADOOP_PID_DIR=%{_var}/run/hadoop| ' conf/hadoop-env.sh %build %ant package \ -Dversion=%{version} \ %if %{with apidocs} -Djava5.home=%{java_home} \ -Dforrest.home=../apache-forrest-0.8 %else %endif %install rm -rf $RPM_BUILD_ROOT install -d $RPM_BUILD_ROOT{%{_appdir},%{_var}/{log,run}/hadoop} %if 0 cp -a bin c++ conf ivy lib webapps $RPM_BUILD_ROOT%{_appdir} cp -a *.jar *.xml $RPM_BUILD_ROOT%{_appdir} # we're noarch rm -rvf $RPM_BUILD_ROOT%{_appdir}/lib/native/ rm -rvf $RPM_BUILD_ROOT%{_appdir}/c++/Linux-amd64-64 rm -rvf $RPM_BUILD_ROOT%{_appdir}/c++/Linux-i386-32 rm -rvf $RPM_BUILD_ROOT%{_appdir}/librecordio/librecordio.a %endif %clean rm -rf $RPM_BUILD_ROOT %pre %groupadd -g 245 -r hadoop %useradd -u 245 -m -r -g hadoop -c 'HDFS Runtime User' -s /bin/sh hadoop %postun if [ "$1" = "0" ]; then %userremove hadoop %groupremove hadoop fi %files %defattr(644,root,root,755) %doc CHANGES.txt NOTICE.txt README.txt %dir %{_appdir} %dir %{_appdir}/bin %attr(755,root,root) %{_appdir}/bin/* %dir %{_appdir}/conf %config(noreplace) %verify(not md5 mtime size) %{_appdir}/conf/* %{_appdir}/webapps %{_appdir}/hadoop-*.jar %{_appdir}/ivy %{_appdir}/ivy.xml %dir %{_appdir}/lib %{_appdir}/lib/jdiff %{_appdir}/lib/*.jar %{_appdir}/lib/jsp-2.1 %attr(775,root,hadoop) %{_var}/run/hadoop %attr(775,root,hadoop) %{_var}/log/hadoop