# cat /etc/redhat-release Scientific Linux SL release 5.7 (Boron)* Install the additional repositories: EPEL, Certification Authority, UMD
# yum install yum-priorities yum-protectbase # cd /etc/yum.repos.d/ # rpm -ivh http://mirror.switch.ch/ftp/mirror/epel//5/x86_64/epel-release-5-4.noarch.rpm # wget http://repo-pd.italiangrid.it/mrepo/repos/egi-trustanchors.repo # rpm -ivh http://repo-pd.italiangrid.it/mrepo/EMI/1/sl5/x86_64/updates/emi-release-1.0.1-1.sl5.noarch.rpm # wget http://repo-pd.italiangrid.it/mrepo/repos/igi/sl5/x86_64/igi-emi.repo
# getenforce Disabled
# ls /etc/yum.repos.d/ egi-trustanchors.repo emi1-third-party.repo emi1-base.repo emi1-updates.repo epel.repo epel-testing.repo igi-emi.repo sl-contrib.repo sl-fastbugs.repo sl-security.repo sl-testing.repo sl-debuginfo.repo sl.repo sl-srpms.repoIMPORTANT: remove the dag repository if present
# yum clean all Loaded plugins: downloadonly, kernel-module, priorities, protect-packages, protectbase, security, verify, versionlock Cleaning up Everything # yum install emi-torque-server emi-torque-utils # yum install yaim-addons # yum install nfs-utilssee here for details
# cp -r /opt/glite/yaim/examples/siteinfo/* .
# mkdir vo.dhere an example for some VOs. Information about the several VOs are available at the CENTRAL OPERATIONS PORTAL.
# rpm -qa | grep munge munge-libs-0.5.8-8.el5 munge-0.5.8-8.el5
# /usr/sbin/create-munge-key # ls -ltr /etc/munge/ total 4 -r-------- 1 munge munge 1024 Jan 13 14:32 munge.key
# chown munge:munge /etc/munge/munge.key
# service munge start Starting MUNGE: [ OK ] # chkconfig munge on
# /opt/glite/yaim/bin/yaim -v -s site-info_batch.def -n TORQUE_server -n TORQUE_utilssee details
# /opt/glite/yaim/bin/yaim -c -s site-info_batch.def -n TORQUE_server -n TORQUE_utilssee details
2012-04-24 15:37:29 lcg-info-dynamic-scheduler: LRMS backend command returned nonzero exit status 2012-04-24 15:37:29 lcg-info-dynamic-scheduler: Exiting without output, GIP will use static values Can not obtain pbs version from host [...]instead, on the torque server:
04/24/2012 14:00:46;0080;PBS_Server;Req;req_reject;Reject reply code=15021(Invalid credential), aux=0, type=StatusJob, from tomcat@cream-01.cnaf.infn.it 04/24/2012 14:01:02;0080;PBS_Server;Req;req_reject;Reject reply code=15021(Invalid credential), aux=0, type=StatusJob, from ldap@cream-01.cnaf.infn.itSolution is to add tomcat and ldap users/groups to torque host and restart pbs_server - as they exists only on CreamCE host.
# echo 'tomcat:x:91:91:Tomcat:/usr/share/tomcat5:/bin/sh' >> /etc/passwd # echo 'ldap:x:55:55:LDAP User:/var/lib/ldap:/bin/false' >> /etc/passwd # echo 'tomcat:x:91:' >> /etc/group # echo 'ldap:x:55:' >> /etc/group
mkdir /opt/exp_soft/
/opt/exp_soft/ *.cnaf.infn.it(rw,sync,no_root_squash)
# service nfs status rpc.mountd is stopped nfsd is stopped # service portmap status portmap is stopped # service portmap start Starting portmap: [ OK ] # service nfs start Starting NFS services: [ OK ] Starting NFS daemon: [ OK ] Starting NFS mountd: [ OK ] Starting RPC idmapd: [ OK ] # chkconfig nfs on # chkconfig portmap on
# exportfs -raor simply restart nfs daemon
GlueCEStateWaitingJobs: 444444and in the log /var/log/bdii/bdii-update.log on CE you notice errors like the folllowing:
Traceback (most recent call last): File "/usr/libexec/lcg-info-dynamic-scheduler", line 435, in ? wrt = qwt * nwait TypeError: unsupported operand type(s) for *: 'NoneType' and 'int'probably the queues have no "resources_default.walltime" parameter configured. So define it for each queue by launching, for example:
# qmgr -c "set queue prod resources_default.walltime = 01:00:00" # qmgr -c "set queue cert resources_default.walltime = 01:00:00" # qmgr -c "set queue cloudtf resources_default.walltime = 01:00:00"
# qmgr -c "set server authorized_users += *@cream-02.cnaf.infn.it"Regarding the ssh configuration, have a look NotesAboutInstallationAndConfigurationOfCREAMForTORQUE
# qmgr -c 'p s'
# pbsnodes -a
CLASSWEIGHT 1 QOSWEIGHT 1 QOSCFG[normal] MAXJOB=7 CLASSCFG[prod] QDEF=normal CLASSCFG[cert] PRIORITY=5000After the modification restart maui. In order to avoid that yaim overwrites this file during the host reconfiguration, set:
CONFIG_MAUI="no"in your site.def (the first time you launch the yaim script, it has to be set to "yes"
