# cat /etc/redhat-release Scientific Linux SL release 5.7 (Boron)* Install the additional repositories: EPEL, Certification Authority, UMD
# yum install yum-priorities yum-protectbase # cd /etc/yum.repos.d/ # rpm -ivh http://mirror.switch.ch/ftp/mirror/epel//5/x86_64/epel-release-5-4.noarch.rpm # wget http://repo-pd.italiangrid.it/mrepo/repos/egi-trustanchors.repo # rpm -ivh http://repo-pd.italiangrid.it/mrepo/EMI/1/sl5/x86_64/updates/emi-release-1.0.1-1.sl5.noarch.rpm # wget http://repo-pd.italiangrid.it/mrepo/repos/igi/sl5/x86_64/igi-emi.repo
# getenforce Disabled
# ls /etc/yum.repos.d/ egi-trustanchors.repo emi1-third-party.repo emi1-base.repo emi1-updates.repo igi-emi.repo epel.repo epel-testing.repo sl-contrib.repo sl-fastbugs.repo sl-security.repo sl-testing.repo sl-debuginfo.repo sl.repo sl-srpms.repoIMPORTANT: remove the dag repository if present
# yum clean all Loaded plugins: downloadonly, kernel-module, priorities, protect-packages, protectbase, security, verify, versionlock Cleaning up Everything
# yum install ca-policy-egi-core # yum install xml-commons-apis # yum install emi-cream-ce # yum install emi-torque-server emi-torque-utils # yum install glite-dgas-common glite-dgas-hlr-clients glite-dgas-hlr-sensors glite-dgas-hlr-sensors-producers yaim-dgas # yum install nfs-utilssee here for details
# cp -r /opt/glite/yaim/examples/siteinfo/* .
# mkdir vo.dhere an example for some VOs. Information about the several VOs are available at the CENTRAL OPERATIONS PORTAL.
# # YAIM creamCE specific variables # # LSF settings: path where lsf.conf is located #BATCH_CONF_DIR=lsf_install_path/conf # # CE-monitor host (by default CE-monitor is installed on the same machine as # cream-CE) CEMON_HOST=$CE_HOST # # CREAM database user CREAM_DB_USER=********* # CREAM_DB_PASSWORD=********* # Machine hosting the BLAH blparser. # In this machine batch system logs must be accessible. #BLPARSER_HOST=set_to_fully_qualified_host_name_of_machine_hosting_blparser_server BLPARSER_HOST=$CE_HOST
# # YAIM DGAS Sensors specific variables # ################################ # DGAS configuration variables # ################################ # For any details about DGAS variables please refer to the guide: # http://igrelease.forge.cnaf.infn.it/doku.php?id=doc:guides:dgas # Reference Resource HLR for the site. DGAS_HLR_RESOURCE="prod-hlr-01.pd.infn.it" # Specify the type of job which the CE has to process. # Set ”all” on “the main CE” of the site, ”grid” on the others. # Default value: all #DGAS_JOBS_TO_PROCESS="all" # This parameter can be used to specify the list of VOs to publish. # If the parameter is specified, the sensors (pushd) will forward # to the Site HLR just records belonging to one of the specified VOs. # Leave commented if you want to send records for ALL VOs # Default value: parameter not specified #DGAS_VO_TO_PROCESS="vo1;vo2;vo3..." # Bound date on jobs backward processing. # The backward processing does not consider jobs prior to that date. # Default value: 2009-01-01. #DGAS_IGNORE_JOBS_LOGGED_BEFORE="2011-11-01" # Main CE of the site. # ATTENTION: set this variable only in the case of site with a “singleLRMS” # in which there are more than one CEs or local submission hosts (i.e. host # from which you may submit jobs directly to the batch system). # In this case, DGAS_USE_CE_HOSTNAME parameter must be set to the same value # for all hosts sharing the lrms and this value can be arbitrary chosen among # these submitting hostnames (you may choose the best one). # Otherwise leave it commented. # we have 2 CEs, cremino is the main one DGAS_USE_CE_HOSTNAME="cremino.cnaf.infn.it" # Path for the batch-system log files. # * for torque/pbs: # DGAS_ACCT_DIR=/var/torque/server_priv/accounting # * for LSF: # DGAS_ACCT_DIR=lsf_install_path/work/cluster_name/logdir # * for SGE: # DGAS_ACCT_DIR=/opt/sge/default/common/ DGAS_ACCT_DIR=/var/torque/server_priv/accounting # Full path to the 'condor_history' command, used to gather DGAS usage records # when Condor is used as a batch system. Otherwise leave it commented. #DGAS_CONDOR_HISTORY_COMMAND=""
---+++ host certificate # ll /etc/grid-security/host* -rw-r--r-- 1 root root 1440 Oct 18 09:31 /etc/grid-security/hostcert.pem -r-------- 1 root root 887 Oct 18 09:31 /etc/grid-security/hostkey.pem
# rpm -qa | grep munge munge-libs-0.5.8-8.el5 munge-0.5.8-8.el5
# /usr/sbin/create-munge-key # ls -ltr /etc/munge/ total 4 -r-------- 1 munge munge 1024 Jan 13 14:32 munge.key
# chown munge:munge /etc/munge/munge.key
# service munge start Starting MUNGE: [ OK ] # chkconfig munge on
# /opt/glite/yaim/bin/yaim -v -s site-info_cremino.def -n creamCE -n TORQUE_server -n TORQUE_utils -n DGAS_sensorssee details
# /opt/glite/yaim/bin/yaim -c -s site-info_cremino.def -n creamCE -n TORQUE_server -n TORQUE_utils -n DGAS_sensorssee details
VO_SW_DIR=/opt/exp_soft
mkdir /opt/exp_soft/
/opt/exp_soft/ *.cnaf.infn.it(rw,sync,no_root_squash)
# service nfs status rpc.mountd is stopped nfsd is stopped # service portmap status portmap is stopped # service portmap start Starting portmap: [ OK ] # service nfs start Starting NFS services: [ OK ] Starting NFS daemon: [ OK ] Starting NFS mountd: [ OK ] Starting RPC idmapd: [ OK ] # chkconfig nfs on # chkconfig portmap on
# exportfs -raor simply restart nfs daemon
GlueCEStateWaitingJobs: 444444and in the log /var/log/bdii/bdii-update.log you notice errors like the folllowing:
Traceback (most recent call last): File "/usr/libexec/lcg-info-dynamic-scheduler", line 435, in ? wrt = qwt * nwait TypeError: unsupported operand type(s) for *: 'NoneType' and 'int'probably the queues have no "resources_default.walltime" parameter configured. So define it for each queue by launching, for example:
# qmgr -c "set queue prod resources_default.walltime = 01:00:00" # qmgr -c "set queue cert resources_default.walltime = 01:00:00" # qmgr -c "set queue cloudtf resources_default.walltime = 01:00:00"
# qmgr -c 'p s'
# pbsnodes -a
/var/spool/maui/maui.cfg
file:
CLASSWEIGHT 1 QOSWEIGHT 1 QOSCFG[normal] MAXJOB=7 CLASSCFG[prod] QDEF=normal CLASSCFG[cert] PRIORITY=5000After the modification restart maui. In order to avoid that yaim overwrites this file during the host reconfiguration, set:
CONFIG_MAUI="no"in your site.def (the first time you launch the yaim script, it has to be set to "yes"
Date | Comment | By |
---|---|---|
2012-02-08 | added walltime workaround | Alessandro Paolini |
2012-02-02 | modified software area settings | Alessandro Paolini |
2012-01-25 | installation notes completed | Alessandro Paolini |
2012-01-16 | First draft | Alessandro Paolini |
|