Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion opal/mca/pmix/pmix2x/pmix/NEWS
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
Copyright (c) 2017 IBM Corporation. All rights reserved.
$COPYRIGHT$

Additional copyrights may follow
Expand All @@ -23,6 +24,32 @@ current release as well as the "stable" bug fix release branch.
Master (not on release branches yet)
------------------------------------

1.2.2 -- 21 March 2017
----------------------
- Compiler fix for Sun/Oracle CC (PR #322)
- Fix missing include (PR #326)
- Improve error checking around posix_fallocate (PR #329)
- Fix possible memory corruption (PR #331)


1.2.1 -- 21 Feb. 2017
----------------------
- dstore: Fix data corruption bug in key overwrite cases
- dstore: Performance and scalability fixes
- sm: Use posix_fallocate() before mmap
- pmi1/pmi2: Restore support
- dstore: Fix extension slot size allocation (Issue #280)


1.2.0 -- 14 Dec. 2016
----------------------
- Add shared memory data storage (dstore) option. Default: enabled
Configure option: --disable-dstore
- PMIx_Commit performance improvements
- Disable errhandler support
- Keep job info in the shared memory dstore
- PMIx_Get performance and memory improvements

1.1.5
-----
- Add pmix_version.h to support direct detection of PMIx library version
Expand Down
6 changes: 3 additions & 3 deletions opal/mca/pmix/pmix2x/pmix/VERSION
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ release=0
# The only requirement is that it must be entirely printable ASCII
# characters and have no white space.

greek=
greek=a1

# If repo_rev is empty, then the repository version number will be
# obtained during "make dist" via the "git describe --tags --always"
# command, or with the date (if "git describe" fails) in the form of
# "date<date>".

repo_rev=git4cdd5e0
repo_rev=gitc442ba8

# If tarball_version is not empty, it is used as the version string in
# the tarball filename, regardless of all other versions listed in
Expand All @@ -44,7 +44,7 @@ tarball_version=

# The date when this release was created

date="Mar 11, 2017"
date="Apr 02, 2017"

# The shared library version of each of PMIx's public libraries.
# These versions are maintained in accordance with the "Library
Expand Down
8 changes: 5 additions & 3 deletions opal/mca/pmix/pmix2x/pmix/config/pmix_functions.m4
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ dnl All rights reserved.
dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
dnl Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
dnl Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
dnl Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
dnl Copyright (c) 2017 Research Organization for Information Science
dnl and Technology (RIST). All rights reserved.
dnl
dnl $COPYRIGHT$
dnl
Expand Down Expand Up @@ -278,7 +280,7 @@ for val in ${$1}; do
# http://www.open-mpi.org/community/lists/devel/2012/08/11362.php).

case $val in
-Xclang)
-Xclang|-Xg)
pmix_found=0
pmix_i=`expr $pmix_count + 1`
;;
Expand Down Expand Up @@ -366,7 +368,7 @@ AC_DEFUN([PMIX_FLAGS_UNIQ],[
# https://github.com/open-mpi/ompi/issues/324).

case $val in
-Xclang)
-Xclang|-Xg)
pmix_found=0
pmix_i=`expr $pmix_count + 1`
;;
Expand Down
7 changes: 5 additions & 2 deletions opal/mca/pmix/pmix2x/pmix/examples/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

AM_CPPFLAGS = -I$(top_builddir)/src -I$(top_builddir)/src/include -I$(top_builddir)/include -I$(top_builddir)/include/pmix

noinst_PROGRAMS = client dmodex dynamic fault pub tool debugger debuggerd alloc
noinst_PROGRAMS = client dmodex dynamic fault pub tool debugger debuggerd alloc jctrl
if !WANT_HIDDEN
# these examples use internal symbols
# use --disable-visibility
Expand All @@ -40,11 +40,14 @@ debuggerd_SOURCES = debuggerd.c
debuggerd_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
debuggerd_LDADD = $(top_builddir)/src/libpmix.la


alloc_SOURCES = alloc.c
alloc_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
alloc_LDADD = $(top_builddir)/src/libpmix.la

jctrl_SOURCES = jctrl.c
jctrl_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
jctrl_LDADD = $(top_builddir)/src/libpmix.la

dmodex_SOURCES = dmodex.c
dmodex_LDFLAGS = $(PMIX_PKG_CONFIG_LDFLAGS)
dmodex_LDADD = $(top_builddir)/src/libpmix.la
Expand Down
229 changes: 229 additions & 0 deletions opal/mca/pmix/pmix2x/pmix/examples/jctrl.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/

#define _GNU_SOURCE
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include <signal.h>

#include <pmix.h>

static pmix_proc_t myproc;

/* this is the event notification function we pass down below
* when registering for general events - i.e.,, the default
* handler. We don't technically need to register one, but it
* is usually good practice to catch any events that occur */
static void notification_fn(size_t evhdlr_registration_id,
pmix_status_t status,
const pmix_proc_t *source,
pmix_info_t info[], size_t ninfo,
pmix_info_t results[], size_t nresults,
pmix_event_notification_cbfunc_fn_t cbfunc,
void *cbdata)
{
if (NULL != cbfunc) {
cbfunc(PMIX_EVENT_ACTION_COMPLETE, NULL, 0, NULL, NULL, cbdata);
}
}

/* event handler registration is done asynchronously because it
* may involve the PMIx server registering with the host RM for
* external events. So we provide a callback function that returns
* the status of the request (success or an error), plus a numerical index
* to the registered event. The index is used later on to deregister
* an event handler - if we don't explicitly deregister it, then the
* PMIx server will do so when it see us exit */
static void evhandler_reg_callbk(pmix_status_t status,
size_t evhandler_ref,
void *cbdata)
{
volatile int *active = (volatile int*)cbdata;

if (PMIX_SUCCESS != status) {
fprintf(stderr, "Client %s:%d EVENT HANDLER REGISTRATION FAILED WITH STATUS %d, ref=%lu\n",
myproc.nspace, myproc.rank, status, (unsigned long)evhandler_ref);
}
*active = status;
}

static void infocbfunc(pmix_status_t status,
pmix_info_t *info, size_t ninfo,
void *cbdata,
pmix_release_cbfunc_t release_fn,
void *release_cbdata)
{
volatile int *active = (volatile int*)cbdata;

/* release the caller */
if (NULL != release_fn) {
release_fn(release_cbdata);
}

*active = status;
}

int main(int argc, char **argv)
{
int rc;
pmix_value_t value;
pmix_value_t *val = &value;
pmix_proc_t proc;
uint32_t nprocs, n;
pmix_info_t *info, *iptr;
bool flag;
volatile int active;
pmix_data_array_t *dptr;

/* init us - note that the call to "init" includes the return of
* any job-related info provided by the RM. */
if (PMIX_SUCCESS != (rc = PMIx_Init(&myproc, NULL, 0))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Init failed: %d\n", myproc.nspace, myproc.rank, rc);
exit(0);
}
fprintf(stderr, "Client ns %s rank %d: Running\n", myproc.nspace, myproc.rank);


/* register our default event handler - again, this isn't strictly
* required, but is generally good practice */
active = -1;
PMIx_Register_event_handler(NULL, 0, NULL, 0,
notification_fn, evhandler_reg_callbk, (void*)&active);
while (-1 == active) {
sleep(1);
}
if (0 != active) {
fprintf(stderr, "[%s:%d] Default handler registration failed\n", myproc.nspace, myproc.rank);
exit(active);
}

/* job-related info is found in our nspace, assigned to the
* wildcard rank as it doesn't relate to a specific rank. Setup
* a name to retrieve such values */
PMIX_PROC_CONSTRUCT(&proc);
(void)strncpy(proc.nspace, myproc.nspace, PMIX_MAX_NSLEN);
proc.rank = PMIX_RANK_WILDCARD;

/* get our universe size */
if (PMIX_SUCCESS != (rc = PMIx_Get(&proc, PMIX_UNIV_SIZE, NULL, 0, &val))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Get universe size failed: %d\n", myproc.nspace, myproc.rank, rc);
goto done;
}
nprocs = val->data.uint32;
PMIX_VALUE_RELEASE(val);
fprintf(stderr, "Client %s:%d universe size %d\n", myproc.nspace, myproc.rank, nprocs);

/* inform the RM that we are preemptible, and that our checkpoint methods are
* "signal" on SIGUSR2 and event on PMIX_JCTRL_CHECKPOINT */
PMIX_INFO_CREATE(info, 2);
flag = true;
PMIX_INFO_LOAD(&info[0], PMIX_JOB_CTRL_PREEMPTIBLE, (void*)&flag, PMIX_BOOL);
/* can't use "load" to load a pmix_data_array_t */
(void)strncpy(info[1].key, PMIX_JOB_CTRL_CHECKPOINT_METHOD, PMIX_MAX_KEYLEN);
info[1].value.type = PMIX_DATA_ARRAY;
dptr = (pmix_data_array_t*)malloc(sizeof(pmix_data_array_t));
info[1].value.data.darray = dptr;
dptr->type = PMIX_INFO;
dptr->size = 2;
PMIX_INFO_CREATE(dptr->array, dptr->size);
rc = SIGUSR2;
iptr = (pmix_info_t*)dptr->array;
PMIX_INFO_LOAD(&iptr[0], PMIX_JOB_CTRL_CHECKPOINT_SIGNAL, &rc, PMIX_INT);
rc = PMIX_JCTRL_CHECKPOINT;
PMIX_INFO_LOAD(&iptr[1], PMIX_JOB_CTRL_CHECKPOINT_EVENT, &rc, PMIX_STATUS);

/* since this is informational and not a requested operation, the target parameter
* doesn't mean anything and can be ignored */
active = -1;
if (PMIX_SUCCESS != (rc = PMIx_Job_control_nb(NULL, 0, info, 2, infocbfunc, (void*)&active))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc);
goto done;
}
while (-1 == active) {
sleep(1);
}
PMIX_INFO_FREE(info, 2);
if (0 != active) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Job_control_nb failed: %d\n", myproc.nspace, myproc.rank, rc);
exit(active);
}

/* now request that this process be monitored using heartbeats */
PMIX_INFO_CREATE(iptr, 1);
PMIX_INFO_LOAD(&iptr[0], PMIX_MONITOR_HEARTBEAT, NULL, PMIX_POINTER);

PMIX_INFO_CREATE(info, 3);
PMIX_INFO_LOAD(&info[0], PMIX_MONITOR_ID, "MONITOR1", PMIX_STRING);
n = 5; // require a heartbeat every 5 seconds
PMIX_INFO_LOAD(&info[1], PMIX_MONITOR_HEARTBEAT_TIME, &n, PMIX_UINT32);
n = 2; // two heartbeats can be missed before declaring us "stalled"
PMIX_INFO_LOAD(&info[2], PMIX_MONITOR_HEARTBEAT_DROPS, &n, PMIX_UINT32);

/* make the request */
active = -1;
if (PMIX_SUCCESS != (rc = PMIx_Process_monitor_nb(iptr, PMIX_MONITOR_HEARTBEAT_ALERT,
info, 3, infocbfunc, (void*)&active))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc);
goto done;
}
while (-1 == active) {
sleep(1);
}
PMIX_INFO_FREE(iptr, 1);
PMIX_INFO_FREE(info, 3);
if (0 != active) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Process_monitor_nb failed: %d\n", myproc.nspace, myproc.rank, rc);
exit(active);
}

/* send a heartbeat */
PMIx_Heartbeat();

/* call fence to synchronize with our peers - no need to
* collect any info as we didn't "put" anything */
PMIX_INFO_CREATE(info, 1);
flag = false;
PMIX_INFO_LOAD(info, PMIX_COLLECT_DATA, &flag, PMIX_BOOL);
if (PMIX_SUCCESS != (rc = PMIx_Fence(&proc, 1, info, 1))) {
fprintf(stderr, "Client ns %s rank %d: PMIx_Fence failed: %d\n", myproc.nspace, myproc.rank, rc);
goto done;
}
PMIX_INFO_FREE(info, 1);


done:
/* finalize us */
fprintf(stderr, "Client ns %s rank %d: Finalizing\n", myproc.nspace, myproc.rank);
if (PMIX_SUCCESS != (rc = PMIx_Finalize(NULL, 0))) {
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize failed: %d\n", myproc.nspace, myproc.rank, rc);
} else {
fprintf(stderr, "Client ns %s rank %d:PMIx_Finalize successfully completed\n", myproc.nspace, myproc.rank);
}
fflush(stderr);
return(0);
}
10 changes: 8 additions & 2 deletions opal/mca/pmix/pmix2x/pmix/include/pmix.h
Original file line number Diff line number Diff line change
Expand Up @@ -523,8 +523,14 @@ pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pmix_status_t
pmix_info_cbfunc_t cbfunc, void *cbdata);

/* define a special macro to simplify sending of a heartbeat */
#define PMIx_Heartbeat() \
PMIx_Process_monitor_nb(PMIX_SEND_HEARTBEAT, NULL, 0, NULL, NULL)
#define PMIx_Heartbeat() \
do { \
pmix_info_t _in; \
PMIX_INFO_CONSTRUCT(&_in); \
PMIX_INFO_LOAD(&_in, PMIX_SEND_HEARTBEAT, NULL, PMIX_POINTER); \
PMIx_Process_monitor_nb(&_in, PMIX_SUCCESS, NULL, 0, NULL, NULL); \
PMIX_INFO_DESTRUCT(&_in); \
} while(0)

#if defined(c_plusplus) || defined(__cplusplus)
}
Expand Down
Loading