From 476af053a60003c4c017f320d55860d7ecb29ac9 Mon Sep 17 00:00:00 2001 From: William Brown Date: Feb 14 2016 21:54:44 +0000 Subject: Ticket 48384 - Fix dblayer_is_cachesize_sane and dblayer_sys_pages for linux Bug Description: At this point in time, the current algorithm to determine if the cachesizing is sane is based on: issane = (int)((*cachesize / pagesize) <= (pages - procpages)); However, the values of pages and procpages are suspect: Consider: dblayer_is_cachesize_sane pages=3050679 / procpages=268505910 This isn't a type: procpages often exceeds pages. This is because procpages is derived from /proc/pid/status, vmsize, which is the maximum amount of ram a process *could* allocate. Additionally, the value of pages may exceed out vmsize, so we may be over eagerly allocating memory, that we don't actually have access to. Vmsize also includes swap space, so we might be trying to alloc memory into swap too. dblayer_is_cachesize_sane also only takes into account pages (total on system) and the current process' allocation: This makes no regard to: * How much ram is *actually* free on the system with respect to other processes * The value of getrlimit via availpages. The first condition is especially bad, because our system may be approaching an OOM condition, and we blazenly allocate a swathe of pages which triggers this. Fix Description: First, this fix corrects procpages to be based on vmrss, which is the actual working set size of the process, rather than the maximum possible allocation in vmsize. The value of pages is taken from the smaller of: * vmsize * systeminfo total ram (excluding swap) The value of availpages is derived from the smallest of: * pages * getrlimit * freepages (with consideration of all processes on system) The check for issane now checks that the cachepage request is smaller than availpages, which guarantees: * Our system actually has the ram free to accomodate them without swapping or triggering an OOM condition. * We respect rlimits in the allocation. Next, this moves the cachesize_is_sane and sys_pages utilities to util.c. This way we can begin to reference these checks in other areas of the code. We also change the way that we calculate free and total memory. Linux as it seems does not offer a complete API for sysinfo, so the only way to really get these is to read /proc/meminfo. This fixes the live calls from cn=config to only checkmemory allocation based on the difference, due to the fact the current allocation is already passed and may be consuming ram. https://fedorahosted.org/389/ticket/48384 Author: wibrown Review by: nhosoi, tbordaz, lkrispen (Thanks!) --- diff --git a/ldap/servers/slapd/back-ldbm/cache.c b/ldap/servers/slapd/back-ldbm/cache.c index 65439e7..f6a9cf5 100644 --- a/ldap/servers/slapd/back-ldbm/cache.c +++ b/ldap/servers/slapd/back-ldbm/cache.c @@ -678,7 +678,10 @@ static void entrycache_set_max_size(struct cache *cache, size_t bytes) cache_make_hashes(cache, CACHE_TYPE_ENTRY); } cache_unlock(cache); - if (! dblayer_is_cachesize_sane(&bytes)) { + /* This may already have been called by one of the functions in + * ldbm_instance_config + */ + if (! util_is_cachesize_sane(&bytes)) { LDAPDebug(LDAP_DEBUG_ANY, "WARNING -- Possible CONFIGURATION ERROR -- cachesize " "(%lu) may be configured to use more than the available " @@ -1613,7 +1616,10 @@ dncache_set_max_size(struct cache *cache, size_t bytes) cache_make_hashes(cache, CACHE_TYPE_DN); } cache_unlock(cache); - if (! dblayer_is_cachesize_sane(&bytes)) { + /* This may already have been called by one of the functions in + * ldbm_instance_config + */ + if (! util_is_cachesize_sane(&bytes)) { LDAPDebug1Arg(LDAP_DEBUG_ANY, "WARNING -- Possible CONFIGURATION ERROR -- cachesize " "(%lu) may be configured to use more than the available " diff --git a/ldap/servers/slapd/back-ldbm/dblayer.c b/ldap/servers/slapd/back-ldbm/dblayer.c index e65f3cf..bc290cb 100644 --- a/ldap/servers/slapd/back-ldbm/dblayer.c +++ b/ldap/servers/slapd/back-ldbm/dblayer.c @@ -68,7 +68,6 @@ #include #include #include -#include #if 1000*DB_VERSION_MAJOR + 100*DB_VERSION_MINOR >= 4100 #define DB_OPEN(oflags, db, txnid, file, database, type, flags, mode, rval) \ @@ -869,190 +868,6 @@ static void dblayer_init_dbenv(DB_ENV *pEnv, dblayer_private *priv) #endif } -/* returns system pagesize (in bytes) and the number of pages of physical - * RAM this machine has. - * as a bonus, if 'procpages' is non-NULL, it will be filled in with the - * approximate number of pages this process is using! - * on platforms that we haven't figured out how to do this yet, both fields - * are filled with zero and you're on your own. - * - * platforms supported so far: - * Solaris, Linux, Windows - */ -#ifdef OS_solaris -#include -#endif -#ifdef LINUX -#include -#include /* undocumented (?) */ -#endif -#if defined ( hpux ) -#include -#endif - -static size_t dblayer_getvirtualmemsize() -{ - struct rlimit rl; - - /* the maximum size of a process's total available memory, in bytes */ - getrlimit(RLIMIT_AS, &rl); - return rl.rlim_cur; -} - -/* pages = number of pages of physical ram on the machine (corrected for 32-bit build on 64-bit machine). - * procpages = pages currently used by this process (or working set size, sometimes) - * availpages = some notion of the number of pages 'free'. Typically this number is not useful. - */ -void dblayer_sys_pages(size_t *pagesize, size_t *pages, size_t *procpages, size_t *availpages) -{ - *pagesize = *pages = *availpages = 0; - if (procpages) - *procpages = 0; - -#ifdef OS_solaris - *pagesize = (int)sysconf(_SC_PAGESIZE); - *pages = (int)sysconf(_SC_PHYS_PAGES); - *availpages = dblayer_getvirtualmemsize() / *pagesize; - /* solaris has THE most annoying way to get this info */ - if (procpages) { - struct prpsinfo psi; - char fn[40]; - int fd; - - sprintf(fn, "/proc/%d", getpid()); - fd = open(fn, O_RDONLY); - if (fd >= 0) { - memset(&psi, 0, sizeof(psi)); - if (ioctl(fd, PIOCPSINFO, (void *)&psi) == 0) - *procpages = psi.pr_size; - close(fd); - } - } -#endif - -#ifdef LINUX - { - struct sysinfo si; - size_t pages_per_mem_unit = 0; - size_t mem_units_per_page = 0; /* We don't know if these units are really pages */ - - sysinfo(&si); - *pagesize = getpagesize(); - if (si.mem_unit > *pagesize) { - pages_per_mem_unit = si.mem_unit / *pagesize; - *pages = si.totalram * pages_per_mem_unit; - } else { - mem_units_per_page = *pagesize / si.mem_unit; - *pages = si.totalram / mem_units_per_page; - } - *availpages = dblayer_getvirtualmemsize() / *pagesize; - /* okay i take that back, linux's method is more retarded here. - * hopefully linux doesn't have the FILE* problem that solaris does - * (where you can't use FILE if you have more than 256 fd's open) - */ - if (procpages) { - FILE *f; - char fn[40], s[80]; - - sprintf(fn, "/proc/%d/status", getpid()); - f = fopen(fn, "r"); - if (!f) /* fopen failed */ - return; - while (! feof(f)) { - fgets(s, 79, f); - if (feof(f)) - break; - if (strncmp(s, "VmSize:", 7) == 0) { - sscanf(s+7, "%lu", (long unsigned int *)procpages); - break; - } - } - fclose(f); - /* procpages is now in 1k chunks, not pages... */ - *procpages /= (*pagesize / 1024); - } - } -#endif - -#if defined ( hpux ) - { - struct pst_static pst; - int rval = pstat_getstatic(&pst, sizeof(pst), (size_t)1, 0); - if (rval < 0) /* pstat_getstatic failed */ - return; - *pagesize = pst.page_size; - *pages = pst.physical_memory; - *availpages = dblayer_getvirtualmemsize() / *pagesize; - if (procpages) - { -#define BURST (size_t)32 /* get BURST proc info at one time... */ - struct pst_status psts[BURST]; - int i, count; - int idx = 0; /* index within the context */ - int mypid = getpid(); - - *procpages = 0; - /* loop until count == 0, will occur all have been returned */ - while ((count = pstat_getproc(psts, sizeof(psts[0]), BURST, idx)) > 0) { - /* got count (max of BURST) this time. process them */ - for (i = 0; i < count; i++) { - if (psts[i].pst_pid == mypid) - { - *procpages = (size_t)(psts[i].pst_dsize + psts[i].pst_tsize + psts[i].pst_ssize); - break; - } - } - if (i < count) - break; - - /* - * now go back and do it again, using the next index after - * the current 'burst' - */ - idx = psts[count-1].pst_idx + 1; - } - } - } -#endif - /* If this is a 32-bit build, it might be running on a 64-bit machine, - * in which case, if the box has tons of ram, we can end up telling - * the auto cache code to use more memory than the process can address. - * so we cap the number returned here. - */ -#if defined(__LP64__) || defined (_LP64) -#else - { - size_t one_gig_pages = GIGABYTE / *pagesize; - if (*pages > (2 * one_gig_pages) ) { - LDAPDebug(LDAP_DEBUG_TRACE,"More than 2Gbytes physical memory detected. Since this is a 32-bit process, truncating memory size used for auto cache calculations to 2Gbytes\n", - 0, 0, 0); - *pages = (2 * one_gig_pages); - } - } -#endif -} - - -int dblayer_is_cachesize_sane(size_t *cachesize) -{ - size_t pages = 0, pagesize = 0, procpages = 0, availpages = 0; - int issane = 1; - - dblayer_sys_pages(&pagesize, &pages, &procpages, &availpages); - if (!pagesize || !pages) - return 1; /* do nothing when we can't get the avail mem */ - /* If the requested cache size is larger than the remaining pysical memory - * after the current working set size for this process has been subtracted, - * then we say that's insane and try to correct. - */ - issane = (int)((*cachesize / pagesize) <= (pages - procpages)); - if (!issane) { - *cachesize = (size_t)((pages - procpages) * pagesize); - } - - return issane; -} - static void dblayer_dump_config_tracing(dblayer_private *priv) { @@ -1567,7 +1382,7 @@ dblayer_start(struct ldbminfo *li, int dbmode) /* Sanity check on cache size on platforms which allow us to figure out * the available phys mem */ - if (!dblayer_is_cachesize_sane(&(priv->dblayer_cachesize))) { + if (!util_is_cachesize_sane(&(priv->dblayer_cachesize))) { /* Oops---looks like the admin misconfigured, let's warn them */ LDAPDebug(LDAP_DEBUG_ANY,"WARNING---Likely CONFIGURATION ERROR---" "dbcachesize is configured to use more than the available " @@ -1887,8 +1702,7 @@ check_and_set_import_cache(struct ldbminfo *li) size_t page_delta = 0; char s[64]; /* big enough to hold %ld */ - dblayer_sys_pages(&pagesize, &pages, &procpages, &availpages); - if (0 == pagesize || 0 == pages) { + if (util_info_sys_pages(&pagesize, &pages, &procpages, &availpages) != 0 || 0 == pagesize || 0 == pages) { LDAPDebug2Args(LDAP_DEBUG_ANY, "check_and_set_import_cache: " "Failed to get pagesize: %ld or pages: %ld\n", pagesize, pages); @@ -1928,7 +1742,12 @@ check_and_set_import_cache(struct ldbminfo *li) } else { /* autosizing importCache */ /* ./125 instead of ./100 is for adjusting the BDB overhead. */ +#ifdef LINUX + /* On linux, availpages is correct so we should use it! */ + import_pages = (li->li_import_cache_autosize * availpages) / 125; +#else import_pages = (li->li_import_cache_autosize * pages) / 125; +#endif } page_delta = pages - import_pages; diff --git a/ldap/servers/slapd/back-ldbm/ldbm_config.c b/ldap/servers/slapd/back-ldbm/ldbm_config.c index e265bb9..341fdff 100644 --- a/ldap/servers/slapd/back-ldbm/ldbm_config.c +++ b/ldap/servers/slapd/back-ldbm/ldbm_config.c @@ -402,6 +402,17 @@ static int ldbm_config_dbcachesize_set(void *arg, void *value, char *errorbuf, i struct ldbminfo *li = (struct ldbminfo *) arg; int retval = LDAP_SUCCESS; size_t val = (size_t)value; + size_t delta = (size_t)value; + + /* There is an error here. We check the new val against our current mem-alloc + * Issue is that we already are using system pages, so while our value *might* + * be valid, we may reject it here due to the current procs page usage. + * + * So how do we solve this? If we are setting a SMALLER value than we + * currently have ALLOW it, because we already passed the cache sanity. + * If we are setting a LARGER value, we check the delta of the two, and make + * sure that it is sane. + */ if (apply) { /* Stop the user configuring a stupidly small cache */ @@ -411,12 +422,15 @@ static int ldbm_config_dbcachesize_set(void *arg, void *value, char *errorbuf, i LDAPDebug( LDAP_DEBUG_ANY,"WARNING: cache too small, increasing to %dK bytes\n", DBDEFMINSIZ/1000, 0, 0); val = DBDEFMINSIZ; - } else if (!dblayer_is_cachesize_sane(&val)){ - PR_snprintf(errorbuf, SLAPI_DSE_RETURNTEXT_SIZE, - "Error: dbcachememsize value is too large."); - LDAPDebug( LDAP_DEBUG_ANY,"Error: dbcachememsize value is too large.\n", - 0, 0, 0); - return LDAP_UNWILLING_TO_PERFORM; + } else if (val > li->li_dbcachesize) { + delta = val - li->li_dbcachesize; + if (!util_is_cachesize_sane(&delta)){ + PR_snprintf(errorbuf, SLAPI_DSE_RETURNTEXT_SIZE, + "Error: dbcachememsize value is too large."); + LDAPDebug( LDAP_DEBUG_ANY,"Error: dbcachememsize value is too large.\n", + 0, 0, 0); + return LDAP_UNWILLING_TO_PERFORM; + } } if (CONFIG_PHASE_RUNNING == phase) { li->li_new_dbcachesize = val; @@ -469,14 +483,28 @@ static int ldbm_config_dbncache_set(void *arg, void *value, char *errorbuf, int struct ldbminfo *li = (struct ldbminfo *) arg; int retval = LDAP_SUCCESS; size_t val = (size_t) ((uintptr_t)value); + size_t delta = 0; + + /* There is an error here. We check the new val against our current mem-alloc + * Issue is that we already are using system pages, so while our value *might* + * be valid, we may reject it here due to the current procs page usage. + * + * So how do we solve this? If we are setting a SMALLER value than we + * currently have ALLOW it, because we already passed the cache sanity. + * If we are setting a LARGER value, we check the delta of the two, and make + * sure that it is sane. + */ if (apply) { - if (!dblayer_is_cachesize_sane(&val)){ - PR_snprintf(errorbuf, SLAPI_DSE_RETURNTEXT_SIZE, - "Error: dbncache size value is too large."); - LDAPDebug( LDAP_DEBUG_ANY,"Error: dbncache size value is too large.\n", - val, 0, 0); - return LDAP_UNWILLING_TO_PERFORM; + if (val > li->li_dbncache) { + delta = val - li->li_dbncache; + if (!util_is_cachesize_sane(&delta)){ + PR_snprintf(errorbuf, SLAPI_DSE_RETURNTEXT_SIZE, + "Error: dbncache size value is too large."); + LDAPDebug( LDAP_DEBUG_ANY,"Error: dbncache size value is too large.\n", + val, 0, 0); + return LDAP_UNWILLING_TO_PERFORM; + } } if (CONFIG_PHASE_RUNNING == phase) { @@ -1037,14 +1065,28 @@ static int ldbm_config_db_cache_set(void *arg, void *value, char *errorbuf, int struct ldbminfo *li = (struct ldbminfo *) arg; int retval = LDAP_SUCCESS; size_t val = (size_t) ((uintptr_t)value); + size_t delta = 0; + + /* There is an error here. We check the new val against our current mem-alloc + * Issue is that we already are using system pages, so while our value *might* + * be valid, we may reject it here due to the current procs page usage. + * + * So how do we solve this? If we are setting a SMALLER value than we + * currently have ALLOW it, because we already passed the cache sanity. + * If we are setting a LARGER value, we check the delta of the two, and make + * sure that it is sane. + */ if (apply) { - if (!dblayer_is_cachesize_sane(&val)){ - PR_snprintf(errorbuf, SLAPI_DSE_RETURNTEXT_SIZE, - "Error: db cachesize value is too large"); - LDAPDebug( LDAP_DEBUG_ANY,"Error: db cachesize value is too large.\n", - val, 0, 0); - return LDAP_UNWILLING_TO_PERFORM; + if (val > li->li_dblayer_private->dblayer_cache_config) { + delta = val - li->li_dblayer_private->dblayer_cache_config; + if (!util_is_cachesize_sane(&delta)){ + PR_snprintf(errorbuf, SLAPI_DSE_RETURNTEXT_SIZE, + "Error: db cachesize value is too large"); + LDAPDebug( LDAP_DEBUG_ANY,"Error: db cachesize value is too large.\n", + val, 0, 0); + return LDAP_UNWILLING_TO_PERFORM; + } } li->li_dblayer_private->dblayer_cache_config = val; } @@ -1158,13 +1200,26 @@ static int ldbm_config_import_cachesize_set(void *arg, void *value, char *errorb { struct ldbminfo *li = (struct ldbminfo *)arg; size_t val = (size_t)value; + size_t delta = (size_t)value; + /* There is an error here. We check the new val against our current mem-alloc + * Issue is that we already are using system pages, so while our value *might* + * be valid, we may reject it here due to the current procs page usage. + * + * So how do we solve this? If we are setting a SMALLER value than we + * currently have ALLOW it, because we already passed the cache sanity. + * If we are setting a LARGER value, we check the delta of the two, and make + * sure that it is sane. + */ if (apply){ - if (!dblayer_is_cachesize_sane(&val)){ - PR_snprintf(errorbuf, SLAPI_DSE_RETURNTEXT_SIZE, - "Error: import cachesize value is too large."); - LDAPDebug( LDAP_DEBUG_ANY,"Error: import cachesize value is too large.\n", - 0, 0, 0); - return LDAP_UNWILLING_TO_PERFORM; + if (val > li->li_import_cachesize) { + delta = val - li->li_import_cachesize; + if (!util_is_cachesize_sane(&delta)){ + PR_snprintf(errorbuf, SLAPI_DSE_RETURNTEXT_SIZE, + "Error: import cachesize value is too large."); + LDAPDebug( LDAP_DEBUG_ANY,"Error: import cachesize value is too large.\n", + 0, 0, 0); + return LDAP_UNWILLING_TO_PERFORM; + } } li->li_import_cachesize = val; } diff --git a/ldap/servers/slapd/back-ldbm/ldbm_instance_config.c b/ldap/servers/slapd/back-ldbm/ldbm_instance_config.c index e9db22b..1f750d4 100644 --- a/ldap/servers/slapd/back-ldbm/ldbm_instance_config.c +++ b/ldap/servers/slapd/back-ldbm/ldbm_instance_config.c @@ -92,17 +92,29 @@ ldbm_instance_config_cachememsize_set(void *arg, void *value, char *errorbuf, in ldbm_instance *inst = (ldbm_instance *) arg; int retval = LDAP_SUCCESS; size_t val = (size_t) value; - size_t chkval = val; + size_t delta = 0; /* Do whatever we can to make sure the data is ok. */ + /* There is an error here. We check the new val against our current mem-alloc + * Issue is that we already are using system pages, so while our value *might* + * be valid, we may reject it here due to the current procs page usage. + * + * So how do we solve this? If we are setting a SMALLER value than we + * currently have ALLOW it, because we already passed the cache sanity. + * If we are setting a LARGER value, we check the delta of the two, and make + * sure that it is sane. + */ if (apply) { - if (!dblayer_is_cachesize_sane(&chkval)){ - PR_snprintf(errorbuf, SLAPI_DSE_RETURNTEXT_SIZE, - "Error: cachememsize value is too large."); - LDAPDebug( LDAP_DEBUG_ANY,"Error: cachememsize value is too large.\n", - 0, 0, 0); - return LDAP_UNWILLING_TO_PERFORM; + if (val > inst->inst_cache.c_maxsize) { + delta = val - inst->inst_cache.c_maxsize; + if (!util_is_cachesize_sane(&delta)){ + PR_snprintf(errorbuf, SLAPI_DSE_RETURNTEXT_SIZE, + "Error: cachememsize value is too large."); + LDAPDebug( LDAP_DEBUG_ANY,"Error: cachememsize value is too large.\n", + 0, 0, 0); + return LDAP_UNWILLING_TO_PERFORM; + } } cache_set_max_size(&(inst->inst_cache), val, CACHE_TYPE_ENTRY); } @@ -124,17 +136,29 @@ ldbm_instance_config_dncachememsize_set(void *arg, void *value, char *errorbuf, ldbm_instance *inst = (ldbm_instance *) arg; int retval = LDAP_SUCCESS; size_t val = (size_t)value; - size_t chkval = val; + size_t delta = 0; /* Do whatever we can to make sure the data is ok. */ + /* There is an error here. We check the new val against our current mem-alloc + * Issue is that we already are using system pages, so while our value *might* + * be valid, we may reject it here due to the current procs page usage. + * + * So how do we solve this? If we are setting a SMALLER value than we + * currently have ALLOW it, because we already passed the cache sanity. + * If we are setting a LARGER value, we check the delta of the two, and make + * sure that it is sane. + */ if (apply) { - if (!dblayer_is_cachesize_sane(&chkval)){ - PR_snprintf(errorbuf, SLAPI_DSE_RETURNTEXT_SIZE, - "Error: dncachememsize value is too large."); - LDAPDebug( LDAP_DEBUG_ANY,"Error: dncachememsize value is too large.\n", - 0, 0, 0); - return LDAP_UNWILLING_TO_PERFORM; + if (val > inst->inst_dncache.c_maxsize) { + delta = val - inst->inst_dncache.c_maxsize; + if (!util_is_cachesize_sane(&delta)){ + PR_snprintf(errorbuf, SLAPI_DSE_RETURNTEXT_SIZE, + "Error: dncachememsize value is too large."); + LDAPDebug( LDAP_DEBUG_ANY,"Error: dncachememsize value is too large.\n", + 0, 0, 0); + return LDAP_UNWILLING_TO_PERFORM; + } } cache_set_max_size(&(inst->inst_dncache), val, CACHE_TYPE_DN); } diff --git a/ldap/servers/slapd/back-ldbm/proto-back-ldbm.h b/ldap/servers/slapd/back-ldbm/proto-back-ldbm.h index 37f9f20..86e2237 100644 --- a/ldap/servers/slapd/back-ldbm/proto-back-ldbm.h +++ b/ldap/servers/slapd/back-ldbm/proto-back-ldbm.h @@ -131,8 +131,6 @@ int dblayer_terminate(struct ldbminfo *li); int dblayer_close_indexes(backend *be); int dblayer_open_file(backend *be, char* indexname, int create, struct attrinfo *ai, DB **ppDB); int dblayer_close_file(DB **db); -void dblayer_sys_pages(size_t *pagesize, size_t *pages, size_t *procpages, size_t *availpages); -int dblayer_is_cachesize_sane(size_t *cachesize); void dblayer_remember_disk_filled(struct ldbminfo *li); int dblayer_open_huge_file(const char *path, int oflag, int mode); int dblayer_instance_start(backend *be, int normal_mode); diff --git a/ldap/servers/slapd/back-ldbm/start.c b/ldap/servers/slapd/back-ldbm/start.c index 53c47bd..5058942 100644 --- a/ldap/servers/slapd/back-ldbm/start.c +++ b/ldap/servers/slapd/back-ldbm/start.c @@ -118,7 +118,11 @@ ldbm_back_start( Slapi_PBlock *pb ) } else { size_t pagesize, pages, procpages, availpages; - dblayer_sys_pages(&pagesize, &pages, &procpages, &availpages); + if (util_info_sys_pages(&pagesize, &pages, &procpages, &availpages) != 0) { + LDAPDebug( LDAP_DEBUG_ANY, "start: Unable to determine system page limits\n", + 0, 0, 0 ); + return SLAPI_FAIL_GENERAL; + } if (pagesize) { char s[32]; /* big enough to hold %ld */ unsigned long cache_size_to_configure = 0; diff --git a/ldap/servers/slapd/slapi-private.h b/ldap/servers/slapd/slapi-private.h index 63fe8cc..fb7b5f8 100644 --- a/ldap/servers/slapd/slapi-private.h +++ b/ldap/servers/slapd/slapi-private.h @@ -1334,6 +1334,27 @@ char *slapi_getSSLVersion_str(PRUint16 vnum, char *buf, size_t bufsize); time_t slapi_parse_duration(const char *value); int slapi_is_duration_valid(const char *value); +/** + * Populate the pointers with the system memory information. + * At this time, Linux is the only "reliable" system for returning these values + * + * \param pagesize Will return the system page size in bytes. + * \param pages The total number of memory pages on the system. May include swap pages depending on OS. + * \param procpages Number of memory pages our current process is consuming. May not be accurate on all platforms as this could be the VMSize rather than the actual number of consumed pages. + * \param availpages Number of available pages of memory on the system. Not all operating systems set this correctly. + * + * \return 0 on success, non-zero on failure to determine memory sizings. + */ +int util_info_sys_pages(size_t *pagesize, size_t *pages, size_t *procpages, size_t *availpages); + +/** + * Determine if the requested cachesize will exceed the system memory limits causing an out of memory condition + * + * \param cachesize. The requested allocation. If this value is greater than the memory available, this value will be REDUCED to be valid. + * + * \return 0 if the size is "sane". 1 if the value will cause OOM and has been REDUCED + */ +int util_is_cachesize_sane(size_t *cachesize); #ifdef __cplusplus } diff --git a/ldap/servers/slapd/util.c b/ldap/servers/slapd/util.c index dbe69f0..b7ecde1 100644 --- a/ldap/servers/slapd/util.c +++ b/ldap/servers/slapd/util.c @@ -25,6 +25,7 @@ #include "prinrval.h" #include "snmp_collator.h" #include +#include #define UTIL_ESCAPE_NONE 0 #define UTIL_ESCAPE_HEX 1 @@ -39,6 +40,24 @@ #define FILTER_BUF 128 /* initial buffer size for attr value */ #define BUF_INCR 16 /* the amount to increase the FILTER_BUF once it fills up */ +/* Used by our util_info_sys_pages function + * + * platforms supported so far: + * Solaris, Linux, Windows + */ +#ifdef OS_solaris +#include +#endif +#ifdef LINUX +#include +#endif +#if defined ( hpux ) +#include +#endif + + + + static int special_np(unsigned char c) { if (c == '\\') { @@ -1432,3 +1451,349 @@ slapi_uniqueIDRdnSize() } return util_uniqueidlen; } + + +/** + * Get the virtual memory size as defined by system rlimits. + * + * \return size_t bytes available + */ +static size_t util_getvirtualmemsize() +{ + struct rlimit rl; + /* the maximum size of a process's total available memory, in bytes */ + if (getrlimit(RLIMIT_AS, &rl) != 0) { + /* We received an error condition. There are a number of possible + * reasons we have have gotten here, but most likely is EINVAL, where + * rlim->rlim_cur was greater than rlim->rlim_max. + * As a result, we should return a 0, to tell the system we can't alloc + * memory. + */ + int errsrv = errno; + slapi_log_error(SLAPI_LOG_FATAL,"util_getvirtualmemsize", "ERROR: getrlimit returned non-zero. errno=%u\n", errsrv); + return 0; + } + return rl.rlim_cur; +} + +/* pages = number of pages of physical ram on the machine (corrected for 32-bit build on 64-bit machine). + * procpages = pages currently used by this process (or working set size, sometimes) + * availpages = some notion of the number of pages 'free'. Typically this number is not useful. + */ +int util_info_sys_pages(size_t *pagesize, size_t *pages, size_t *procpages, size_t *availpages) +{ + *pagesize = 0; + *pages = 0; + *availpages = 0; + if (procpages) + *procpages = 0; + +#ifdef OS_solaris + *pagesize = (int)sysconf(_SC_PAGESIZE); + *pages = (int)sysconf(_SC_PHYS_PAGES); + *availpages = util_getvirtualmemsize() / *pagesize; + /* solaris has THE most annoying way to get this info */ + if (procpages) { + struct prpsinfo psi; + char fn[40]; + int fd; + + sprintf(fn, "/proc/%d", getpid()); + fd = open(fn, O_RDONLY); + if (fd >= 0) { + memset(&psi, 0, sizeof(psi)); + if (ioctl(fd, PIOCPSINFO, (void *)&psi) == 0) + *procpages = psi.pr_size; + close(fd); + } + } +#endif + +#ifdef LINUX + { + /* + * On linux because of the way that the virtual memory system works, we + * don't really need to think about other processes, or fighting them. + * But that's not without quirks. + * + * We are given a virtual memory space, represented by vsize (man 5 proc) + * This space is a "funny number". It's a best effort based system + * where linux instead of telling us how much memory *actually* exists + * for us to use, gives us a virtual memory allocation which is the + * value of ram + swap. + * + * But none of these pages even exist or belong to us on the real system + * until will malloc them AND write a non-zero to them. + * + * The biggest issue with this is that vsize does NOT consider the + * effect other processes have on the system. So a process can malloc + * 2 Gig from the host, and our vsize doesn't reflect that until we + * suddenly can't malloc anything. + * + * We can see exactly what we are using inside of the vmm by + * looking at rss (man 5 proc). This shows us the current actual + * allocation of memory we are using. This is a good thing. + * + * We obviously don't want to have any pages in swap, but sometimes we + * can't help that: And there is also no guarantee that while we have + * X bytes in vsize, that we can even allocate any of them. Plus, we + * don't know if we are about to allocate to swap or not .... or get us + * killed in a blaze of oom glory. + * + * So there are now two strategies avaliable in this function. + * The first is to blindly accept what the VMM tells us about vsize + * while we hope and pray that we don't get nailed because we used + * too much. + * + * The other is a more conservative approach: We check vsize from + * proc/pid/status, and we check /proc/meminfo for freemem + * Which ever value is "lower" is the upper bound on pages we could + * potentially allocate: generally, this will be MemAvailable. + */ + + size_t vmsize = 0; + size_t freesize = 0; + + *pagesize = getpagesize(); + + /* Get the amount of freeram, rss, and the vmsize */ + + FILE *f; + char fn[40], s[80]; + + sprintf(fn, "/proc/%d/status", getpid()); + f = fopen(fn, "r"); + if (!f) { /* fopen failed */ + /* We should probably make noise here! */ + int errsrv = errno; + slapi_log_error(SLAPI_LOG_FATAL,"util_info_sys_pages", "ERROR: Unable to open file /proc/%d/status. errno=%u\n", getpid(), errsrv); + return 1; + } + while (! feof(f)) { + fgets(s, 79, f); + if (feof(f)) { + break; + } + /* VmRSS shows us what we are ACTUALLY using for proc pages + * Rather than "funny" pages. + */ + if (strncmp(s, "VmSize:", 7) == 0) { + sscanf(s+7, "%lu", (long unsigned int *)&vmsize); + } + if (strncmp(s, "VmRSS:", 6) == 0) { + sscanf(s+6, "%lu", (long unsigned int *)procpages); + } + } + fclose(f); + + FILE *fm; + char *fmn = "/proc/meminfo"; + fm = fopen(fmn, "r"); + if (!fm) { + int errsrv = errno; + slapi_log_error(SLAPI_LOG_FATAL,"util_info_sys_pages", "ERROR: Unable to open file /proc/meminfo. errno=%u\n", errsrv); + return 1; + } + while (! feof(fm)) { + fgets(s, 79, fm); + /* Is this really needed? */ + if (feof(fm)) { + break; + } + if (strncmp(s, "MemTotal:", 9) == 0) { + sscanf(s+9, "%lu", (long unsigned int *)pages); + } + if (strncmp(s, "MemAvailable:", 13) == 0) { + sscanf(s+13, "%lu", (long unsigned int *)&freesize); + } + } + fclose(fm); + + + *pages /= (*pagesize / 1024); + freesize /= (*pagesize / 1024); + /* procpages is now in kb not pages... */ + *procpages /= (*pagesize / 1024); + /* This is in bytes, make it pages */ + *availpages = util_getvirtualmemsize() / *pagesize; + /* Now we have vmsize, the availpages from getrlimit, our freesize */ + vmsize /= (*pagesize / 1024); + + /* Pages is the total ram on the system. We should smaller of: + * - vmsize + * - pages + */ + LDAPDebug(LDAP_DEBUG_TRACE,"util_info_sys_pages pages=%lu, vmsize=%lu, \n", + (unsigned long) *pages, (unsigned long) vmsize,0); + if (vmsize < *pages) { + LDAPDebug(LDAP_DEBUG_TRACE,"util_info_sys_pages using vmsize for pages \n",0,0,0); + *pages = vmsize; + } else { + LDAPDebug(LDAP_DEBUG_TRACE,"util_info_sys_pages using pages for pages \n",0,0,0); + } + + /* Availpages is how much we *could* alloc. We should take the smallest: + * - pages + * - getrlimit (availpages) + * - freesize + */ + LDAPDebug(LDAP_DEBUG_TRACE,"util_info_sys_pages pages=%lu, getrlim=%lu, freesize=%lu\n", + (unsigned long)*pages, (unsigned long)*availpages, (unsigned long)freesize); + if (*pages < *availpages && *pages < freesize) { + LDAPDebug(LDAP_DEBUG_TRACE,"util_info_sys_pages using pages for availpages \n",0,0,0); + *availpages = *pages; + } else if ( freesize < *pages && freesize < *availpages ) { + LDAPDebug(LDAP_DEBUG_TRACE,"util_info_sys_pages using freesize for availpages \n",0,0,0); + *availpages = freesize; + } else { + LDAPDebug(LDAP_DEBUG_TRACE,"util_info_sys_pages using getrlim for availpages \n",0,0,0); + } + + + } +#endif /* linux */ + + + +#if defined ( hpux ) + { + struct pst_static pst; + int rval = pstat_getstatic(&pst, sizeof(pst), (size_t)1, 0); + if (rval < 0) { /* pstat_getstatic failed */ + return 1; + } + *pagesize = pst.page_size; + *pages = pst.physical_memory; + *availpages = util_getvirtualmemsize() / *pagesize; + if (procpages) + { +#define BURST (size_t)32 /* get BURST proc info at one time... */ + struct pst_status psts[BURST]; + int i, count; + int idx = 0; /* index within the context */ + int mypid = getpid(); + + *procpages = 0; + /* loop until count == 0, will occur all have been returned */ + while ((count = pstat_getproc(psts, sizeof(psts[0]), BURST, idx)) > 0) { + /* got count (max of BURST) this time. process them */ + for (i = 0; i < count; i++) { + if (psts[i].pst_pid == mypid) + { + *procpages = (size_t)(psts[i].pst_dsize + psts[i].pst_tsize + psts[i].pst_ssize); + break; + } + } + if (i < count) + break; + + /* + * now go back and do it again, using the next index after + * the current 'burst' + */ + idx = psts[count-1].pst_idx + 1; + } + } + } +#endif + /* If this is a 32-bit build, it might be running on a 64-bit machine, + * in which case, if the box has tons of ram, we can end up telling + * the auto cache code to use more memory than the process can address. + * so we cap the number returned here. + */ +#if defined(__LP64__) || defined (_LP64) +#else + { + size_t one_gig_pages = GIGABYTE / *pagesize; + if (*pages > (2 * one_gig_pages) ) { + LDAPDebug(LDAP_DEBUG_TRACE,"More than 2Gbytes physical memory detected. Since this is a 32-bit process, truncating memory size used for auto cache calculations to 2Gbytes\n", + 0, 0, 0); + *pages = (2 * one_gig_pages); + } + } +#endif + + /* This is stupid. If you set %u to %zu to print a size_t, you get literal %zu in your logs + * So do the filthy cast instead. + */ + slapi_log_error(SLAPI_LOG_FATAL,"util_info_sys_pages", "USING pages=%lu, procpages=%lu, availpages=%lu \n", + (unsigned long)*pages, (unsigned long)*procpages, (unsigned long)*availpages); + return 0; + +} + +int util_is_cachesize_sane(size_t *cachesize) +{ + size_t pages = 0; + size_t pagesize = 0; + size_t procpages = 0; + size_t availpages = 0; + + size_t cachepages = 0; + + int issane = 1; + + if (util_info_sys_pages(&pagesize, &pages, &procpages, &availpages) != 0) { + goto out; + } +#ifdef LINUX + /* Linux we calculate availpages correctly, so USE IT */ + if (!pagesize || !availpages) { + goto out; + } +#else + if (!pagesize || !pages) { + goto out; + } +#endif + /* do nothing when we can't get the avail mem */ + + + /* If the requested cache size is larger than the remaining physical memory + * after the current working set size for this process has been subtracted, + * then we say that's insane and try to correct. + */ + + cachepages = *cachesize / pagesize; + LDAPDebug(LDAP_DEBUG_TRACE,"util_is_cachesize_sane cachesize=%lu / pagesize=%lu \n", + (unsigned long)*cachesize,(unsigned long)pagesize,0); + +#ifdef LINUX + /* Linux we calculate availpages correctly, so USE IT */ + issane = (int)(cachepages <= availpages); + LDAPDebug(LDAP_DEBUG_TRACE,"util_is_cachesize_sane cachepages=%lu <= availpages=%lu\n", + (unsigned long)cachepages,(unsigned long)availpages,0); + + if (!issane) { + /* Since we are ask for more than what's available, we give half of + * the remaining system mem to the cachesize instead, and log a warning + */ + *cachesize = (size_t)((availpages / 2) * pagesize); + slapi_log_error(SLAPI_LOG_FATAL, "util_is_cachesize_sane", "WARNING adjusted cachesize to %lu\n", (unsigned long)*cachesize); + } +#else + size_t freepages = 0; + freepages = pages - procpages; + LDAPDebug(LDAP_DEBUG_TRACE,"util_is_cachesize_sane pages=%lu - procpages=%lu\n", + (unsigned long)pages,(unsigned long)procpages,0); + + issane = (int)(cachepages <= freepages); + LDAPDebug(LDAP_DEBUG_TRACE,"util_is_cachesize_sane cachepages=%lu <= freepages=%lu\n", + (unsigned long)cachepages,(unsigned long)freepages,0); + + if (!issane) { + *cachesize = (size_t)((pages - procpages) * pagesize); + slapi_log_error(SLAPI_LOG_FATAL, "util_is_cachesize_sane", "util_is_cachesize_sane WARNING adjusted cachesize to %lu\n", + (unsigned long )*cachesize); + } +#endif +out: + if (!issane) { + slapi_log_error(SLAPI_LOG_FATAL,"util_is_cachesize_sane", "WARNING: Cachesize not sane \n"); + } + + return issane; +} + + +