
Many of these are pointless (e.g. there is no need to explicitly turn on spellchecking and language dictionaries for the manpages by default). The only useful modelines are the ones enforcing the project coding standards for indentation style (and "maybe" filetype/syntax, but everything except the asciidoc manpages and makepkg.conf is already autodetected), and indent style can be applied more easily with .editorconfig Signed-off-by: Eli Schwartz <eschwartz@archlinux.org> Signed-off-by: Allan McRae <allan@archlinux.org>
351 lines
9.5 KiB
C
351 lines
9.5 KiB
C
/*
|
|
* pkghash.c
|
|
*
|
|
* Copyright (c) 2011-2018 Pacman Development Team <pacman-dev@archlinux.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <errno.h>
|
|
|
|
#include "pkghash.h"
|
|
#include "util.h"
|
|
|
|
/* List of primes for possible sizes of hash tables.
|
|
*
|
|
* The maximum table size is the last prime under 1,000,000. That is
|
|
* more than an order of magnitude greater than the number of packages
|
|
* in any Linux distribution, and well under UINT_MAX.
|
|
*/
|
|
static const unsigned int prime_list[] =
|
|
{
|
|
11u, 13u, 17u, 19u, 23u, 29u, 31u, 37u, 41u, 43u, 47u,
|
|
53u, 59u, 61u, 67u, 71u, 73u, 79u, 83u, 89u, 97u, 103u,
|
|
109u, 113u, 127u, 137u, 139u, 149u, 157u, 167u, 179u, 193u,
|
|
199u, 211u, 227u, 241u, 257u, 277u, 293u, 313u, 337u, 359u,
|
|
383u, 409u, 439u, 467u, 503u, 541u, 577u, 619u, 661u, 709u,
|
|
761u, 823u, 887u, 953u, 1031u, 1109u, 1193u, 1289u, 1381u,
|
|
1493u, 1613u, 1741u, 1879u, 2029u, 2179u, 2357u, 2549u,
|
|
2753u, 2971u, 3209u, 3469u, 3739u, 4027u, 4349u, 4703u,
|
|
5087u, 5503u, 5953u, 6427u, 6949u, 7517u, 8123u, 8783u,
|
|
9497u, 10273u, 11113u, 12011u, 12983u, 14033u, 15173u,
|
|
16411u, 17749u, 19183u, 20753u, 22447u, 24281u, 26267u,
|
|
28411u, 30727u, 33223u, 35933u, 38873u, 42043u, 45481u,
|
|
49201u, 53201u, 57557u, 62233u, 67307u, 72817u, 78779u,
|
|
85229u, 92203u, 99733u, 107897u, 116731u, 126271u, 136607u,
|
|
147793u, 159871u, 172933u, 187091u, 202409u, 218971u, 236897u,
|
|
256279u, 277261u, 299951u, 324503u, 351061u, 379787u, 410857u,
|
|
444487u, 480881u, 520241u, 562841u, 608903u, 658753u, 712697u,
|
|
771049u, 834181u, 902483u, 976369u
|
|
};
|
|
|
|
/* How far forward do we look when linear probing for a spot? */
|
|
static const unsigned int stride = 1;
|
|
/* What is the maximum load percentage of our hash table? */
|
|
static const double max_hash_load = 0.68;
|
|
/* Initial load percentage given a certain size */
|
|
static const double initial_hash_load = 0.58;
|
|
|
|
/* Allocate a hash table with space for at least "size" elements */
|
|
alpm_pkghash_t *_alpm_pkghash_create(unsigned int size)
|
|
{
|
|
alpm_pkghash_t *hash = NULL;
|
|
unsigned int i, loopsize;
|
|
|
|
CALLOC(hash, 1, sizeof(alpm_pkghash_t), return NULL);
|
|
size = size / initial_hash_load + 1;
|
|
|
|
loopsize = ARRAYSIZE(prime_list);
|
|
for(i = 0; i < loopsize; i++) {
|
|
if(prime_list[i] > size) {
|
|
hash->buckets = prime_list[i];
|
|
hash->limit = hash->buckets * max_hash_load;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if(hash->buckets < size) {
|
|
errno = ERANGE;
|
|
free(hash);
|
|
return NULL;
|
|
}
|
|
|
|
CALLOC(hash->hash_table, hash->buckets, sizeof(alpm_list_t *), \
|
|
free(hash); return NULL);
|
|
|
|
return hash;
|
|
}
|
|
|
|
static unsigned int get_hash_position(unsigned long name_hash,
|
|
alpm_pkghash_t *hash)
|
|
{
|
|
unsigned int position;
|
|
|
|
position = name_hash % hash->buckets;
|
|
|
|
/* collision resolution using open addressing with linear probing */
|
|
while(hash->hash_table[position] != NULL) {
|
|
position += stride;
|
|
while(position >= hash->buckets) {
|
|
position -= hash->buckets;
|
|
}
|
|
}
|
|
|
|
return position;
|
|
}
|
|
|
|
/* Expand the hash table size to the next increment and rebin the entries */
|
|
static alpm_pkghash_t *rehash(alpm_pkghash_t *oldhash)
|
|
{
|
|
alpm_pkghash_t *newhash;
|
|
unsigned int newsize, i;
|
|
|
|
/* Hash tables will need resized in two cases:
|
|
* - adding packages to the local database
|
|
* - poor estimation of the number of packages in sync database
|
|
*
|
|
* For small hash tables sizes (<500) the increase in size is by a
|
|
* minimum of a factor of 2 for optimal rehash efficiency. For
|
|
* larger database sizes, this increase is reduced to avoid excess
|
|
* memory allocation as both scenarios requiring a rehash should not
|
|
* require a table size increase that large. */
|
|
if(oldhash->buckets < 500) {
|
|
newsize = oldhash->buckets * 2;
|
|
} else if(oldhash->buckets < 2000) {
|
|
newsize = oldhash->buckets * 3 / 2;
|
|
} else if(oldhash->buckets < 5000) {
|
|
newsize = oldhash->buckets * 4 / 3;
|
|
} else {
|
|
newsize = oldhash->buckets + 1;
|
|
}
|
|
|
|
newhash = _alpm_pkghash_create(newsize);
|
|
if(newhash == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
newhash->list = oldhash->list;
|
|
oldhash->list = NULL;
|
|
|
|
for(i = 0; i < oldhash->buckets; i++) {
|
|
if(oldhash->hash_table[i] != NULL) {
|
|
alpm_pkg_t *package = oldhash->hash_table[i]->data;
|
|
unsigned int position = get_hash_position(package->name_hash, newhash);
|
|
|
|
newhash->hash_table[position] = oldhash->hash_table[i];
|
|
oldhash->hash_table[i] = NULL;
|
|
}
|
|
}
|
|
|
|
newhash->entries = oldhash->entries;
|
|
|
|
_alpm_pkghash_free(oldhash);
|
|
|
|
return newhash;
|
|
}
|
|
|
|
static alpm_pkghash_t *pkghash_add_pkg(alpm_pkghash_t **hashref, alpm_pkg_t *pkg,
|
|
int sorted)
|
|
{
|
|
alpm_list_t *ptr;
|
|
unsigned int position;
|
|
alpm_pkghash_t *hash;
|
|
|
|
if(pkg == NULL || hashref == NULL || *hashref == NULL) {
|
|
return NULL;
|
|
}
|
|
hash = *hashref;
|
|
|
|
if(hash->entries >= hash->limit) {
|
|
if((hash = rehash(hash)) == NULL) {
|
|
/* resizing failed and there are no more open buckets */
|
|
return NULL;
|
|
}
|
|
*hashref = hash;
|
|
}
|
|
|
|
position = get_hash_position(pkg->name_hash, hash);
|
|
|
|
MALLOC(ptr, sizeof(alpm_list_t), return NULL);
|
|
|
|
ptr->data = pkg;
|
|
ptr->prev = ptr;
|
|
ptr->next = NULL;
|
|
|
|
hash->hash_table[position] = ptr;
|
|
if(!sorted) {
|
|
hash->list = alpm_list_join(hash->list, ptr);
|
|
} else {
|
|
hash->list = alpm_list_mmerge(hash->list, ptr, _alpm_pkg_cmp);
|
|
}
|
|
|
|
hash->entries += 1;
|
|
return hash;
|
|
}
|
|
|
|
alpm_pkghash_t *_alpm_pkghash_add(alpm_pkghash_t **hash, alpm_pkg_t *pkg)
|
|
{
|
|
return pkghash_add_pkg(hash, pkg, 0);
|
|
}
|
|
|
|
alpm_pkghash_t *_alpm_pkghash_add_sorted(alpm_pkghash_t **hash, alpm_pkg_t *pkg)
|
|
{
|
|
return pkghash_add_pkg(hash, pkg, 1);
|
|
}
|
|
|
|
static unsigned int move_one_entry(alpm_pkghash_t *hash,
|
|
unsigned int start, unsigned int end)
|
|
{
|
|
/* Iterate backwards from 'end' to 'start', seeing if any of the items
|
|
* would hash to 'start'. If we find one, we move it there and break. If
|
|
* we get all the way back to position and find none that hash to it, we
|
|
* also end iteration. Iterating backwards helps prevent needless shuffles;
|
|
* we will never need to move more than one item per function call. The
|
|
* return value is our current iteration location; if this is equal to
|
|
* 'start' we can stop this madness. */
|
|
while(end != start) {
|
|
alpm_list_t *i = hash->hash_table[end];
|
|
alpm_pkg_t *info = i->data;
|
|
unsigned int new_position = get_hash_position(info->name_hash, hash);
|
|
|
|
if(new_position == start) {
|
|
hash->hash_table[start] = i;
|
|
hash->hash_table[end] = NULL;
|
|
break;
|
|
}
|
|
|
|
/* the odd math ensures we are always positive, e.g.
|
|
* e.g. (0 - 1) % 47 == -1
|
|
* e.g. (47 + 0 - 1) % 47 == 46 */
|
|
end = (hash->buckets + end - stride) % hash->buckets;
|
|
}
|
|
return end;
|
|
}
|
|
|
|
/**
|
|
* @brief Remove a package from a pkghash.
|
|
*
|
|
* @param hash the hash to remove the package from
|
|
* @param pkg the package we are removing
|
|
* @param data output parameter containing the removed item
|
|
*
|
|
* @return the resultant hash
|
|
*/
|
|
alpm_pkghash_t *_alpm_pkghash_remove(alpm_pkghash_t *hash, alpm_pkg_t *pkg,
|
|
alpm_pkg_t **data)
|
|
{
|
|
alpm_list_t *i;
|
|
unsigned int position;
|
|
|
|
if(data) {
|
|
*data = NULL;
|
|
}
|
|
|
|
if(pkg == NULL || hash == NULL) {
|
|
return hash;
|
|
}
|
|
|
|
position = pkg->name_hash % hash->buckets;
|
|
while((i = hash->hash_table[position]) != NULL) {
|
|
alpm_pkg_t *info = i->data;
|
|
|
|
if(info->name_hash == pkg->name_hash &&
|
|
strcmp(info->name, pkg->name) == 0) {
|
|
unsigned int stop, prev;
|
|
|
|
/* remove from list and hash */
|
|
hash->list = alpm_list_remove_item(hash->list, i);
|
|
if(data) {
|
|
*data = info;
|
|
}
|
|
hash->hash_table[position] = NULL;
|
|
free(i);
|
|
hash->entries -= 1;
|
|
|
|
/* Potentially move entries following removed entry to keep open
|
|
* addressing collision resolution working. We start by finding the
|
|
* next null bucket to know how far we have to look. */
|
|
stop = position + stride;
|
|
while(stop >= hash->buckets) {
|
|
stop -= hash->buckets;
|
|
}
|
|
while(hash->hash_table[stop] != NULL && stop != position) {
|
|
stop += stride;
|
|
while(stop >= hash->buckets) {
|
|
stop -= hash->buckets;
|
|
}
|
|
}
|
|
stop = (hash->buckets + stop - stride) % hash->buckets;
|
|
|
|
/* We now search backwards from stop to position. If we find an
|
|
* item that now hashes to position, we will move it, and then try
|
|
* to plug the new hole we just opened up, until we finally don't
|
|
* move anything. */
|
|
while((prev = move_one_entry(hash, position, stop)) != position) {
|
|
position = prev;
|
|
}
|
|
|
|
return hash;
|
|
}
|
|
|
|
position += stride;
|
|
while(position >= hash->buckets) {
|
|
position -= hash->buckets;
|
|
}
|
|
}
|
|
|
|
return hash;
|
|
}
|
|
|
|
void _alpm_pkghash_free(alpm_pkghash_t *hash)
|
|
{
|
|
if(hash != NULL) {
|
|
unsigned int i;
|
|
for(i = 0; i < hash->buckets; i++) {
|
|
free(hash->hash_table[i]);
|
|
}
|
|
free(hash->hash_table);
|
|
}
|
|
free(hash);
|
|
}
|
|
|
|
alpm_pkg_t *_alpm_pkghash_find(alpm_pkghash_t *hash, const char *name)
|
|
{
|
|
alpm_list_t *lp;
|
|
unsigned long name_hash;
|
|
unsigned int position;
|
|
|
|
if(name == NULL || hash == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
name_hash = _alpm_hash_sdbm(name);
|
|
|
|
position = name_hash % hash->buckets;
|
|
|
|
while((lp = hash->hash_table[position]) != NULL) {
|
|
alpm_pkg_t *info = lp->data;
|
|
|
|
if(info->name_hash == name_hash && strcmp(info->name, name) == 0) {
|
|
return info;
|
|
}
|
|
|
|
position += stride;
|
|
while(position >= hash->buckets) {
|
|
position -= hash->buckets;
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|