diff options
Diffstat (limited to 'openssl/doc/crypto/lhash.pod')
-rw-r--r-- | openssl/doc/crypto/lhash.pod | 294 |
1 files changed, 294 insertions, 0 deletions
diff --git a/openssl/doc/crypto/lhash.pod b/openssl/doc/crypto/lhash.pod new file mode 100644 index 000000000..dcdbb43a8 --- /dev/null +++ b/openssl/doc/crypto/lhash.pod @@ -0,0 +1,294 @@ +=pod + +=head1 NAME + +lh_new, lh_free, lh_insert, lh_delete, lh_retrieve, lh_doall, lh_doall_arg, lh_error - dynamic hash table + +=head1 SYNOPSIS + + #include <openssl/lhash.h> + + LHASH *lh_new(LHASH_HASH_FN_TYPE hash, LHASH_COMP_FN_TYPE compare); + void lh_free(LHASH *table); + + void *lh_insert(LHASH *table, void *data); + void *lh_delete(LHASH *table, void *data); + void *lh_retrieve(LHASH *table, void *data); + + void lh_doall(LHASH *table, LHASH_DOALL_FN_TYPE func); + void lh_doall_arg(LHASH *table, LHASH_DOALL_ARG_FN_TYPE func, + void *arg); + + int lh_error(LHASH *table); + + typedef int (*LHASH_COMP_FN_TYPE)(const void *, const void *); + typedef unsigned long (*LHASH_HASH_FN_TYPE)(const void *); + typedef void (*LHASH_DOALL_FN_TYPE)(const void *); + typedef void (*LHASH_DOALL_ARG_FN_TYPE)(const void *, const void *); + +=head1 DESCRIPTION + +This library implements dynamic hash tables. The hash table entries +can be arbitrary structures. Usually they consist of key and value +fields. + +lh_new() creates a new B<LHASH> structure to store arbitrary data +entries, and provides the 'hash' and 'compare' callbacks to be used in +organising the table's entries. The B<hash> callback takes a pointer +to a table entry as its argument and returns an unsigned long hash +value for its key field. The hash value is normally truncated to a +power of 2, so make sure that your hash function returns well mixed +low order bits. The B<compare> callback takes two arguments (pointers +to two hash table entries), and returns 0 if their keys are equal, +non-zero otherwise. If your hash table will contain items of some +particular type and the B<hash> and B<compare> callbacks hash/compare +these types, then the B<DECLARE_LHASH_HASH_FN> and +B<IMPLEMENT_LHASH_COMP_FN> macros can be used to create callback +wrappers of the prototypes required by lh_new(). These provide +per-variable casts before calling the type-specific callbacks written +by the application author. These macros, as well as those used for +the "doall" callbacks, are defined as; + + #define DECLARE_LHASH_HASH_FN(f_name,o_type) \ + unsigned long f_name##_LHASH_HASH(const void *); + #define IMPLEMENT_LHASH_HASH_FN(f_name,o_type) \ + unsigned long f_name##_LHASH_HASH(const void *arg) { \ + o_type a = (o_type)arg; \ + return f_name(a); } + #define LHASH_HASH_FN(f_name) f_name##_LHASH_HASH + + #define DECLARE_LHASH_COMP_FN(f_name,o_type) \ + int f_name##_LHASH_COMP(const void *, const void *); + #define IMPLEMENT_LHASH_COMP_FN(f_name,o_type) \ + int f_name##_LHASH_COMP(const void *arg1, const void *arg2) { \ + o_type a = (o_type)arg1; \ + o_type b = (o_type)arg2; \ + return f_name(a,b); } + #define LHASH_COMP_FN(f_name) f_name##_LHASH_COMP + + #define DECLARE_LHASH_DOALL_FN(f_name,o_type) \ + void f_name##_LHASH_DOALL(const void *); + #define IMPLEMENT_LHASH_DOALL_FN(f_name,o_type) \ + void f_name##_LHASH_DOALL(const void *arg) { \ + o_type a = (o_type)arg; \ + f_name(a); } + #define LHASH_DOALL_FN(f_name) f_name##_LHASH_DOALL + + #define DECLARE_LHASH_DOALL_ARG_FN(f_name,o_type,a_type) \ + void f_name##_LHASH_DOALL_ARG(const void *, const void *); + #define IMPLEMENT_LHASH_DOALL_ARG_FN(f_name,o_type,a_type) \ + void f_name##_LHASH_DOALL_ARG(const void *arg1, const void *arg2) { \ + o_type a = (o_type)arg1; \ + a_type b = (a_type)arg2; \ + f_name(a,b); } + #define LHASH_DOALL_ARG_FN(f_name) f_name##_LHASH_DOALL_ARG + +An example of a hash table storing (pointers to) structures of type 'STUFF' +could be defined as follows; + + /* Calculates the hash value of 'tohash' (implemented elsewhere) */ + unsigned long STUFF_hash(const STUFF *tohash); + /* Orders 'arg1' and 'arg2' (implemented elsewhere) */ + int STUFF_cmp(const STUFF *arg1, const STUFF *arg2); + /* Create the type-safe wrapper functions for use in the LHASH internals */ + static IMPLEMENT_LHASH_HASH_FN(STUFF_hash, const STUFF *) + static IMPLEMENT_LHASH_COMP_FN(STUFF_cmp, const STUFF *); + /* ... */ + int main(int argc, char *argv[]) { + /* Create the new hash table using the hash/compare wrappers */ + LHASH *hashtable = lh_new(LHASH_HASH_FN(STUFF_hash), + LHASH_COMP_FN(STUFF_cmp)); + /* ... */ + } + +lh_free() frees the B<LHASH> structure B<table>. Allocated hash table +entries will not be freed; consider using lh_doall() to deallocate any +remaining entries in the hash table (see below). + +lh_insert() inserts the structure pointed to by B<data> into B<table>. +If there already is an entry with the same key, the old value is +replaced. Note that lh_insert() stores pointers, the data are not +copied. + +lh_delete() deletes an entry from B<table>. + +lh_retrieve() looks up an entry in B<table>. Normally, B<data> is +a structure with the key field(s) set; the function will return a +pointer to a fully populated structure. + +lh_doall() will, for every entry in the hash table, call B<func> with +the data item as its parameter. For lh_doall() and lh_doall_arg(), +function pointer casting should be avoided in the callbacks (see +B<NOTE>) - instead, either declare the callbacks to match the +prototype required in lh_new() or use the declare/implement macros to +create type-safe wrappers that cast variables prior to calling your +type-specific callbacks. An example of this is illustrated here where +the callback is used to cleanup resources for items in the hash table +prior to the hashtable itself being deallocated: + + /* Cleans up resources belonging to 'a' (this is implemented elsewhere) */ + void STUFF_cleanup(STUFF *a); + /* Implement a prototype-compatible wrapper for "STUFF_cleanup" */ + IMPLEMENT_LHASH_DOALL_FN(STUFF_cleanup, STUFF *) + /* ... then later in the code ... */ + /* So to run "STUFF_cleanup" against all items in a hash table ... */ + lh_doall(hashtable, LHASH_DOALL_FN(STUFF_cleanup)); + /* Then the hash table itself can be deallocated */ + lh_free(hashtable); + +When doing this, be careful if you delete entries from the hash table +in your callbacks: the table may decrease in size, moving the item +that you are currently on down lower in the hash table - this could +cause some entries to be skipped during the iteration. The second +best solution to this problem is to set hash-E<gt>down_load=0 before +you start (which will stop the hash table ever decreasing in size). +The best solution is probably to avoid deleting items from the hash +table inside a "doall" callback! + +lh_doall_arg() is the same as lh_doall() except that B<func> will be +called with B<arg> as the second argument and B<func> should be of +type B<LHASH_DOALL_ARG_FN_TYPE> (a callback prototype that is passed +both the table entry and an extra argument). As with lh_doall(), you +can instead choose to declare your callback with a prototype matching +the types you are dealing with and use the declare/implement macros to +create compatible wrappers that cast variables before calling your +type-specific callbacks. An example of this is demonstrated here +(printing all hash table entries to a BIO that is provided by the +caller): + + /* Prints item 'a' to 'output_bio' (this is implemented elsewhere) */ + void STUFF_print(const STUFF *a, BIO *output_bio); + /* Implement a prototype-compatible wrapper for "STUFF_print" */ + static IMPLEMENT_LHASH_DOALL_ARG_FN(STUFF_print, const STUFF *, BIO *) + /* ... then later in the code ... */ + /* Print out the entire hashtable to a particular BIO */ + lh_doall_arg(hashtable, LHASH_DOALL_ARG_FN(STUFF_print), logging_bio); + +lh_error() can be used to determine if an error occurred in the last +operation. lh_error() is a macro. + +=head1 RETURN VALUES + +lh_new() returns B<NULL> on error, otherwise a pointer to the new +B<LHASH> structure. + +When a hash table entry is replaced, lh_insert() returns the value +being replaced. B<NULL> is returned on normal operation and on error. + +lh_delete() returns the entry being deleted. B<NULL> is returned if +there is no such value in the hash table. + +lh_retrieve() returns the hash table entry if it has been found, +B<NULL> otherwise. + +lh_error() returns 1 if an error occurred in the last operation, 0 +otherwise. + +lh_free(), lh_doall() and lh_doall_arg() return no values. + +=head1 NOTE + +The various LHASH macros and callback types exist to make it possible +to write type-safe code without resorting to function-prototype +casting - an evil that makes application code much harder to +audit/verify and also opens the window of opportunity for stack +corruption and other hard-to-find bugs. It also, apparently, violates +ANSI-C. + +The LHASH code regards table entries as constant data. As such, it +internally represents lh_insert()'d items with a "const void *" +pointer type. This is why callbacks such as those used by lh_doall() +and lh_doall_arg() declare their prototypes with "const", even for the +parameters that pass back the table items' data pointers - for +consistency, user-provided data is "const" at all times as far as the +LHASH code is concerned. However, as callers are themselves providing +these pointers, they can choose whether they too should be treating +all such parameters as constant. + +As an example, a hash table may be maintained by code that, for +reasons of encapsulation, has only "const" access to the data being +indexed in the hash table (ie. it is returned as "const" from +elsewhere in their code) - in this case the LHASH prototypes are +appropriate as-is. Conversely, if the caller is responsible for the +life-time of the data in question, then they may well wish to make +modifications to table item passed back in the lh_doall() or +lh_doall_arg() callbacks (see the "STUFF_cleanup" example above). If +so, the caller can either cast the "const" away (if they're providing +the raw callbacks themselves) or use the macros to declare/implement +the wrapper functions without "const" types. + +Callers that only have "const" access to data they're indexing in a +table, yet declare callbacks without constant types (or cast the +"const" away themselves), are therefore creating their own risks/bugs +without being encouraged to do so by the API. On a related note, +those auditing code should pay special attention to any instances of +DECLARE/IMPLEMENT_LHASH_DOALL_[ARG_]_FN macros that provide types +without any "const" qualifiers. + +=head1 BUGS + +lh_insert() returns B<NULL> both for success and error. + +=head1 INTERNALS + +The following description is based on the SSLeay documentation: + +The B<lhash> library implements a hash table described in the +I<Communications of the ACM> in 1991. What makes this hash table +different is that as the table fills, the hash table is increased (or +decreased) in size via OPENSSL_realloc(). When a 'resize' is done, instead of +all hashes being redistributed over twice as many 'buckets', one +bucket is split. So when an 'expand' is done, there is only a minimal +cost to redistribute some values. Subsequent inserts will cause more +single 'bucket' redistributions but there will never be a sudden large +cost due to redistributing all the 'buckets'. + +The state for a particular hash table is kept in the B<LHASH> structure. +The decision to increase or decrease the hash table size is made +depending on the 'load' of the hash table. The load is the number of +items in the hash table divided by the size of the hash table. The +default values are as follows. If (hash->up_load E<lt> load) =E<gt> +expand. if (hash-E<gt>down_load E<gt> load) =E<gt> contract. The +B<up_load> has a default value of 1 and B<down_load> has a default value +of 2. These numbers can be modified by the application by just +playing with the B<up_load> and B<down_load> variables. The 'load' is +kept in a form which is multiplied by 256. So +hash-E<gt>up_load=8*256; will cause a load of 8 to be set. + +If you are interested in performance the field to watch is +num_comp_calls. The hash library keeps track of the 'hash' value for +each item so when a lookup is done, the 'hashes' are compared, if +there is a match, then a full compare is done, and +hash-E<gt>num_comp_calls is incremented. If num_comp_calls is not equal +to num_delete plus num_retrieve it means that your hash function is +generating hashes that are the same for different values. It is +probably worth changing your hash function if this is the case because +even if your hash table has 10 items in a 'bucket', it can be searched +with 10 B<unsigned long> compares and 10 linked list traverses. This +will be much less expensive that 10 calls to your compare function. + +lh_strhash() is a demo string hashing function: + + unsigned long lh_strhash(const char *c); + +Since the B<LHASH> routines would normally be passed structures, this +routine would not normally be passed to lh_new(), rather it would be +used in the function passed to lh_new(). + +=head1 SEE ALSO + +L<lh_stats(3)|lh_stats(3)> + +=head1 HISTORY + +The B<lhash> library is available in all versions of SSLeay and OpenSSL. +lh_error() was added in SSLeay 0.9.1b. + +This manpage is derived from the SSLeay documentation. + +In OpenSSL 0.9.7, all lhash functions that were passed function pointers +were changed for better type safety, and the function types LHASH_COMP_FN_TYPE, +LHASH_HASH_FN_TYPE, LHASH_DOALL_FN_TYPE and LHASH_DOALL_ARG_FN_TYPE +became available. + +=cut |