Proposition: use a hashtable instead of bsearch to locate applets

Jason Cipriani jason.cipriani at gmail.com
Thu May 29 15:24:06 UTC 2014


Does this provided a noticeable performance increase? Do you have
benchmarks?

J
On May 29, 2014 7:17 AM, "Bartosz Golaszewski" <bartekgola at gmail.com> wrote:

> Hi!
>
> Busybox uses bsearch() to locate the applet's main function in
> find_applet_by_name(). Running 'make defconfig' results in 355 applets
> being built and this in turn results in 8-9 calls to strcmp() on average
> per busybox execution.
>
> Maybe we should switch to using a simple static hashtable? The following
> patch is a simple & dirty proof of concept to show what I mean. It modifies
> applet_tables.c to generate a static hashtable containing indicies of
> fields
> in applet_nameofs. I used a simple and fast hash function taken from
> Robert Jenkins.
>
> With this patch, on each execution and after the hash computation, the
> number of calls to strcmp() has been limited to four at most, and mostly
> it's just one or two. There are no calls to applet_name_compare() too.
>
> The patch results in bigger code, but there's room for improvement as
> we could probably get rid of some of the arrays generated by applet_tables
> and unify the hashtables used in busybox applets.
>
> If there's any interest, I can prepare a better, more memory-wise optimized
> version.
>
> Best regards,
> Bartosz Golaszewski
>
> ---
>  applets/applet_tables.c | 58
> ++++++++++++++++++++++++++++++++++++++++++++++++-
>  libbb/appletlib.c       | 45 ++++++++++++++++++++++++++++++++------
>  2 files changed, 95 insertions(+), 8 deletions(-)
>
> diff --git a/applets/applet_tables.c b/applets/applet_tables.c
> index 94b974e..9656b5c 100644
> --- a/applets/applet_tables.c
> +++ b/applets/applet_tables.c
> @@ -14,6 +14,7 @@
>  #include <string.h>
>  #include <stdio.h>
>  #include <unistd.h>
> +#include <stdint.h>
>
>  #undef ARRAY_SIZE
>  #define ARRAY_SIZE(x) ((unsigned)(sizeof(x) / sizeof((x)[0])))
> @@ -42,6 +43,26 @@ enum { NUM_APPLETS = ARRAY_SIZE(applets) };
>
>  static int offset[NUM_APPLETS];
>
> +#define MAX_BUCKET_SIZE 16
> +static int applet_hashtable[NUM_APPLETS][16];
> +
> +static unsigned jenkins_hash(const char *key, size_t len)
> +{
> +       unsigned hash, i;
> +
> +       for(hash = i = 0; i < len; ++i) {
> +               hash += key[i];
> +               hash += (hash << 10);
> +               hash ^= (hash >> 6);
> +       }
> +
> +       hash += (hash << 3);
> +       hash ^= (hash >> 11);
> +       hash += (hash << 15);
> +
> +       return hash;
> +}
> +
>  static int cmp_name(const void *a, const void *b)
>  {
>         const struct bb_applet *aa = a;
> @@ -51,7 +72,7 @@ static int cmp_name(const void *a, const void *b)
>
>  int main(int argc, char **argv)
>  {
> -       int i;
> +       int i, j;
>         int ofs;
>  //     unsigned MAX_APPLET_NAME_LEN = 1;
>
> @@ -129,6 +150,41 @@ int main(int argc, char **argv)
>         }
>         printf("};\n");
>  #endif
> +
> +       // Initialize local hashtable
> +       for (i = 0; i < NUM_APPLETS; i++) {
> +               for (j = 0; j < MAX_BUCKET_SIZE; j++)
> +                       applet_hashtable[i][j] = -1;
> +       }
> +
> +       // For each applet - place it in appropriate bucket
> +       for (i = 0; i < NUM_APPLETS; i++) {
> +               unsigned ind = jenkins_hash(applets[i].name,
> +                                       strlen(applets[i].name)) %
> NUM_APPLETS;
> +
> +               for (j = 0; j < MAX_BUCKET_SIZE; j++) {
> +                       if (applet_hashtable[ind][j] < 0) {
> +                               applet_hashtable[ind][j] = i;
> +                               break;
> +                       }
> +               }
> +       }
> +
> +       // Create a static array for each bucket
> +       for (i = 0; i < NUM_APPLETS; i++) {
> +               printf("const int16_t bucket%d[] = { ", i);
> +               for (j = 0; applet_hashtable[i][j] >= 0; j++) {
> +                       printf("%d, ", applet_hashtable[i][j]);
> +               }
> +               printf(" -1 };\n");
> +       }
> +
> +       // Create a static array of pointers to the buckets
> +       printf("\nconst int16_t *applet_hashtab[] = {\n");
> +       for (i = 0; i < NUM_APPLETS; i++)
> +               printf("\tbucket%d,\n", i);
> +       printf("};\n");
> +
>         //printf("#endif /* SKIP_definitions */\n");
>  //     printf("\n");
>  //     printf("#define MAX_APPLET_NAME_LEN %u\n", MAX_APPLET_NAME_LEN);
> diff --git a/libbb/appletlib.c b/libbb/appletlib.c
> index f7c416e..6be536d 100644
> --- a/libbb/appletlib.c
> +++ b/libbb/appletlib.c
> @@ -52,7 +52,6 @@
>
>  #include "usage_compressed.h"
>
> -
>  #if ENABLE_SHOW_USAGE && !ENABLE_FEATURE_COMPRESS_USAGE
>  static const char usage_messages[] ALIGN1 = UNPACKED_USAGE;
>  #else
> @@ -140,23 +139,55 @@ void FAST_FUNC bb_show_usage(void)
>  }
>
>  #if NUM_APPLETS > 8
> -static int applet_name_compare(const void *name, const void *idx)
> +static unsigned jenkins_hash(const char *key, size_t len)
>  {
> -       int i = (int)(ptrdiff_t)idx - 1;
> -       return strcmp(name, APPLET_NAME(i));
> +       unsigned hash, i;
> +
> +       for(hash = i = 0; i < len; ++i) {
> +               hash += key[i];
> +               hash += (hash << 10);
> +               hash ^= (hash >> 6);
> +       }
> +
> +       hash += (hash << 3);
> +       hash ^= (hash >> 11);
> +       hash += (hash << 15);
> +
> +       return hash;
>  }
> +
> +//static int applet_name_compare(const void *name, const void *idx)
> +//{
> +//     int i = (int)(ptrdiff_t)idx - 1;
> +//     return strcmp(name, APPLET_NAME(i));
> +//}
>  #endif
>  int FAST_FUNC find_applet_by_name(const char *name)
>  {
>  #if NUM_APPLETS > 8
>         /* Do a binary search to find the applet entry given the name. */
> -       const char *p;
> -       p = bsearch(name, (void*)(ptrdiff_t)1, ARRAY_SIZE(applet_main), 1,
> applet_name_compare);
> +       //const char *p;
> +       //p = bsearch(name, (void*)(ptrdiff_t)1, ARRAY_SIZE(applet_main),
> 1, applet_name_compare);
> +
> +       unsigned ind;
> +       int ret = -1, i = 0;
> +
> +       ind = jenkins_hash(name, strlen(name)) % NUM_APPLETS;
> +       while (applet_hashtab[ind][i] != -1) {
> +               if (strcmp(name, APPLET_NAME(applet_hashtab[ind][i])) ==
> 0) {
> +                       ret = applet_hashtab[ind][i];
> +                       break;
> +               }
> +               ++i;
> +       }
> +
> +       return ret;
> +
>         /*
>          * if (!p) return -1;
>          * ^^^^^^^^^^^^^^^^^^ the code below will do this if p == NULL :)
>          */
> -       return (int)(ptrdiff_t)p - 1;
> +       //return (int)(ptrdiff_t)p - 1;
>  #else
>         /* A version which does not pull in bsearch */
>         int i = 0;
> --
> 1.9.1
>
> _______________________________________________
> busybox mailing list
> busybox at busybox.net
> http://lists.busybox.net/mailman/listinfo/busybox
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.busybox.net/pipermail/busybox/attachments/20140529/7cf45aa3/attachment.html>


More information about the busybox mailing list