From 54245985fb3c89d72e285c4db39d38ed2f5fb0de Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter.wuille@gmail.com>
Date: Sun, 31 Mar 2019 11:41:05 -0700
Subject: Squashed 'src/secp256k1/' changes from 0b70241850..b19c000063
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

b19c000063 Merge #607: Use size_t shifts when computing a size_t
4d01bc2d9c Merge #606: travis: Remove unused sudo:false
e6d01e9347 Use size_t shifts when computing a size_t
7667532bd7 travis: Remove unused sudo:false
ee99f12f3d Merge #599: Switch x86_64 asm to use "i" instead of "n" for immediate values.
d58bc93f2c Switch x86_64 asm to use "i" instead of "n" for immediate values.
05362ee042 Merge #597: Add $(COMMON_LIB) to exhaustive tests to fix ARM asm build
83483869ac Add $(COMMON_LIB) to exhaustive tests to fix ARM asm build
aa15154a48 Merge #568: Fix integer overflow in ecmult_multi_var when n is large
2277af5ff0 Fix integer overflow in ecmult_multi_var when n is large
85d0e1bcce Merge #591: Make bench_internal obey secp256k1_fe_sqrt's contract wrt aliasing.
14196379ec Merge #580: Add trivial ecmult_multi algorithm which does not require a scratch space
a697d82da9 Add trivial ecmult_multi to the benchmark tool
bade617417 Add trivial ecmult_multi algorithm. It is selected when no scratch space is given and just multiplies and adds the points.
5545e13dea Merge #584: configure: Use CFLAGS_FOR_BUILD when checking native compiler
20c5869df2 Merge #516: improvements to random seed in src/tests.c
b76e45d5d6 Make bench_internal obey secp256k1_fe_sqrt's contract wrt aliasing.
870a977644 Merge #562: Make use of TAG_PUBKEY constants in secp256k1_eckey_pubkey_parse
be40c4d0b5 Fixup for C90 mixed declarations.
c71dd2c08f Merge #509: Fix algorithm selection in bench_ecmult
6492bf88cc Merge #518: Summarize build options after running configure
0e9ada1941 Merge #567: Correct order of libs returned on pkg-config --libs --static libsecp2…
e96901a4b9 Merge #587: Make randomization of a non-signing context a noop
58df8d03ad Merge #511: Portability fix for the configure scripts generated
2ebdad772a Merge #552: Make constants static:
1c131affd3 Merge #551: secp256k1_fe_sqrt: Verify that the arguments don't alias.
ba698f883b Merge #539: Assorted minor corrections
949e85b009 Merge #550: Optimize secp256k1_fe_normalize_weak calls.
a34bcaadf1 Actually pass CFLAGS_FOR_BUILD and LDFLAGS_FOR_BUILD to linker
2d5f4cebdc configure: Use CFLAGS_FOR_BUILD when checking native compiler
b408c6a8b2 Merge #579: Use __GNUC_PREREQ for detecting __builtin_expect
6198375218 Make randomization of a non-signing context a noop
c663397f46 Use __GNUC_PREREQ for detecting __builtin_expect
e34ceb333b Merge #557: Eliminate scratch memory used when generating contexts
b3bf5f99a3 ecmult_impl: expand comment to explain how effective affine interacts with everything
efa783f8f0 Store z-ratios in the 'x' coord they'll recover
ffd3b346fe add `secp256k1_ge_set_all_gej_var` test which deals with many infinite points
84740acd2a ecmult_impl: save one fe_inv_var
47045270fa ecmult_impl: eliminate scratch memory used when generating context
7f7a2ed3a8 ecmult_gen_impl: eliminate scratch memory used when generating context
314a61d724 Merge #553: add static context object which has no capabilities
89a20a8945 Correct order of libs returned on pkg-config --libs --static libsecp256k1 call.
1086fda4c1 Merge #354: [ECDH API change] Support custom hash function
d3cb1f95eb Make use of TAG_PUBKEY constants in secp256k1_eckey_pubkey_parse
40fde611bd prevent attempts to modify `secp256k1_context_no_precomp`
ed7c08417a add static context object which has no capabilities
496c5b43b8 Make constants static: static const secp256k1_ge secp256k1_ge_const_g; static const int CURVE_B;
bf8b86cc07 secp256k1_fe_sqrt: Verify that the arguments don't alias.
9bd89c836b Optimize secp256k1_fe_normalize_weak calls. Move secp256k1_fe_normalize_weak calls out of ECMULT_TABLE_GET_GE and ECMULT_TABLE_GET_GE_STORAGE and into secp256k1_ge_globalz_set_table_gej instead.
52ab96fedb clean dependendies in field_*_impl.h
deff5edd42 Correct math typos in field_*.h
4efb3f8dd1 Add check that restrict pointers don't alias with all parameters.
1e6f1f5ad5 Merge #529: fix tests.c in the count == 0 case
c8fbc3c397 [ECDH API change] Allow pass arbitrary data to hash function
b00be65056 [ECDH API change] Support custom hash function
95e99f196f fix tests.c in the count == 0 case
452d8e4d2a Merge #523: scratch: add stack frame support
6fe50439ae scratch: add stack frame support
9bc2e26502 Merge #522: parameterize ecmult_const over input size
7c1b91ba4b parameterize ecmult_const over input size
dbc3ddd5e2 Merge #513: Increase sparsity of pippenger fixed window naf representation
3965027c81 Summarize build options in configure script
0f0517369c Fix algorithm selection in bench_ecmult
fb9271dcf0 Merge #510: add a couple missing `const`s to ecmult_pippenger_wnaf
cd5f6028e5 Merge #515: Fix typo
09146ae854 Merge #512: secp256k1_ec_privkey_negate - fix documentation
ec0a7b3ae3 Don't touch leading zeros in wnaf_fixed.
9e36d1bfe2 Fix bug in wnaf_fixed where the wnaf array is not completely zeroed when given a 0 scalar.
96f68a0afc Don't invert scalar in wnaf_fixed when it is even because a caller might intentionally give a scalar with many leading zeros.
8b3841c91d fix bug in fread() failure check
cddef0c0be tests: add warning message when /dev/urandom fails
9b7c47a21e Fix typo
6dbb007869 Increase sparsity of pippenger fixed window naf representation
1646ace4d5 secp256k1_ec_privkey_negate - fix documentation
270f6c80db Portability fix for the configure scripts generated
9b3ff0309d add a couple missing `const`s to ecmult_pippenger_wnaf
cd329dbc3e Merge #460: [build] Update ax_jni_include_dir.m4 macro
7f9c1a1565 Merge #498: tests: Avoid calling fclose(...) with an invalid argument
f99aa8d4d3 Merge #499: tests: Make sure we get the requested number of bytes from /dev/urandom
b549d3d5f7 Merge #472: [build] Set --enable-jni to no by default instead of auto.
d333521516 Merge #494: Support OpenSSL versions >= 1.1 for ENABLE_OPENSSL_TESTS
2ef8ea5d21 Merge #495: Add bench_ecmult to .gitignore
82a96e4587 tests: Make sure we get the requested number of bytes from /dev/urandom
5aae5b5bb2 Avoid calling fclose(...) with an invalid argument
cb32940df3 Add bench_ecmult to .gitignore
31abd3ab8d Support OpenSSL versions >= 1.1 for ENABLE_OPENSSL_TESTS
c95f6f1360 Merge #487: fix tests typo, s/changed/unchanged
fb46c83881 Merge #463: Reduce usage of hardcoded size constants
02f5001dfc Merge #490: Disambiguate bench functions and types
1f46d6089e Disambiguate bench functions and types
f54c6c5083 Merge #480: Enable benchmark building by default
c77fc08597 Merge #486: Add pippenger_wnaf for multi-multiplication
d2f9c6b5dc Use more precise pippenger bucket windows
4c950bbeaf Save some additions per window in _pippenger_wnaf
a58f543f5a Add flags for choosing algorithm in ecmult_multi benchmark
36b22c9337 Use scratch space dependent batching in ecmult_multi
355a38f113 Add pippenger_wnaf ecmult_multi
bc65aa794e Add bench_ecmult
dba5471b69 Add ecmult_multi tests
8c1c831bdb Generalize Strauss to support multiple points
548de42ecf add resizeable scratch space API
0e96cdc6b6 fix typo, s/changed/unchanged
c7680e570f Reduce usage of hardcoded size constants
6ad5cdb42a Merge #479: Get rid of reserved _t in type names
7a78f60598 Print whether we're building benchmarks
4afec9f1ae Build benchmarks by default
d1dc9dfc0a Get rid of reserved _t in type names
57752d28b3 [build] Set --enable-jni to no by default instead of auto.
e7daa9b3c2 [build] Tweak JNI macro to warn instead of error for JNI not found.
5b22977922 [build] Update ax_jni_include_dir.m4 macro to deal with recent versions of macOS

git-subtree-dir: src/secp256k1
git-subtree-split: b19c000063be11018b4d1a6b0a85871ab9d0bdcf
---
 src/bench.h                                   |  16 +
 src/bench_ecdh.c                              |  10 +-
 src/bench_ecmult.c                            | 207 ++++++
 src/bench_internal.c                          |  83 +--
 src/bench_recover.c                           |   8 +-
 src/bench_sign.c                              |  12 +-
 src/eckey_impl.h                              |   2 +-
 src/ecmult.h                                  |  19 +-
 src/ecmult_const.h                            |   4 +-
 src/ecmult_const_impl.h                       |  97 +--
 src/ecmult_gen_impl.h                         |   4 +-
 src/ecmult_impl.h                             | 929 +++++++++++++++++++++++---
 src/field_10x26.h                             |   4 +-
 src/field_10x26_impl.h                        |   5 +-
 src/field_5x52.h                              |   4 +-
 src/field_5x52_impl.h                         |   2 +-
 src/field_5x52_int128_impl.h                  |   4 +-
 src/field_impl.h                              |   3 +
 src/gen_context.c                             |   2 +-
 src/group.h                                   |  10 +-
 src/group_impl.h                              |  75 ++-
 src/hash.h                                    |  26 +-
 src/hash_impl.h                               |  49 +-
 src/java/org/bitcoin/NativeSecp256k1Test.java |   2 +-
 src/java/org_bitcoin_NativeSecp256k1.c        |   6 +-
 src/modules/ecdh/main_impl.h                  |  41 +-
 src/modules/ecdh/tests_impl.h                 |  59 +-
 src/scalar_4x64_impl.h                        |   6 +-
 src/scratch.h                                 |  39 ++
 src/scratch_impl.h                            |  86 +++
 src/secp256k1.c                               |  51 +-
 src/testrand_impl.h                           |   2 +-
 src/tests.c                                   | 668 +++++++++++++++++-
 src/tests_exhaustive.c                        |  43 +-
 src/util.h                                    |  10 +-
 35 files changed, 2239 insertions(+), 349 deletions(-)
 create mode 100644 src/bench_ecmult.c
 create mode 100644 src/scratch.h
 create mode 100644 src/scratch_impl.h

(limited to 'src')

diff --git a/src/bench.h b/src/bench.h
index d5ebe01301..5b59783f68 100644
--- a/src/bench.h
+++ b/src/bench.h
@@ -8,6 +8,7 @@
 #define SECP256K1_BENCH_H
 
 #include <stdio.h>
+#include <string.h>
 #include <math.h>
 #include "sys/time.h"
 
@@ -63,4 +64,19 @@ void run_benchmark(char *name, void (*benchmark)(void*), void (*setup)(void*), v
     printf("us\n");
 }
 
+int have_flag(int argc, char** argv, char *flag) {
+    char** argm = argv + argc;
+    argv++;
+    if (argv == argm) {
+        return 1;
+    }
+    while (argv != NULL && argv != argm) {
+        if (strcmp(*argv, flag) == 0) {
+            return 1;
+        }
+        argv++;
+    }
+    return 0;
+}
+
 #endif /* SECP256K1_BENCH_H */
diff --git a/src/bench_ecdh.c b/src/bench_ecdh.c
index cde5e2dbb4..c1dd5a6ac9 100644
--- a/src/bench_ecdh.c
+++ b/src/bench_ecdh.c
@@ -15,11 +15,11 @@ typedef struct {
     secp256k1_context *ctx;
     secp256k1_pubkey point;
     unsigned char scalar[32];
-} bench_ecdh_t;
+} bench_ecdh_data;
 
 static void bench_ecdh_setup(void* arg) {
     int i;
-    bench_ecdh_t *data = (bench_ecdh_t*)arg;
+    bench_ecdh_data *data = (bench_ecdh_data*)arg;
     const unsigned char point[] = {
         0x03,
         0x54, 0x94, 0xc1, 0x5d, 0x32, 0x09, 0x97, 0x06,
@@ -39,15 +39,15 @@ static void bench_ecdh_setup(void* arg) {
 static void bench_ecdh(void* arg) {
     int i;
     unsigned char res[32];
-    bench_ecdh_t *data = (bench_ecdh_t*)arg;
+    bench_ecdh_data *data = (bench_ecdh_data*)arg;
 
     for (i = 0; i < 20000; i++) {
-        CHECK(secp256k1_ecdh(data->ctx, res, &data->point, data->scalar) == 1);
+        CHECK(secp256k1_ecdh(data->ctx, res, &data->point, data->scalar, NULL, NULL) == 1);
     }
 }
 
 int main(void) {
-    bench_ecdh_t data;
+    bench_ecdh_data data;
 
     run_benchmark("ecdh", bench_ecdh, bench_ecdh_setup, NULL, &data, 10, 20000);
     return 0;
diff --git a/src/bench_ecmult.c b/src/bench_ecmult.c
new file mode 100644
index 0000000000..6d0ed1f436
--- /dev/null
+++ b/src/bench_ecmult.c
@@ -0,0 +1,207 @@
+/**********************************************************************
+ * Copyright (c) 2017 Pieter Wuille                                   *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+#include <stdio.h>
+
+#include "include/secp256k1.h"
+
+#include "util.h"
+#include "hash_impl.h"
+#include "num_impl.h"
+#include "field_impl.h"
+#include "group_impl.h"
+#include "scalar_impl.h"
+#include "ecmult_impl.h"
+#include "bench.h"
+#include "secp256k1.c"
+
+#define POINTS 32768
+#define ITERS 10000
+
+typedef struct {
+    /* Setup once in advance */
+    secp256k1_context* ctx;
+    secp256k1_scratch_space* scratch;
+    secp256k1_scalar* scalars;
+    secp256k1_ge* pubkeys;
+    secp256k1_scalar* seckeys;
+    secp256k1_gej* expected_output;
+    secp256k1_ecmult_multi_func ecmult_multi;
+
+    /* Changes per test */
+    size_t count;
+    int includes_g;
+
+    /* Changes per test iteration */
+    size_t offset1;
+    size_t offset2;
+
+    /* Test output. */
+    secp256k1_gej* output;
+} bench_data;
+
+static int bench_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, void* arg) {
+    bench_data* data = (bench_data*)arg;
+    if (data->includes_g) ++idx;
+    if (idx == 0) {
+        *sc = data->scalars[data->offset1];
+        *ge = secp256k1_ge_const_g;
+    } else {
+        *sc = data->scalars[(data->offset1 + idx) % POINTS];
+        *ge = data->pubkeys[(data->offset2 + idx - 1) % POINTS];
+    }
+    return 1;
+}
+
+static void bench_ecmult(void* arg) {
+    bench_data* data = (bench_data*)arg;
+
+    size_t count = data->count;
+    int includes_g = data->includes_g;
+    size_t iters = 1 + ITERS / count;
+    size_t iter;
+
+    for (iter = 0; iter < iters; ++iter) {
+        data->ecmult_multi(&data->ctx->ecmult_ctx, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_callback, arg, count - includes_g);
+        data->offset1 = (data->offset1 + count) % POINTS;
+        data->offset2 = (data->offset2 + count - 1) % POINTS;
+    }
+}
+
+static void bench_ecmult_setup(void* arg) {
+    bench_data* data = (bench_data*)arg;
+    data->offset1 = (data->count * 0x537b7f6f + 0x8f66a481) % POINTS;
+    data->offset2 = (data->count * 0x7f6f537b + 0x6a1a8f49) % POINTS;
+}
+
+static void bench_ecmult_teardown(void* arg) {
+    bench_data* data = (bench_data*)arg;
+    size_t iters = 1 + ITERS / data->count;
+    size_t iter;
+    /* Verify the results in teardown, to avoid doing comparisons while benchmarking. */
+    for (iter = 0; iter < iters; ++iter) {
+        secp256k1_gej tmp;
+        secp256k1_gej_add_var(&tmp, &data->output[iter], &data->expected_output[iter], NULL);
+        CHECK(secp256k1_gej_is_infinity(&tmp));
+    }
+}
+
+static void generate_scalar(uint32_t num, secp256k1_scalar* scalar) {
+    secp256k1_sha256 sha256;
+    unsigned char c[11] = {'e', 'c', 'm', 'u', 'l', 't', 0, 0, 0, 0};
+    unsigned char buf[32];
+    int overflow = 0;
+    c[6] = num;
+    c[7] = num >> 8;
+    c[8] = num >> 16;
+    c[9] = num >> 24;
+    secp256k1_sha256_initialize(&sha256);
+    secp256k1_sha256_write(&sha256, c, sizeof(c));
+    secp256k1_sha256_finalize(&sha256, buf);
+    secp256k1_scalar_set_b32(scalar, buf, &overflow);
+    CHECK(!overflow);
+}
+
+static void run_test(bench_data* data, size_t count, int includes_g) {
+    char str[32];
+    static const secp256k1_scalar zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0);
+    size_t iters = 1 + ITERS / count;
+    size_t iter;
+
+    data->count = count;
+    data->includes_g = includes_g;
+
+    /* Compute (the negation of) the expected results directly. */
+    data->offset1 = (data->count * 0x537b7f6f + 0x8f66a481) % POINTS;
+    data->offset2 = (data->count * 0x7f6f537b + 0x6a1a8f49) % POINTS;
+    for (iter = 0; iter < iters; ++iter) {
+        secp256k1_scalar tmp;
+        secp256k1_scalar total = data->scalars[(data->offset1++) % POINTS];
+        size_t i = 0;
+        for (i = 0; i + 1 < count; ++i) {
+            secp256k1_scalar_mul(&tmp, &data->seckeys[(data->offset2++) % POINTS], &data->scalars[(data->offset1++) % POINTS]);
+            secp256k1_scalar_add(&total, &total, &tmp);
+        }
+        secp256k1_scalar_negate(&total, &total);
+        secp256k1_ecmult(&data->ctx->ecmult_ctx, &data->expected_output[iter], NULL, &zero, &total);
+    }
+
+    /* Run the benchmark. */
+    sprintf(str, includes_g ? "ecmult_%ig" : "ecmult_%i", (int)count);
+    run_benchmark(str, bench_ecmult, bench_ecmult_setup, bench_ecmult_teardown, data, 10, count * (1 + ITERS / count));
+}
+
+int main(int argc, char **argv) {
+    bench_data data;
+    int i, p;
+    secp256k1_gej* pubkeys_gej;
+    size_t scratch_size;
+
+    data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY);
+    scratch_size = secp256k1_strauss_scratch_size(POINTS) + STRAUSS_SCRATCH_OBJECTS*16;
+    data.scratch = secp256k1_scratch_space_create(data.ctx, scratch_size);
+    data.ecmult_multi = secp256k1_ecmult_multi_var;
+
+    if (argc > 1) {
+        if(have_flag(argc, argv, "pippenger_wnaf")) {
+            printf("Using pippenger_wnaf:\n");
+            data.ecmult_multi = secp256k1_ecmult_pippenger_batch_single;
+        } else if(have_flag(argc, argv, "strauss_wnaf")) {
+            printf("Using strauss_wnaf:\n");
+            data.ecmult_multi = secp256k1_ecmult_strauss_batch_single;
+        } else if(have_flag(argc, argv, "simple")) {
+            printf("Using simple algorithm:\n");
+            data.ecmult_multi = secp256k1_ecmult_multi_var;
+            secp256k1_scratch_space_destroy(data.scratch);
+            data.scratch = NULL;
+        } else {
+            fprintf(stderr, "%s: unrecognized argument '%s'.\n", argv[0], argv[1]);
+            fprintf(stderr, "Use 'pippenger_wnaf', 'strauss_wnaf', 'simple' or no argument to benchmark a combined algorithm.\n");
+            return 1;
+        }
+    }
+
+    /* Allocate stuff */
+    data.scalars = malloc(sizeof(secp256k1_scalar) * POINTS);
+    data.seckeys = malloc(sizeof(secp256k1_scalar) * POINTS);
+    data.pubkeys = malloc(sizeof(secp256k1_ge) * POINTS);
+    data.expected_output = malloc(sizeof(secp256k1_gej) * (ITERS + 1));
+    data.output = malloc(sizeof(secp256k1_gej) * (ITERS + 1));
+
+    /* Generate a set of scalars, and private/public keypairs. */
+    pubkeys_gej = malloc(sizeof(secp256k1_gej) * POINTS);
+    secp256k1_gej_set_ge(&pubkeys_gej[0], &secp256k1_ge_const_g);
+    secp256k1_scalar_set_int(&data.seckeys[0], 1);
+    for (i = 0; i < POINTS; ++i) {
+        generate_scalar(i, &data.scalars[i]);
+        if (i) {
+            secp256k1_gej_double_var(&pubkeys_gej[i], &pubkeys_gej[i - 1], NULL);
+            secp256k1_scalar_add(&data.seckeys[i], &data.seckeys[i - 1], &data.seckeys[i - 1]);
+        }
+    }
+    secp256k1_ge_set_all_gej_var(data.pubkeys, pubkeys_gej, POINTS);
+    free(pubkeys_gej);
+
+    for (i = 1; i <= 8; ++i) {
+        run_test(&data, i, 1);
+    }
+
+    for (p = 0; p <= 11; ++p) {
+        for (i = 9; i <= 16; ++i) {
+            run_test(&data, i << p, 1);
+        }
+    }
+    secp256k1_context_destroy(data.ctx);
+    if (data.scratch != NULL) {
+        secp256k1_scratch_space_destroy(data.scratch);
+    }
+    free(data.scalars);
+    free(data.pubkeys);
+    free(data.seckeys);
+    free(data.output);
+    free(data.expected_output);
+
+    return(0);
+}
diff --git a/src/bench_internal.c b/src/bench_internal.c
index 0809f77bda..9071724331 100644
--- a/src/bench_internal.c
+++ b/src/bench_internal.c
@@ -25,10 +25,10 @@ typedef struct {
     secp256k1_gej gej_x, gej_y;
     unsigned char data[64];
     int wnaf[256];
-} bench_inv_t;
+} bench_inv;
 
 void bench_setup(void* arg) {
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     static const unsigned char init_x[32] = {
         0x02, 0x03, 0x05, 0x07, 0x0b, 0x0d, 0x11, 0x13,
@@ -58,7 +58,7 @@ void bench_setup(void* arg) {
 
 void bench_scalar_add(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 2000000; i++) {
         secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
@@ -67,7 +67,7 @@ void bench_scalar_add(void* arg) {
 
 void bench_scalar_negate(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 2000000; i++) {
         secp256k1_scalar_negate(&data->scalar_x, &data->scalar_x);
@@ -76,7 +76,7 @@ void bench_scalar_negate(void* arg) {
 
 void bench_scalar_sqr(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 200000; i++) {
         secp256k1_scalar_sqr(&data->scalar_x, &data->scalar_x);
@@ -85,7 +85,7 @@ void bench_scalar_sqr(void* arg) {
 
 void bench_scalar_mul(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 200000; i++) {
         secp256k1_scalar_mul(&data->scalar_x, &data->scalar_x, &data->scalar_y);
@@ -95,7 +95,7 @@ void bench_scalar_mul(void* arg) {
 #ifdef USE_ENDOMORPHISM
 void bench_scalar_split(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 20000; i++) {
         secp256k1_scalar l, r;
@@ -107,7 +107,7 @@ void bench_scalar_split(void* arg) {
 
 void bench_scalar_inverse(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 2000; i++) {
         secp256k1_scalar_inverse(&data->scalar_x, &data->scalar_x);
@@ -117,7 +117,7 @@ void bench_scalar_inverse(void* arg) {
 
 void bench_scalar_inverse_var(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 2000; i++) {
         secp256k1_scalar_inverse_var(&data->scalar_x, &data->scalar_x);
@@ -127,7 +127,7 @@ void bench_scalar_inverse_var(void* arg) {
 
 void bench_field_normalize(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 2000000; i++) {
         secp256k1_fe_normalize(&data->fe_x);
@@ -136,7 +136,7 @@ void bench_field_normalize(void* arg) {
 
 void bench_field_normalize_weak(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 2000000; i++) {
         secp256k1_fe_normalize_weak(&data->fe_x);
@@ -145,7 +145,7 @@ void bench_field_normalize_weak(void* arg) {
 
 void bench_field_mul(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 200000; i++) {
         secp256k1_fe_mul(&data->fe_x, &data->fe_x, &data->fe_y);
@@ -154,7 +154,7 @@ void bench_field_mul(void* arg) {
 
 void bench_field_sqr(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 200000; i++) {
         secp256k1_fe_sqr(&data->fe_x, &data->fe_x);
@@ -163,7 +163,7 @@ void bench_field_sqr(void* arg) {
 
 void bench_field_inverse(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 20000; i++) {
         secp256k1_fe_inv(&data->fe_x, &data->fe_x);
@@ -173,7 +173,7 @@ void bench_field_inverse(void* arg) {
 
 void bench_field_inverse_var(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 20000; i++) {
         secp256k1_fe_inv_var(&data->fe_x, &data->fe_x);
@@ -183,17 +183,19 @@ void bench_field_inverse_var(void* arg) {
 
 void bench_field_sqrt(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
+    secp256k1_fe t;
 
     for (i = 0; i < 20000; i++) {
-        secp256k1_fe_sqrt(&data->fe_x, &data->fe_x);
+        t = data->fe_x;
+        secp256k1_fe_sqrt(&data->fe_x, &t);
         secp256k1_fe_add(&data->fe_x, &data->fe_y);
     }
 }
 
 void bench_group_double_var(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 200000; i++) {
         secp256k1_gej_double_var(&data->gej_x, &data->gej_x, NULL);
@@ -202,7 +204,7 @@ void bench_group_double_var(void* arg) {
 
 void bench_group_add_var(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 200000; i++) {
         secp256k1_gej_add_var(&data->gej_x, &data->gej_x, &data->gej_y, NULL);
@@ -211,7 +213,7 @@ void bench_group_add_var(void* arg) {
 
 void bench_group_add_affine(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 200000; i++) {
         secp256k1_gej_add_ge(&data->gej_x, &data->gej_x, &data->ge_y);
@@ -220,7 +222,7 @@ void bench_group_add_affine(void* arg) {
 
 void bench_group_add_affine_var(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 200000; i++) {
         secp256k1_gej_add_ge_var(&data->gej_x, &data->gej_x, &data->ge_y, NULL);
@@ -229,7 +231,7 @@ void bench_group_add_affine_var(void* arg) {
 
 void bench_group_jacobi_var(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 20000; i++) {
         secp256k1_gej_has_quad_y_var(&data->gej_x);
@@ -238,7 +240,7 @@ void bench_group_jacobi_var(void* arg) {
 
 void bench_ecmult_wnaf(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 20000; i++) {
         secp256k1_ecmult_wnaf(data->wnaf, 256, &data->scalar_x, WINDOW_A);
@@ -248,10 +250,10 @@ void bench_ecmult_wnaf(void* arg) {
 
 void bench_wnaf_const(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
 
     for (i = 0; i < 20000; i++) {
-        secp256k1_wnaf_const(data->wnaf, data->scalar_x, WINDOW_A);
+        secp256k1_wnaf_const(data->wnaf, data->scalar_x, WINDOW_A, 256);
         secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
     }
 }
@@ -259,8 +261,8 @@ void bench_wnaf_const(void* arg) {
 
 void bench_sha256(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
-    secp256k1_sha256_t sha;
+    bench_inv *data = (bench_inv*)arg;
+    secp256k1_sha256 sha;
 
     for (i = 0; i < 20000; i++) {
         secp256k1_sha256_initialize(&sha);
@@ -271,8 +273,8 @@ void bench_sha256(void* arg) {
 
 void bench_hmac_sha256(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
-    secp256k1_hmac_sha256_t hmac;
+    bench_inv *data = (bench_inv*)arg;
+    secp256k1_hmac_sha256 hmac;
 
     for (i = 0; i < 20000; i++) {
         secp256k1_hmac_sha256_initialize(&hmac, data->data, 32);
@@ -283,8 +285,8 @@ void bench_hmac_sha256(void* arg) {
 
 void bench_rfc6979_hmac_sha256(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
-    secp256k1_rfc6979_hmac_sha256_t rng;
+    bench_inv *data = (bench_inv*)arg;
+    secp256k1_rfc6979_hmac_sha256 rng;
 
     for (i = 0; i < 20000; i++) {
         secp256k1_rfc6979_hmac_sha256_initialize(&rng, data->data, 64);
@@ -311,7 +313,7 @@ void bench_context_sign(void* arg) {
 #ifndef USE_NUM_NONE
 void bench_num_jacobi(void* arg) {
     int i;
-    bench_inv_t *data = (bench_inv_t*)arg;
+    bench_inv *data = (bench_inv*)arg;
     secp256k1_num nx, norder;
 
     secp256k1_scalar_get_num(&nx, &data->scalar_x);
@@ -324,23 +326,8 @@ void bench_num_jacobi(void* arg) {
 }
 #endif
 
-int have_flag(int argc, char** argv, char *flag) {
-    char** argm = argv + argc;
-    argv++;
-    if (argv == argm) {
-        return 1;
-    }
-    while (argv != NULL && argv != argm) {
-        if (strcmp(*argv, flag) == 0) {
-            return 1;
-        }
-        argv++;
-    }
-    return 0;
-}
-
 int main(int argc, char **argv) {
-    bench_inv_t data;
+    bench_inv data;
     if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "add")) run_benchmark("scalar_add", bench_scalar_add, bench_setup, NULL, &data, 10, 2000000);
     if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "negate")) run_benchmark("scalar_negate", bench_scalar_negate, bench_setup, NULL, &data, 10, 2000000);
     if (have_flag(argc, argv, "scalar") || have_flag(argc, argv, "sqr")) run_benchmark("scalar_sqr", bench_scalar_sqr, bench_setup, NULL, &data, 10, 200000);
diff --git a/src/bench_recover.c b/src/bench_recover.c
index 6489378cc6..b806eed94e 100644
--- a/src/bench_recover.c
+++ b/src/bench_recover.c
@@ -13,11 +13,11 @@ typedef struct {
     secp256k1_context *ctx;
     unsigned char msg[32];
     unsigned char sig[64];
-} bench_recover_t;
+} bench_recover_data;
 
 void bench_recover(void* arg) {
     int i;
-    bench_recover_t *data = (bench_recover_t*)arg;
+    bench_recover_data *data = (bench_recover_data*)arg;
     secp256k1_pubkey pubkey;
     unsigned char pubkeyc[33];
 
@@ -38,7 +38,7 @@ void bench_recover(void* arg) {
 
 void bench_recover_setup(void* arg) {
     int i;
-    bench_recover_t *data = (bench_recover_t*)arg;
+    bench_recover_data *data = (bench_recover_data*)arg;
 
     for (i = 0; i < 32; i++) {
         data->msg[i] = 1 + i;
@@ -49,7 +49,7 @@ void bench_recover_setup(void* arg) {
 }
 
 int main(void) {
-    bench_recover_t data;
+    bench_recover_data data;
 
     data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_VERIFY);
 
diff --git a/src/bench_sign.c b/src/bench_sign.c
index ed7224d757..544b43963c 100644
--- a/src/bench_sign.c
+++ b/src/bench_sign.c
@@ -12,11 +12,11 @@ typedef struct {
     secp256k1_context* ctx;
     unsigned char msg[32];
     unsigned char key[32];
-} bench_sign_t;
+} bench_sign;
 
 static void bench_sign_setup(void* arg) {
     int i;
-    bench_sign_t *data = (bench_sign_t*)arg;
+    bench_sign *data = (bench_sign*)arg;
 
     for (i = 0; i < 32; i++) {
         data->msg[i] = i + 1;
@@ -26,9 +26,9 @@ static void bench_sign_setup(void* arg) {
     }
 }
 
-static void bench_sign(void* arg) {
+static void bench_sign_run(void* arg) {
     int i;
-    bench_sign_t *data = (bench_sign_t*)arg;
+    bench_sign *data = (bench_sign*)arg;
 
     unsigned char sig[74];
     for (i = 0; i < 20000; i++) {
@@ -45,11 +45,11 @@ static void bench_sign(void* arg) {
 }
 
 int main(void) {
-    bench_sign_t data;
+    bench_sign data;
 
     data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN);
 
-    run_benchmark("ecdsa_sign", bench_sign, bench_sign_setup, NULL, &data, 10, 20000);
+    run_benchmark("ecdsa_sign", bench_sign_run, bench_sign_setup, NULL, &data, 10, 20000);
 
     secp256k1_context_destroy(data.ctx);
     return 0;
diff --git a/src/eckey_impl.h b/src/eckey_impl.h
index 1ab9a68ec0..7c5b789325 100644
--- a/src/eckey_impl.h
+++ b/src/eckey_impl.h
@@ -18,7 +18,7 @@ static int secp256k1_eckey_pubkey_parse(secp256k1_ge *elem, const unsigned char
     if (size == 33 && (pub[0] == SECP256K1_TAG_PUBKEY_EVEN || pub[0] == SECP256K1_TAG_PUBKEY_ODD)) {
         secp256k1_fe x;
         return secp256k1_fe_set_b32(&x, pub+1) && secp256k1_ge_set_xo_var(elem, &x, pub[0] == SECP256K1_TAG_PUBKEY_ODD);
-    } else if (size == 65 && (pub[0] == 0x04 || pub[0] == 0x06 || pub[0] == 0x07)) {
+    } else if (size == 65 && (pub[0] == SECP256K1_TAG_PUBKEY_UNCOMPRESSED || pub[0] == SECP256K1_TAG_PUBKEY_HYBRID_EVEN || pub[0] == SECP256K1_TAG_PUBKEY_HYBRID_ODD)) {
         secp256k1_fe x, y;
         if (!secp256k1_fe_set_b32(&x, pub+1) || !secp256k1_fe_set_b32(&y, pub+33)) {
             return 0;
diff --git a/src/ecmult.h b/src/ecmult.h
index 6d44aba60b..3d75a960f4 100644
--- a/src/ecmult.h
+++ b/src/ecmult.h
@@ -1,5 +1,5 @@
 /**********************************************************************
- * Copyright (c) 2013, 2014 Pieter Wuille                             *
+ * Copyright (c) 2013, 2014, 2017 Pieter Wuille, Andrew Poelstra      *
  * Distributed under the MIT software license, see the accompanying   *
  * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
  **********************************************************************/
@@ -9,6 +9,8 @@
 
 #include "num.h"
 #include "group.h"
+#include "scalar.h"
+#include "scratch.h"
 
 typedef struct {
     /* For accelerating the computation of a*P + b*G: */
@@ -28,4 +30,19 @@ static int secp256k1_ecmult_context_is_built(const secp256k1_ecmult_context *ctx
 /** Double multiply: R = na*A + ng*G */
 static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng);
 
+typedef int (secp256k1_ecmult_multi_callback)(secp256k1_scalar *sc, secp256k1_ge *pt, size_t idx, void *data);
+
+/**
+ * Multi-multiply: R = inp_g_sc * G + sum_i ni * Ai.
+ * Chooses the right algorithm for a given number of points and scratch space
+ * size. Resets and overwrites the given scratch space. If the points do not
+ * fit in the scratch space the algorithm is repeatedly run with batches of
+ * points. If no scratch space is given then a simple algorithm is used that
+ * simply multiplies the points with the corresponding scalars and adds them up.
+ * Returns: 1 on success (including when inp_g_sc is NULL and n is 0)
+ *          0 if there is not enough scratch space for a single point or
+ *          callback returns 0
+ */
+static int secp256k1_ecmult_multi_var(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n);
+
 #endif /* SECP256K1_ECMULT_H */
diff --git a/src/ecmult_const.h b/src/ecmult_const.h
index 72bf7d7582..d4804b8b68 100644
--- a/src/ecmult_const.h
+++ b/src/ecmult_const.h
@@ -10,6 +10,8 @@
 #include "scalar.h"
 #include "group.h"
 
-static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, const secp256k1_scalar *q);
+/* Here `bits` should be set to the maximum bitlength of the _absolute value_ of `q`, plus
+ * one because we internally sometimes add 2 to the number during the WNAF conversion. */
+static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, const secp256k1_scalar *q, int bits);
 
 #endif /* SECP256K1_ECMULT_CONST_H */
diff --git a/src/ecmult_const_impl.h b/src/ecmult_const_impl.h
index 7d7a172b7b..8411752eb0 100644
--- a/src/ecmult_const_impl.h
+++ b/src/ecmult_const_impl.h
@@ -12,13 +12,6 @@
 #include "ecmult_const.h"
 #include "ecmult_impl.h"
 
-#ifdef USE_ENDOMORPHISM
-    #define WNAF_BITS 128
-#else
-    #define WNAF_BITS 256
-#endif
-#define WNAF_SIZE(w) ((WNAF_BITS + (w) - 1) / (w))
-
 /* This is like `ECMULT_TABLE_GET_GE` but is constant time */
 #define ECMULT_CONST_TABLE_GET_GE(r,pre,n,w) do { \
     int m; \
@@ -55,7 +48,7 @@
  *
  *  Numbers reference steps of `Algorithm SPA-resistant Width-w NAF with Odd Scalar` on pp. 335
  */
-static int secp256k1_wnaf_const(int *wnaf, secp256k1_scalar s, int w) {
+static int secp256k1_wnaf_const(int *wnaf, secp256k1_scalar s, int w, int size) {
     int global_sign;
     int skew = 0;
     int word = 0;
@@ -74,9 +67,14 @@ static int secp256k1_wnaf_const(int *wnaf, secp256k1_scalar s, int w) {
      * and we'd lose any performance benefit. Instead, we use a technique from
      * Section 4.2 of the Okeya/Tagaki paper, which is to add either 1 (for even)
      * or 2 (for odd) to the number we are encoding, returning a skew value indicating
-     * this, and having the caller compensate after doing the multiplication. */
-
-    /* Negative numbers will be negated to keep their bit representation below the maximum width */
+     * this, and having the caller compensate after doing the multiplication.
+     *
+     * In fact, we _do_ want to negate numbers to minimize their bit-lengths (and in
+     * particular, to ensure that the outputs from the endomorphism-split fit into
+     * 128 bits). If we negate, the parity of our number flips, inverting which of
+     * {1, 2} we want to add to the scalar when ensuring that it's odd. Further
+     * complicating things, -1 interacts badly with `secp256k1_scalar_cadd_bit` and
+     * we need to special-case it in this logic. */
     flip = secp256k1_scalar_is_high(&s);
     /* We add 1 to even numbers, 2 to odd ones, noting that negation flips parity */
     bit = flip ^ !secp256k1_scalar_is_even(&s);
@@ -95,7 +93,7 @@ static int secp256k1_wnaf_const(int *wnaf, secp256k1_scalar s, int w) {
 
     /* 4 */
     u_last = secp256k1_scalar_shr_int(&s, w);
-    while (word * w < WNAF_BITS) {
+    while (word * w < size) {
         int sign;
         int even;
 
@@ -115,37 +113,44 @@ static int secp256k1_wnaf_const(int *wnaf, secp256k1_scalar s, int w) {
     wnaf[word] = u * global_sign;
 
     VERIFY_CHECK(secp256k1_scalar_is_zero(&s));
-    VERIFY_CHECK(word == WNAF_SIZE(w));
+    VERIFY_CHECK(word == WNAF_SIZE_BITS(size, w));
     return skew;
 }
 
-
-static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, const secp256k1_scalar *scalar) {
+static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, const secp256k1_scalar *scalar, int size) {
     secp256k1_ge pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
     secp256k1_ge tmpa;
     secp256k1_fe Z;
 
     int skew_1;
-    int wnaf_1[1 + WNAF_SIZE(WINDOW_A - 1)];
 #ifdef USE_ENDOMORPHISM
     secp256k1_ge pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
     int wnaf_lam[1 + WNAF_SIZE(WINDOW_A - 1)];
     int skew_lam;
     secp256k1_scalar q_1, q_lam;
 #endif
+    int wnaf_1[1 + WNAF_SIZE(WINDOW_A - 1)];
 
     int i;
     secp256k1_scalar sc = *scalar;
 
     /* build wnaf representation for q. */
+    int rsize = size;
+#ifdef USE_ENDOMORPHISM
+    if (size > 128) {
+        rsize = 128;
+        /* split q into q_1 and q_lam (where q = q_1 + q_lam*lambda, and q_1 and q_lam are ~128 bit) */
+        secp256k1_scalar_split_lambda(&q_1, &q_lam, &sc);
+        skew_1   = secp256k1_wnaf_const(wnaf_1,   q_1,   WINDOW_A - 1, 128);
+        skew_lam = secp256k1_wnaf_const(wnaf_lam, q_lam, WINDOW_A - 1, 128);
+    } else
+#endif
+    {
+        skew_1   = secp256k1_wnaf_const(wnaf_1, sc, WINDOW_A - 1, size);
 #ifdef USE_ENDOMORPHISM
-    /* split q into q_1 and q_lam (where q = q_1 + q_lam*lambda, and q_1 and q_lam are ~128 bit) */
-    secp256k1_scalar_split_lambda(&q_1, &q_lam, &sc);
-    skew_1   = secp256k1_wnaf_const(wnaf_1,   q_1,   WINDOW_A - 1);
-    skew_lam = secp256k1_wnaf_const(wnaf_lam, q_lam, WINDOW_A - 1);
-#else
-    skew_1   = secp256k1_wnaf_const(wnaf_1, sc, WINDOW_A - 1);
+        skew_lam = 0;
 #endif
+    }
 
     /* Calculate odd multiples of a.
      * All multiples are brought to the same Z 'denominator', which is stored
@@ -159,26 +164,30 @@ static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, cons
         secp256k1_fe_normalize_weak(&pre_a[i].y);
     }
 #ifdef USE_ENDOMORPHISM
-    for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
-        secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]);
+    if (size > 128) {
+        for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
+            secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]);
+        }
     }
 #endif
 
     /* first loop iteration (separated out so we can directly set r, rather
      * than having it start at infinity, get doubled several times, then have
      * its new value added to it) */
-    i = wnaf_1[WNAF_SIZE(WINDOW_A - 1)];
+    i = wnaf_1[WNAF_SIZE_BITS(rsize, WINDOW_A - 1)];
     VERIFY_CHECK(i != 0);
     ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, i, WINDOW_A);
     secp256k1_gej_set_ge(r, &tmpa);
 #ifdef USE_ENDOMORPHISM
-    i = wnaf_lam[WNAF_SIZE(WINDOW_A - 1)];
-    VERIFY_CHECK(i != 0);
-    ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, i, WINDOW_A);
-    secp256k1_gej_add_ge(r, r, &tmpa);
+    if (size > 128) {
+        i = wnaf_lam[WNAF_SIZE_BITS(rsize, WINDOW_A - 1)];
+        VERIFY_CHECK(i != 0);
+        ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, i, WINDOW_A);
+        secp256k1_gej_add_ge(r, r, &tmpa);
+    }
 #endif
     /* remaining loop iterations */
-    for (i = WNAF_SIZE(WINDOW_A - 1) - 1; i >= 0; i--) {
+    for (i = WNAF_SIZE_BITS(rsize, WINDOW_A - 1) - 1; i >= 0; i--) {
         int n;
         int j;
         for (j = 0; j < WINDOW_A - 1; ++j) {
@@ -190,10 +199,12 @@ static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, cons
         VERIFY_CHECK(n != 0);
         secp256k1_gej_add_ge(r, r, &tmpa);
 #ifdef USE_ENDOMORPHISM
-        n = wnaf_lam[i];
-        ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A);
-        VERIFY_CHECK(n != 0);
-        secp256k1_gej_add_ge(r, r, &tmpa);
+        if (size > 128) {
+            n = wnaf_lam[i];
+            ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A);
+            VERIFY_CHECK(n != 0);
+            secp256k1_gej_add_ge(r, r, &tmpa);
+        }
 #endif
     }
 
@@ -213,14 +224,18 @@ static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, cons
         secp256k1_ge_set_gej(&correction, &tmpj);
         secp256k1_ge_to_storage(&correction_1_stor, a);
 #ifdef USE_ENDOMORPHISM
-        secp256k1_ge_to_storage(&correction_lam_stor, a);
+        if (size > 128) {
+            secp256k1_ge_to_storage(&correction_lam_stor, a);
+        }
 #endif
         secp256k1_ge_to_storage(&a2_stor, &correction);
 
         /* For odd numbers this is 2a (so replace it), for even ones a (so no-op) */
         secp256k1_ge_storage_cmov(&correction_1_stor, &a2_stor, skew_1 == 2);
 #ifdef USE_ENDOMORPHISM
-        secp256k1_ge_storage_cmov(&correction_lam_stor, &a2_stor, skew_lam == 2);
+        if (size > 128) {
+            secp256k1_ge_storage_cmov(&correction_lam_stor, &a2_stor, skew_lam == 2);
+        }
 #endif
 
         /* Apply the correction */
@@ -229,10 +244,12 @@ static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, cons
         secp256k1_gej_add_ge(r, r, &correction);
 
 #ifdef USE_ENDOMORPHISM
-        secp256k1_ge_from_storage(&correction, &correction_lam_stor);
-        secp256k1_ge_neg(&correction, &correction);
-        secp256k1_ge_mul_lambda(&correction, &correction);
-        secp256k1_gej_add_ge(r, r, &correction);
+        if (size > 128) {
+            secp256k1_ge_from_storage(&correction, &correction_lam_stor);
+            secp256k1_ge_neg(&correction, &correction);
+            secp256k1_ge_mul_lambda(&correction, &correction);
+            secp256k1_gej_add_ge(r, r, &correction);
+        }
 #endif
     }
 }
diff --git a/src/ecmult_gen_impl.h b/src/ecmult_gen_impl.h
index 9615b932dd..d64505dc00 100644
--- a/src/ecmult_gen_impl.h
+++ b/src/ecmult_gen_impl.h
@@ -77,7 +77,7 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx
                 secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej, NULL);
             }
         }
-        secp256k1_ge_set_all_gej_var(prec, precj, 1024, cb);
+        secp256k1_ge_set_all_gej_var(prec, precj, 1024);
     }
     for (j = 0; j < 64; j++) {
         for (i = 0; i < 16; i++) {
@@ -161,7 +161,7 @@ static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const
     secp256k1_gej gb;
     secp256k1_fe s;
     unsigned char nonce32[32];
-    secp256k1_rfc6979_hmac_sha256_t rng;
+    secp256k1_rfc6979_hmac_sha256 rng;
     int retry;
     unsigned char keydata[64] = {0};
     if (seed32 == NULL) {
diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index 93d3794cb4..1986914a4f 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -1,13 +1,14 @@
-/**********************************************************************
- * Copyright (c) 2013, 2014 Pieter Wuille                             *
- * Distributed under the MIT software license, see the accompanying   *
- * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
- **********************************************************************/
+/*****************************************************************************
+ * Copyright (c) 2013, 2014, 2017 Pieter Wuille, Andrew Poelstra, Jonas Nick *
+ * Distributed under the MIT software license, see the accompanying          *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.       *
+ *****************************************************************************/
 
 #ifndef SECP256K1_ECMULT_IMPL_H
 #define SECP256K1_ECMULT_IMPL_H
 
 #include <string.h>
+#include <stdint.h>
 
 #include "group.h"
 #include "scalar.h"
@@ -41,9 +42,36 @@
 #endif
 #endif
 
+#ifdef USE_ENDOMORPHISM
+    #define WNAF_BITS 128
+#else
+    #define WNAF_BITS 256
+#endif
+#define WNAF_SIZE_BITS(bits, w) (((bits) + (w) - 1) / (w))
+#define WNAF_SIZE(w) WNAF_SIZE_BITS(WNAF_BITS, w)
+
 /** The number of entries a table with precomputed multiples needs to have. */
 #define ECMULT_TABLE_SIZE(w) (1 << ((w)-2))
 
+/* The number of objects allocated on the scratch space for ecmult_multi algorithms */
+#define PIPPENGER_SCRATCH_OBJECTS 6
+#define STRAUSS_SCRATCH_OBJECTS 6
+
+#define PIPPENGER_MAX_BUCKET_WINDOW 12
+
+/* Minimum number of points for which pippenger_wnaf is faster than strauss wnaf */
+#ifdef USE_ENDOMORPHISM
+    #define ECMULT_PIPPENGER_THRESHOLD 88
+#else
+    #define ECMULT_PIPPENGER_THRESHOLD 160
+#endif
+
+#ifdef USE_ENDOMORPHISM
+    #define ECMULT_MAX_POINTS_PER_BATCH 5000000
+#else
+    #define ECMULT_MAX_POINTS_PER_BATCH 10000000
+#endif
+
 /** Fill a table 'prej' with precomputed odd multiples of a. Prej will contain
  *  the values [1*a,3*a,...,(2*n-1)*a], so it space for n values. zr[0] will
  *  contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z.
@@ -109,24 +137,135 @@ static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge *p
     secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A), pre, globalz, prej, zr);
 }
 
-static void secp256k1_ecmult_odd_multiples_table_storage_var(int n, secp256k1_ge_storage *pre, const secp256k1_gej *a, const secp256k1_callback *cb) {
-    secp256k1_gej *prej = (secp256k1_gej*)checked_malloc(cb, sizeof(secp256k1_gej) * n);
-    secp256k1_ge *prea = (secp256k1_ge*)checked_malloc(cb, sizeof(secp256k1_ge) * n);
-    secp256k1_fe *zr = (secp256k1_fe*)checked_malloc(cb, sizeof(secp256k1_fe) * n);
+static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp256k1_ge_storage *pre, const secp256k1_gej *a) {
+    secp256k1_gej d;
+    secp256k1_ge d_ge, p_ge;
+    secp256k1_gej pj;
+    secp256k1_fe zi;
+    secp256k1_fe zr;
+    secp256k1_fe dx_over_dz_squared;
     int i;
 
-    /* Compute the odd multiples in Jacobian form. */
-    secp256k1_ecmult_odd_multiples_table(n, prej, zr, a);
-    /* Convert them in batch to affine coordinates. */
-    secp256k1_ge_set_table_gej_var(prea, prej, zr, n);
-    /* Convert them to compact storage form. */
-    for (i = 0; i < n; i++) {
-        secp256k1_ge_to_storage(&pre[i], &prea[i]);
+    VERIFY_CHECK(!a->infinity);
+
+    secp256k1_gej_double_var(&d, a, NULL);
+
+    /* First, we perform all the additions in an isomorphic curve obtained by multiplying
+     * all `z` coordinates by 1/`d.z`. In these coordinates `d` is affine so we can use
+     * `secp256k1_gej_add_ge_var` to perform the additions. For each addition, we store
+     * the resulting y-coordinate and the z-ratio, since we only have enough memory to
+     * store two field elements. These are sufficient to efficiently undo the isomorphism
+     * and recompute all the `x`s.
+     */
+    d_ge.x = d.x;
+    d_ge.y = d.y;
+    d_ge.infinity = 0;
+
+    secp256k1_ge_set_gej_zinv(&p_ge, a, &d.z);
+    pj.x = p_ge.x;
+    pj.y = p_ge.y;
+    pj.z = a->z;
+    pj.infinity = 0;
+
+    for (i = 0; i < (n - 1); i++) {
+        secp256k1_fe_normalize_var(&pj.y);
+        secp256k1_fe_to_storage(&pre[i].y, &pj.y);
+        secp256k1_gej_add_ge_var(&pj, &pj, &d_ge, &zr);
+        secp256k1_fe_normalize_var(&zr);
+        secp256k1_fe_to_storage(&pre[i].x, &zr);
     }
 
-    free(prea);
-    free(prej);
-    free(zr);
+    /* Invert d.z in the same batch, preserving pj.z so we can extract 1/d.z */
+    secp256k1_fe_mul(&zi, &pj.z, &d.z);
+    secp256k1_fe_inv_var(&zi, &zi);
+
+    /* Directly set `pre[n - 1]` to `pj`, saving the inverted z-coordinate so
+     * that we can combine it with the saved z-ratios to compute the other zs
+     * without any more inversions. */
+    secp256k1_ge_set_gej_zinv(&p_ge, &pj, &zi);
+    secp256k1_ge_to_storage(&pre[n - 1], &p_ge);
+
+    /* Compute the actual x-coordinate of D, which will be needed below. */
+    secp256k1_fe_mul(&d.z, &zi, &pj.z);  /* d.z = 1/d.z */
+    secp256k1_fe_sqr(&dx_over_dz_squared, &d.z);
+    secp256k1_fe_mul(&dx_over_dz_squared, &dx_over_dz_squared, &d.x);
+
+    /* Going into the second loop, we have set `pre[n-1]` to its final affine
+     * form, but still need to set `pre[i]` for `i` in 0 through `n-2`. We
+     * have `zi = (p.z * d.z)^-1`, where
+     *
+     *     `p.z` is the z-coordinate of the point on the isomorphic curve
+     *           which was ultimately assigned to `pre[n-1]`.
+     *     `d.z` is the multiplier that must be applied to all z-coordinates
+     *           to move from our isomorphic curve back to secp256k1; so the
+     *           product `p.z * d.z` is the z-coordinate of the secp256k1
+     *           point assigned to `pre[n-1]`.
+     *
+     * All subsequent inverse-z-coordinates can be obtained by multiplying this
+     * factor by successive z-ratios, which is much more efficient than directly
+     * computing each one.
+     *
+     * Importantly, these inverse-zs will be coordinates of points on secp256k1,
+     * while our other stored values come from computations on the isomorphic
+     * curve. So in the below loop, we will take care not to actually use `zi`
+     * or any derived values until we're back on secp256k1.
+     */
+    i = n - 1;
+    while (i > 0) {
+        secp256k1_fe zi2, zi3;
+        const secp256k1_fe *rzr;
+        i--;
+
+        secp256k1_ge_from_storage(&p_ge, &pre[i]);
+
+        /* For each remaining point, we extract the z-ratio from the stored
+         * x-coordinate, compute its z^-1 from that, and compute the full
+         * point from that. */
+        rzr = &p_ge.x;
+        secp256k1_fe_mul(&zi, &zi, rzr);
+        secp256k1_fe_sqr(&zi2, &zi);
+        secp256k1_fe_mul(&zi3, &zi2, &zi);
+        /* To compute the actual x-coordinate, we use the stored z ratio and
+         * y-coordinate, which we obtained from `secp256k1_gej_add_ge_var`
+         * in the loop above, as well as the inverse of the square of its
+         * z-coordinate. We store the latter in the `zi2` variable, which is
+         * computed iteratively starting from the overall Z inverse then
+         * multiplying by each z-ratio in turn.
+         *
+         * Denoting the z-ratio as `rzr`, we observe that it is equal to `h`
+         * from the inside of the above `gej_add_ge_var` call. This satisfies
+         *
+         *    rzr = d_x * z^2 - x * d_z^2
+         *
+         * where (`d_x`, `d_z`) are Jacobian coordinates of `D` and `(x, z)`
+         * are Jacobian coordinates of our desired point -- except both are on
+         * the isomorphic curve that we were using when we called `gej_add_ge_var`.
+         * To get back to secp256k1, we must multiply both `z`s by `d_z`, or
+         * equivalently divide both `x`s by `d_z^2`. Our equation then becomes
+         *
+         *    rzr = d_x * z^2 / d_z^2 - x
+         *
+         * (The left-hand-side, being a ratio of z-coordinates, is unaffected
+         * by the isomorphism.)
+         *
+         * Rearranging to solve for `x`, we have
+         *
+         *     x = d_x * z^2 / d_z^2 - rzr
+         *
+         * But what we actually want is the affine coordinate `X = x/z^2`,
+         * which will satisfy
+         *
+         *     X = d_x / d_z^2 - rzr / z^2
+         *       = dx_over_dz_squared - rzr * zi2
+         */
+        secp256k1_fe_mul(&p_ge.x, rzr, &zi2);
+        secp256k1_fe_negate(&p_ge.x, &p_ge.x, 1);
+        secp256k1_fe_add(&p_ge.x, &dx_over_dz_squared);
+        /* y is stored_y/z^3, as we expect */
+        secp256k1_fe_mul(&p_ge.y, &p_ge.y, &zi3);
+        /* Store */
+        secp256k1_ge_to_storage(&pre[i], &p_ge);
+    }
 }
 
 /** The following two macro retrieves a particular odd multiple from a table
@@ -138,7 +277,8 @@ static void secp256k1_ecmult_odd_multiples_table_storage_var(int n, secp256k1_ge
     if ((n) > 0) { \
         *(r) = (pre)[((n)-1)/2]; \
     } else { \
-        secp256k1_ge_neg((r), &(pre)[(-(n)-1)/2]); \
+        *(r) = (pre)[(-(n)-1)/2]; \
+        secp256k1_fe_negate(&((r)->y), &((r)->y), 1); \
     } \
 } while(0)
 
@@ -150,7 +290,7 @@ static void secp256k1_ecmult_odd_multiples_table_storage_var(int n, secp256k1_ge
         secp256k1_ge_from_storage((r), &(pre)[((n)-1)/2]); \
     } else { \
         secp256k1_ge_from_storage((r), &(pre)[(-(n)-1)/2]); \
-        secp256k1_ge_neg((r), (r)); \
+        secp256k1_fe_negate(&((r)->y), &((r)->y), 1); \
     } \
 } while(0)
 
@@ -174,7 +314,7 @@ static void secp256k1_ecmult_context_build(secp256k1_ecmult_context *ctx, const
     ctx->pre_g = (secp256k1_ge_storage (*)[])checked_malloc(cb, sizeof((*ctx->pre_g)[0]) * ECMULT_TABLE_SIZE(WINDOW_G));
 
     /* precompute the tables with odd multiples */
-    secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj, cb);
+    secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g, &gj);
 
 #ifdef USE_ENDOMORPHISM
     {
@@ -188,7 +328,7 @@ static void secp256k1_ecmult_context_build(secp256k1_ecmult_context *ctx, const
         for (i = 0; i < 128; i++) {
             secp256k1_gej_double_var(&g_128j, &g_128j, NULL);
         }
-        secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j, cb);
+        secp256k1_ecmult_odd_multiples_table_storage_var(ECMULT_TABLE_SIZE(WINDOW_G), *ctx->pre_g_128, &g_128j);
     }
 #endif
 }
@@ -283,50 +423,78 @@ static int secp256k1_ecmult_wnaf(int *wnaf, int len, const secp256k1_scalar *a,
     return last_set_bit + 1;
 }
 
-static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) {
-    secp256k1_ge pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
-    secp256k1_ge tmpa;
-    secp256k1_fe Z;
+struct secp256k1_strauss_point_state {
 #ifdef USE_ENDOMORPHISM
-    secp256k1_ge pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
     secp256k1_scalar na_1, na_lam;
-    /* Splitted G factors. */
-    secp256k1_scalar ng_1, ng_128;
     int wnaf_na_1[130];
     int wnaf_na_lam[130];
     int bits_na_1;
     int bits_na_lam;
-    int wnaf_ng_1[129];
-    int bits_ng_1;
-    int wnaf_ng_128[129];
-    int bits_ng_128;
 #else
     int wnaf_na[256];
     int bits_na;
+#endif
+    size_t input_pos;
+};
+
+struct secp256k1_strauss_state {
+    secp256k1_gej* prej;
+    secp256k1_fe* zr;
+    secp256k1_ge* pre_a;
+#ifdef USE_ENDOMORPHISM
+    secp256k1_ge* pre_a_lam;
+#endif
+    struct secp256k1_strauss_point_state* ps;
+};
+
+static void secp256k1_ecmult_strauss_wnaf(const secp256k1_ecmult_context *ctx, const struct secp256k1_strauss_state *state, secp256k1_gej *r, int num, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) {
+    secp256k1_ge tmpa;
+    secp256k1_fe Z;
+#ifdef USE_ENDOMORPHISM
+    /* Splitted G factors. */
+    secp256k1_scalar ng_1, ng_128;
+    int wnaf_ng_1[129];
+    int bits_ng_1 = 0;
+    int wnaf_ng_128[129];
+    int bits_ng_128 = 0;
+#else
     int wnaf_ng[256];
-    int bits_ng;
+    int bits_ng = 0;
 #endif
     int i;
-    int bits;
+    int bits = 0;
+    int np;
+    int no = 0;
 
+    for (np = 0; np < num; ++np) {
+        if (secp256k1_scalar_is_zero(&na[np]) || secp256k1_gej_is_infinity(&a[np])) {
+            continue;
+        }
+        state->ps[no].input_pos = np;
 #ifdef USE_ENDOMORPHISM
-    /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */
-    secp256k1_scalar_split_lambda(&na_1, &na_lam, na);
-
-    /* build wnaf representation for na_1 and na_lam. */
-    bits_na_1   = secp256k1_ecmult_wnaf(wnaf_na_1,   130, &na_1,   WINDOW_A);
-    bits_na_lam = secp256k1_ecmult_wnaf(wnaf_na_lam, 130, &na_lam, WINDOW_A);
-    VERIFY_CHECK(bits_na_1 <= 130);
-    VERIFY_CHECK(bits_na_lam <= 130);
-    bits = bits_na_1;
-    if (bits_na_lam > bits) {
-        bits = bits_na_lam;
-    }
+        /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */
+        secp256k1_scalar_split_lambda(&state->ps[no].na_1, &state->ps[no].na_lam, &na[np]);
+
+        /* build wnaf representation for na_1 and na_lam. */
+        state->ps[no].bits_na_1   = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_1,   130, &state->ps[no].na_1,   WINDOW_A);
+        state->ps[no].bits_na_lam = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_lam, 130, &state->ps[no].na_lam, WINDOW_A);
+        VERIFY_CHECK(state->ps[no].bits_na_1 <= 130);
+        VERIFY_CHECK(state->ps[no].bits_na_lam <= 130);
+        if (state->ps[no].bits_na_1 > bits) {
+            bits = state->ps[no].bits_na_1;
+        }
+        if (state->ps[no].bits_na_lam > bits) {
+            bits = state->ps[no].bits_na_lam;
+        }
 #else
-    /* build wnaf representation for na. */
-    bits_na     = secp256k1_ecmult_wnaf(wnaf_na,     256, na,      WINDOW_A);
-    bits = bits_na;
+        /* build wnaf representation for na. */
+        state->ps[no].bits_na     = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na,     256, &na[np],      WINDOW_A);
+        if (state->ps[no].bits_na > bits) {
+            bits = state->ps[no].bits_na;
+        }
 #endif
+        ++no;
+    }
 
     /* Calculate odd multiples of a.
      * All multiples are brought to the same Z 'denominator', which is stored
@@ -338,29 +506,51 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej
      * of 1/Z, so we can use secp256k1_gej_add_zinv_var, which uses the same
      * isomorphism to efficiently add with a known Z inverse.
      */
-    secp256k1_ecmult_odd_multiples_table_globalz_windowa(pre_a, &Z, a);
+    if (no > 0) {
+        /* Compute the odd multiples in Jacobian form. */
+        secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->prej, state->zr, &a[state->ps[0].input_pos]);
+        for (np = 1; np < no; ++np) {
+            secp256k1_gej tmp = a[state->ps[np].input_pos];
+#ifdef VERIFY
+            secp256k1_fe_normalize_var(&(state->prej[(np - 1) * ECMULT_TABLE_SIZE(WINDOW_A) + ECMULT_TABLE_SIZE(WINDOW_A) - 1].z));
+#endif
+            secp256k1_gej_rescale(&tmp, &(state->prej[(np - 1) * ECMULT_TABLE_SIZE(WINDOW_A) + ECMULT_TABLE_SIZE(WINDOW_A) - 1].z));
+            secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->prej + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &tmp);
+            secp256k1_fe_mul(state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &(a[state->ps[np].input_pos].z));
+        }
+        /* Bring them to the same Z denominator. */
+        secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A) * no, state->pre_a, &Z, state->prej, state->zr);
+    } else {
+        secp256k1_fe_set_int(&Z, 1);
+    }
 
 #ifdef USE_ENDOMORPHISM
-    for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
-        secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]);
+    for (np = 0; np < no; ++np) {
+        for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
+            secp256k1_ge_mul_lambda(&state->pre_a_lam[np * ECMULT_TABLE_SIZE(WINDOW_A) + i], &state->pre_a[np * ECMULT_TABLE_SIZE(WINDOW_A) + i]);
+        }
     }
 
-    /* split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit) */
-    secp256k1_scalar_split_128(&ng_1, &ng_128, ng);
+    if (ng) {
+        /* split ng into ng_1 and ng_128 (where gn = gn_1 + gn_128*2^128, and gn_1 and gn_128 are ~128 bit) */
+        secp256k1_scalar_split_128(&ng_1, &ng_128, ng);
 
-    /* Build wnaf representation for ng_1 and ng_128 */
-    bits_ng_1   = secp256k1_ecmult_wnaf(wnaf_ng_1,   129, &ng_1,   WINDOW_G);
-    bits_ng_128 = secp256k1_ecmult_wnaf(wnaf_ng_128, 129, &ng_128, WINDOW_G);
-    if (bits_ng_1 > bits) {
-        bits = bits_ng_1;
-    }
-    if (bits_ng_128 > bits) {
-        bits = bits_ng_128;
+        /* Build wnaf representation for ng_1 and ng_128 */
+        bits_ng_1   = secp256k1_ecmult_wnaf(wnaf_ng_1,   129, &ng_1,   WINDOW_G);
+        bits_ng_128 = secp256k1_ecmult_wnaf(wnaf_ng_128, 129, &ng_128, WINDOW_G);
+        if (bits_ng_1 > bits) {
+            bits = bits_ng_1;
+        }
+        if (bits_ng_128 > bits) {
+            bits = bits_ng_128;
+        }
     }
 #else
-    bits_ng     = secp256k1_ecmult_wnaf(wnaf_ng,     256, ng,      WINDOW_G);
-    if (bits_ng > bits) {
-        bits = bits_ng;
+    if (ng) {
+        bits_ng     = secp256k1_ecmult_wnaf(wnaf_ng,     256, ng,      WINDOW_G);
+        if (bits_ng > bits) {
+            bits = bits_ng;
+        }
     }
 #endif
 
@@ -370,13 +560,15 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej
         int n;
         secp256k1_gej_double_var(r, r, NULL);
 #ifdef USE_ENDOMORPHISM
-        if (i < bits_na_1 && (n = wnaf_na_1[i])) {
-            ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
-            secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
-        }
-        if (i < bits_na_lam && (n = wnaf_na_lam[i])) {
-            ECMULT_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A);
-            secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
+        for (np = 0; np < no; ++np) {
+            if (i < state->ps[np].bits_na_1 && (n = state->ps[np].wnaf_na_1[i])) {
+                ECMULT_TABLE_GET_GE(&tmpa, state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A);
+                secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
+            }
+            if (i < state->ps[np].bits_na_lam && (n = state->ps[np].wnaf_na_lam[i])) {
+                ECMULT_TABLE_GET_GE(&tmpa, state->pre_a_lam + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A);
+                secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
+            }
         }
         if (i < bits_ng_1 && (n = wnaf_ng_1[i])) {
             ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G);
@@ -387,9 +579,11 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej
             secp256k1_gej_add_zinv_var(r, r, &tmpa, &Z);
         }
 #else
-        if (i < bits_na && (n = wnaf_na[i])) {
-            ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
-            secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
+        for (np = 0; np < no; ++np) {
+            if (i < state->ps[np].bits_na && (n = state->ps[np].wnaf_na[i])) {
+                ECMULT_TABLE_GET_GE(&tmpa, state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A);
+                secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
+            }
         }
         if (i < bits_ng && (n = wnaf_ng[i])) {
             ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G);
@@ -403,4 +597,585 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej
     }
 }
 
+static void secp256k1_ecmult(const secp256k1_ecmult_context *ctx, secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) {
+    secp256k1_gej prej[ECMULT_TABLE_SIZE(WINDOW_A)];
+    secp256k1_fe zr[ECMULT_TABLE_SIZE(WINDOW_A)];
+    secp256k1_ge pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
+    struct secp256k1_strauss_point_state ps[1];
+#ifdef USE_ENDOMORPHISM
+    secp256k1_ge pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
+#endif
+    struct secp256k1_strauss_state state;
+
+    state.prej = prej;
+    state.zr = zr;
+    state.pre_a = pre_a;
+#ifdef USE_ENDOMORPHISM
+    state.pre_a_lam = pre_a_lam;
+#endif
+    state.ps = ps;
+    secp256k1_ecmult_strauss_wnaf(ctx, &state, r, 1, a, na, ng);
+}
+
+static size_t secp256k1_strauss_scratch_size(size_t n_points) {
+#ifdef USE_ENDOMORPHISM
+    static const size_t point_size = (2 * sizeof(secp256k1_ge) + sizeof(secp256k1_gej) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar);
+#else
+    static const size_t point_size = (sizeof(secp256k1_ge) + sizeof(secp256k1_gej) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar);
+#endif
+    return n_points*point_size;
+}
+
+static int secp256k1_ecmult_strauss_batch(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points, size_t cb_offset) {
+    secp256k1_gej* points;
+    secp256k1_scalar* scalars;
+    struct secp256k1_strauss_state state;
+    size_t i;
+
+    secp256k1_gej_set_infinity(r);
+    if (inp_g_sc == NULL && n_points == 0) {
+        return 1;
+    }
+
+    if (!secp256k1_scratch_allocate_frame(scratch, secp256k1_strauss_scratch_size(n_points), STRAUSS_SCRATCH_OBJECTS)) {
+        return 0;
+    }
+    points = (secp256k1_gej*)secp256k1_scratch_alloc(scratch, n_points * sizeof(secp256k1_gej));
+    scalars = (secp256k1_scalar*)secp256k1_scratch_alloc(scratch, n_points * sizeof(secp256k1_scalar));
+    state.prej = (secp256k1_gej*)secp256k1_scratch_alloc(scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_gej));
+    state.zr = (secp256k1_fe*)secp256k1_scratch_alloc(scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_fe));
+#ifdef USE_ENDOMORPHISM
+    state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(scratch, n_points * 2 * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge));
+    state.pre_a_lam = state.pre_a + n_points * ECMULT_TABLE_SIZE(WINDOW_A);
+#else
+    state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge));
+#endif
+    state.ps = (struct secp256k1_strauss_point_state*)secp256k1_scratch_alloc(scratch, n_points * sizeof(struct secp256k1_strauss_point_state));
+
+    for (i = 0; i < n_points; i++) {
+        secp256k1_ge point;
+        if (!cb(&scalars[i], &point, i+cb_offset, cbdata)) {
+            secp256k1_scratch_deallocate_frame(scratch);
+            return 0;
+        }
+        secp256k1_gej_set_ge(&points[i], &point);
+    }
+    secp256k1_ecmult_strauss_wnaf(ctx, &state, r, n_points, points, scalars, inp_g_sc);
+    secp256k1_scratch_deallocate_frame(scratch);
+    return 1;
+}
+
+/* Wrapper for secp256k1_ecmult_multi_func interface */
+static int secp256k1_ecmult_strauss_batch_single(const secp256k1_ecmult_context *actx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) {
+    return secp256k1_ecmult_strauss_batch(actx, scratch, r, inp_g_sc, cb, cbdata, n, 0);
+}
+
+static size_t secp256k1_strauss_max_points(secp256k1_scratch *scratch) {
+    return secp256k1_scratch_max_allocation(scratch, STRAUSS_SCRATCH_OBJECTS) / secp256k1_strauss_scratch_size(1);
+}
+
+/** Convert a number to WNAF notation.
+ *  The number becomes represented by sum(2^{wi} * wnaf[i], i=0..WNAF_SIZE(w)+1) - return_val.
+ *  It has the following guarantees:
+ *  - each wnaf[i] is either 0 or an odd integer between -(1 << w) and (1 << w)
+ *  - the number of words set is always WNAF_SIZE(w)
+ *  - the returned skew is 0 or 1
+ */
+static int secp256k1_wnaf_fixed(int *wnaf, const secp256k1_scalar *s, int w) {
+    int skew = 0;
+    int pos;
+    int max_pos;
+    int last_w;
+    const secp256k1_scalar *work = s;
+
+    if (secp256k1_scalar_is_zero(s)) {
+        for (pos = 0; pos < WNAF_SIZE(w); pos++) {
+            wnaf[pos] = 0;
+        }
+        return 0;
+    }
+
+    if (secp256k1_scalar_is_even(s)) {
+        skew = 1;
+    }
+
+    wnaf[0] = secp256k1_scalar_get_bits_var(work, 0, w) + skew;
+    /* Compute last window size. Relevant when window size doesn't divide the
+     * number of bits in the scalar */
+    last_w = WNAF_BITS - (WNAF_SIZE(w) - 1) * w;
+
+    /* Store the position of the first nonzero word in max_pos to allow
+     * skipping leading zeros when calculating the wnaf. */
+    for (pos = WNAF_SIZE(w) - 1; pos > 0; pos--) {
+        int val = secp256k1_scalar_get_bits_var(work, pos * w, pos == WNAF_SIZE(w)-1 ? last_w : w);
+        if(val != 0) {
+            break;
+        }
+        wnaf[pos] = 0;
+    }
+    max_pos = pos;
+    pos = 1;
+
+    while (pos <= max_pos) {
+        int val = secp256k1_scalar_get_bits_var(work, pos * w, pos == WNAF_SIZE(w)-1 ? last_w : w);
+        if ((val & 1) == 0) {
+            wnaf[pos - 1] -= (1 << w);
+            wnaf[pos] = (val + 1);
+        } else {
+            wnaf[pos] = val;
+        }
+        /* Set a coefficient to zero if it is 1 or -1 and the proceeding digit
+         * is strictly negative or strictly positive respectively. Only change
+         * coefficients at previous positions because above code assumes that
+         * wnaf[pos - 1] is odd.
+         */
+        if (pos >= 2 && ((wnaf[pos - 1] == 1 && wnaf[pos - 2] < 0) || (wnaf[pos - 1] == -1 && wnaf[pos - 2] > 0))) {
+            if (wnaf[pos - 1] == 1) {
+                wnaf[pos - 2] += 1 << w;
+            } else {
+                wnaf[pos - 2] -= 1 << w;
+            }
+            wnaf[pos - 1] = 0;
+        }
+        ++pos;
+    }
+
+    return skew;
+}
+
+struct secp256k1_pippenger_point_state {
+    int skew_na;
+    size_t input_pos;
+};
+
+struct secp256k1_pippenger_state {
+    int *wnaf_na;
+    struct secp256k1_pippenger_point_state* ps;
+};
+
+/*
+ * pippenger_wnaf computes the result of a multi-point multiplication as
+ * follows: The scalars are brought into wnaf with n_wnaf elements each. Then
+ * for every i < n_wnaf, first each point is added to a "bucket" corresponding
+ * to the point's wnaf[i]. Second, the buckets are added together such that
+ * r += 1*bucket[0] + 3*bucket[1] + 5*bucket[2] + ...
+ */
+static int secp256k1_ecmult_pippenger_wnaf(secp256k1_gej *buckets, int bucket_window, struct secp256k1_pippenger_state *state, secp256k1_gej *r, const secp256k1_scalar *sc, const secp256k1_ge *pt, size_t num) {
+    size_t n_wnaf = WNAF_SIZE(bucket_window+1);
+    size_t np;
+    size_t no = 0;
+    int i;
+    int j;
+
+    for (np = 0; np < num; ++np) {
+        if (secp256k1_scalar_is_zero(&sc[np]) || secp256k1_ge_is_infinity(&pt[np])) {
+            continue;
+        }
+        state->ps[no].input_pos = np;
+        state->ps[no].skew_na = secp256k1_wnaf_fixed(&state->wnaf_na[no*n_wnaf], &sc[np], bucket_window+1);
+        no++;
+    }
+    secp256k1_gej_set_infinity(r);
+
+    if (no == 0) {
+        return 1;
+    }
+
+    for (i = n_wnaf - 1; i >= 0; i--) {
+        secp256k1_gej running_sum;
+
+        for(j = 0; j < ECMULT_TABLE_SIZE(bucket_window+2); j++) {
+            secp256k1_gej_set_infinity(&buckets[j]);
+        }
+
+        for (np = 0; np < no; ++np) {
+            int n = state->wnaf_na[np*n_wnaf + i];
+            struct secp256k1_pippenger_point_state point_state = state->ps[np];
+            secp256k1_ge tmp;
+            int idx;
+
+            if (i == 0) {
+                /* correct for wnaf skew */
+                int skew = point_state.skew_na;
+                if (skew) {
+                    secp256k1_ge_neg(&tmp, &pt[point_state.input_pos]);
+                    secp256k1_gej_add_ge_var(&buckets[0], &buckets[0], &tmp, NULL);
+                }
+            }
+            if (n > 0) {
+                idx = (n - 1)/2;
+                secp256k1_gej_add_ge_var(&buckets[idx], &buckets[idx], &pt[point_state.input_pos], NULL);
+            } else if (n < 0) {
+                idx = -(n + 1)/2;
+                secp256k1_ge_neg(&tmp, &pt[point_state.input_pos]);
+                secp256k1_gej_add_ge_var(&buckets[idx], &buckets[idx], &tmp, NULL);
+            }
+        }
+
+        for(j = 0; j < bucket_window; j++) {
+            secp256k1_gej_double_var(r, r, NULL);
+        }
+
+        secp256k1_gej_set_infinity(&running_sum);
+        /* Accumulate the sum: bucket[0] + 3*bucket[1] + 5*bucket[2] + 7*bucket[3] + ...
+         *                   = bucket[0] +   bucket[1] +   bucket[2] +   bucket[3] + ...
+         *                   +         2 *  (bucket[1] + 2*bucket[2] + 3*bucket[3] + ...)
+         * using an intermediate running sum:
+         * running_sum = bucket[0] +   bucket[1] +   bucket[2] + ...
+         *
+         * The doubling is done implicitly by deferring the final window doubling (of 'r').
+         */
+        for(j = ECMULT_TABLE_SIZE(bucket_window+2) - 1; j > 0; j--) {
+            secp256k1_gej_add_var(&running_sum, &running_sum, &buckets[j], NULL);
+            secp256k1_gej_add_var(r, r, &running_sum, NULL);
+        }
+
+        secp256k1_gej_add_var(&running_sum, &running_sum, &buckets[0], NULL);
+        secp256k1_gej_double_var(r, r, NULL);
+        secp256k1_gej_add_var(r, r, &running_sum, NULL);
+    }
+    return 1;
+}
+
+/**
+ * Returns optimal bucket_window (number of bits of a scalar represented by a
+ * set of buckets) for a given number of points.
+ */
+static int secp256k1_pippenger_bucket_window(size_t n) {
+#ifdef USE_ENDOMORPHISM
+    if (n <= 1) {
+        return 1;
+    } else if (n <= 4) {
+        return 2;
+    } else if (n <= 20) {
+        return 3;
+    } else if (n <= 57) {
+        return 4;
+    } else if (n <= 136) {
+        return 5;
+    } else if (n <= 235) {
+        return 6;
+    } else if (n <= 1260) {
+        return 7;
+    } else if (n <= 4420) {
+        return 9;
+    } else if (n <= 7880) {
+        return 10;
+    } else if (n <= 16050) {
+        return 11;
+    } else {
+        return PIPPENGER_MAX_BUCKET_WINDOW;
+    }
+#else
+    if (n <= 1) {
+        return 1;
+    } else if (n <= 11) {
+        return 2;
+    } else if (n <= 45) {
+        return 3;
+    } else if (n <= 100) {
+        return 4;
+    } else if (n <= 275) {
+        return 5;
+    } else if (n <= 625) {
+        return 6;
+    } else if (n <= 1850) {
+        return 7;
+    } else if (n <= 3400) {
+        return 8;
+    } else if (n <= 9630) {
+        return 9;
+    } else if (n <= 17900) {
+        return 10;
+    } else if (n <= 32800) {
+        return 11;
+    } else {
+        return PIPPENGER_MAX_BUCKET_WINDOW;
+    }
+#endif
+}
+
+/**
+ * Returns the maximum optimal number of points for a bucket_window.
+ */
+static size_t secp256k1_pippenger_bucket_window_inv(int bucket_window) {
+    switch(bucket_window) {
+#ifdef USE_ENDOMORPHISM
+        case 1: return 1;
+        case 2: return 4;
+        case 3: return 20;
+        case 4: return 57;
+        case 5: return 136;
+        case 6: return 235;
+        case 7: return 1260;
+        case 8: return 1260;
+        case 9: return 4420;
+        case 10: return 7880;
+        case 11: return 16050;
+        case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX;
+#else
+        case 1: return 1;
+        case 2: return 11;
+        case 3: return 45;
+        case 4: return 100;
+        case 5: return 275;
+        case 6: return 625;
+        case 7: return 1850;
+        case 8: return 3400;
+        case 9: return 9630;
+        case 10: return 17900;
+        case 11: return 32800;
+        case PIPPENGER_MAX_BUCKET_WINDOW: return SIZE_MAX;
+#endif
+    }
+    return 0;
+}
+
+
+#ifdef USE_ENDOMORPHISM
+SECP256K1_INLINE static void secp256k1_ecmult_endo_split(secp256k1_scalar *s1, secp256k1_scalar *s2, secp256k1_ge *p1, secp256k1_ge *p2) {
+    secp256k1_scalar tmp = *s1;
+    secp256k1_scalar_split_lambda(s1, s2, &tmp);
+    secp256k1_ge_mul_lambda(p2, p1);
+
+    if (secp256k1_scalar_is_high(s1)) {
+        secp256k1_scalar_negate(s1, s1);
+        secp256k1_ge_neg(p1, p1);
+    }
+    if (secp256k1_scalar_is_high(s2)) {
+        secp256k1_scalar_negate(s2, s2);
+        secp256k1_ge_neg(p2, p2);
+    }
+}
+#endif
+
+/**
+ * Returns the scratch size required for a given number of points (excluding
+ * base point G) without considering alignment.
+ */
+static size_t secp256k1_pippenger_scratch_size(size_t n_points, int bucket_window) {
+#ifdef USE_ENDOMORPHISM
+    size_t entries = 2*n_points + 2;
+#else
+    size_t entries = n_points + 1;
+#endif
+    size_t entry_size = sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) + sizeof(struct secp256k1_pippenger_point_state) + (WNAF_SIZE(bucket_window+1)+1)*sizeof(int);
+    return (sizeof(secp256k1_gej) << bucket_window) + sizeof(struct secp256k1_pippenger_state) + entries * entry_size;
+}
+
+static int secp256k1_ecmult_pippenger_batch(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points, size_t cb_offset) {
+    /* Use 2(n+1) with the endomorphism, n+1 without, when calculating batch
+     * sizes. The reason for +1 is that we add the G scalar to the list of
+     * other scalars. */
+#ifdef USE_ENDOMORPHISM
+    size_t entries = 2*n_points + 2;
+#else
+    size_t entries = n_points + 1;
+#endif
+    secp256k1_ge *points;
+    secp256k1_scalar *scalars;
+    secp256k1_gej *buckets;
+    struct secp256k1_pippenger_state *state_space;
+    size_t idx = 0;
+    size_t point_idx = 0;
+    int i, j;
+    int bucket_window;
+
+    (void)ctx;
+    secp256k1_gej_set_infinity(r);
+    if (inp_g_sc == NULL && n_points == 0) {
+        return 1;
+    }
+
+    bucket_window = secp256k1_pippenger_bucket_window(n_points);
+    if (!secp256k1_scratch_allocate_frame(scratch, secp256k1_pippenger_scratch_size(n_points, bucket_window), PIPPENGER_SCRATCH_OBJECTS)) {
+        return 0;
+    }
+    points = (secp256k1_ge *) secp256k1_scratch_alloc(scratch, entries * sizeof(*points));
+    scalars = (secp256k1_scalar *) secp256k1_scratch_alloc(scratch, entries * sizeof(*scalars));
+    state_space = (struct secp256k1_pippenger_state *) secp256k1_scratch_alloc(scratch, sizeof(*state_space));
+    state_space->ps = (struct secp256k1_pippenger_point_state *) secp256k1_scratch_alloc(scratch, entries * sizeof(*state_space->ps));
+    state_space->wnaf_na = (int *) secp256k1_scratch_alloc(scratch, entries*(WNAF_SIZE(bucket_window+1)) * sizeof(int));
+    buckets = (secp256k1_gej *) secp256k1_scratch_alloc(scratch, sizeof(*buckets) << bucket_window);
+
+    if (inp_g_sc != NULL) {
+        scalars[0] = *inp_g_sc;
+        points[0] = secp256k1_ge_const_g;
+        idx++;
+#ifdef USE_ENDOMORPHISM
+        secp256k1_ecmult_endo_split(&scalars[0], &scalars[1], &points[0], &points[1]);
+        idx++;
+#endif
+    }
+
+    while (point_idx < n_points) {
+        if (!cb(&scalars[idx], &points[idx], point_idx + cb_offset, cbdata)) {
+            secp256k1_scratch_deallocate_frame(scratch);
+            return 0;
+        }
+        idx++;
+#ifdef USE_ENDOMORPHISM
+        secp256k1_ecmult_endo_split(&scalars[idx - 1], &scalars[idx], &points[idx - 1], &points[idx]);
+        idx++;
+#endif
+        point_idx++;
+    }
+
+    secp256k1_ecmult_pippenger_wnaf(buckets, bucket_window, state_space, r, scalars, points, idx);
+
+    /* Clear data */
+    for(i = 0; (size_t)i < idx; i++) {
+        secp256k1_scalar_clear(&scalars[i]);
+        state_space->ps[i].skew_na = 0;
+        for(j = 0; j < WNAF_SIZE(bucket_window+1); j++) {
+            state_space->wnaf_na[i * WNAF_SIZE(bucket_window+1) + j] = 0;
+        }
+    }
+    for(i = 0; i < 1<<bucket_window; i++) {
+        secp256k1_gej_clear(&buckets[i]);
+    }
+    secp256k1_scratch_deallocate_frame(scratch);
+    return 1;
+}
+
+/* Wrapper for secp256k1_ecmult_multi_func interface */
+static int secp256k1_ecmult_pippenger_batch_single(const secp256k1_ecmult_context *actx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) {
+    return secp256k1_ecmult_pippenger_batch(actx, scratch, r, inp_g_sc, cb, cbdata, n, 0);
+}
+
+/**
+ * Returns the maximum number of points in addition to G that can be used with
+ * a given scratch space. The function ensures that fewer points may also be
+ * used.
+ */
+static size_t secp256k1_pippenger_max_points(secp256k1_scratch *scratch) {
+    size_t max_alloc = secp256k1_scratch_max_allocation(scratch, PIPPENGER_SCRATCH_OBJECTS);
+    int bucket_window;
+    size_t res = 0;
+
+    for (bucket_window = 1; bucket_window <= PIPPENGER_MAX_BUCKET_WINDOW; bucket_window++) {
+        size_t n_points;
+        size_t max_points = secp256k1_pippenger_bucket_window_inv(bucket_window);
+        size_t space_for_points;
+        size_t space_overhead;
+        size_t entry_size = sizeof(secp256k1_ge) + sizeof(secp256k1_scalar) + sizeof(struct secp256k1_pippenger_point_state) + (WNAF_SIZE(bucket_window+1)+1)*sizeof(int);
+
+#ifdef USE_ENDOMORPHISM
+        entry_size = 2*entry_size;
+#endif
+        space_overhead = (sizeof(secp256k1_gej) << bucket_window) + entry_size + sizeof(struct secp256k1_pippenger_state);
+        if (space_overhead > max_alloc) {
+            break;
+        }
+        space_for_points = max_alloc - space_overhead;
+
+        n_points = space_for_points/entry_size;
+        n_points = n_points > max_points ? max_points : n_points;
+        if (n_points > res) {
+            res = n_points;
+        }
+        if (n_points < max_points) {
+            /* A larger bucket_window may support even more points. But if we
+             * would choose that then the caller couldn't safely use any number
+             * smaller than what this function returns */
+            break;
+        }
+    }
+    return res;
+}
+
+/* Computes ecmult_multi by simply multiplying and adding each point. Does not
+ * require a scratch space */
+static int secp256k1_ecmult_multi_simple_var(const secp256k1_ecmult_context *ctx, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points) {
+    size_t point_idx;
+    secp256k1_scalar szero;
+    secp256k1_gej tmpj;
+
+    secp256k1_scalar_set_int(&szero, 0);
+    secp256k1_gej_set_infinity(r);
+    secp256k1_gej_set_infinity(&tmpj);
+    /* r = inp_g_sc*G */
+    secp256k1_ecmult(ctx, r, &tmpj, &szero, inp_g_sc);
+    for (point_idx = 0; point_idx < n_points; point_idx++) {
+        secp256k1_ge point;
+        secp256k1_gej pointj;
+        secp256k1_scalar scalar;
+        if (!cb(&scalar, &point, point_idx, cbdata)) {
+            return 0;
+        }
+        /* r += scalar*point */
+        secp256k1_gej_set_ge(&pointj, &point);
+        secp256k1_ecmult(ctx, &tmpj, &pointj, &scalar, NULL);
+        secp256k1_gej_add_var(r, r, &tmpj, NULL);
+    }
+    return 1;
+}
+
+/* Compute the number of batches and the batch size given the maximum batch size and the
+ * total number of points */
+static int secp256k1_ecmult_multi_batch_size_helper(size_t *n_batches, size_t *n_batch_points, size_t max_n_batch_points, size_t n) {
+    if (max_n_batch_points == 0) {
+        return 0;
+    }
+    if (max_n_batch_points > ECMULT_MAX_POINTS_PER_BATCH) {
+        max_n_batch_points = ECMULT_MAX_POINTS_PER_BATCH;
+    }
+    if (n == 0) {
+        *n_batches = 0;
+        *n_batch_points = 0;
+        return 1;
+    }
+    /* Compute ceil(n/max_n_batch_points) and ceil(n/n_batches) */
+    *n_batches = 1 + (n - 1) / max_n_batch_points;
+    *n_batch_points = 1 + (n - 1) / *n_batches;
+    return 1;
+}
+
+typedef int (*secp256k1_ecmult_multi_func)(const secp256k1_ecmult_context*, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t);
+static int secp256k1_ecmult_multi_var(const secp256k1_ecmult_context *ctx, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n) {
+    size_t i;
+
+    int (*f)(const secp256k1_ecmult_context*, secp256k1_scratch*, secp256k1_gej*, const secp256k1_scalar*, secp256k1_ecmult_multi_callback cb, void*, size_t, size_t);
+    size_t n_batches;
+    size_t n_batch_points;
+
+    secp256k1_gej_set_infinity(r);
+    if (inp_g_sc == NULL && n == 0) {
+        return 1;
+    } else if (n == 0) {
+        secp256k1_scalar szero;
+        secp256k1_scalar_set_int(&szero, 0);
+        secp256k1_ecmult(ctx, r, r, &szero, inp_g_sc);
+        return 1;
+    }
+    if (scratch == NULL) {
+        return secp256k1_ecmult_multi_simple_var(ctx, r, inp_g_sc, cb, cbdata, n);
+    }
+
+    /* Compute the batch sizes for pippenger given a scratch space. If it's greater than a threshold
+     * use pippenger. Otherwise use strauss */
+    if (!secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, secp256k1_pippenger_max_points(scratch), n)) {
+        return 0;
+    }
+    if (n_batch_points >= ECMULT_PIPPENGER_THRESHOLD) {
+        f = secp256k1_ecmult_pippenger_batch;
+    } else {
+        if (!secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, secp256k1_strauss_max_points(scratch), n)) {
+            return 0;
+        }
+        f = secp256k1_ecmult_strauss_batch;
+    }
+    for(i = 0; i < n_batches; i++) {
+        size_t nbp = n < n_batch_points ? n : n_batch_points;
+        size_t offset = n_batch_points*i;
+        secp256k1_gej tmp;
+        if (!f(ctx, scratch, &tmp, i == 0 ? inp_g_sc : NULL, cb, cbdata, nbp, offset)) {
+            return 0;
+        }
+        secp256k1_gej_add_var(r, r, &tmp, NULL);
+        n -= nbp;
+    }
+    return 1;
+}
+
 #endif /* SECP256K1_ECMULT_IMPL_H */
diff --git a/src/field_10x26.h b/src/field_10x26.h
index 727c5267fb..5ff03c8abc 100644
--- a/src/field_10x26.h
+++ b/src/field_10x26.h
@@ -10,7 +10,9 @@
 #include <stdint.h>
 
 typedef struct {
-    /* X = sum(i=0..9, elem[i]*2^26) mod n */
+    /* X = sum(i=0..9, n[i]*2^(i*26)) mod p
+     * where p = 2^256 - 0x1000003D1
+     */
     uint32_t n[10];
 #ifdef VERIFY
     int magnitude;
diff --git a/src/field_10x26_impl.h b/src/field_10x26_impl.h
index 94f8132fc8..4ae4fdcec8 100644
--- a/src/field_10x26_impl.h
+++ b/src/field_10x26_impl.h
@@ -8,7 +8,6 @@
 #define SECP256K1_FIELD_REPR_IMPL_H
 
 #include "util.h"
-#include "num.h"
 #include "field.h"
 
 #ifdef VERIFY
@@ -486,7 +485,8 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t
     VERIFY_BITS(b[9], 26);
 
     /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
-     *  px is a shorthand for sum(a[i]*b[x-i], i=0..x).
+     *  for 0 <= x <= 9, px is a shorthand for sum(a[i]*b[x-i], i=0..x).
+     *  for 9 <= x <= 18, px is a shorthand for sum(a[i]*b[x-i], i=(x-9)..9)
      *  Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
      */
 
@@ -1069,6 +1069,7 @@ static void secp256k1_fe_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp2
     secp256k1_fe_verify(a);
     secp256k1_fe_verify(b);
     VERIFY_CHECK(r != b);
+    VERIFY_CHECK(a != b);
 #endif
     secp256k1_fe_mul_inner(r->n, a->n, b->n);
 #ifdef VERIFY
diff --git a/src/field_5x52.h b/src/field_5x52.h
index bccd8feb4d..fc5bfe357e 100644
--- a/src/field_5x52.h
+++ b/src/field_5x52.h
@@ -10,7 +10,9 @@
 #include <stdint.h>
 
 typedef struct {
-    /* X = sum(i=0..4, elem[i]*2^52) mod n */
+    /* X = sum(i=0..4, n[i]*2^(i*52)) mod p
+     * where p = 2^256 - 0x1000003D1
+     */
     uint64_t n[5];
 #ifdef VERIFY
     int magnitude;
diff --git a/src/field_5x52_impl.h b/src/field_5x52_impl.h
index 957c61b014..f4263320d5 100644
--- a/src/field_5x52_impl.h
+++ b/src/field_5x52_impl.h
@@ -12,7 +12,6 @@
 #endif
 
 #include "util.h"
-#include "num.h"
 #include "field.h"
 
 #if defined(USE_ASM_X86_64)
@@ -422,6 +421,7 @@ static void secp256k1_fe_mul(secp256k1_fe *r, const secp256k1_fe *a, const secp2
     secp256k1_fe_verify(a);
     secp256k1_fe_verify(b);
     VERIFY_CHECK(r != b);
+    VERIFY_CHECK(a != b);
 #endif
     secp256k1_fe_mul_inner(r->n, a->n, b->n);
 #ifdef VERIFY
diff --git a/src/field_5x52_int128_impl.h b/src/field_5x52_int128_impl.h
index 95a0d1791c..bcbfb92ac2 100644
--- a/src/field_5x52_int128_impl.h
+++ b/src/field_5x52_int128_impl.h
@@ -32,9 +32,11 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t
     VERIFY_BITS(b[3], 56);
     VERIFY_BITS(b[4], 52);
     VERIFY_CHECK(r != b);
+    VERIFY_CHECK(a != b);
 
     /*  [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
-     *  px is a shorthand for sum(a[i]*b[x-i], i=0..x).
+     *  for 0 <= x <= 4, px is a shorthand for sum(a[i]*b[x-i], i=0..x).
+     *  for 4 <= x <= 8, px is a shorthand for sum(a[i]*b[x-i], i=(x-4)..4)
      *  Note that [x 0 0 0 0 0] = [x*R].
      */
 
diff --git a/src/field_impl.h b/src/field_impl.h
index 20428648af..6070caccfe 100644
--- a/src/field_impl.h
+++ b/src/field_impl.h
@@ -12,6 +12,7 @@
 #endif
 
 #include "util.h"
+#include "num.h"
 
 #if defined(USE_FIELD_10X26)
 #include "field_10x26_impl.h"
@@ -48,6 +49,8 @@ static int secp256k1_fe_sqrt(secp256k1_fe *r, const secp256k1_fe *a) {
     secp256k1_fe x2, x3, x6, x9, x11, x22, x44, x88, x176, x220, x223, t1;
     int j;
 
+    VERIFY_CHECK(r != a);
+
     /** The binary representation of (p + 1)/4 has 3 blocks of 1s, with lengths in
      *  { 2, 22, 223 }. Use an addition chain to calculate 2^n - 1 for each block:
      *  1, [2], 3, 6, 9, 11, [22], 44, 88, 176, 220, [223]
diff --git a/src/gen_context.c b/src/gen_context.c
index 1835fd491d..87d296ebf0 100644
--- a/src/gen_context.c
+++ b/src/gen_context.c
@@ -41,7 +41,7 @@ int main(int argc, char **argv) {
     
     fprintf(fp, "#ifndef _SECP256K1_ECMULT_STATIC_CONTEXT_\n");
     fprintf(fp, "#define _SECP256K1_ECMULT_STATIC_CONTEXT_\n");
-    fprintf(fp, "#include \"group.h\"\n");
+    fprintf(fp, "#include \"src/group.h\"\n");
     fprintf(fp, "#define SC SECP256K1_GE_STORAGE_CONST\n");
     fprintf(fp, "static const secp256k1_ge_storage secp256k1_ecmult_static_context[64][16] = {\n");
 
diff --git a/src/group.h b/src/group.h
index ea1302deb8..8e122ab429 100644
--- a/src/group.h
+++ b/src/group.h
@@ -65,12 +65,7 @@ static void secp256k1_ge_neg(secp256k1_ge *r, const secp256k1_ge *a);
 static void secp256k1_ge_set_gej(secp256k1_ge *r, secp256k1_gej *a);
 
 /** Set a batch of group elements equal to the inputs given in jacobian coordinates */
-static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len, const secp256k1_callback *cb);
-
-/** Set a batch of group elements equal to the inputs given in jacobian
- *  coordinates (with known z-ratios). zr must contain the known z-ratios such
- *  that mul(a[i].z, zr[i+1]) == a[i+1].z. zr[0] is ignored. */
-static void secp256k1_ge_set_table_gej_var(secp256k1_ge *r, const secp256k1_gej *a, const secp256k1_fe *zr, size_t len);
+static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len);
 
 /** Bring a batch inputs given in jacobian coordinates (with known z-ratios) to
  *  the same global z "denominator". zr must contain the known z-ratios such
@@ -79,6 +74,9 @@ static void secp256k1_ge_set_table_gej_var(secp256k1_ge *r, const secp256k1_gej
  *  stored in globalz. */
 static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge *r, secp256k1_fe *globalz, const secp256k1_gej *a, const secp256k1_fe *zr);
 
+/** Set a group element (affine) equal to the point at infinity. */
+static void secp256k1_ge_set_infinity(secp256k1_ge *r);
+
 /** Set a group element (jacobian) equal to the point at infinity. */
 static void secp256k1_gej_set_infinity(secp256k1_gej *r);
 
diff --git a/src/group_impl.h b/src/group_impl.h
index b31b6c12ef..9b93c39e92 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -38,22 +38,22 @@
  */
 #if defined(EXHAUSTIVE_TEST_ORDER)
 #  if EXHAUSTIVE_TEST_ORDER == 199
-const secp256k1_ge secp256k1_ge_const_g = SECP256K1_GE_CONST(
+static const secp256k1_ge secp256k1_ge_const_g = SECP256K1_GE_CONST(
     0xFA7CC9A7, 0x0737F2DB, 0xA749DD39, 0x2B4FB069,
     0x3B017A7D, 0xA808C2F1, 0xFB12940C, 0x9EA66C18,
     0x78AC123A, 0x5ED8AEF3, 0x8732BC91, 0x1F3A2868,
     0x48DF246C, 0x808DAE72, 0xCFE52572, 0x7F0501ED
 );
 
-const int CURVE_B = 4;
+static const int CURVE_B = 4;
 #  elif EXHAUSTIVE_TEST_ORDER == 13
-const secp256k1_ge secp256k1_ge_const_g = SECP256K1_GE_CONST(
+static const secp256k1_ge secp256k1_ge_const_g = SECP256K1_GE_CONST(
     0xedc60018, 0xa51a786b, 0x2ea91f4d, 0x4c9416c0,
     0x9de54c3b, 0xa1316554, 0x6cf4345c, 0x7277ef15,
     0x54cb1b6b, 0xdc8c1273, 0x087844ea, 0x43f4603e,
     0x0eaf9a43, 0xf6effe55, 0x939f806d, 0x37adf8ac
 );
-const int CURVE_B = 2;
+static const int CURVE_B = 2;
 #  else
 #    error No known generator for the specified exhaustive test group order.
 #  endif
@@ -68,7 +68,7 @@ static const secp256k1_ge secp256k1_ge_const_g = SECP256K1_GE_CONST(
     0xFD17B448UL, 0xA6855419UL, 0x9C47D08FUL, 0xFB10D4B8UL
 );
 
-const int CURVE_B = 7;
+static const int CURVE_B = 7;
 #endif
 
 static void secp256k1_ge_set_gej_zinv(secp256k1_ge *r, const secp256k1_gej *a, const secp256k1_fe *zi) {
@@ -126,46 +126,43 @@ static void secp256k1_ge_set_gej_var(secp256k1_ge *r, secp256k1_gej *a) {
     r->y = a->y;
 }
 
-static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len, const secp256k1_callback *cb) {
-    secp256k1_fe *az;
-    secp256k1_fe *azi;
+static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len) {
+    secp256k1_fe u;
     size_t i;
-    size_t count = 0;
-    az = (secp256k1_fe *)checked_malloc(cb, sizeof(secp256k1_fe) * len);
+    size_t last_i = SIZE_MAX;
+
     for (i = 0; i < len; i++) {
         if (!a[i].infinity) {
-            az[count++] = a[i].z;
+            /* Use destination's x coordinates as scratch space */
+            if (last_i == SIZE_MAX) {
+                r[i].x = a[i].z;
+            } else {
+                secp256k1_fe_mul(&r[i].x, &r[last_i].x, &a[i].z);
+            }
+            last_i = i;
         }
     }
+    if (last_i == SIZE_MAX) {
+        return;
+    }
+    secp256k1_fe_inv_var(&u, &r[last_i].x);
 
-    azi = (secp256k1_fe *)checked_malloc(cb, sizeof(secp256k1_fe) * count);
-    secp256k1_fe_inv_all_var(azi, az, count);
-    free(az);
-
-    count = 0;
-    for (i = 0; i < len; i++) {
-        r[i].infinity = a[i].infinity;
+    i = last_i;
+    while (i > 0) {
+        i--;
         if (!a[i].infinity) {
-            secp256k1_ge_set_gej_zinv(&r[i], &a[i], &azi[count++]);
+            secp256k1_fe_mul(&r[last_i].x, &r[i].x, &u);
+            secp256k1_fe_mul(&u, &u, &a[last_i].z);
+            last_i = i;
         }
     }
-    free(azi);
-}
-
-static void secp256k1_ge_set_table_gej_var(secp256k1_ge *r, const secp256k1_gej *a, const secp256k1_fe *zr, size_t len) {
-    size_t i = len - 1;
-    secp256k1_fe zi;
+    VERIFY_CHECK(!a[last_i].infinity);
+    r[last_i].x = u;
 
-    if (len > 0) {
-        /* Compute the inverse of the last z coordinate, and use it to compute the last affine output. */
-        secp256k1_fe_inv(&zi, &a[i].z);
-        secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zi);
-
-        /* Work out way backwards, using the z-ratios to scale the x/y values. */
-        while (i > 0) {
-            secp256k1_fe_mul(&zi, &zi, &zr[i]);
-            i--;
-            secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zi);
+    for (i = 0; i < len; i++) {
+        r[i].infinity = a[i].infinity;
+        if (!a[i].infinity) {
+            secp256k1_ge_set_gej_zinv(&r[i], &a[i], &r[i].x);
         }
     }
 }
@@ -178,6 +175,8 @@ static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge *r, secp
         /* The z of the final point gives us the "global Z" for the table. */
         r[i].x = a[i].x;
         r[i].y = a[i].y;
+        /* Ensure all y values are in weak normal form for fast negation of points */
+        secp256k1_fe_normalize_weak(&r[i].y);
         *globalz = a[i].z;
         r[i].infinity = 0;
         zs = zr[i];
@@ -200,6 +199,12 @@ static void secp256k1_gej_set_infinity(secp256k1_gej *r) {
     secp256k1_fe_clear(&r->z);
 }
 
+static void secp256k1_ge_set_infinity(secp256k1_ge *r) {
+    r->infinity = 1;
+    secp256k1_fe_clear(&r->x);
+    secp256k1_fe_clear(&r->y);
+}
+
 static void secp256k1_gej_clear(secp256k1_gej *r) {
     r->infinity = 0;
     secp256k1_fe_clear(&r->x);
diff --git a/src/hash.h b/src/hash.h
index e08d25d225..de26e4b89f 100644
--- a/src/hash.h
+++ b/src/hash.h
@@ -14,28 +14,28 @@ typedef struct {
     uint32_t s[8];
     uint32_t buf[16]; /* In big endian */
     size_t bytes;
-} secp256k1_sha256_t;
+} secp256k1_sha256;
 
-static void secp256k1_sha256_initialize(secp256k1_sha256_t *hash);
-static void secp256k1_sha256_write(secp256k1_sha256_t *hash, const unsigned char *data, size_t size);
-static void secp256k1_sha256_finalize(secp256k1_sha256_t *hash, unsigned char *out32);
+static void secp256k1_sha256_initialize(secp256k1_sha256 *hash);
+static void secp256k1_sha256_write(secp256k1_sha256 *hash, const unsigned char *data, size_t size);
+static void secp256k1_sha256_finalize(secp256k1_sha256 *hash, unsigned char *out32);
 
 typedef struct {
-    secp256k1_sha256_t inner, outer;
-} secp256k1_hmac_sha256_t;
+    secp256k1_sha256 inner, outer;
+} secp256k1_hmac_sha256;
 
-static void secp256k1_hmac_sha256_initialize(secp256k1_hmac_sha256_t *hash, const unsigned char *key, size_t size);
-static void secp256k1_hmac_sha256_write(secp256k1_hmac_sha256_t *hash, const unsigned char *data, size_t size);
-static void secp256k1_hmac_sha256_finalize(secp256k1_hmac_sha256_t *hash, unsigned char *out32);
+static void secp256k1_hmac_sha256_initialize(secp256k1_hmac_sha256 *hash, const unsigned char *key, size_t size);
+static void secp256k1_hmac_sha256_write(secp256k1_hmac_sha256 *hash, const unsigned char *data, size_t size);
+static void secp256k1_hmac_sha256_finalize(secp256k1_hmac_sha256 *hash, unsigned char *out32);
 
 typedef struct {
     unsigned char v[32];
     unsigned char k[32];
     int retry;
-} secp256k1_rfc6979_hmac_sha256_t;
+} secp256k1_rfc6979_hmac_sha256;
 
-static void secp256k1_rfc6979_hmac_sha256_initialize(secp256k1_rfc6979_hmac_sha256_t *rng, const unsigned char *key, size_t keylen);
-static void secp256k1_rfc6979_hmac_sha256_generate(secp256k1_rfc6979_hmac_sha256_t *rng, unsigned char *out, size_t outlen);
-static void secp256k1_rfc6979_hmac_sha256_finalize(secp256k1_rfc6979_hmac_sha256_t *rng);
+static void secp256k1_rfc6979_hmac_sha256_initialize(secp256k1_rfc6979_hmac_sha256 *rng, const unsigned char *key, size_t keylen);
+static void secp256k1_rfc6979_hmac_sha256_generate(secp256k1_rfc6979_hmac_sha256 *rng, unsigned char *out, size_t outlen);
+static void secp256k1_rfc6979_hmac_sha256_finalize(secp256k1_rfc6979_hmac_sha256 *rng);
 
 #endif /* SECP256K1_HASH_H */
diff --git a/src/hash_impl.h b/src/hash_impl.h
index 4c9964ee06..009f26beba 100644
--- a/src/hash_impl.h
+++ b/src/hash_impl.h
@@ -33,7 +33,7 @@
 #define BE32(p) ((((p) & 0xFF) << 24) | (((p) & 0xFF00) << 8) | (((p) & 0xFF0000) >> 8) | (((p) & 0xFF000000) >> 24))
 #endif
 
-static void secp256k1_sha256_initialize(secp256k1_sha256_t *hash) {
+static void secp256k1_sha256_initialize(secp256k1_sha256 *hash) {
     hash->s[0] = 0x6a09e667ul;
     hash->s[1] = 0xbb67ae85ul;
     hash->s[2] = 0x3c6ef372ul;
@@ -128,14 +128,15 @@ static void secp256k1_sha256_transform(uint32_t* s, const uint32_t* chunk) {
     s[7] += h;
 }
 
-static void secp256k1_sha256_write(secp256k1_sha256_t *hash, const unsigned char *data, size_t len) {
+static void secp256k1_sha256_write(secp256k1_sha256 *hash, const unsigned char *data, size_t len) {
     size_t bufsize = hash->bytes & 0x3F;
     hash->bytes += len;
     while (bufsize + len >= 64) {
         /* Fill the buffer, and process it. */
-        memcpy(((unsigned char*)hash->buf) + bufsize, data, 64 - bufsize);
-        data += 64 - bufsize;
-        len -= 64 - bufsize;
+        size_t chunk_len = 64 - bufsize;
+        memcpy(((unsigned char*)hash->buf) + bufsize, data, chunk_len);
+        data += chunk_len;
+        len -= chunk_len;
         secp256k1_sha256_transform(hash->s, hash->buf);
         bufsize = 0;
     }
@@ -145,7 +146,7 @@ static void secp256k1_sha256_write(secp256k1_sha256_t *hash, const unsigned char
     }
 }
 
-static void secp256k1_sha256_finalize(secp256k1_sha256_t *hash, unsigned char *out32) {
+static void secp256k1_sha256_finalize(secp256k1_sha256 *hash, unsigned char *out32) {
     static const unsigned char pad[64] = {0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
     uint32_t sizedesc[2];
     uint32_t out[8];
@@ -161,14 +162,14 @@ static void secp256k1_sha256_finalize(secp256k1_sha256_t *hash, unsigned char *o
     memcpy(out32, (const unsigned char*)out, 32);
 }
 
-static void secp256k1_hmac_sha256_initialize(secp256k1_hmac_sha256_t *hash, const unsigned char *key, size_t keylen) {
-    int n;
+static void secp256k1_hmac_sha256_initialize(secp256k1_hmac_sha256 *hash, const unsigned char *key, size_t keylen) {
+    size_t n;
     unsigned char rkey[64];
-    if (keylen <= 64) {
+    if (keylen <= sizeof(rkey)) {
         memcpy(rkey, key, keylen);
-        memset(rkey + keylen, 0, 64 - keylen);
+        memset(rkey + keylen, 0, sizeof(rkey) - keylen);
     } else {
-        secp256k1_sha256_t sha256;
+        secp256k1_sha256 sha256;
         secp256k1_sha256_initialize(&sha256);
         secp256k1_sha256_write(&sha256, key, keylen);
         secp256k1_sha256_finalize(&sha256, rkey);
@@ -176,24 +177,24 @@ static void secp256k1_hmac_sha256_initialize(secp256k1_hmac_sha256_t *hash, cons
     }
 
     secp256k1_sha256_initialize(&hash->outer);
-    for (n = 0; n < 64; n++) {
+    for (n = 0; n < sizeof(rkey); n++) {
         rkey[n] ^= 0x5c;
     }
-    secp256k1_sha256_write(&hash->outer, rkey, 64);
+    secp256k1_sha256_write(&hash->outer, rkey, sizeof(rkey));
 
     secp256k1_sha256_initialize(&hash->inner);
-    for (n = 0; n < 64; n++) {
+    for (n = 0; n < sizeof(rkey); n++) {
         rkey[n] ^= 0x5c ^ 0x36;
     }
-    secp256k1_sha256_write(&hash->inner, rkey, 64);
-    memset(rkey, 0, 64);
+    secp256k1_sha256_write(&hash->inner, rkey, sizeof(rkey));
+    memset(rkey, 0, sizeof(rkey));
 }
 
-static void secp256k1_hmac_sha256_write(secp256k1_hmac_sha256_t *hash, const unsigned char *data, size_t size) {
+static void secp256k1_hmac_sha256_write(secp256k1_hmac_sha256 *hash, const unsigned char *data, size_t size) {
     secp256k1_sha256_write(&hash->inner, data, size);
 }
 
-static void secp256k1_hmac_sha256_finalize(secp256k1_hmac_sha256_t *hash, unsigned char *out32) {
+static void secp256k1_hmac_sha256_finalize(secp256k1_hmac_sha256 *hash, unsigned char *out32) {
     unsigned char temp[32];
     secp256k1_sha256_finalize(&hash->inner, temp);
     secp256k1_sha256_write(&hash->outer, temp, 32);
@@ -202,8 +203,8 @@ static void secp256k1_hmac_sha256_finalize(secp256k1_hmac_sha256_t *hash, unsign
 }
 
 
-static void secp256k1_rfc6979_hmac_sha256_initialize(secp256k1_rfc6979_hmac_sha256_t *rng, const unsigned char *key, size_t keylen) {
-    secp256k1_hmac_sha256_t hmac;
+static void secp256k1_rfc6979_hmac_sha256_initialize(secp256k1_rfc6979_hmac_sha256 *rng, const unsigned char *key, size_t keylen) {
+    secp256k1_hmac_sha256 hmac;
     static const unsigned char zero[1] = {0x00};
     static const unsigned char one[1] = {0x01};
 
@@ -232,11 +233,11 @@ static void secp256k1_rfc6979_hmac_sha256_initialize(secp256k1_rfc6979_hmac_sha2
     rng->retry = 0;
 }
 
-static void secp256k1_rfc6979_hmac_sha256_generate(secp256k1_rfc6979_hmac_sha256_t *rng, unsigned char *out, size_t outlen) {
+static void secp256k1_rfc6979_hmac_sha256_generate(secp256k1_rfc6979_hmac_sha256 *rng, unsigned char *out, size_t outlen) {
     /* RFC6979 3.2.h. */
     static const unsigned char zero[1] = {0x00};
     if (rng->retry) {
-        secp256k1_hmac_sha256_t hmac;
+        secp256k1_hmac_sha256 hmac;
         secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
         secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
         secp256k1_hmac_sha256_write(&hmac, zero, 1);
@@ -247,7 +248,7 @@ static void secp256k1_rfc6979_hmac_sha256_generate(secp256k1_rfc6979_hmac_sha256
     }
 
     while (outlen > 0) {
-        secp256k1_hmac_sha256_t hmac;
+        secp256k1_hmac_sha256 hmac;
         int now = outlen;
         secp256k1_hmac_sha256_initialize(&hmac, rng->k, 32);
         secp256k1_hmac_sha256_write(&hmac, rng->v, 32);
@@ -263,7 +264,7 @@ static void secp256k1_rfc6979_hmac_sha256_generate(secp256k1_rfc6979_hmac_sha256
     rng->retry = 1;
 }
 
-static void secp256k1_rfc6979_hmac_sha256_finalize(secp256k1_rfc6979_hmac_sha256_t *rng) {
+static void secp256k1_rfc6979_hmac_sha256_finalize(secp256k1_rfc6979_hmac_sha256 *rng) {
     memset(rng->k, 0, 32);
     memset(rng->v, 0, 32);
     rng->retry = 0;
diff --git a/src/java/org/bitcoin/NativeSecp256k1Test.java b/src/java/org/bitcoin/NativeSecp256k1Test.java
index c00d08899b..d766a1029c 100644
--- a/src/java/org/bitcoin/NativeSecp256k1Test.java
+++ b/src/java/org/bitcoin/NativeSecp256k1Test.java
@@ -52,7 +52,7 @@ public class NativeSecp256k1Test {
     }
 
     /**
-      * This tests secret key verify() for a invalid secretkey
+      * This tests secret key verify() for an invalid secretkey
       */
     public static void testSecKeyVerifyNeg() throws AssertFailException{
         boolean result = false;
diff --git a/src/java/org_bitcoin_NativeSecp256k1.c b/src/java/org_bitcoin_NativeSecp256k1.c
index bcef7b32ce..b50970b4f2 100644
--- a/src/java/org_bitcoin_NativeSecp256k1.c
+++ b/src/java/org_bitcoin_NativeSecp256k1.c
@@ -83,7 +83,7 @@ SECP256K1_API jobjectArray JNICALL Java_org_bitcoin_NativeSecp256k1_secp256k1_1e
 
   secp256k1_ecdsa_signature sig[72];
 
-  int ret = secp256k1_ecdsa_sign(ctx, sig, data, secKey, NULL, NULL );
+  int ret = secp256k1_ecdsa_sign(ctx, sig, data, secKey, NULL, NULL);
 
   unsigned char outputSer[72];
   size_t outputLen = 72;
@@ -353,7 +353,9 @@ SECP256K1_API jobjectArray JNICALL Java_org_bitcoin_NativeSecp256k1_secp256k1_1e
       ctx,
       nonce_res,
       &pubkey,
-      secdata
+      secdata,
+      NULL,
+      NULL
     );
   }
 
diff --git a/src/modules/ecdh/main_impl.h b/src/modules/ecdh/main_impl.h
index 01ecba4d53..44cb68e750 100644
--- a/src/modules/ecdh/main_impl.h
+++ b/src/modules/ecdh/main_impl.h
@@ -10,16 +10,35 @@
 #include "include/secp256k1_ecdh.h"
 #include "ecmult_const_impl.h"
 
-int secp256k1_ecdh(const secp256k1_context* ctx, unsigned char *result, const secp256k1_pubkey *point, const unsigned char *scalar) {
+static int ecdh_hash_function_sha256(unsigned char *output, const unsigned char *x, const unsigned char *y, void *data) {
+    unsigned char version = (y[31] & 0x01) | 0x02;
+    secp256k1_sha256 sha;
+    (void)data;
+
+    secp256k1_sha256_initialize(&sha);
+    secp256k1_sha256_write(&sha, &version, 1);
+    secp256k1_sha256_write(&sha, x, 32);
+    secp256k1_sha256_finalize(&sha, output);
+
+    return 1;
+}
+
+const secp256k1_ecdh_hash_function secp256k1_ecdh_hash_function_sha256 = ecdh_hash_function_sha256;
+const secp256k1_ecdh_hash_function secp256k1_ecdh_hash_function_default = ecdh_hash_function_sha256;
+
+int secp256k1_ecdh(const secp256k1_context* ctx, unsigned char *output, const secp256k1_pubkey *point, const unsigned char *scalar, secp256k1_ecdh_hash_function hashfp, void *data) {
     int ret = 0;
     int overflow = 0;
     secp256k1_gej res;
     secp256k1_ge pt;
     secp256k1_scalar s;
     VERIFY_CHECK(ctx != NULL);
-    ARG_CHECK(result != NULL);
+    ARG_CHECK(output != NULL);
     ARG_CHECK(point != NULL);
     ARG_CHECK(scalar != NULL);
+    if (hashfp == NULL) {
+        hashfp = secp256k1_ecdh_hash_function_default;
+    }
 
     secp256k1_pubkey_load(ctx, &pt, point);
     secp256k1_scalar_set_b32(&s, scalar, &overflow);
@@ -27,24 +46,18 @@ int secp256k1_ecdh(const secp256k1_context* ctx, unsigned char *result, const se
         ret = 0;
     } else {
         unsigned char x[32];
-        unsigned char y[1];
-        secp256k1_sha256_t sha;
+        unsigned char y[32];
 
-        secp256k1_ecmult_const(&res, &pt, &s);
+        secp256k1_ecmult_const(&res, &pt, &s, 256);
         secp256k1_ge_set_gej(&pt, &res);
-        /* Compute a hash of the point in compressed form
-         * Note we cannot use secp256k1_eckey_pubkey_serialize here since it does not
-         * expect its output to be secret and has a timing sidechannel. */
+
+        /* Compute a hash of the point */
         secp256k1_fe_normalize(&pt.x);
         secp256k1_fe_normalize(&pt.y);
         secp256k1_fe_get_b32(x, &pt.x);
-        y[0] = 0x02 | secp256k1_fe_is_odd(&pt.y);
+        secp256k1_fe_get_b32(y, &pt.y);
 
-        secp256k1_sha256_initialize(&sha);
-        secp256k1_sha256_write(&sha, y, sizeof(y));
-        secp256k1_sha256_write(&sha, x, sizeof(x));
-        secp256k1_sha256_finalize(&sha, result);
-        ret = 1;
+        ret = hashfp(output, x, y, data);
     }
 
     secp256k1_scalar_clear(&s);
diff --git a/src/modules/ecdh/tests_impl.h b/src/modules/ecdh/tests_impl.h
index cec30b67c6..fe26e8fb69 100644
--- a/src/modules/ecdh/tests_impl.h
+++ b/src/modules/ecdh/tests_impl.h
@@ -7,6 +7,23 @@
 #ifndef SECP256K1_MODULE_ECDH_TESTS_H
 #define SECP256K1_MODULE_ECDH_TESTS_H
 
+int ecdh_hash_function_test_fail(unsigned char *output, const unsigned char *x, const unsigned char *y, void *data) {
+    (void)output;
+    (void)x;
+    (void)y;
+    (void)data;
+    return 0;
+}
+
+int ecdh_hash_function_custom(unsigned char *output, const unsigned char *x, const unsigned char *y, void *data) {
+    (void)data;
+    /* Save x and y as uncompressed public key */
+    output[0] = 0x04;
+    memcpy(output + 1, x, 32);
+    memcpy(output + 33, y, 32);
+    return 1;
+}
+
 void test_ecdh_api(void) {
     /* Setup context that just counts errors */
     secp256k1_context *tctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN);
@@ -21,15 +38,15 @@ void test_ecdh_api(void) {
     CHECK(secp256k1_ec_pubkey_create(tctx, &point, s_one) == 1);
 
     /* Check all NULLs are detected */
-    CHECK(secp256k1_ecdh(tctx, res, &point, s_one) == 1);
+    CHECK(secp256k1_ecdh(tctx, res, &point, s_one, NULL, NULL) == 1);
     CHECK(ecount == 0);
-    CHECK(secp256k1_ecdh(tctx, NULL, &point, s_one) == 0);
+    CHECK(secp256k1_ecdh(tctx, NULL, &point, s_one, NULL, NULL) == 0);
     CHECK(ecount == 1);
-    CHECK(secp256k1_ecdh(tctx, res, NULL, s_one) == 0);
+    CHECK(secp256k1_ecdh(tctx, res, NULL, s_one, NULL, NULL) == 0);
     CHECK(ecount == 2);
-    CHECK(secp256k1_ecdh(tctx, res, &point, NULL) == 0);
+    CHECK(secp256k1_ecdh(tctx, res, &point, NULL, NULL, NULL) == 0);
     CHECK(ecount == 3);
-    CHECK(secp256k1_ecdh(tctx, res, &point, s_one) == 1);
+    CHECK(secp256k1_ecdh(tctx, res, &point, s_one, NULL, NULL) == 1);
     CHECK(ecount == 3);
 
     /* Cleanup */
@@ -44,29 +61,36 @@ void test_ecdh_generator_basepoint(void) {
     s_one[31] = 1;
     /* Check against pubkey creation when the basepoint is the generator */
     for (i = 0; i < 100; ++i) {
-        secp256k1_sha256_t sha;
+        secp256k1_sha256 sha;
         unsigned char s_b32[32];
-        unsigned char output_ecdh[32];
+        unsigned char output_ecdh[65];
         unsigned char output_ser[32];
-        unsigned char point_ser[33];
+        unsigned char point_ser[65];
         size_t point_ser_len = sizeof(point_ser);
         secp256k1_scalar s;
 
         random_scalar_order(&s);
         secp256k1_scalar_get_b32(s_b32, &s);
 
-        /* compute using ECDH function */
         CHECK(secp256k1_ec_pubkey_create(ctx, &point[0], s_one) == 1);
-        CHECK(secp256k1_ecdh(ctx, output_ecdh, &point[0], s_b32) == 1);
-        /* compute "explicitly" */
         CHECK(secp256k1_ec_pubkey_create(ctx, &point[1], s_b32) == 1);
+
+        /* compute using ECDH function with custom hash function */
+        CHECK(secp256k1_ecdh(ctx, output_ecdh, &point[0], s_b32, ecdh_hash_function_custom, NULL) == 1);
+        /* compute "explicitly" */
+        CHECK(secp256k1_ec_pubkey_serialize(ctx, point_ser, &point_ser_len, &point[1], SECP256K1_EC_UNCOMPRESSED) == 1);
+        /* compare */
+        CHECK(memcmp(output_ecdh, point_ser, 65) == 0);
+
+        /* compute using ECDH function with default hash function */
+        CHECK(secp256k1_ecdh(ctx, output_ecdh, &point[0], s_b32, NULL, NULL) == 1);
+        /* compute "explicitly" */
         CHECK(secp256k1_ec_pubkey_serialize(ctx, point_ser, &point_ser_len, &point[1], SECP256K1_EC_COMPRESSED) == 1);
-        CHECK(point_ser_len == sizeof(point_ser));
         secp256k1_sha256_initialize(&sha);
         secp256k1_sha256_write(&sha, point_ser, point_ser_len);
         secp256k1_sha256_finalize(&sha, output_ser);
         /* compare */
-        CHECK(memcmp(output_ecdh, output_ser, sizeof(output_ser)) == 0);
+        CHECK(memcmp(output_ecdh, output_ser, 32) == 0);
     }
 }
 
@@ -89,11 +113,14 @@ void test_bad_scalar(void) {
     CHECK(secp256k1_ec_pubkey_create(ctx, &point, s_rand) == 1);
 
     /* Try to multiply it by bad values */
-    CHECK(secp256k1_ecdh(ctx, output, &point, s_zero) == 0);
-    CHECK(secp256k1_ecdh(ctx, output, &point, s_overflow) == 0);
+    CHECK(secp256k1_ecdh(ctx, output, &point, s_zero, NULL, NULL) == 0);
+    CHECK(secp256k1_ecdh(ctx, output, &point, s_overflow, NULL, NULL) == 0);
     /* ...and a good one */
     s_overflow[31] -= 1;
-    CHECK(secp256k1_ecdh(ctx, output, &point, s_overflow) == 1);
+    CHECK(secp256k1_ecdh(ctx, output, &point, s_overflow, NULL, NULL) == 1);
+
+    /* Hash function failure results in ecdh failure */
+    CHECK(secp256k1_ecdh(ctx, output, &point, s_overflow, ecdh_hash_function_test_fail, NULL) == 0);
 }
 
 void run_ecdh_tests(void) {
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index db1ebf94be..d378335d99 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -376,7 +376,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
     /* extract m6 */
     "movq %%r8, %q6\n"
     : "=g"(m0), "=g"(m1), "=g"(m2), "=g"(m3), "=g"(m4), "=g"(m5), "=g"(m6)
-    : "S"(l), "n"(SECP256K1_N_C_0), "n"(SECP256K1_N_C_1)
+    : "S"(l), "i"(SECP256K1_N_C_0), "i"(SECP256K1_N_C_1)
     : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "cc");
 
     /* Reduce 385 bits into 258. */
@@ -455,7 +455,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
     /* extract p4 */
     "movq %%r9, %q4\n"
     : "=&g"(p0), "=&g"(p1), "=&g"(p2), "=g"(p3), "=g"(p4)
-    : "g"(m0), "g"(m1), "g"(m2), "g"(m3), "g"(m4), "g"(m5), "g"(m6), "n"(SECP256K1_N_C_0), "n"(SECP256K1_N_C_1)
+    : "g"(m0), "g"(m1), "g"(m2), "g"(m3), "g"(m4), "g"(m5), "g"(m6), "i"(SECP256K1_N_C_0), "i"(SECP256K1_N_C_1)
     : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "cc");
 
     /* Reduce 258 bits into 256. */
@@ -501,7 +501,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
     /* Extract c */
     "movq %%r9, %q0\n"
     : "=g"(c)
-    : "g"(p0), "g"(p1), "g"(p2), "g"(p3), "g"(p4), "D"(r), "n"(SECP256K1_N_C_0), "n"(SECP256K1_N_C_1)
+    : "g"(p0), "g"(p1), "g"(p2), "g"(p3), "g"(p4), "D"(r), "i"(SECP256K1_N_C_0), "i"(SECP256K1_N_C_1)
     : "rax", "rdx", "r8", "r9", "r10", "cc", "memory");
 #else
     uint128_t c;
diff --git a/src/scratch.h b/src/scratch.h
new file mode 100644
index 0000000000..fef377af0d
--- /dev/null
+++ b/src/scratch.h
@@ -0,0 +1,39 @@
+/**********************************************************************
+ * Copyright (c) 2017 Andrew Poelstra	                              *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_SCRATCH_
+#define _SECP256K1_SCRATCH_
+
+#define SECP256K1_SCRATCH_MAX_FRAMES	5
+
+/* The typedef is used internally; the struct name is used in the public API
+ * (where it is exposed as a different typedef) */
+typedef struct secp256k1_scratch_space_struct {
+    void *data[SECP256K1_SCRATCH_MAX_FRAMES];
+    size_t offset[SECP256K1_SCRATCH_MAX_FRAMES];
+    size_t frame_size[SECP256K1_SCRATCH_MAX_FRAMES];
+    size_t frame;
+    size_t max_size;
+    const secp256k1_callback* error_callback;
+} secp256k1_scratch;
+
+static secp256k1_scratch* secp256k1_scratch_create(const secp256k1_callback* error_callback, size_t max_size);
+
+static void secp256k1_scratch_destroy(secp256k1_scratch* scratch);
+
+/** Attempts to allocate a new stack frame with `n` available bytes. Returns 1 on success, 0 on failure */
+static int secp256k1_scratch_allocate_frame(secp256k1_scratch* scratch, size_t n, size_t objects);
+
+/** Deallocates a stack frame */
+static void secp256k1_scratch_deallocate_frame(secp256k1_scratch* scratch);
+
+/** Returns the maximum allocation the scratch space will allow */
+static size_t secp256k1_scratch_max_allocation(const secp256k1_scratch* scratch, size_t n_objects);
+
+/** Returns a pointer into the most recently allocated frame, or NULL if there is insufficient available space */
+static void *secp256k1_scratch_alloc(secp256k1_scratch* scratch, size_t n);
+
+#endif
diff --git a/src/scratch_impl.h b/src/scratch_impl.h
new file mode 100644
index 0000000000..abed713b21
--- /dev/null
+++ b/src/scratch_impl.h
@@ -0,0 +1,86 @@
+/**********************************************************************
+ * Copyright (c) 2017 Andrew Poelstra                                 *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_SCRATCH_IMPL_H_
+#define _SECP256K1_SCRATCH_IMPL_H_
+
+#include "scratch.h"
+
+/* Using 16 bytes alignment because common architectures never have alignment
+ * requirements above 8 for any of the types we care about. In addition we
+ * leave some room because currently we don't care about a few bytes.
+ * TODO: Determine this at configure time. */
+#define ALIGNMENT 16
+
+static secp256k1_scratch* secp256k1_scratch_create(const secp256k1_callback* error_callback, size_t max_size) {
+    secp256k1_scratch* ret = (secp256k1_scratch*)checked_malloc(error_callback, sizeof(*ret));
+    if (ret != NULL) {
+        memset(ret, 0, sizeof(*ret));
+        ret->max_size = max_size;
+        ret->error_callback = error_callback;
+    }
+    return ret;
+}
+
+static void secp256k1_scratch_destroy(secp256k1_scratch* scratch) {
+    if (scratch != NULL) {
+        VERIFY_CHECK(scratch->frame == 0);
+        free(scratch);
+    }
+}
+
+static size_t secp256k1_scratch_max_allocation(const secp256k1_scratch* scratch, size_t objects) {
+    size_t i = 0;
+    size_t allocated = 0;
+    for (i = 0; i < scratch->frame; i++) {
+        allocated += scratch->frame_size[i];
+    }
+    if (scratch->max_size - allocated <= objects * ALIGNMENT) {
+        return 0;
+    }
+    return scratch->max_size - allocated - objects * ALIGNMENT;
+}
+
+static int secp256k1_scratch_allocate_frame(secp256k1_scratch* scratch, size_t n, size_t objects) {
+    VERIFY_CHECK(scratch->frame < SECP256K1_SCRATCH_MAX_FRAMES);
+
+    if (n <= secp256k1_scratch_max_allocation(scratch, objects)) {
+        n += objects * ALIGNMENT;
+        scratch->data[scratch->frame] = checked_malloc(scratch->error_callback, n);
+        if (scratch->data[scratch->frame] == NULL) {
+            return 0;
+        }
+        scratch->frame_size[scratch->frame] = n;
+        scratch->offset[scratch->frame] = 0;
+        scratch->frame++;
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+static void secp256k1_scratch_deallocate_frame(secp256k1_scratch* scratch) {
+    VERIFY_CHECK(scratch->frame > 0);
+    scratch->frame -= 1;
+    free(scratch->data[scratch->frame]);
+}
+
+static void *secp256k1_scratch_alloc(secp256k1_scratch* scratch, size_t size) {
+    void *ret;
+    size_t frame = scratch->frame - 1;
+    size = ((size + ALIGNMENT - 1) / ALIGNMENT) * ALIGNMENT;
+
+    if (scratch->frame == 0 || size + scratch->offset[frame] > scratch->frame_size[frame]) {
+        return NULL;
+    }
+    ret = (void *) ((unsigned char *) scratch->data[frame] + scratch->offset[frame]);
+    memset(ret, 0, size);
+    scratch->offset[frame] += size;
+
+    return ret;
+}
+
+#endif
diff --git a/src/secp256k1.c b/src/secp256k1.c
index 4f8c01655b..15981f46e2 100644
--- a/src/secp256k1.c
+++ b/src/secp256k1.c
@@ -17,6 +17,7 @@
 #include "ecdsa_impl.h"
 #include "eckey_impl.h"
 #include "hash_impl.h"
+#include "scratch_impl.h"
 
 #define ARG_CHECK(cond) do { \
     if (EXPECT(!(cond), 0)) { \
@@ -55,6 +56,14 @@ struct secp256k1_context_struct {
     secp256k1_callback error_callback;
 };
 
+static const secp256k1_context secp256k1_context_no_precomp_ = {
+    { 0 },
+    { 0 },
+    { default_illegal_callback_fn, 0 },
+    { default_error_callback_fn, 0 }
+};
+const secp256k1_context *secp256k1_context_no_precomp = &secp256k1_context_no_precomp_;
+
 secp256k1_context* secp256k1_context_create(unsigned int flags) {
     secp256k1_context* ret = (secp256k1_context*)checked_malloc(&default_error_callback, sizeof(secp256k1_context));
     ret->illegal_callback = default_illegal_callback;
@@ -90,6 +99,7 @@ secp256k1_context* secp256k1_context_clone(const secp256k1_context* ctx) {
 }
 
 void secp256k1_context_destroy(secp256k1_context* ctx) {
+    CHECK(ctx != secp256k1_context_no_precomp);
     if (ctx != NULL) {
         secp256k1_ecmult_context_clear(&ctx->ecmult_ctx);
         secp256k1_ecmult_gen_context_clear(&ctx->ecmult_gen_ctx);
@@ -99,6 +109,7 @@ void secp256k1_context_destroy(secp256k1_context* ctx) {
 }
 
 void secp256k1_context_set_illegal_callback(secp256k1_context* ctx, void (*fun)(const char* message, void* data), const void* data) {
+    CHECK(ctx != secp256k1_context_no_precomp);
     if (fun == NULL) {
         fun = default_illegal_callback_fn;
     }
@@ -107,6 +118,7 @@ void secp256k1_context_set_illegal_callback(secp256k1_context* ctx, void (*fun)(
 }
 
 void secp256k1_context_set_error_callback(secp256k1_context* ctx, void (*fun)(const char* message, void* data), const void* data) {
+    CHECK(ctx != secp256k1_context_no_precomp);
     if (fun == NULL) {
         fun = default_error_callback_fn;
     }
@@ -114,13 +126,22 @@ void secp256k1_context_set_error_callback(secp256k1_context* ctx, void (*fun)(co
     ctx->error_callback.data = data;
 }
 
+secp256k1_scratch_space* secp256k1_scratch_space_create(const secp256k1_context* ctx, size_t max_size) {
+    VERIFY_CHECK(ctx != NULL);
+    return secp256k1_scratch_create(&ctx->error_callback, max_size);
+}
+
+void secp256k1_scratch_space_destroy(secp256k1_scratch_space* scratch) {
+    secp256k1_scratch_destroy(scratch);
+}
+
 static int secp256k1_pubkey_load(const secp256k1_context* ctx, secp256k1_ge* ge, const secp256k1_pubkey* pubkey) {
     if (sizeof(secp256k1_ge_storage) == 64) {
         /* When the secp256k1_ge_storage type is exactly 64 byte, use its
          * representation inside secp256k1_pubkey, as conversion is very fast.
          * Note that secp256k1_pubkey_save must use the same representation. */
         secp256k1_ge_storage s;
-        memcpy(&s, &pubkey->data[0], 64);
+        memcpy(&s, &pubkey->data[0], sizeof(s));
         secp256k1_ge_from_storage(ge, &s);
     } else {
         /* Otherwise, fall back to 32-byte big endian for X and Y. */
@@ -137,7 +158,7 @@ static void secp256k1_pubkey_save(secp256k1_pubkey* pubkey, secp256k1_ge* ge) {
     if (sizeof(secp256k1_ge_storage) == 64) {
         secp256k1_ge_storage s;
         secp256k1_ge_to_storage(&s, ge);
-        memcpy(&pubkey->data[0], &s, 64);
+        memcpy(&pubkey->data[0], &s, sizeof(s));
     } else {
         VERIFY_CHECK(!secp256k1_ge_is_infinity(ge));
         secp256k1_fe_normalize_var(&ge->x);
@@ -307,10 +328,15 @@ int secp256k1_ecdsa_verify(const secp256k1_context* ctx, const secp256k1_ecdsa_s
             secp256k1_ecdsa_sig_verify(&ctx->ecmult_ctx, &r, &s, &q, &m));
 }
 
+static SECP256K1_INLINE void buffer_append(unsigned char *buf, unsigned int *offset, const void *data, unsigned int len) {
+    memcpy(buf + *offset, data, len);
+    *offset += len;
+}
+
 static int nonce_function_rfc6979(unsigned char *nonce32, const unsigned char *msg32, const unsigned char *key32, const unsigned char *algo16, void *data, unsigned int counter) {
    unsigned char keydata[112];
-   int keylen = 64;
-   secp256k1_rfc6979_hmac_sha256_t rng;
+   unsigned int offset = 0;
+   secp256k1_rfc6979_hmac_sha256 rng;
    unsigned int i;
    /* We feed a byte array to the PRNG as input, consisting of:
     * - the private key (32 bytes) and message (32 bytes), see RFC 6979 3.2d.
@@ -320,17 +346,15 @@ static int nonce_function_rfc6979(unsigned char *nonce32, const unsigned char *m
     *  different argument mixtures to emulate each other and result in the same
     *  nonces.
     */
-   memcpy(keydata, key32, 32);
-   memcpy(keydata + 32, msg32, 32);
+   buffer_append(keydata, &offset, key32, 32);
+   buffer_append(keydata, &offset, msg32, 32);
    if (data != NULL) {
-       memcpy(keydata + 64, data, 32);
-       keylen = 96;
+       buffer_append(keydata, &offset, data, 32);
    }
    if (algo16 != NULL) {
-       memcpy(keydata + keylen, algo16, 16);
-       keylen += 16;
+       buffer_append(keydata, &offset, algo16, 16);
    }
-   secp256k1_rfc6979_hmac_sha256_initialize(&rng, keydata, keylen);
+   secp256k1_rfc6979_hmac_sha256_initialize(&rng, keydata, offset);
    memset(keydata, 0, sizeof(keydata));
    for (i = 0; i <= counter; i++) {
        secp256k1_rfc6979_hmac_sha256_generate(&rng, nonce32, 32);
@@ -546,8 +570,9 @@ int secp256k1_ec_pubkey_tweak_mul(const secp256k1_context* ctx, secp256k1_pubkey
 
 int secp256k1_context_randomize(secp256k1_context* ctx, const unsigned char *seed32) {
     VERIFY_CHECK(ctx != NULL);
-    ARG_CHECK(secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx));
-    secp256k1_ecmult_gen_blind(&ctx->ecmult_gen_ctx, seed32);
+    if (secp256k1_ecmult_gen_context_is_built(&ctx->ecmult_gen_ctx)) {
+        secp256k1_ecmult_gen_blind(&ctx->ecmult_gen_ctx, seed32);
+    }
     return 1;
 }
 
diff --git a/src/testrand_impl.h b/src/testrand_impl.h
index 1255574209..30a91e5296 100644
--- a/src/testrand_impl.h
+++ b/src/testrand_impl.h
@@ -13,7 +13,7 @@
 #include "testrand.h"
 #include "hash.h"
 
-static secp256k1_rfc6979_hmac_sha256_t secp256k1_test_rng;
+static secp256k1_rfc6979_hmac_sha256 secp256k1_test_rng;
 static uint32_t secp256k1_test_rng_precomputed[8];
 static int secp256k1_test_rng_precomputed_used = 8;
 static uint64_t secp256k1_test_rng_integer;
diff --git a/src/tests.c b/src/tests.c
index 3d9bd5ebb4..f1c4db929a 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -23,6 +23,9 @@
 #include "openssl/ec.h"
 #include "openssl/ecdsa.h"
 #include "openssl/obj_mac.h"
+# if OPENSSL_VERSION_NUMBER < 0x10100000L
+void ECDSA_SIG_get0(const ECDSA_SIG *sig, const BIGNUM **pr, const BIGNUM **ps) {*pr = sig->r; *ps = sig->s;}
+# endif
 #endif
 
 #include "contrib/lax_der_parsing.c"
@@ -215,8 +218,12 @@ void run_context_tests(void) {
     CHECK(ecount == 3);
     CHECK(secp256k1_ec_pubkey_tweak_mul(vrfy, &pubkey, ctmp) == 1);
     CHECK(ecount == 3);
-    CHECK(secp256k1_context_randomize(vrfy, ctmp) == 0);
-    CHECK(ecount == 4);
+    CHECK(secp256k1_context_randomize(vrfy, ctmp) == 1);
+    CHECK(ecount == 3);
+    CHECK(secp256k1_context_randomize(vrfy, NULL) == 1);
+    CHECK(ecount == 3);
+    CHECK(secp256k1_context_randomize(sign, ctmp) == 1);
+    CHECK(ecount2 == 14);
     CHECK(secp256k1_context_randomize(sign, NULL) == 1);
     CHECK(ecount2 == 14);
     secp256k1_context_set_illegal_callback(vrfy, NULL, NULL);
@@ -248,6 +255,44 @@ void run_context_tests(void) {
     secp256k1_context_destroy(NULL);
 }
 
+void run_scratch_tests(void) {
+    int32_t ecount = 0;
+    secp256k1_context *none = secp256k1_context_create(SECP256K1_CONTEXT_NONE);
+    secp256k1_scratch_space *scratch;
+
+    /* Test public API */
+    secp256k1_context_set_illegal_callback(none, counting_illegal_callback_fn, &ecount);
+
+    scratch = secp256k1_scratch_space_create(none, 1000);
+    CHECK(scratch != NULL);
+    CHECK(ecount == 0);
+
+    /* Test internal API */
+    CHECK(secp256k1_scratch_max_allocation(scratch, 0) == 1000);
+    CHECK(secp256k1_scratch_max_allocation(scratch, 1) < 1000);
+
+    /* Allocating 500 bytes with no frame fails */
+    CHECK(secp256k1_scratch_alloc(scratch, 500) == NULL);
+    CHECK(secp256k1_scratch_max_allocation(scratch, 0) == 1000);
+
+    /* ...but pushing a new stack frame does affect the max allocation */
+    CHECK(secp256k1_scratch_allocate_frame(scratch, 500, 1 == 1));
+    CHECK(secp256k1_scratch_max_allocation(scratch, 1) < 500); /* 500 - ALIGNMENT */
+    CHECK(secp256k1_scratch_alloc(scratch, 500) != NULL);
+    CHECK(secp256k1_scratch_alloc(scratch, 500) == NULL);
+
+    CHECK(secp256k1_scratch_allocate_frame(scratch, 500, 1) == 0);
+
+    /* ...and this effect is undone by popping the frame */
+    secp256k1_scratch_deallocate_frame(scratch);
+    CHECK(secp256k1_scratch_max_allocation(scratch, 0) == 1000);
+    CHECK(secp256k1_scratch_alloc(scratch, 500) == NULL);
+
+    /* cleanup */
+    secp256k1_scratch_space_destroy(scratch);
+    secp256k1_context_destroy(none);
+}
+
 /***** HASH TESTS *****/
 
 void run_sha256_tests(void) {
@@ -270,7 +315,7 @@ void run_sha256_tests(void) {
     int i;
     for (i = 0; i < 8; i++) {
         unsigned char out[32];
-        secp256k1_sha256_t hasher;
+        secp256k1_sha256 hasher;
         secp256k1_sha256_initialize(&hasher);
         secp256k1_sha256_write(&hasher, (const unsigned char*)(inputs[i]), strlen(inputs[i]));
         secp256k1_sha256_finalize(&hasher, out);
@@ -313,7 +358,7 @@ void run_hmac_sha256_tests(void) {
     };
     int i;
     for (i = 0; i < 6; i++) {
-        secp256k1_hmac_sha256_t hasher;
+        secp256k1_hmac_sha256 hasher;
         unsigned char out[32];
         secp256k1_hmac_sha256_initialize(&hasher, (const unsigned char*)(keys[i]), strlen(keys[i]));
         secp256k1_hmac_sha256_write(&hasher, (const unsigned char*)(inputs[i]), strlen(inputs[i]));
@@ -345,7 +390,7 @@ void run_rfc6979_hmac_sha256_tests(void) {
         {0x75, 0x97, 0x88, 0x7c, 0xbd, 0x76, 0x32, 0x1f, 0x32, 0xe3, 0x04, 0x40, 0x67, 0x9a, 0x22, 0xcf, 0x7f, 0x8d, 0x9d, 0x2e, 0xac, 0x39, 0x0e, 0x58, 0x1f, 0xea, 0x09, 0x1c, 0xe2, 0x02, 0xba, 0x94}
     };
 
-    secp256k1_rfc6979_hmac_sha256_t rng;
+    secp256k1_rfc6979_hmac_sha256 rng;
     unsigned char out[32];
     int i;
 
@@ -2054,7 +2099,6 @@ void test_ge(void) {
     /* Test batch gej -> ge conversion with and without known z ratios. */
     {
         secp256k1_fe *zr = (secp256k1_fe *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_fe));
-        secp256k1_ge *ge_set_table = (secp256k1_ge *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_ge));
         secp256k1_ge *ge_set_all = (secp256k1_ge *)checked_malloc(&ctx->error_callback, (4 * runs + 1) * sizeof(secp256k1_ge));
         for (i = 0; i < 4 * runs + 1; i++) {
             /* Compute gej[i + 1].z / gez[i].z (with gej[n].z taken to be 1). */
@@ -2062,20 +2106,33 @@ void test_ge(void) {
                 secp256k1_fe_mul(&zr[i + 1], &zinv[i], &gej[i + 1].z);
             }
         }
-        secp256k1_ge_set_table_gej_var(ge_set_table, gej, zr, 4 * runs + 1);
-        secp256k1_ge_set_all_gej_var(ge_set_all, gej, 4 * runs + 1, &ctx->error_callback);
+        secp256k1_ge_set_all_gej_var(ge_set_all, gej, 4 * runs + 1);
         for (i = 0; i < 4 * runs + 1; i++) {
             secp256k1_fe s;
             random_fe_non_zero(&s);
             secp256k1_gej_rescale(&gej[i], &s);
-            ge_equals_gej(&ge_set_table[i], &gej[i]);
             ge_equals_gej(&ge_set_all[i], &gej[i]);
         }
-        free(ge_set_table);
         free(ge_set_all);
         free(zr);
     }
 
+    /* Test batch gej -> ge conversion with many infinities. */
+    for (i = 0; i < 4 * runs + 1; i++) {
+        random_group_element_test(&ge[i]);
+        /* randomly set half the points to infinitiy */
+        if(secp256k1_fe_is_odd(&ge[i].x)) {
+            secp256k1_ge_set_infinity(&ge[i]);
+        }
+        secp256k1_gej_set_ge(&gej[i], &ge[i]);
+    }
+    /* batch invert */
+    secp256k1_ge_set_all_gej_var(ge, gej, 4 * runs + 1);
+    /* check result */
+    for (i = 0; i < 4 * runs + 1; i++) {
+        ge_equals_gej(&ge[i], &gej[i]);
+    }
+
     free(ge);
     free(gej);
     free(zinv);
@@ -2405,7 +2462,7 @@ void ecmult_const_random_mult(void) {
         0xb84e4e1b, 0xfb77e21f, 0x96baae2a, 0x63dec956
     );
     secp256k1_gej b;
-    secp256k1_ecmult_const(&b, &a, &xn);
+    secp256k1_ecmult_const(&b, &a, &xn, 256);
 
     CHECK(secp256k1_ge_is_valid_var(&a));
     ge_equals_gej(&expected_b, &b);
@@ -2421,12 +2478,12 @@ void ecmult_const_commutativity(void) {
     random_scalar_order_test(&a);
     random_scalar_order_test(&b);
 
-    secp256k1_ecmult_const(&res1, &secp256k1_ge_const_g, &a);
-    secp256k1_ecmult_const(&res2, &secp256k1_ge_const_g, &b);
+    secp256k1_ecmult_const(&res1, &secp256k1_ge_const_g, &a, 256);
+    secp256k1_ecmult_const(&res2, &secp256k1_ge_const_g, &b, 256);
     secp256k1_ge_set_gej(&mid1, &res1);
     secp256k1_ge_set_gej(&mid2, &res2);
-    secp256k1_ecmult_const(&res1, &mid1, &b);
-    secp256k1_ecmult_const(&res2, &mid2, &a);
+    secp256k1_ecmult_const(&res1, &mid1, &b, 256);
+    secp256k1_ecmult_const(&res2, &mid2, &a, 256);
     secp256k1_ge_set_gej(&mid1, &res1);
     secp256k1_ge_set_gej(&mid2, &res2);
     ge_equals_ge(&mid1, &mid2);
@@ -2442,13 +2499,13 @@ void ecmult_const_mult_zero_one(void) {
     secp256k1_scalar_negate(&negone, &one);
 
     random_group_element_test(&point);
-    secp256k1_ecmult_const(&res1, &point, &zero);
+    secp256k1_ecmult_const(&res1, &point, &zero, 3);
     secp256k1_ge_set_gej(&res2, &res1);
     CHECK(secp256k1_ge_is_infinity(&res2));
-    secp256k1_ecmult_const(&res1, &point, &one);
+    secp256k1_ecmult_const(&res1, &point, &one, 2);
     secp256k1_ge_set_gej(&res2, &res1);
     ge_equals_ge(&res2, &point);
-    secp256k1_ecmult_const(&res1, &point, &negone);
+    secp256k1_ecmult_const(&res1, &point, &negone, 256);
     secp256k1_gej_neg(&res1, &res1);
     secp256k1_ge_set_gej(&res2, &res1);
     ge_equals_ge(&res2, &point);
@@ -2474,7 +2531,7 @@ void ecmult_const_chain_multiply(void) {
     for (i = 0; i < 100; ++i) {
         secp256k1_ge tmp;
         secp256k1_ge_set_gej(&tmp, &point);
-        secp256k1_ecmult_const(&point, &tmp, &scalar);
+        secp256k1_ecmult_const(&point, &tmp, &scalar, 256);
     }
     secp256k1_ge_set_gej(&res, &point);
     ge_equals_gej(&res, &expected_point);
@@ -2487,6 +2544,446 @@ void run_ecmult_const_tests(void) {
     ecmult_const_chain_multiply();
 }
 
+typedef struct {
+    secp256k1_scalar *sc;
+    secp256k1_ge *pt;
+} ecmult_multi_data;
+
+static int ecmult_multi_callback(secp256k1_scalar *sc, secp256k1_ge *pt, size_t idx, void *cbdata) {
+    ecmult_multi_data *data = (ecmult_multi_data*) cbdata;
+    *sc = data->sc[idx];
+    *pt = data->pt[idx];
+    return 1;
+}
+
+static int ecmult_multi_false_callback(secp256k1_scalar *sc, secp256k1_ge *pt, size_t idx, void *cbdata) {
+    (void)sc;
+    (void)pt;
+    (void)idx;
+    (void)cbdata;
+    return 0;
+}
+
+void test_ecmult_multi(secp256k1_scratch *scratch, secp256k1_ecmult_multi_func ecmult_multi) {
+    int ncount;
+    secp256k1_scalar szero;
+    secp256k1_scalar sc[32];
+    secp256k1_ge pt[32];
+    secp256k1_gej r;
+    secp256k1_gej r2;
+    ecmult_multi_data data;
+    secp256k1_scratch *scratch_empty;
+
+    data.sc = sc;
+    data.pt = pt;
+    secp256k1_scalar_set_int(&szero, 0);
+
+    /* No points to multiply */
+    CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, NULL, ecmult_multi_callback, &data, 0));
+
+    /* Check 1- and 2-point multiplies against ecmult */
+    for (ncount = 0; ncount < count; ncount++) {
+        secp256k1_ge ptg;
+        secp256k1_gej ptgj;
+        random_scalar_order(&sc[0]);
+        random_scalar_order(&sc[1]);
+
+        random_group_element_test(&ptg);
+        secp256k1_gej_set_ge(&ptgj, &ptg);
+        pt[0] = ptg;
+        pt[1] = secp256k1_ge_const_g;
+
+        /* only G scalar */
+        secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &ptgj, &szero, &sc[0]);
+        CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &sc[0], ecmult_multi_callback, &data, 0));
+        secp256k1_gej_neg(&r2, &r2);
+        secp256k1_gej_add_var(&r, &r, &r2, NULL);
+        CHECK(secp256k1_gej_is_infinity(&r));
+
+        /* 1-point */
+        secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &ptgj, &sc[0], &szero);
+        CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 1));
+        secp256k1_gej_neg(&r2, &r2);
+        secp256k1_gej_add_var(&r, &r, &r2, NULL);
+        CHECK(secp256k1_gej_is_infinity(&r));
+
+        /* Try to multiply 1 point, but scratch space is empty */
+        scratch_empty = secp256k1_scratch_create(&ctx->error_callback, 0);
+        CHECK(!ecmult_multi(&ctx->ecmult_ctx, scratch_empty, &r, &szero, ecmult_multi_callback, &data, 1));
+        secp256k1_scratch_destroy(scratch_empty);
+
+        /* Try to multiply 1 point, but callback returns false */
+        CHECK(!ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_false_callback, &data, 1));
+
+        /* 2-point */
+        secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &ptgj, &sc[0], &sc[1]);
+        CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 2));
+        secp256k1_gej_neg(&r2, &r2);
+        secp256k1_gej_add_var(&r, &r, &r2, NULL);
+        CHECK(secp256k1_gej_is_infinity(&r));
+
+        /* 2-point with G scalar */
+        secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &ptgj, &sc[0], &sc[1]);
+        CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &sc[1], ecmult_multi_callback, &data, 1));
+        secp256k1_gej_neg(&r2, &r2);
+        secp256k1_gej_add_var(&r, &r, &r2, NULL);
+        CHECK(secp256k1_gej_is_infinity(&r));
+    }
+
+    /* Check infinite outputs of various forms */
+    for (ncount = 0; ncount < count; ncount++) {
+        secp256k1_ge ptg;
+        size_t i, j;
+        size_t sizes[] = { 2, 10, 32 };
+
+        for (j = 0; j < 3; j++) {
+            for (i = 0; i < 32; i++) {
+                random_scalar_order(&sc[i]);
+                secp256k1_ge_set_infinity(&pt[i]);
+            }
+            CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, sizes[j]));
+            CHECK(secp256k1_gej_is_infinity(&r));
+        }
+
+        for (j = 0; j < 3; j++) {
+            for (i = 0; i < 32; i++) {
+                random_group_element_test(&ptg);
+                pt[i] = ptg;
+                secp256k1_scalar_set_int(&sc[i], 0);
+            }
+            CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, sizes[j]));
+            CHECK(secp256k1_gej_is_infinity(&r));
+        }
+
+        for (j = 0; j < 3; j++) {
+            random_group_element_test(&ptg);
+            for (i = 0; i < 16; i++) {
+                random_scalar_order(&sc[2*i]);
+                secp256k1_scalar_negate(&sc[2*i + 1], &sc[2*i]);
+                pt[2 * i] = ptg;
+                pt[2 * i + 1] = ptg;
+            }
+
+            CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, sizes[j]));
+            CHECK(secp256k1_gej_is_infinity(&r));
+
+            random_scalar_order(&sc[0]);
+            for (i = 0; i < 16; i++) {
+                random_group_element_test(&ptg);
+
+                sc[2*i] = sc[0];
+                sc[2*i+1] = sc[0];
+                pt[2 * i] = ptg;
+                secp256k1_ge_neg(&pt[2*i+1], &pt[2*i]);
+            }
+
+            CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, sizes[j]));
+            CHECK(secp256k1_gej_is_infinity(&r));
+        }
+
+        random_group_element_test(&ptg);
+        secp256k1_scalar_set_int(&sc[0], 0);
+        pt[0] = ptg;
+        for (i = 1; i < 32; i++) {
+            pt[i] = ptg;
+
+            random_scalar_order(&sc[i]);
+            secp256k1_scalar_add(&sc[0], &sc[0], &sc[i]);
+            secp256k1_scalar_negate(&sc[i], &sc[i]);
+        }
+
+        CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 32));
+        CHECK(secp256k1_gej_is_infinity(&r));
+    }
+
+    /* Check random points, constant scalar */
+    for (ncount = 0; ncount < count; ncount++) {
+        size_t i;
+        secp256k1_gej_set_infinity(&r);
+
+        random_scalar_order(&sc[0]);
+        for (i = 0; i < 20; i++) {
+            secp256k1_ge ptg;
+            sc[i] = sc[0];
+            random_group_element_test(&ptg);
+            pt[i] = ptg;
+            secp256k1_gej_add_ge_var(&r, &r, &pt[i], NULL);
+        }
+
+        secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &r, &sc[0], &szero);
+        CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 20));
+        secp256k1_gej_neg(&r2, &r2);
+        secp256k1_gej_add_var(&r, &r, &r2, NULL);
+        CHECK(secp256k1_gej_is_infinity(&r));
+    }
+
+    /* Check random scalars, constant point */
+    for (ncount = 0; ncount < count; ncount++) {
+        size_t i;
+        secp256k1_ge ptg;
+        secp256k1_gej p0j;
+        secp256k1_scalar rs;
+        secp256k1_scalar_set_int(&rs, 0);
+
+        random_group_element_test(&ptg);
+        for (i = 0; i < 20; i++) {
+            random_scalar_order(&sc[i]);
+            pt[i] = ptg;
+            secp256k1_scalar_add(&rs, &rs, &sc[i]);
+        }
+
+        secp256k1_gej_set_ge(&p0j, &pt[0]);
+        secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &p0j, &rs, &szero);
+        CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 20));
+        secp256k1_gej_neg(&r2, &r2);
+        secp256k1_gej_add_var(&r, &r, &r2, NULL);
+        CHECK(secp256k1_gej_is_infinity(&r));
+    }
+
+    /* Sanity check that zero scalars don't cause problems */
+    for (ncount = 0; ncount < 20; ncount++) {
+        random_scalar_order(&sc[ncount]);
+        random_group_element_test(&pt[ncount]);
+    }
+
+    secp256k1_scalar_clear(&sc[0]);
+    CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 20));
+    secp256k1_scalar_clear(&sc[1]);
+    secp256k1_scalar_clear(&sc[2]);
+    secp256k1_scalar_clear(&sc[3]);
+    secp256k1_scalar_clear(&sc[4]);
+    CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 6));
+    CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &r, &szero, ecmult_multi_callback, &data, 5));
+    CHECK(secp256k1_gej_is_infinity(&r));
+
+    /* Run through s0*(t0*P) + s1*(t1*P) exhaustively for many small values of s0, s1, t0, t1 */
+    {
+        const size_t TOP = 8;
+        size_t s0i, s1i;
+        size_t t0i, t1i;
+        secp256k1_ge ptg;
+        secp256k1_gej ptgj;
+
+        random_group_element_test(&ptg);
+        secp256k1_gej_set_ge(&ptgj, &ptg);
+
+        for(t0i = 0; t0i < TOP; t0i++) {
+            for(t1i = 0; t1i < TOP; t1i++) {
+                secp256k1_gej t0p, t1p;
+                secp256k1_scalar t0, t1;
+
+                secp256k1_scalar_set_int(&t0, (t0i + 1) / 2);
+                secp256k1_scalar_cond_negate(&t0, t0i & 1);
+                secp256k1_scalar_set_int(&t1, (t1i + 1) / 2);
+                secp256k1_scalar_cond_negate(&t1, t1i & 1);
+
+                secp256k1_ecmult(&ctx->ecmult_ctx, &t0p, &ptgj, &t0, &szero);
+                secp256k1_ecmult(&ctx->ecmult_ctx, &t1p, &ptgj, &t1, &szero);
+
+                for(s0i = 0; s0i < TOP; s0i++) {
+                    for(s1i = 0; s1i < TOP; s1i++) {
+                        secp256k1_scalar tmp1, tmp2;
+                        secp256k1_gej expected, actual;
+
+                        secp256k1_ge_set_gej(&pt[0], &t0p);
+                        secp256k1_ge_set_gej(&pt[1], &t1p);
+
+                        secp256k1_scalar_set_int(&sc[0], (s0i + 1) / 2);
+                        secp256k1_scalar_cond_negate(&sc[0], s0i & 1);
+                        secp256k1_scalar_set_int(&sc[1], (s1i + 1) / 2);
+                        secp256k1_scalar_cond_negate(&sc[1], s1i & 1);
+
+                        secp256k1_scalar_mul(&tmp1, &t0, &sc[0]);
+                        secp256k1_scalar_mul(&tmp2, &t1, &sc[1]);
+                        secp256k1_scalar_add(&tmp1, &tmp1, &tmp2);
+
+                        secp256k1_ecmult(&ctx->ecmult_ctx, &expected, &ptgj, &tmp1, &szero);
+                        CHECK(ecmult_multi(&ctx->ecmult_ctx, scratch, &actual, &szero, ecmult_multi_callback, &data, 2));
+                        secp256k1_gej_neg(&expected, &expected);
+                        secp256k1_gej_add_var(&actual, &actual, &expected, NULL);
+                        CHECK(secp256k1_gej_is_infinity(&actual));
+                    }
+                }
+            }
+        }
+    }
+}
+
+void test_secp256k1_pippenger_bucket_window_inv(void) {
+    int i;
+
+    CHECK(secp256k1_pippenger_bucket_window_inv(0) == 0);
+    for(i = 1; i <= PIPPENGER_MAX_BUCKET_WINDOW; i++) {
+#ifdef USE_ENDOMORPHISM
+        /* Bucket_window of 8 is not used with endo */
+        if (i == 8) {
+            continue;
+        }
+#endif
+        CHECK(secp256k1_pippenger_bucket_window(secp256k1_pippenger_bucket_window_inv(i)) == i);
+        if (i != PIPPENGER_MAX_BUCKET_WINDOW) {
+            CHECK(secp256k1_pippenger_bucket_window(secp256k1_pippenger_bucket_window_inv(i)+1) > i);
+        }
+    }
+}
+
+/**
+ * Probabilistically test the function returning the maximum number of possible points
+ * for a given scratch space.
+ */
+void test_ecmult_multi_pippenger_max_points(void) {
+    size_t scratch_size = secp256k1_rand_int(256);
+    size_t max_size = secp256k1_pippenger_scratch_size(secp256k1_pippenger_bucket_window_inv(PIPPENGER_MAX_BUCKET_WINDOW-1)+512, 12);
+    secp256k1_scratch *scratch;
+    size_t n_points_supported;
+    int bucket_window = 0;
+
+    for(; scratch_size < max_size; scratch_size+=256) {
+        scratch = secp256k1_scratch_create(&ctx->error_callback, scratch_size);
+        CHECK(scratch != NULL);
+        n_points_supported = secp256k1_pippenger_max_points(scratch);
+        if (n_points_supported == 0) {
+            secp256k1_scratch_destroy(scratch);
+            continue;
+        }
+        bucket_window = secp256k1_pippenger_bucket_window(n_points_supported);
+        CHECK(secp256k1_scratch_allocate_frame(scratch, secp256k1_pippenger_scratch_size(n_points_supported, bucket_window), PIPPENGER_SCRATCH_OBJECTS));
+        secp256k1_scratch_deallocate_frame(scratch);
+        secp256k1_scratch_destroy(scratch);
+    }
+    CHECK(bucket_window == PIPPENGER_MAX_BUCKET_WINDOW);
+}
+
+void test_ecmult_multi_batch_size_helper(void) {
+    size_t n_batches, n_batch_points, max_n_batch_points, n;
+
+    max_n_batch_points = 0;
+    n = 1;
+    CHECK(secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, max_n_batch_points, n) == 0);
+
+    max_n_batch_points = 1;
+    n = 0;
+    CHECK(secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, max_n_batch_points, n) == 1);
+    CHECK(n_batches == 0);
+    CHECK(n_batch_points == 0);
+
+    max_n_batch_points = 2;
+    n = 5;
+    CHECK(secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, max_n_batch_points, n) == 1);
+    CHECK(n_batches == 3);
+    CHECK(n_batch_points == 2);
+
+    max_n_batch_points = ECMULT_MAX_POINTS_PER_BATCH;
+    n = ECMULT_MAX_POINTS_PER_BATCH;
+    CHECK(secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, max_n_batch_points, n) == 1);
+    CHECK(n_batches == 1);
+    CHECK(n_batch_points == ECMULT_MAX_POINTS_PER_BATCH);
+
+    max_n_batch_points = ECMULT_MAX_POINTS_PER_BATCH + 1;
+    n = ECMULT_MAX_POINTS_PER_BATCH + 1;
+    CHECK(secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, max_n_batch_points, n) == 1);
+    CHECK(n_batches == 2);
+    CHECK(n_batch_points == ECMULT_MAX_POINTS_PER_BATCH/2 + 1);
+
+    max_n_batch_points = 1;
+    n = SIZE_MAX;
+    CHECK(secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, max_n_batch_points, n) == 1);
+    CHECK(n_batches == SIZE_MAX);
+    CHECK(n_batch_points == 1);
+
+    max_n_batch_points = 2;
+    n = SIZE_MAX;
+    CHECK(secp256k1_ecmult_multi_batch_size_helper(&n_batches, &n_batch_points, max_n_batch_points, n) == 1);
+    CHECK(n_batches == SIZE_MAX/2 + 1);
+    CHECK(n_batch_points == 2);
+}
+
+/**
+ * Run secp256k1_ecmult_multi_var with num points and a scratch space restricted to
+ * 1 <= i <= num points.
+ */
+void test_ecmult_multi_batching(void) {
+    static const int n_points = 2*ECMULT_PIPPENGER_THRESHOLD;
+    secp256k1_scalar scG;
+    secp256k1_scalar szero;
+    secp256k1_scalar *sc = (secp256k1_scalar *)checked_malloc(&ctx->error_callback, sizeof(secp256k1_scalar) * n_points);
+    secp256k1_ge *pt = (secp256k1_ge *)checked_malloc(&ctx->error_callback, sizeof(secp256k1_ge) * n_points);
+    secp256k1_gej r;
+    secp256k1_gej r2;
+    ecmult_multi_data data;
+    int i;
+    secp256k1_scratch *scratch;
+
+    secp256k1_gej_set_infinity(&r2);
+    secp256k1_scalar_set_int(&szero, 0);
+
+    /* Get random scalars and group elements and compute result */
+    random_scalar_order(&scG);
+    secp256k1_ecmult(&ctx->ecmult_ctx, &r2, &r2, &szero, &scG);
+    for(i = 0; i < n_points; i++) {
+        secp256k1_ge ptg;
+        secp256k1_gej ptgj;
+        random_group_element_test(&ptg);
+        secp256k1_gej_set_ge(&ptgj, &ptg);
+        pt[i] = ptg;
+        random_scalar_order(&sc[i]);
+        secp256k1_ecmult(&ctx->ecmult_ctx, &ptgj, &ptgj, &sc[i], NULL);
+        secp256k1_gej_add_var(&r2, &r2, &ptgj, NULL);
+    }
+    data.sc = sc;
+    data.pt = pt;
+
+    /* Test with empty scratch space */
+    scratch = secp256k1_scratch_create(&ctx->error_callback, 0);
+    CHECK(!secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, 1));
+    secp256k1_scratch_destroy(scratch);
+
+    /* Test with space for 1 point in pippenger. That's not enough because
+     * ecmult_multi selects strauss which requires more memory. */
+    scratch = secp256k1_scratch_create(&ctx->error_callback, secp256k1_pippenger_scratch_size(1, 1) + PIPPENGER_SCRATCH_OBJECTS*ALIGNMENT);
+    CHECK(!secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, 1));
+    secp256k1_scratch_destroy(scratch);
+
+    secp256k1_gej_neg(&r2, &r2);
+    for(i = 1; i <= n_points; i++) {
+        if (i > ECMULT_PIPPENGER_THRESHOLD) {
+            int bucket_window = secp256k1_pippenger_bucket_window(i);
+            size_t scratch_size = secp256k1_pippenger_scratch_size(i, bucket_window);
+            scratch = secp256k1_scratch_create(&ctx->error_callback, scratch_size + PIPPENGER_SCRATCH_OBJECTS*ALIGNMENT);
+        } else {
+            size_t scratch_size = secp256k1_strauss_scratch_size(i);
+            scratch = secp256k1_scratch_create(&ctx->error_callback, scratch_size + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT);
+        }
+        CHECK(secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &r, &scG, ecmult_multi_callback, &data, n_points));
+        secp256k1_gej_add_var(&r, &r, &r2, NULL);
+        CHECK(secp256k1_gej_is_infinity(&r));
+        secp256k1_scratch_destroy(scratch);
+    }
+    free(sc);
+    free(pt);
+}
+
+void run_ecmult_multi_tests(void) {
+    secp256k1_scratch *scratch;
+
+    test_secp256k1_pippenger_bucket_window_inv();
+    test_ecmult_multi_pippenger_max_points();
+    scratch = secp256k1_scratch_create(&ctx->error_callback, 819200);
+    test_ecmult_multi(scratch, secp256k1_ecmult_multi_var);
+    test_ecmult_multi(NULL, secp256k1_ecmult_multi_var);
+    test_ecmult_multi(scratch, secp256k1_ecmult_pippenger_batch_single);
+    test_ecmult_multi(scratch, secp256k1_ecmult_strauss_batch_single);
+    secp256k1_scratch_destroy(scratch);
+
+    /* Run test_ecmult_multi with space for exactly one point */
+    scratch = secp256k1_scratch_create(&ctx->error_callback, secp256k1_strauss_scratch_size(1) + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT);
+    test_ecmult_multi(scratch, secp256k1_ecmult_multi_var);
+    secp256k1_scratch_destroy(scratch);
+
+    test_ecmult_multi_batch_size_helper();
+    test_ecmult_multi_batching();
+}
+
 void test_wnaf(const secp256k1_scalar *number, int w) {
     secp256k1_scalar x, two, t;
     int wnaf[256];
@@ -2541,6 +3038,7 @@ void test_constant_wnaf(const secp256k1_scalar *number, int w) {
     int wnaf[256] = {0};
     int i;
     int skew;
+    int bits = 256;
     secp256k1_scalar num = *number;
 
     secp256k1_scalar_set_int(&x, 0);
@@ -2550,10 +3048,11 @@ void test_constant_wnaf(const secp256k1_scalar *number, int w) {
     for (i = 0; i < 16; ++i) {
         secp256k1_scalar_shr_int(&num, 8);
     }
+    bits = 128;
 #endif
-    skew = secp256k1_wnaf_const(wnaf, num, w);
+    skew = secp256k1_wnaf_const(wnaf, num, w, bits);
 
-    for (i = WNAF_SIZE(w); i >= 0; --i) {
+    for (i = WNAF_SIZE_BITS(bits, w); i >= 0; --i) {
         secp256k1_scalar t;
         int v = wnaf[i];
         CHECK(v != 0); /* check nonzero */
@@ -2575,6 +3074,110 @@ void test_constant_wnaf(const secp256k1_scalar *number, int w) {
     CHECK(secp256k1_scalar_eq(&x, &num));
 }
 
+void test_fixed_wnaf(const secp256k1_scalar *number, int w) {
+    secp256k1_scalar x, shift;
+    int wnaf[256] = {0};
+    int i;
+    int skew;
+    secp256k1_scalar num = *number;
+
+    secp256k1_scalar_set_int(&x, 0);
+    secp256k1_scalar_set_int(&shift, 1 << w);
+    /* With USE_ENDOMORPHISM on we only consider 128-bit numbers */
+#ifdef USE_ENDOMORPHISM
+    for (i = 0; i < 16; ++i) {
+        secp256k1_scalar_shr_int(&num, 8);
+    }
+#endif
+    skew = secp256k1_wnaf_fixed(wnaf, &num, w);
+
+    for (i = WNAF_SIZE(w)-1; i >= 0; --i) {
+        secp256k1_scalar t;
+        int v = wnaf[i];
+        CHECK(v == 0 || v & 1);  /* check parity */
+        CHECK(v > -(1 << w)); /* check range above */
+        CHECK(v < (1 << w));  /* check range below */
+
+        secp256k1_scalar_mul(&x, &x, &shift);
+        if (v >= 0) {
+            secp256k1_scalar_set_int(&t, v);
+        } else {
+            secp256k1_scalar_set_int(&t, -v);
+            secp256k1_scalar_negate(&t, &t);
+        }
+        secp256k1_scalar_add(&x, &x, &t);
+    }
+    /* If skew is 1 then add 1 to num */
+    secp256k1_scalar_cadd_bit(&num, 0, skew == 1);
+    CHECK(secp256k1_scalar_eq(&x, &num));
+}
+
+/* Checks that the first 8 elements of wnaf are equal to wnaf_expected and the
+ * rest is 0.*/
+void test_fixed_wnaf_small_helper(int *wnaf, int *wnaf_expected, int w) {
+    int i;
+    for (i = WNAF_SIZE(w)-1; i >= 8; --i) {
+        CHECK(wnaf[i] == 0);
+    }
+    for (i = 7; i >= 0; --i) {
+        CHECK(wnaf[i] == wnaf_expected[i]);
+    }
+}
+
+void test_fixed_wnaf_small(void) {
+    int w = 4;
+    int wnaf[256] = {0};
+    int i;
+    int skew;
+    secp256k1_scalar num;
+
+    secp256k1_scalar_set_int(&num, 0);
+    skew = secp256k1_wnaf_fixed(wnaf, &num, w);
+    for (i = WNAF_SIZE(w)-1; i >= 0; --i) {
+        int v = wnaf[i];
+        CHECK(v == 0);
+    }
+    CHECK(skew == 0);
+
+    secp256k1_scalar_set_int(&num, 1);
+    skew = secp256k1_wnaf_fixed(wnaf, &num, w);
+    for (i = WNAF_SIZE(w)-1; i >= 1; --i) {
+        int v = wnaf[i];
+        CHECK(v == 0);
+    }
+    CHECK(wnaf[0] == 1);
+    CHECK(skew == 0);
+
+    {
+        int wnaf_expected[8] = { 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf };
+        secp256k1_scalar_set_int(&num, 0xffffffff);
+        skew = secp256k1_wnaf_fixed(wnaf, &num, w);
+        test_fixed_wnaf_small_helper(wnaf, wnaf_expected, w);
+        CHECK(skew == 0);
+    }
+    {
+        int wnaf_expected[8] = { -1, -1, -1, -1, -1, -1, -1, 0xf };
+        secp256k1_scalar_set_int(&num, 0xeeeeeeee);
+        skew = secp256k1_wnaf_fixed(wnaf, &num, w);
+        test_fixed_wnaf_small_helper(wnaf, wnaf_expected, w);
+        CHECK(skew == 1);
+    }
+    {
+        int wnaf_expected[8] = { 1, 0, 1, 0, 1, 0, 1, 0 };
+        secp256k1_scalar_set_int(&num, 0x01010101);
+        skew = secp256k1_wnaf_fixed(wnaf, &num, w);
+        test_fixed_wnaf_small_helper(wnaf, wnaf_expected, w);
+        CHECK(skew == 0);
+    }
+    {
+        int wnaf_expected[8] = { -0xf, 0, 0xf, -0xf, 0, 0xf, 1, 0 };
+        secp256k1_scalar_set_int(&num, 0x01ef1ef1);
+        skew = secp256k1_wnaf_fixed(wnaf, &num, w);
+        test_fixed_wnaf_small_helper(wnaf, wnaf_expected, w);
+        CHECK(skew == 0);
+    }
+}
+
 void run_wnaf(void) {
     int i;
     secp256k1_scalar n = {{0}};
@@ -2585,12 +3188,15 @@ void run_wnaf(void) {
     test_constant_wnaf(&n, 4);
     n.d[0] = 2;
     test_constant_wnaf(&n, 4);
+    /* Test 0 */
+    test_fixed_wnaf_small();
     /* Random tests */
     for (i = 0; i < count; i++) {
         random_scalar_order(&n);
         test_wnaf(&n, 4+(i%10));
         test_constant_wnaf_negate(&n);
         test_constant_wnaf(&n, 4 + (i % 10));
+        test_fixed_wnaf(&n, 4 + (i % 10));
     }
     secp256k1_scalar_set_int(&n, 0);
     CHECK(secp256k1_scalar_cond_negate(&n, 1) == -1);
@@ -3055,6 +3661,7 @@ void run_ec_pubkey_parse_test(void) {
     ecount = 0;
     VG_UNDEF(&pubkey, sizeof(pubkey));
     CHECK(secp256k1_ec_pubkey_parse(ctx, &pubkey, pubkeyc, 65) == 1);
+    CHECK(secp256k1_ec_pubkey_parse(secp256k1_context_no_precomp, &pubkey, pubkeyc, 65) == 1);
     VG_CHECK(&pubkey, sizeof(pubkey));
     CHECK(ecount == 0);
     VG_UNDEF(&ge, sizeof(ge));
@@ -3177,7 +3784,7 @@ void run_eckey_edge_case_test(void) {
     VG_CHECK(&pubkey, sizeof(pubkey));
     CHECK(memcmp(&pubkey, zeros, sizeof(secp256k1_pubkey)) > 0);
     pubkey_negone = pubkey;
-    /* Tweak of zero leaves the value changed. */
+    /* Tweak of zero leaves the value unchanged. */
     memset(ctmp2, 0, 32);
     CHECK(secp256k1_ec_privkey_tweak_add(ctx, ctmp, ctmp2) == 1);
     CHECK(memcmp(orderc, ctmp, 31) == 0 && ctmp[31] == 0x40);
@@ -3668,6 +4275,7 @@ int test_ecdsa_der_parse(const unsigned char *sig, size_t siglen, int certainly_
 
 #ifdef ENABLE_OPENSSL_TESTS
     ECDSA_SIG *sig_openssl;
+    const BIGNUM *r = NULL, *s = NULL;
     const unsigned char *sigptr;
     unsigned char roundtrip_openssl[2048];
     int len_openssl = 2048;
@@ -3719,15 +4327,16 @@ int test_ecdsa_der_parse(const unsigned char *sig, size_t siglen, int certainly_
     sigptr = sig;
     parsed_openssl = (d2i_ECDSA_SIG(&sig_openssl, &sigptr, siglen) != NULL);
     if (parsed_openssl) {
-        valid_openssl = !BN_is_negative(sig_openssl->r) && !BN_is_negative(sig_openssl->s) && BN_num_bits(sig_openssl->r) > 0 && BN_num_bits(sig_openssl->r) <= 256 && BN_num_bits(sig_openssl->s) > 0 && BN_num_bits(sig_openssl->s) <= 256;
+        ECDSA_SIG_get0(sig_openssl, &r, &s);
+        valid_openssl = !BN_is_negative(r) && !BN_is_negative(s) && BN_num_bits(r) > 0 && BN_num_bits(r) <= 256 && BN_num_bits(s) > 0 && BN_num_bits(s) <= 256;
         if (valid_openssl) {
             unsigned char tmp[32] = {0};
-            BN_bn2bin(sig_openssl->r, tmp + 32 - BN_num_bytes(sig_openssl->r));
+            BN_bn2bin(r, tmp + 32 - BN_num_bytes(r));
             valid_openssl = memcmp(tmp, max_scalar, 32) < 0;
         }
         if (valid_openssl) {
             unsigned char tmp[32] = {0};
-            BN_bn2bin(sig_openssl->s, tmp + 32 - BN_num_bytes(sig_openssl->s));
+            BN_bn2bin(s, tmp + 32 - BN_num_bytes(s));
             valid_openssl = memcmp(tmp, max_scalar, 32) < 0;
         }
     }
@@ -4431,8 +5040,9 @@ int main(int argc, char **argv) {
         }
     } else {
         FILE *frand = fopen("/dev/urandom", "r");
-        if ((frand == NULL) || !fread(&seed16, sizeof(seed16), 1, frand)) {
+        if ((frand == NULL) || fread(&seed16, 1, sizeof(seed16), frand) != sizeof(seed16)) {
             uint64_t t = time(NULL) * (uint64_t)1337;
+            fprintf(stderr, "WARNING: could not read 16 bytes from /dev/urandom; falling back to insecure PRNG\n");
             seed16[0] ^= t;
             seed16[1] ^= t >> 8;
             seed16[2] ^= t >> 16;
@@ -4442,7 +5052,9 @@ int main(int argc, char **argv) {
             seed16[6] ^= t >> 48;
             seed16[7] ^= t >> 56;
         }
-        fclose(frand);
+        if (frand) {
+            fclose(frand);
+        }
     }
     secp256k1_rand_seed(seed16);
 
@@ -4451,6 +5063,7 @@ int main(int argc, char **argv) {
 
     /* initialize */
     run_context_tests();
+    run_scratch_tests();
     ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY);
     if (secp256k1_rand_bits(1)) {
         secp256k1_rand256(run32);
@@ -4492,6 +5105,7 @@ int main(int argc, char **argv) {
     run_ecmult_constants();
     run_ecmult_gen_blind();
     run_ecmult_const_tests();
+    run_ecmult_multi_tests();
     run_ec_combine();
 
     /* endomorphism tests */
diff --git a/src/tests_exhaustive.c b/src/tests_exhaustive.c
index b040bb0733..ab9779b02f 100644
--- a/src/tests_exhaustive.c
+++ b/src/tests_exhaustive.c
@@ -174,7 +174,7 @@ void test_exhaustive_ecmult(const secp256k1_context *ctx, const secp256k1_ge *gr
                 ge_equals_gej(&group[(i * r_log + j) % order], &tmp);
 
                 if (i > 0) {
-                    secp256k1_ecmult_const(&tmp, &group[i], &ng);
+                    secp256k1_ecmult_const(&tmp, &group[i], &ng, 256);
                     ge_equals_gej(&group[(i * j) % order], &tmp);
                 }
             }
@@ -182,6 +182,46 @@ void test_exhaustive_ecmult(const secp256k1_context *ctx, const secp256k1_ge *gr
     }
 }
 
+typedef struct {
+    secp256k1_scalar sc[2];
+    secp256k1_ge pt[2];
+} ecmult_multi_data;
+
+static int ecmult_multi_callback(secp256k1_scalar *sc, secp256k1_ge *pt, size_t idx, void *cbdata) {
+    ecmult_multi_data *data = (ecmult_multi_data*) cbdata;
+    *sc = data->sc[idx];
+    *pt = data->pt[idx];
+    return 1;
+}
+
+void test_exhaustive_ecmult_multi(const secp256k1_context *ctx, const secp256k1_ge *group, int order) {
+    int i, j, k, x, y;
+    secp256k1_scratch *scratch = secp256k1_scratch_create(&ctx->error_callback, 4096);
+    for (i = 0; i < order; i++) {
+        for (j = 0; j < order; j++) {
+            for (k = 0; k < order; k++) {
+                for (x = 0; x < order; x++) {
+                    for (y = 0; y < order; y++) {
+                        secp256k1_gej tmp;
+                        secp256k1_scalar g_sc;
+                        ecmult_multi_data data;
+
+                        secp256k1_scalar_set_int(&data.sc[0], i);
+                        secp256k1_scalar_set_int(&data.sc[1], j);
+                        secp256k1_scalar_set_int(&g_sc, k);
+                        data.pt[0] = group[x];
+                        data.pt[1] = group[y];
+
+                        secp256k1_ecmult_multi_var(&ctx->ecmult_ctx, scratch, &tmp, &g_sc, ecmult_multi_callback, &data, 2);
+                        ge_equals_gej(&group[(i * x + j * y + k) % order], &tmp);
+                    }
+                }
+            }
+        }
+    }
+    secp256k1_scratch_destroy(scratch);
+}
+
 void r_from_k(secp256k1_scalar *r, const secp256k1_ge *group, int k) {
     secp256k1_fe x;
     unsigned char x_bin[32];
@@ -456,6 +496,7 @@ int main(void) {
 #endif
     test_exhaustive_addition(group, groupj, EXHAUSTIVE_TEST_ORDER);
     test_exhaustive_ecmult(ctx, group, groupj, EXHAUSTIVE_TEST_ORDER);
+    test_exhaustive_ecmult_multi(ctx, group, EXHAUSTIVE_TEST_ORDER);
     test_exhaustive_sign(ctx, group, EXHAUSTIVE_TEST_ORDER);
     test_exhaustive_verify(ctx, group, EXHAUSTIVE_TEST_ORDER);
 
diff --git a/src/util.h b/src/util.h
index b0441d8e30..e1f5b76452 100644
--- a/src/util.h
+++ b/src/util.h
@@ -36,7 +36,7 @@ static SECP256K1_INLINE void secp256k1_callback_call(const secp256k1_callback *
 } while(0)
 #endif
 
-#ifdef HAVE_BUILTIN_EXPECT
+#if SECP256K1_GNUC_PREREQ(3, 0)
 #define EXPECT(x,c) __builtin_expect((x),(c))
 #else
 #define EXPECT(x,c) (x)
@@ -76,6 +76,14 @@ static SECP256K1_INLINE void *checked_malloc(const secp256k1_callback* cb, size_
     return ret;
 }
 
+static SECP256K1_INLINE void *checked_realloc(const secp256k1_callback* cb, void *ptr, size_t size) {
+    void *ret = realloc(ptr, size);
+    if (ret == NULL) {
+        secp256k1_callback_call(cb, "Out of memory");
+    }
+    return ret;
+}
+
 /* Macro for restrict, when available and not in a VERIFY build. */
 #if defined(SECP256K1_BUILD) && defined(VERIFY)
 # define SECP256K1_RESTRICT
-- 
cgit v1.2.3